From 8e7e2d470079a64b76a4660e69d20e11938ebc6f Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 22 Sep 2023 16:25:40 -0700 Subject: [PATCH] Use find_orf for ExpectedORF initialization --- tests/expected-results-large-csv/errors.csv | 272 +- tests/expected-results-large-csv/holistic.csv | 82 +- tests/expected-results-large-csv/orfs.csv | 902 +++--- tests/expected-results-large-hxb2/errors.json | 130 +- .../expected-results-large-hxb2/holistic.json | 96 +- tests/expected-results-large-hxb2/orfs.json | 1566 +++++----- tests/expected-results-large/errors.json | 402 +-- tests/expected-results-large/holistic.json | 96 +- tests/expected-results-large/orfs.json | 2530 ++++++++--------- tests/expected-results-single-csv/errors.csv | 5 +- .../expected-results-single-csv/holistic.csv | 2 +- tests/expected-results-single-csv/orfs.csv | 22 +- .../holistic.json | 2 +- tests/expected-results-single-hxb2/orfs.json | 46 +- tests/expected-results-single/errors.json | 9 +- tests/expected-results-single/holistic.json | 2 +- tests/expected-results-single/orfs.json | 70 +- tests/expected-results-small-csv/errors.csv | 54 +- tests/expected-results-small-csv/holistic.csv | 10 +- tests/expected-results-small-csv/orfs.csv | 110 +- tests/expected-results-small/errors.json | 74 +- tests/expected-results-small/holistic.json | 16 +- tests/expected-results-small/orfs.json | 320 +-- util/expected_orf.py | 33 +- 24 files changed, 3320 insertions(+), 3531 deletions(-) diff --git a/tests/expected-results-large-csv/errors.csv b/tests/expected-results-large-csv/errors.csv index 3920c31..9c9b3a6 100644 --- a/tests/expected-results-large-csv/errors.csv +++ b/tests/expected-results-large-csv/errors.csv @@ -1,209 +1,179 @@ sequence_name,error,message -KX505501.1,DeletionInOrf,"ORF pol at 2084-5095 can have maximum deletions 30, got 2721" -KX505501.1,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6323 -KX505501.1,InsertionInOrf,"Smaller ORF vif at 5040-5618 can have maximum insertions 90, got 909" -KX505501.1,DeletionInOrf,"Smaller ORF vpr at 5558-5849 can have maximum deletions 30, got 84" -KX505501.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5893 -KX505501.1,InternalStopInOrf,Smaller ORF rev_exon1 at 5969-6044 contains an internal stop codon at 6005 -KX505501.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 129" -KX505501.1,FrameshiftInOrf,Smaller ORF tat_exon2 at 8376-8468 contains out of frame indels that impact 71 positions. -KX505501.1,DeletionInOrf,"Smaller ORF rev_exon2 at 8377-8652 can have maximum deletions 30, got 96" -KX505501.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 1116" +KX505501.1,DeletionInOrf,"ORF pol at 2084-5096 can have maximum deletions 30, got 2721" +KX505501.1,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6323 +KX505501.1,InsertionInOrf,"Smaller ORF vif at 5040-5619 can have maximum insertions 90, got 909" +KX505501.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 195 positions. +KX505501.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5893 +KX505501.1,InternalStopInOrf,Smaller ORF rev_exon1 at 5969-6047 contains an internal stop codon at 6005 +KX505501.1,FrameshiftInOrf,Smaller ORF tat_exon2 at 8376-8469 contains out of frame indels that impact 40 positions. +KX505501.1,DeletionInOrf,"Smaller ORF rev_exon2 at 8377-8653 can have maximum deletions 30, got 96" +KX505501.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 1116" KX505501.1,RevResponseElementDeletion,Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions. KX505501.1,LongDeletion,Query sequence contains a long deletion. KX505501.1,Scramble,Sequence is plus-scrambled. -MN691959,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MN691959,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 129" -MN691959,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 249" -MN692074,DeletionInOrf,"ORF pol at 2084-5095 can have maximum deletions 30, got 981" -MN692074,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6551 -MN692074,DeletionInOrf,"Smaller ORF vif at 5040-5618 can have maximum deletions 30, got 81" -MN692074,InsertionInOrf,"Smaller ORF vpr at 5558-5849 can have maximum insertions 90, got 234" -MN692074,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5893 -MN692074,FrameshiftInOrf,Smaller ORF vpu at 6061-6309 contains out of frame indels that impact 168 positions. -MN692074,FrameshiftInOrf,Smaller ORF tat_exon2 at 8376-8468 contains out of frame indels that impact 76 positions. -MN692074,DeletionInOrf,"Smaller ORF rev_exon2 at 8377-8652 can have maximum deletions 30, got 204" -MN692074,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 1131" +MN691959,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MN691959,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 249" +MN692074,DeletionInOrf,"ORF pol at 2084-5096 can have maximum deletions 30, got 981" +MN692074,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6551 +MN692074,DeletionInOrf,"Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 81" +MN692074,InsertionInOrf,"Smaller ORF vpr at 5558-5843 can have maximum insertions 90, got 261" +MN692074,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5893 +MN692074,InsertionInOrf,"Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 108" +MN692074,FrameshiftInOrf,Smaller ORF tat_exon2 at 8376-8469 contains out of frame indels that impact 76 positions. +MN692074,DeletionInOrf,"Smaller ORF rev_exon2 at 8377-8653 can have maximum deletions 30, got 204" +MN692074,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 1131" MN692074,RevResponseElementDeletion,Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions. MN692074,LongDeletion,Query sequence contains a long deletion. -MN692145,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MN692145,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" -MN692145,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 231" -MN090335,InternalStopInOrf,ORF gag at 789-2291 contains an internal stop codon at 822 -MN090335,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MN090335,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" -MN090335,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 282" +MN692145,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MN692145,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 231" +MN090335,InternalStopInOrf,ORF gag at 789-2292 contains an internal stop codon at 822 +MN090335,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MN090335,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 282" MN090335,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 42 deletions with max tolerance of 10 deletions. MN090335,MajorSpliceDonorSiteMutated,"Query sequence has a mutated splice donor site, AT." MN090335,Scramble,Sequence is minus-scrambled. MN090335,InternalInversion,Sequence contains an internal inversion. -MN090376,InternalStopInOrf,ORF gag at 789-2291 contains an internal stop codon at 822 -MN090376,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MN090376,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 219" -MN090376,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 252" +MN090376,InternalStopInOrf,ORF gag at 789-2292 contains an internal stop codon at 822 +MN090376,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MN090376,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 252" MN090376,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 95 deletions with max tolerance of 10 deletions. MN090376,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." MN090376,Scramble,Sequence is minus-scrambled. MN090376,InternalInversion,Sequence contains an internal inversion. -MK115581.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115581.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115581.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -MK115690.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MK115690.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115690.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" +MK115581.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115581.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +MK115690.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MK115690.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" MK115690.1,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 12 deletions with max tolerance of 10 deletions. MK115690.1,MajorSpliceDonorSiteMutated,"Query sequence has a mutated splice donor site, G-." -MK115571.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115571.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115571.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" +MK115571.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115571.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" MK115571.1,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 79 deletions with max tolerance of 10 deletions. MK115571.1,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." -MK115514.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115514.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115514.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" +MK115514.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115514.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" MK115514.1,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." -MK115488.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115488.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115488.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" +MK115488.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115488.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" MK115488.1,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." -MK115030.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MK115030.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115030.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 270" +MK115030.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MK115030.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 270" MK115030.1,Scramble,Sequence is minus-scrambled. MK115030.1,InternalInversion,Sequence contains an internal inversion. -MK115498.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115498.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115498.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -MK115211.1,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6890 -MK115211.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MK115211.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 219" -MK115211.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 252" +MK115498.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115498.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +MK115211.1,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6890 +MK115211.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MK115211.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 252" MK115211.1,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 20 deletions with max tolerance of 10 deletions. MK115211.1,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." MK115211.1,Scramble,Sequence is minus-scrambled. MK115211.1,InternalInversion,Sequence contains an internal inversion. MK115158.1,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 20 deletions with max tolerance of 10 deletions. MK115158.1,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." -MK114705.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 77 positions. -MK114705.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK114705.1,FrameshiftInOrf,Smaller ORF nef at 8796-9416 contains out of frame indels that impact 122 positions. -MK114856.1,InternalStopInOrf,ORF gag at 789-2291 contains an internal stop codon at 873 -MK114856.1,InternalStopInOrf,ORF pol at 2084-5095 contains an internal stop codon at 2213 -MK114856.1,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6326 -MK114856.1,InternalStopInOrf,Smaller ORF vif at 5040-5618 contains an internal stop codon at 5172 -MK114856.1,InternalStopInOrf,Smaller ORF vpr at 5558-5849 contains an internal stop codon at 5594 -MK114856.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5920 -MK114856.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 111" -MK114856.1,InternalStopInOrf,Smaller ORF rev_exon2 at 8377-8652 contains an internal stop codon at 8434 -MK114856.1,InternalStopInOrf,Smaller ORF nef at 8796-9416 contains an internal stop codon at 8853 +MK114705.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 74 positions. +MK114705.1,FrameshiftInOrf,Smaller ORF nef at 8796-9417 contains out of frame indels that impact 122 positions. +MK114856.1,InternalStopInOrf,ORF gag at 789-2292 contains an internal stop codon at 873 +MK114856.1,InternalStopInOrf,ORF pol at 2084-5096 contains an internal stop codon at 2213 +MK114856.1,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6326 +MK114856.1,InternalStopInOrf,Smaller ORF vif at 5040-5619 contains an internal stop codon at 5172 +MK114856.1,InternalStopInOrf,Smaller ORF vpr at 5558-5843 contains an internal stop codon at 5594 +MK114856.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5920 +MK114856.1,FrameshiftInOrf,Smaller ORF vpu at 6259-6310 contains out of frame indels that impact 67 positions. +MK114856.1,InternalStopInOrf,Smaller ORF rev_exon2 at 8377-8653 contains an internal stop codon at 8434 +MK114856.1,InternalStopInOrf,Smaller ORF nef at 8796-9417 contains an internal stop codon at 8853 MK114856.1,APOBECHypermutationDetected,Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 4.399685326687554e-65). -MK115009.1,InternalStopInOrf,ORF gag at 789-2291 contains an internal stop codon at 834 -MK115009.1,InternalStopInOrf,ORF pol at 2084-5095 contains an internal stop codon at 2183 -MK115009.1,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6350 -MK115009.1,InternalStopInOrf,Smaller ORF vif at 5040-5618 contains an internal stop codon at 5373 -MK115009.1,InternalStopInOrf,Smaller ORF vpr at 5558-5849 contains an internal stop codon at 5717 -MK115009.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5860 -MK115009.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 177" -MK115009.1,InternalStopInOrf,Smaller ORF rev_exon2 at 8377-8652 contains an internal stop codon at 8434 -MK115009.1,InternalStopInOrf,Smaller ORF nef at 8796-9416 contains an internal stop codon at 8874 +MK115009.1,InternalStopInOrf,ORF gag at 789-2292 contains an internal stop codon at 834 +MK115009.1,InternalStopInOrf,ORF pol at 2084-5096 contains an internal stop codon at 2183 +MK115009.1,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6350 +MK115009.1,InternalStopInOrf,Smaller ORF vif at 5040-5619 contains an internal stop codon at 5373 +MK115009.1,DeletionInOrf,"Smaller ORF vpr at 5558-5843 can have maximum deletions 30, got 75" +MK115009.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5860 +MK115009.1,InsertionInOrf,"Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 177" +MK115009.1,InternalStopInOrf,Smaller ORF rev_exon2 at 8377-8653 contains an internal stop codon at 8434 +MK115009.1,InternalStopInOrf,Smaller ORF nef at 8796-9417 contains an internal stop codon at 8874 MK115009.1,APOBECHypermutationDetected,Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 1.3731449337509935e-41). MK115009.1,Scramble,Sequence is minus-scrambled. MK115009.1,InternalInversion,Sequence contains an internal inversion. -MK115387.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MK115387.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115387.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 279" -MK115491.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115491.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115491.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -MK116110.1,InternalStopInOrf,ORF gag at 140-1642 contains an internal stop codon at 185 +MK115387.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MK115387.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 279" +MK115491.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115491.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +MK116110.1,InternalStopInOrf,ORF gag at 140-1643 contains an internal stop codon at 185 MK116110.1,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 22 deletions with max tolerance of 10 deletions. MK116110.1,MajorSpliceDonorSiteMutated,"Query sequence has a mutated splice donor site, CC." -MK115527.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115527.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115527.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -MK114997.1,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6512 -MK114997.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MK114997.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK114997.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 270" -MK115518.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115518.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115518.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -MK115065.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MK115065.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115065.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 270" +MK115527.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115527.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +MK114997.1,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6512 +MK114997.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MK114997.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 270" +MK115518.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115518.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +MK115065.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MK115065.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 270" MK115065.1,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 75 deletions with max tolerance of 10 deletions. MK115065.1,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." MK115065.1,Scramble,Sequence is plus-scrambled. MK115065.1,InternalInversion,Sequence contains an internal inversion. -MK115464.1,InternalStopInOrf,ORF gag at 789-2291 contains an internal stop codon at 924 -MK115464.1,InternalStopInOrf,ORF pol at 2084-5095 contains an internal stop codon at 2183 -MK115464.1,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6425 -MK115464.1,InternalStopInOrf,Smaller ORF vif at 5040-5618 contains an internal stop codon at 5247 -MK115464.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MK115464.1,FrameshiftInOrf,Smaller ORF tat_exon1 at 5830-6044 contains out of frame indels that impact 104 positions. -MK115464.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 108" -MK115464.1,InternalStopInOrf,Smaller ORF rev_exon2 at 8377-8652 contains an internal stop codon at 8434 -MK115464.1,FrameshiftInOrf,Smaller ORF nef at 8796-9416 contains out of frame indels that impact 393 positions. +MK115464.1,InternalStopInOrf,ORF gag at 789-2292 contains an internal stop codon at 924 +MK115464.1,InternalStopInOrf,ORF pol at 2084-5096 contains an internal stop codon at 2183 +MK115464.1,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6425 +MK115464.1,InternalStopInOrf,Smaller ORF vif at 5040-5619 contains an internal stop codon at 5247 +MK115464.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MK115464.1,FrameshiftInOrf,Smaller ORF tat_exon1 at 5830-6046 contains out of frame indels that impact 104 positions. +MK115464.1,InternalStopInOrf,Smaller ORF rev_exon2 at 8377-8653 contains an internal stop codon at 8434 +MK115464.1,FrameshiftInOrf,Smaller ORF nef at 8796-9417 contains out of frame indels that impact 393 positions. MK115464.1,APOBECHypermutationDetected,Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 5.391006513622446e-23). -MK115530.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115530.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115530.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -MK115520.1,InternalStopInOrf,ORF pol at 2084-5095 contains an internal stop codon at 2198 -MK115520.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115520.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115520.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" +MK115530.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115530.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +MK115520.1,InternalStopInOrf,ORF pol at 2084-5096 contains an internal stop codon at 2198 +MK115520.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115520.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" MK115520.1,MajorSpliceDonorSiteMutated,"Query sequence has a mutated splice donor site, GA." MK115520.1,InternalInversion,Sequence contains an internal inversion. -MK115503.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115503.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115503.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -MK115570.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115570.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115570.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" +MK115503.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115503.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +MK115570.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115570.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" MK115570.1,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 11 deletions with max tolerance of 10 deletions. MK115570.1,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." -MK115509.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115509.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115509.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -MK115702.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 77 positions. -MK115702.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 204" -MK115702.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 249" +MK115509.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115509.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +MK115702.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 74 positions. +MK115702.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 249" MK115702.1,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 54 deletions with max tolerance of 10 deletions. MK115702.1,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." MK115702.1,Scramble,Sequence is plus-scrambled. MK115702.1,InternalInversion,Sequence contains an internal inversion. -MK115095.1,InternalStopInOrf,ORF gag at 789-2291 contains an internal stop codon at 927 -MK115095.1,InternalStopInOrf,ORF pol at 2084-5095 contains an internal stop codon at 2183 -MK115095.1,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6551 -MK115095.1,InternalStopInOrf,Smaller ORF vif at 5040-5618 contains an internal stop codon at 5151 -MK115095.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MK115095.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5860 -MK115095.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 108" -MK115095.1,InternalStopInOrf,Smaller ORF rev_exon2 at 8377-8652 contains an internal stop codon at 8434 -MK115095.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 213" +MK115095.1,InternalStopInOrf,ORF gag at 789-2292 contains an internal stop codon at 927 +MK115095.1,InternalStopInOrf,ORF pol at 2084-5096 contains an internal stop codon at 2183 +MK115095.1,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6551 +MK115095.1,InternalStopInOrf,Smaller ORF vif at 5040-5619 contains an internal stop codon at 5151 +MK115095.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MK115095.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5860 +MK115095.1,FrameshiftInOrf,Smaller ORF vpu at 6259-6310 contains out of frame indels that impact 43 positions. +MK115095.1,InternalStopInOrf,Smaller ORF rev_exon2 at 8377-8653 contains an internal stop codon at 8434 +MK115095.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 213" MK115095.1,APOBECHypermutationDetected,Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 9.460464921885922e-40). -MK115490.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115490.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115490.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -MK115576.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions. -MK115576.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -MK115576.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" -OQ092466,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -OQ092466,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -OQ092466,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 261" +MK115490.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115490.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +MK115576.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MK115576.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" +OQ092466,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +OQ092466,InsertionInOrf,"Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 195" +OQ092466,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 261" OQ092466,MajorSpliceDonorSiteMutated,"Query sequence has a mutated splice donor site, GC." OQ092463,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 26 deletions with max tolerance of 10 deletions. OQ092463,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." OQ092465,MajorSpliceDonorSiteMutated,"Query sequence has a mutated splice donor site, GA." -OQ092462,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -OQ092462,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" -OQ092462,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 255" +OQ092462,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +OQ092462,InsertionInOrf,"Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 195" +OQ092462,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 255" OQ092462,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 23 deletions with max tolerance of 10 deletions. OQ092462,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." OQ092464,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 19 deletions with max tolerance of 10 deletions. OQ092464,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." -OQ092467,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -OQ092467,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 213" -OQ092467,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 237" +OQ092467,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +OQ092467,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 237" OQ092467,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 22 deletions with max tolerance of 10 deletions. OQ092467,MajorSpliceDonorSiteMutated,"Query sequence has a missing splice donor site, --." diff --git a/tests/expected-results-large-csv/holistic.csv b/tests/expected-results-large-csv/holistic.csv index 65841c6..fc0954a 100644 --- a/tests/expected-results-large-csv/holistic.csv +++ b/tests/expected-results-large-csv/holistic.csv @@ -1,42 +1,42 @@ seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs -KX505501.1,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498199403230785,1.2158237356034052,0.17663960024984385,789,8794,4 -MN691959,9493,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0816956477003807,1.1086063415148004,1.0,789,8794,3 -MN692074,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5041670953801831,1.1728099569171853,0.4114928169893816,789,8794,4 -MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,1.0,789,8794,3 -MN090335,9069,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842576396748637,1.0603153600176425,1.0003747657713928,789,8794,3 -MN090376,8985,0.026007919521734202,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,8985,0.9784957300133759,1.0604340567612687,0.9943785134291068,789,8794,3 -MK115581.1,9495,0.6897199265079494,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9495,0.9870357032616525,1.0046340179041602,1.0,789,8794,2 -MK115690.1,9689,0.05065930954004094,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,0.988167506945159,0.9949427185468056,1.0,789,8794,2 -MK115571.1,9394,0.8012585672082311,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9394,0.9866241382858318,1.0113902490951672,1.0,789,8794,2 -MK115514.1,9382,0.6458974386368621,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9382,0.9864183557979216,1.0173736943082499,1.0,789,8794,2 -MK115488.1,9623,0.6511896911074662,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9623,1.0255170285008746,1.0325262392185388,1.0,789,8794,6 -MK115030.1,9126,0.031598631869680704,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9126,0.9940323078506019,1.0655270655270654,1.0,789,8794,3 -MK115498.1,9461,0.8339748776671196,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9461,0.9866241382858318,1.0080329774865235,1.0,789,8794,2 -MK115211.1,9032,0.11689558806708,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9032,0.9950612202901533,1.0598981399468557,1.0,789,8794,3 -MK115158.1,9143,0.002572269807584293,Ref.47_BF.ES.08.P1942.GQ372987,9143,0.9921577414295317,0.9699223449633599,1.0,234,8211,1 -MK114705.1,9411,0.14449377496074622,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9411,1.0980553554892478,1.122622463075125,1.0,789,8794,6 -MK114856.1,9477,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9477,1.0485646671468258,1.0812493405085997,1.0,789,8794,4 -MK115009.1,9207,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9207,0.9965016977055252,1.0590854784403172,1.0,789,8794,3 -MK115387.1,9136,0.5412311092694289,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9136,0.9796275336968824,1.040936952714536,1.0,789,8794,2 -MK115491.1,9422,0.8951015182445495,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9422,1.003704084782385,1.0299299511780937,1.0,789,8794,2 -MK116110.1,8967,0.07021438897893317,Ref.B.TH.90.BK132.AY173951,8967,0.9957759004001778,0.9972119995539199,0.9986220719027934,140,8123,3 -MK115527.1,9481,0.7689834393883834,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9481,0.986727029529787,1.0056956017297753,1.0,789,8794,2 -MK114997.1,9055,0.054959132555391754,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9055,0.9784957300133759,1.0516841524019878,1.0,789,8794,2 -MK115518.1,9537,0.6385326595592609,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9537,0.9847720958946393,0.9996854356715948,1.0,789,8794,3 -MK115065.1,9214,0.033517722768753644,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9214,1.0080255170285009,1.069459518124593,1.0,789,8794,6 -MK115464.1,9663,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9663,0.9838460746990431,0.9893407844354756,1.0,789,8794,2 -MK115530.1,9544,0.5789377103398377,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9544,0.9866241382858318,0.9992665549036044,1.0,789,8794,2 -MK115520.1,9589,0.5200353682902832,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9589,0.9787015125012861,0.987902805297737,1.0121174266083697,789,8794,3 -MK115503.1,9617,0.4263025132504157,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9617,0.9870357032616525,0.9953207861079338,1.0,789,8794,2 -MK115570.1,9485,0.738578434638724,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9485,0.9866241382858318,1.0057986294148655,1.0,789,8794,2 -MK115509.1,9353,0.7866198309713798,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9353,0.9866241382858318,1.0197797498128942,1.0,789,8794,2 -MK115702.1,9098,0.14401391767451666,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9098,0.987447268237473,1.0596834469114091,1.0198625858838226,789,8794,4 -MK115095.1,9137,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9137,0.9907397880440375,1.060085367188355,1.0,789,8794,2 -MK115490.1,9347,0.8863248655310947,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9347,0.9848749871385946,1.0204343639670483,1.0,789,8794,3 -MK115576.1,9266,0.818189227062389,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9266,0.9917687004835889,1.0342110943233327,1.0,789,8794,3 -OQ092466,9686,0.3876036547663967,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9686,1.1015536577837226,1.1192442700805285,1.0,789,8794,3 -OQ092463,9605,0.21628713708846803,Ref.B.TH.90.BK132.AY173951,9605,1.0529124055135617,0.9884435190005205,1.0,140,8123,2 -OQ092465,9659,0.02412789935966586,Ref.28_BF.BR.99.BREPM12817.DQ085874,9659,1.0429696287964005,0.9620043482762191,1.0,200,8207,2 -OQ092462,9714,0.10777665573070194,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9714,1.130671879823027,1.1301214741610048,1.0,789,8794,3 -OQ092464,9556,0.006887768010151674,Ref.28_BF.BR.99.BREPM12817.DQ085874,9556,1.0427446569178853,0.9678735872750105,1.0,200,8207,2 -OQ092467,9936,0.6416537859942263,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9936,1.1308776623109373,1.0962157809983897,1.0,789,8794,3 +KX505501.1,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498199403230785,1.2158237356034052,0.17661753684736448,789,8795,4 +MN691959,9493,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0816956477003807,1.1086063415148004,1.0,789,8795,3 +MN692074,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5041670953801831,1.1728099569171853,0.41144141893579816,789,8795,4 +MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,1.0,789,8795,3 +MN090335,9069,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842576396748637,1.0603153600176425,1.0003747189607795,789,8795,3 +MN090376,8985,0.026007919521734202,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,8985,0.9784957300133759,1.0604340567612687,0.9943792155883088,789,8795,3 +MK115581.1,9495,0.6897199265079494,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9495,0.9870357032616525,1.0046340179041602,1.0,789,8795,2 +MK115690.1,9689,0.05065930954004094,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,0.988167506945159,0.9949427185468056,1.0,789,8795,2 +MK115571.1,9394,0.8012585672082311,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9394,0.9866241382858318,1.0113902490951672,1.0,789,8795,2 +MK115514.1,9382,0.6458974386368621,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9382,0.9864183557979216,1.0173736943082499,1.0,789,8795,2 +MK115488.1,9623,0.6511896911074662,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9623,1.0255170285008746,1.0325262392185388,1.0,789,8795,6 +MK115030.1,9126,0.031598631869680704,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9126,0.9940323078506019,1.0655270655270654,1.0,789,8795,3 +MK115498.1,9461,0.8339748776671196,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9461,0.9866241382858318,1.0080329774865235,1.0,789,8795,2 +MK115211.1,9032,0.11689558806708,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9032,0.9950612202901533,1.0598981399468557,1.0,789,8795,3 +MK115158.1,9143,0.002572269807584293,Ref.47_BF.ES.08.P1942.GQ372987,9143,0.9921577414295317,0.9699223449633599,1.0,234,8212,1 +MK114705.1,9411,0.14449377496074622,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9411,1.0980553554892478,1.122622463075125,1.0,789,8795,6 +MK114856.1,9477,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9477,1.0485646671468258,1.0812493405085997,1.0,789,8795,4 +MK115009.1,9207,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9207,0.9965016977055252,1.0590854784403172,1.0,789,8795,3 +MK115387.1,9136,0.5412311092694289,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9136,0.9796275336968824,1.040936952714536,1.0,789,8795,2 +MK115491.1,9422,0.8951015182445495,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9422,1.003704084782385,1.0299299511780937,1.0,789,8795,2 +MK116110.1,8967,0.07021438897893317,Ref.B.TH.90.BK132.AY173951,8967,0.9957759004001778,0.9972119995539199,0.998622244488978,140,8124,3 +MK115527.1,9481,0.7689834393883834,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9481,0.986727029529787,1.0056956017297753,1.0,789,8795,2 +MK114997.1,9055,0.054959132555391754,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9055,0.9784957300133759,1.0516841524019878,1.0,789,8795,2 +MK115518.1,9537,0.6385326595592609,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9537,0.9847720958946393,0.9996854356715948,1.0,789,8795,3 +MK115065.1,9214,0.033517722768753644,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9214,1.0080255170285009,1.069459518124593,1.0,789,8795,6 +MK115464.1,9663,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9663,0.9838460746990431,0.9893407844354756,1.0,789,8795,2 +MK115530.1,9544,0.5789377103398377,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9544,0.9866241382858318,0.9992665549036044,1.0,789,8795,2 +MK115520.1,9589,0.5200353682902832,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9589,0.9787015125012861,0.987902805297737,1.012115913065201,789,8795,3 +MK115503.1,9617,0.4263025132504157,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9617,0.9870357032616525,0.9953207861079338,1.0,789,8795,2 +MK115570.1,9485,0.738578434638724,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9485,0.9866241382858318,1.0057986294148655,1.0,789,8795,2 +MK115509.1,9353,0.7866198309713798,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9353,0.9866241382858318,1.0197797498128942,1.0,789,8795,2 +MK115702.1,9098,0.14401391767451666,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9098,0.987447268237473,1.0596834469114091,1.019860104921309,789,8795,4 +MK115095.1,9137,1.0,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9137,0.9907397880440375,1.060085367188355,1.0,789,8795,2 +MK115490.1,9347,0.8863248655310947,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9347,0.9848749871385946,1.0204343639670483,1.0,789,8795,3 +MK115576.1,9266,0.818189227062389,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9266,0.9917687004835889,1.0342110943233327,1.0,789,8795,3 +OQ092466,9686,0.3876036547663967,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9686,1.1015536577837226,1.1192442700805285,1.0,789,8795,3 +OQ092463,9605,0.21628713708846803,Ref.B.TH.90.BK132.AY173951,9605,1.0529124055135617,0.9884435190005205,1.0,140,8124,2 +OQ092465,9659,0.02412789935966586,Ref.28_BF.BR.99.BREPM12817.DQ085874,9659,1.0429696287964005,0.9620043482762191,1.0,200,8208,2 +OQ092462,9714,0.10777665573070194,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9714,1.130671879823027,1.1301214741610048,1.0,789,8795,3 +OQ092464,9556,0.006887768010151674,Ref.28_BF.BR.99.BREPM12817.DQ085874,9556,1.0427446569178853,0.9678735872750105,1.0,200,8208,2 +OQ092467,9936,0.6416537859942263,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9936,1.1308776623109373,1.0962157809983897,1.0,789,8795,3 diff --git a/tests/expected-results-large-csv/orfs.csv b/tests/expected-results-large-csv/orfs.csv index 87d8217..e85fe8e 100644 --- a/tests/expected-results-large-csv/orfs.csv +++ b/tests/expected-results-large-csv/orfs.csv @@ -1,452 +1,452 @@ seqid,name,start,end,subtype_start,subtype_end,orientation,distance,protein,aminoacids,nucleotides -KX505501.1,env,0,1824,6224,8794,forward,0.7626080297560442,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,vif,0,1824,5040,5618,forward,0.7647696476964769,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,nef,0,1824,8796,9416,forward,0.7645782478980201,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,tat_exon1,1,1750,5830,6044,forward,0.7680130480667754,MRKLQNGIDCIQCMQGLLHQAR,VSLVRPDLSLGALWLTREPTA*ASIKLALSASSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKGKPEELSRRRTRLAERAQQEARGGDW*VRRKFFD*RRLEGERWVRERQY*AGENWIDGKKFG*GQEEIKDIN*NI*YGQAES*NDSQLILAC*KHQKAVDKYWDSYNRLLRQDQKNLSLCIIQ*QSSIVYIKK*M*KTPRKP*TR*KKSKTKLGNKHSKPQLQEVAVRSATITL*CRIIRGKWYIRPCHQEL*MHG*K**RKRLSAQK*YPCFQHYQKEPPHKT*TPC*IQWGDIKQPCKC*KRPLMRKLQNGIDCIQCMQGLLHQAR*ENQGEVT*QELLVPFRNK*HG*QIIHLSQ*ERFIKDG*S*G*IK**ECIALSAFWT*DKDQRNLLETM*TGSIKP*EPNKPHRK*KIG*QKPCWSRMRTQIVKLF*KH*DQQPH*KK**QHAREWEDPAIKQEFWLKQ*AK*QIQLQ**CRKAILGTKEKLLSASIVAKKGT*PEIAGPLEKRAVGNVEGKDIK*KTVLRDRLIL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLD,GTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGAT -KX505501.1,gag,336,1824,789,2291,forward,0.3997973809613161,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,rev_exon1,1306,1750,5969,6044,forward,0.758082497212932,MRTQIVKLF,MRTQIVKLF*KH*DQQPH*KK**QHAREWEDPAIKQEFWLKQ*AK*QIQLQ**CRKAILGTKEKLLSASIVAKKGT*PEIAGPLEKRAVGNVEGKDIK*KTVLRDRLIL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLD,ATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGAT -KX505501.1,vpr,1599,1749,5558,5849,forward,0.7677189534455227,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTR,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTR,ATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGA -KX505501.1,pol,1627,1927,2084,5095,forward,0.7724330674761569,GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,IL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,ATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA -KX505501.1,tat_exon2,1746,1824,8376,8468,forward,0.7616257781032589,RCIRSTTRTADTELFTRDFPLGTFQ,RCIRSTTRTADTELFTRDFPLGTFQ*,AGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,vpu,1747,1927,6061,6309,forward,0.769715460635405,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA -KX505501.1,rev_exon2,1747,1927,8377,8652,forward,0.7620186257236345,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA -MN691959,gag,639,2142,789,2291,forward,0.0801186943620179,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAGATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN691959,pol,1934,4946,2084,5095,forward,0.054722889368558514,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAGTAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACAAAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN691959,vif,4890,5469,5040,5618,forward,0.09157509157509158,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGAATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN691959,vpr,5408,5702,5558,5849,forward,0.5562531391260666,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS*T,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MN691959,tat_exon1,5679,5895,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG -MN691959,rev_exon1,5818,5896,5969,6044,forward,0.48200514138817474,MAGRSGDSDEDLLKTVRLIKFLYQSS,MAGRSGDSDEDLLKTVRLIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT -MN691959,vpu,5910,6156,6061,6309,forward,0.53246275519588,SIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEVSALVEMGVEMGHHAPWDIDDL,MQPIQIAIVALVVAIIIAIVV*SIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEVSALVEMGVEMGHHAPWDIDDL*,ATGCAACCTATACAAATAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG -MN691959,env,6070,8656,6224,8794,forward,0.13638128518734216,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN691959,tat_exon2,8237,8333,8376,8468,forward,0.6672629695885509,RPTSQTRGDPTGPKE,RPTSQTRGDPTGPKE*KKKVERETETDPFD**,AGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGA -MN691959,rev_exon2,8238,8514,8377,8652,forward,0.210025203024363,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE*,GACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAG -MN691959,nef,8657,9278,8796,9416,forward,0.08588605782994552,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN692074,nef,0,4059,8796,9416,forward,0.763072203234748,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,WKG*FGPRKDKISLICGSTTHKATSLIGRTTHQGRGPDFH*PLDGASS*YQLIQRR*KRPMQERTTACYTL*ACMGWRTRRKKC*CGSLTAA*HFITWPESCIRSTTRIADFELSTRDFPLGTFQGGVAWAGLGSGEPSDAAYKQLLFACTGSLWLDQI*AWELSG*LGNPLLKPQ*SLP*VL*VVCARLLCDSGN*RSLRPF*SVWKISSSGARTGT*KRKRNQRSSLDAGLGLLKRARQEARGGDW*VRQKF*LAEARRREMGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ*G*GGN*RKLY*IQEQMIQY*KK*ICQEDGNQK**GELEVLSK*DSMIRYS*KSVDIKL*VQY**DLHLST*LEEIC*LRLVAL*IFPLVLLKLYQ*N*SQEWMAQKLNNGH*QKKK*KH**KFVQKWKRKGKFQKLGLKIHTILQYLP*RKKTVLNGEN**ISENLIRELKTSGKFN*EYHIPQG*ERKNQ*QYWMWGMHIFQFP*IKNLGNILHLPYPVETMRHQGLDISTMCFHRDGKDHQQYSKVA*QKF*SLLENKIQK*LSINTWMIYM*DLI*K*GSIE*K*RN*DNIC*DGDLPHQTKNIRKNPHSFGWVMNSILINGQYSL*CCQKKTAGLSMTYRS*WEN*IGQVRFTQGLK*GNYVNSLGEPKH*QK*YH*QKKQSWNWQKTGKF*KNQYMEYIMTHQKT**QKYRSRGKVNGHIKFIKSHLKI*KQENMQK*GVPTLMM*NN*QRQCKK*PQKA**YGERLLNLNYPYKKKHGKHGGQSIGKPPGFLSGSLSIPLP**NYGTS*RKNP**EQKPSM*MGQLTGRLN*EKQDMLLTEEDKKLSP*LTQQIRRLNYKQFI*LCRIRD*K*T**QTHNMH*,TGGAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAG -MN692074,env,2,4115,6224,8794,forward,0.7604257801108195,MNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA -MN692074,gag,789,2292,789,2291,forward,0.19470123431286457,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN692074,pol,2084,4115,2084,5095,forward,0.5617851221088768,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA -MN692074,vif,3617,4115,5040,5618,forward,0.7631664499349805,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA -MN692074,vpr,3617,4085,5558,5849,forward,0.7632679688399402,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDT,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDT,ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACC -MN692074,tat_exon1,3823,4084,5830,6044,forward,0.7659115426105717,MVPVRERTHSRSRNLLCRWGS,MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY,ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC -MN692074,rev_exon1,3823,4084,5969,6044,forward,0.7645569620253164,MVPVRERTHSRSRNLLCRWGS,MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY,ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC -MN692074,vpu,4080,4164,6061,6309,forward,0.7708418891170431,IPSGEPSDAAYKQLLFACTGSLWLDQI,IPSGEPSDAAYKQLLFACTGSLWLDQI*,ATACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGA -MN692074,tat_exon2,4080,4164,8376,8468,forward,0.7699443413729128,IPSGEPSDAAYKQLLFACTGSLWLDQI,IPSGEPSDAAYKQLLFACTGSLWLDQI*,ATACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGA -MN692074,rev_exon2,4081,4153,8377,8652,forward,0.7667894365645325,YPVASPQMLHISSCFLPVLGLSG,YPVASPQMLHISSCFLPVLGLSG*,TACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAG -MN692145,gag,775,2281,789,2291,forward,0.20784453738651432,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN692145,pol,2070,5085,2084,5095,forward,0.14843087362171337,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN692145,vif,5029,5608,5040,5618,forward,0.2608047690014903,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG -MN692145,vpr,5547,5838,5558,5849,forward,0.6002510555745751,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG -MN692145,tat_exon1,5818,6034,5830,6044,forward,0.31992687385740404,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ,ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG -MN692145,rev_exon1,5957,6035,5969,6044,forward,0.4267425320056898,MAGRSGDSDEELLKTVRLIKFLYQSS,MAGRSGDSDEELLKTVRLIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT -MN692145,vpu,6049,6298,6061,6309,forward,0.5326633165829145,MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL,MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL*,ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN692145,env,6212,8783,6224,8794,forward,0.47520309038232134,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL*,ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA -MN692145,tat_exon2,8364,8457,8376,8468,forward,0.3921568627450981,RPASQPRGDPTGPKESKKKVERETETDPLH,RPASQPRGDPTGPKESKKKVERETETDPLH*,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG -MN692145,rev_exon2,8365,8641,8377,8652,forward,0.29843322556577967,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE*,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG -MN692145,nef,8784,9387,8796,9416,forward,0.4049958673891082,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA -MN090335,gag,315,1665,789,2291,forward,0.596665989022159,MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ,MYTIEGGYCII**SKFF*SCLKGWL*LFQYLFTAS*CF*QARINCESF*FPACPYYMF*SIFFLSPWP*PNFLPSI*FSPAQY*RSRTHLSPSSLR*SKMAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ*,ATGTACACAATAGAGGGTGGCTACTGTATTATATAATGATCTAAGTTCTTCTGATCCTGTCTGAAGGGATGGTTGTAGCTGTTCCAATATCTGTTTACAGCCTCCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGTTCCCTGCTTGCCCATACTATATGTTTTAATCTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATCTATCTAATTCTCCCCCGCTCAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAATGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCACCCCACAAGATTTGAACACTATGCTAAACACAGTGGGTGGACACCAAGCAGCTATGCAAATGTTAAAAGAGGTCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGACTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAGCAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTTTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGCAACAGGTACAGCCAACATACTGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAGGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA -MN090335,pol,1427,4469,2084,5095,forward,0.27887169154684477,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAGGACATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCGGACACAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTCTAAATTTTCCCATCAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGACGGCCCAAAGGTTAAACAATGGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGAATTCTGGGAAATTCAATTAGGTATACCACATCCTGCAGGGCTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTGGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAACATAGAACAAAAGTAGAGGAACTGAGGCAACATCTGATGAGGTGGGGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGAAAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAAGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGATCCGGTACATGGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAGGGAGAAGGTCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCATACTAATGATGTAAAGCAATTAACAGAGGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATGGGGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGATTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAATAGGGATAATAAATCAGGAAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAGGATTCGGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGGCATGGGTGCCAGCCCACAAAGGAATTGGAGGAAATGAACAGGTAGATAAACTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGAAAAAGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAACAATGGCTAGTGATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCCAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN090335,vif,4413,4992,5040,5618,forward,0.3566796368352788,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAAAAATGGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGGGATGATAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAGAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAACAGCAGTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG -MN090335,vpr,4931,5225,5558,5849,forward,0.6187165775401071,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS*T,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAGACT -MN090335,tat_exon1,5202,5418,5830,6044,forward,0.42503863987635226,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ,ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAG -MN090335,rev_exon1,5341,5419,5969,6044,forward,0.5275498241500586,MAGRSGDRDEDLLKTVRLIKFLYQSS,MAGRSGDRDEDLLKTVRLIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGT -MN090335,vpu,5433,5682,6061,6309,forward,0.5368311327310633,MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL,MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL*,ATGCAATCTTTAGAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTAGGCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAAAATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG -MN090335,env,5596,8158,6224,8794,forward,0.5139610675592354,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ*,ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA -MN090335,tat_exon2,7739,7835,8376,8468,forward,0.6842105263157894,RPSSQPRGDQTGPKE,RPSSQPRGDQTGPKE*KKKVERETEADPED**,AGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGA -MN090335,rev_exon2,7740,8016,8377,8652,forward,0.4267425320056898,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE*,GACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAG -MN090335,nef,8159,8813,8796,9416,forward,0.49485619884358334,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAAGGTTGTATGGCTGGATGGCCTACTGTAAGGGAAAGAATGGAAAGAATTGATCCAAGGCCTGCTGCAAGGAGGGAACAAGCTGAGCCAGCAGCAGCTGGGGTAGGAGCAGCATCTCGAGACTTGGAAAAATATGGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTAGGCTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTGGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATGGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA -MN090376,gag,522,1590,789,2291,forward,0.6828741441147701,MYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP,MHTIEDCYCIK**FKFL*SCLKCWL*LSQYLSTAF*CF*QARINCESFQFPACPYYMF*LISFLSPWP*PNFLPFV*FSPA*YRRSRTHLSPSSLR*SKRRTHRSPPLASCCARFSKPSPIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP*SQ*,ATGCACACAATAGAGGACTGCTACTGTATTAAATAATGATTTAAGTTCCTCTGATCCTGTCTGAAGTGCTGGTTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCCAGTTCCCTGCTTGCCCATACTATATGTTTTAACTTATATCTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATTTGTCTAATTCTCCCCCGCTTAATACCGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAAGGCGTACTCACCGTTCGCCGCCCCTCGCCTCTTGCTGTGCGCGCTTCAGCAAGCCGAGTCCGATAATTCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA -MN090376,pol,1382,4394,2084,5095,forward,0.23966680468616797,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGEDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNIPPVVAKEIVACCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGEDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNIPPVVAKEIVACCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTGACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGATGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCTGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTCGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN090376,vif,4338,4920,5040,5618,forward,0.42479043044174425,MENRWQVMVVWQVDRMRISMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRKKPPLPSVRKLTEDRWKEPQRTKGHRESHTMNGH,MENRWQVMVVWQVDRMRISMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRKKPPLPSVRKLTEDRWKEPQRTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAGCATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGACTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAAAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG -MN090376,vpr,4859,5153,5558,5849,forward,0.5940302029259086,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS*T,ATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCTTGGCTTCATGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MN090376,tat_exon1,5130,5346,5830,6044,forward,0.484764542936288,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ,ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAG -MN090376,rev_exon1,5269,5347,5969,6044,forward,0.5797101449275363,MAGRSGDSDEELLRIAGTIKFLYQSS,MAGRSGDSDEELLRIAGTIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAGT -MN090376,vpu,5361,5631,6061,6309,forward,0.5776066350710902,MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLDMGHHAPWDVNDL,MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLDMGHHAPWDVNDL*,ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA -MN090376,env,5524,8110,6224,8794,forward,0.4972760674014952,MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL,MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAGGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAGCCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTAACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTAACATCAATAGCACTAATATAAACAATACCAATAGTATAGAAAGAGAAATGACAAACTGCTCTTTTAATGTCACCACAGTCATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAAACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGCTATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTAAAAATGTTAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAACACAGAAGTAAATATTATCACACTCCCATGCAAGATAAGGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACATTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA -MN090376,tat_exon2,7691,7784,8376,8468,forward,0.4464285714285715,RPSSQPRGDPTGPKESEKKVERETETDPVT,RPSSQPRGDPTGPKESEKKVERETETDPVT*,AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG -MN090376,rev_exon2,7692,7968,8377,8652,forward,0.4267425320056898,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE*,GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG -MN090376,nef,8111,8735,8796,9416,forward,0.5093153589821267,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTSANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTSANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC*,ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACATCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA -MK115581.1,gag,680,2180,789,2291,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115581.1,pol,1972,4984,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115581.1,vif,4928,5507,5040,5618,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115581.1,vpr,5446,5740,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115581.1,tat_exon1,5717,5936,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA -MK115581.1,rev_exon1,5856,5934,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115581.1,vpu,5948,6194,6061,6309,forward,0.5513972055888224,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115581.1,env,6111,8652,6224,8794,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115581.1,tat_exon2,8233,8329,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115581.1,rev_exon2,8234,8510,8377,8652,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115581.1,nef,8653,9268,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115690.1,gag,777,2286,789,2291,forward,0.24076694150363465,MGARASVLSGGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQIMGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKVEEEQNKSKKKAQQAAAGAGNSSQTSTSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATNSAAIMMQRGNFRNQRKSVKCFNCGKDGHIAKNCRAPRRKGCWKCGKEGHQMKDCPERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTTPSQKQEKIDQDLYPLASLKSLFGNDPSSQ,MGARASVLSGGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQIMGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKVEEEQNKSKKKAQQAAAGAGNSSQTSTSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATNSAAIMMQRGNFRNQRKSVKCFNCGKDGHIAKNCRAPRRKGCWKCGKEGHQMKDCPERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTTPSQKQEKIDQDLYPLASLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAGTGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAGATTAAAGCATATCGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATAATGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTGTATAATACAGTAGCAACCCTCTATTGTGTACATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGGCGCAGGAAACAGCAGTCAGACCAGCACCAGCCAAAATTACCCTATAGTACAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTCTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACTAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCCACCAGCATTCTAGACATAAGACAAGGACCAAAGGAGCCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTAGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAGGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGTCATAAAGCAAGAGTTTTAGCGGAAGCAATGAGCCAAGCAACAAATTCAGCTGCCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAAAGTGTTAAGTGTTTTAATTGCGGCAAAGATGGGCACATAGCAAAAAATTGCAGGGCCCCTAGAAGAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTCCAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115690.1,pol,2078,5090,2084,5095,forward,0.185747174550021,FFRENLAFPQGEAREFSSEQTRANSPTRGELQVWGGDNNSLSEAGEDRPGPVSFSFPQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPYRKQNPNIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIALPEKESWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVIPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQEQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKSGKAGYVTNRGRQKVIPLTDTTNQKTELHAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQEAAYFILKLAGRWPVATIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDVIATDLQTKELQKQITKIQHFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEAREFSSEQTRANSPTRGELQVWGGDNNSLSEAGEDRPGPVSFSFPQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPYRKQNPNIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIALPEKESWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVIPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQEQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKSGKAGYVTNRGRQKVIPLTDTTNQKTELHAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQEAAYFILKLAGRWPVATIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDVIATDLQTKELQKQITKIQHFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGCACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATAGGGCCTGAGAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGACTTCAGAGAACTAAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGACAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAACCTTATAGAAAACAAAATCCAAACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACCTAGAAATAGGGCAGCATAGAATAAAAATAGAAGAACTGAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGACAAGTGGACAGTACAGCCTATAGCGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATCTACCCAGGAATTAAAGTAAGGCAATTATGTAAACTACTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAAAAGAAGCAGAGCTAGAATTAGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGAGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTGACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGGGAAAGACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAATCAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTATCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACATGCAATTTATCTAGCTTTGCAAGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTGTTTTTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAGCAGCATACTTTATTTTAAAATTAGCAGGACGATGGCCAGTAGCAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACAGTTAAGGCCGCCTGCTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAACAATGAATTGAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACGTAATAGCAACAGACTTACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAACATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115690.1,vif,5034,5613,5040,5618,forward,0.39766435115272314,MENRWQVMIVWQVDRMRIRRWHSLVKHHIYISGKARGWVYKHHYENTHPRISSKVYIPLGEARLAVTTYWGLHTGERDWHLGQGVSIEWRKKEYSTQVDPNLADQLIHLYYFDCFSESAIRNVILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSITKLTEDRWNKPQRTKGHRGNHTMNGH,MENRWQVMIVWQVDRMRIRRWHSLVKHHIYISGKARGWVYKHHYENTHPRISSKVYIPLGEARLAVTTYWGLHTGERDWHLGQGVSIEWRKKEYSTQVDPNLADQLIHLYYFDCFSESAIRNVILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSITKLTEDRWNKPQRTKGHRGNHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAAGATGGCACAGTTTAGTAAAACACCATATATATATTTCAGGGAAAGCTAGAGGATGGGTTTATAAACATCACTATGAAAACACTCATCCAAGAATAAGTTCAAAAGTATACATCCCACTAGGGGAAGCTAGACTGGCAGTAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGGAATATAGCACACAAGTAGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGTCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTTTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTATTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAG -MK115690.1,vpr,5552,5846,5558,5849,forward,0.6098398169336385,MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS,MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS*S,ATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACACATCTATGAGACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAAGTCTGCAACAACTGCTGTTCATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGAATTACTCCACAGAGGAGAACAAGAAATGGAGCCAGTAGATCCTAATCT -MK115690.1,tat_exon1,5823,6039,5830,6044,forward,0.42503863987635226,MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ,MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ,ATGGAGCCAGTAGATCCTAATCTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCACTTGCTATTGTAAACAGTGTTGCTATCATTGCCAAGTTTGTTTCATAACCAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAG -MK115690.1,rev_exon1,5962,6040,5969,6044,forward,0.5275498241500586,MAGRSGDNDEDLLKTVRFIKLLYQSS,MAGRSGDNDEDLLKTVRFIKLLYQSS,ATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAGT -MK115690.1,vpu,6054,6300,6061,6309,forward,0.4999062089664228,MQSLAILAIVALVVAAIIAIVVWTIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDHEELSALMEMGHHAPWDVDDL,MQSLAILAIVALVVAAIIAIVVWTIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDHEELSALMEMGHHAPWDVDDL*,ATGCAATCTTTAGCAATATTAGCAATAGTAGCATTAGTAGTAGCAGCCATAATAGCAATAGTTGTGTGGACCATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGATAGTGGCAATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAG -MK115690.1,env,6217,8800,6224,8794,forward,0.4988789237668163,MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ,MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ*,ATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCTACAGACCCCAACCCACAAGAAATAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTACATTGCACTAAGTTGGAGATTAATAGCACTAAGAAGACTAATAGCACTAATAATGGTACTAACATCAATGCCACTGATGATAGTTGGGGGGAAATGAAAAACTGCTCTTTCAATACCACTGCAAGCATAAGAGATAAGGTACAGAGAGAATTTGCGCTTTTTTATAAACTTGATATAGTACCAATAGATAATGATGATATCAACTATAGGTTAATAAGTTGTAACACCTCAGTCCTTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAAAATTTCAATGGAACAGGACAATGTAAAAATGTCAGCACAGTGCAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTCAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAAATCTGAAAATATCACAGACAATACTAAAACTATAATAGTACAGCTGAATGCATCTGTAGCAATTGTTTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGGCCAGGGAGAGCGTTTTATGCAGCAGGAGATATAATAGGAGACATAAGACGAGCACACTGTATCCTTAACAAAACAACATGGGATAACACAATAGAACAGGTAGCTAAAAAATTAAGAGAACAATTTGAGAATAAGACAATAGTCTTTAGTGAATCCTCGGGAGGGGACCCAGAAATTACAATGATTAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAGTACAACTGTTTAATAGTACTTGGCATAATAATGGGAGTAGTACTACAGGGTCAAGTAGCAGTGAAGGCAATATCACACTCCCATGCAAAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACCAATTAGCTGCGAGTCAAATATTACAGGGTTGCTACTAACAAGAGATGGTGGGAATGACGCTAACGGGAACAACACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGCGAAGTGAATTATATAGATACAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCACAGAGAAGAGTGGTACAGAGAGAAAAAAGAGCAGTGGGTCTCGGAGCCTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGCTTTGGGGTTGCTCTGGAAAACTCATCTGCAACACTGCTGTGCCTTGGAATACTAGTTGGAGTAACAAATCTCTGGATGATATTTGGCATAACATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAACATAATATACAGCTTAATTGAGGCATCGCAAACCCAGCAAGAAAAGAATGAACAAGAATTACTAGAATTAGACAAATGGGCAAGTCTGTGGAATTGGTTTAGCATATCAAACTGGCTGTGGTACATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTATACTTTCTATAGTGAATAGAGTTAGGAAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAGTGCTGTTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGAGTTATAGAAGGATTGCGCAGAGCTTTTAGAGCTATTATCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGAGCTTTGCAATAA -MK115690.1,tat_exon2,8381,8474,8376,8468,forward,0.5303030303030303,RPTSQPRGDPTGPKEPETKVESKTETDPLT,RPTSQPRGDPTGPKEPETKVESKTETDPLT*,AGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAG -MK115690.1,rev_exon2,8382,8658,8377,8652,forward,0.42982806877249113,DPPPSPEGTRQARRNRRRRWRARQRQIHSLSGWILSNYLGRPAEPVPLPLPPLERLTLDCSEDCGTSGTQGVGSPQILVEPSAVLEAGTKE,DPPPSPEGTRQARRNRRRRWRARQRQIHSLSGWILSNYLGRPAEPVPLPLPPLERLTLDCSEDCGTSGTQGVGSPQILVEPSAVLEAGTKE*,GACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAG -MK115690.1,nef,8801,9425,8796,9416,forward,0.46635769262600346,MGGKWSKCSIVGWPTVRERIRRAEPAAEGVGAVSRDLEKHGAITSSNANNADCTWLEAQKEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGIIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVEPERVEEENKRENRCLLHPMSQHGMDDPEKEVLQWRFDSRLAFHHVARELHPEYYKN,MGGKWSKCSIVGWPTVRERIRRAEPAAEGVGAVSRDLEKHGAITSSNANNADCTWLEAQKEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGIIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVEPERVEEENKRENRCLLHPMSQHGMDDPEKEVLQWRFDSRLAFHHVARELHPEYYKN*NC*,ATGGGTGGTAAATGGTCAAAATGTAGTATAGTTGGATGGCCTACTGTAAGGGAAAGAATAAGACGAGCAGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAAGCATGGAGCAATCACAAGTAGCAATGCTAACAATGCTGATTGTACCTGGCTGGAAGCCCAAAAAGAAGAGGAGGAGGTAGGCTTTCCAGTCAGGCCTCAGGTACCCTTAAGACCAATGACTTACAAGGCAGCCTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATAATTTACTCCCAAAAAAGACAAGATATTCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACTAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGAGAGGGTAGAAGAGGAGAATAAAAGAGAGAACCGCTGCTTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGTGCTACAGTGGAGGTTTGACAGCCGCCTAGCCTTTCACCACGTAGCCAGAGAGCTGCATCCGGAGTACTATAAGAACTAGAACTGCTGA -MK115571.1,gag,579,2079,789,2291,forward,0.3014827756125966,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115571.1,pol,1871,4883,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115571.1,vif,4827,5406,5040,5618,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115571.1,vpr,5345,5639,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115571.1,tat_exon1,5616,5832,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG -MK115571.1,rev_exon1,5755,5833,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115571.1,vpu,5847,6093,6061,6309,forward,0.5399181166837258,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115571.1,env,6010,8551,6224,8794,forward,0.4569687738004571,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115571.1,tat_exon2,8132,8228,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115571.1,rev_exon2,8133,8409,8377,8652,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115571.1,nef,8552,9167,8796,9416,forward,0.4756067663643049,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115514.1,gag,584,2084,789,2291,forward,0.3014827756125966,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115514.1,pol,1876,4888,2084,5095,forward,0.19298018391400085,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115514.1,vif,4832,5411,5040,5618,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115514.1,vpr,5350,5644,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115514.1,tat_exon1,5621,5840,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA -MK115514.1,rev_exon1,5760,5838,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115514.1,vpu,5852,6098,6061,6309,forward,0.5399181166837258,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115514.1,env,6015,8556,6224,8794,forward,0.4585964351370794,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115514.1,tat_exon2,8137,8233,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115514.1,rev_exon2,8138,8414,8377,8652,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115514.1,nef,8557,9172,8796,9416,forward,0.4756067663643049,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115488.1,gag,707,2207,789,2291,forward,0.3014827756125966,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115488.1,pol,1999,5011,2084,5095,forward,0.19298018391400085,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115488.1,vif,4955,5534,5040,5618,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115488.1,vpr,5473,5767,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115488.1,tat_exon1,5744,5963,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA -MK115488.1,rev_exon1,5883,5961,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115488.1,vpu,5975,6221,6061,6309,forward,0.5399181166837258,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115488.1,env,6138,8679,6224,8794,forward,0.4585964351370794,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115488.1,tat_exon2,8260,8356,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115488.1,rev_exon2,8261,8537,8377,8652,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115488.1,nef,8680,9295,8796,9416,forward,0.4756067663643049,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115030.1,gag,176,1685,789,2291,forward,0.27304152847199525,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRKNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRKNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP*YQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA -MK115030.1,pol,1477,4489,2084,5095,forward,0.26443159013103534,FFREDLAFPQGEAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED,FFREDLAFPQGEAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115030.1,vif,4433,5012,5040,5618,forward,0.3903081914030819,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG -MK115030.1,vpr,4951,5245,5558,5849,forward,0.6069164265129684,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*L,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGACATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGCTT -MK115030.1,tat_exon1,5222,5441,5830,6044,forward,0.5060292850990527,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*,ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA -MK115030.1,rev_exon1,5361,5439,5969,6044,forward,0.531617235590375,MAGRSGDSDEELLTAVRIIKRLYQSS,MAGRSGDSDEELLTAVRIIKRLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT -MK115030.1,vpu,5453,5699,6061,6309,forward,0.5783961231827419,MHILEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSAIVEMGHLVPWDGDDM,MHILEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSAIVEMGHLVPWDGDDM*,ATGCATATCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG -MK115030.1,env,5616,8217,6224,8794,forward,0.510440428145289,MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL,MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL*,ATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAGAAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAATACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGAAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAACCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA -MK115030.1,tat_exon2,7798,7891,8376,8468,forward,0.4918032786885247,RPSSQPRGDPTGPKEQKKEVERETEAHPRD,RPSSQPRGDPTGPKEQKKEVERETEAHPRD*,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG -MK115030.1,rev_exon2,7799,8075,8377,8652,forward,0.40871934604904625,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG -MK115030.1,nef,8218,8860,8796,9416,forward,0.5478186258332784,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNTDVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNTDVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC*,ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATACTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA -MK115498.1,gag,663,2163,789,2291,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115498.1,pol,1955,4967,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115498.1,vif,4911,5490,5040,5618,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115498.1,vpr,5429,5723,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115498.1,tat_exon1,5700,5916,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG -MK115498.1,rev_exon1,5839,5917,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115498.1,vpu,5931,6177,6061,6309,forward,0.5513972055888224,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115498.1,env,6094,8635,6224,8794,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115498.1,tat_exon2,8216,8312,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115498.1,rev_exon2,8217,8493,8377,8652,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115498.1,nef,8636,9251,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115211.1,gag,250,1753,789,2291,forward,0.25132972351334526,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQKIEIKDTKEALDKIEEEQNKSKKKTQQAAADTGHNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNAVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQKIEIKDTKEALDKIEEEQNKSKKKTQQAAADTGHNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNAVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP*SQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACACAGGACATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCATTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAGCCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAAACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAAATGCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA -MK115211.1,pol,1545,4557,2084,5095,forward,0.2540106951871657,FFRENLAFPQGKARELSSKQDRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISRIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVIAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDGCVASRQDED,FFRENLAFPQGKARELSSKQDRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISRIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVIAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDGCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAGAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCTGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATTTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTACCACCTGTAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGGCAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGTAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA -MK115211.1,vif,4501,5083,5040,5618,forward,0.40472673559822736,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKLTEDRWNEPQRTKGHRESHTMNGH,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKLTEDRWNEPQRTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG -MK115211.1,vpr,5022,5313,5558,5849,forward,0.6010897287271042,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS*,ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAACTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGGGCAAGAAATGGAGCCAGTAGATCCTAG -MK115211.1,tat_exon1,5293,5509,5830,6044,forward,0.48719691819623834,MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ,MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ,ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAAGACTGCTTGCACCAGTTGCTATTGTAAAAGGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAG -MK115211.1,rev_exon1,5432,5510,5969,6044,forward,0.5698711595639246,MAGRSGDSDEELLRITRTIKFLYQNS,MAGRSGDSDEELLRITRTIKFLYQNS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGT -MK115211.1,vpu,5524,5794,6061,6309,forward,0.5867864387134165,MQSLEILAIVALVVAFIIAIVVWSIVFIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWEVNDL,MQSLEILAIVALVVAFIIAIVVWSIVFIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWEVNDL*,ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTATTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAA -MK115211.1,env,5687,8198,6224,8794,forward,0.6631820277358986,MHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL,MRVKGTKKNWQPSWRWGTMLIWGWATMLLGRSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDPEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTTINNTSSIEEGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSATITQACPKVSFEPIPIH*VQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRKAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACCCAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTACTATAAACAATACCAGTAGTATAGAAGAAGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCGCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTAGGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACACTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGTAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA -MK115211.1,tat_exon2,7779,7872,8376,8468,forward,0.4464285714285715,RPSSQPRGDPTGPKESEKKVERETETDPVT,RPSSQPRGDPTGPKESEKKVERETETDPVT*,AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG -MK115211.1,rev_exon2,7780,8056,8377,8652,forward,0.4267425320056898,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE*,GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG -MK115211.1,nef,8199,8823,8796,9416,forward,0.5148270181219111,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDPDKEVLVWKFDSRLAFRHVAREIHPEYYKNC,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDPDKEVLVWKFDSRLAFRHVAREIHPEYYKNC*,ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACCCAGATAAAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA -MK115158.1,gag,316,1819,234,1730,forward,0.38499312512276596,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQRIEIKDTKEALDKIEEEQNKSKKKTQQAAADPGNSNQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPMSSLKSLFGNDP,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQRIEIKDTKEALDKIEEEQNKSKKKTQQAAADPGNSNQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPMSSLKSLFGNDP*SQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACCCAGGAAATAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACGATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAACCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA -MK115158.1,pol,1611,4623,1526,4534,forward,0.30843043180260443,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPYVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDMVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNIPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPYVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDMVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNIPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTCTACCAGGAAGATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATGGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTGCATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCTCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACTTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAGGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA -MK115158.1,vif,4567,5149,4479,5060,forward,0.48399487836107546,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKRTEDRWNEPQRTKGHRESHTMNGH,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKRTEDRWNEPQRTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGTTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACGGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG -MK115158.1,vpr,5088,5379,5000,5290,forward,0.43053960964408733,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS*,ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTGGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115158.1,tat_exon1,5359,5575,5271,5485,forward,0.46628407460545196,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ,ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAG -MK115158.1,rev_exon1,5498,5576,5410,5485,forward,0.5678884873515746,MAGRSGDSDEELLKITRTIKFLYQNS,MAGRSGDSDEELLKITRTIKFLYQNS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGT -MK115158.1,vpu,5590,5860,5502,5747,forward,0.5374618963580942,MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWDVNDL,MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWDVNDL*,ATGCAATCCTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTTGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA -MK115158.1,env,5753,8315,5665,8211,forward,0.5078662118966413,MRVKGTKKNWQPSWRWGTMLIWGWATMLLGMSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCENVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL,MRVKGTKKNWQPSWRWGTMLIWGWATMLLGMSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCENVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTGAAAATGTTAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGGGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTATATCTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA -MK115158.1,tat_exon2,7896,7989,7793,7885,forward,0.5279960707269156,RPSSQPRGDPTGPKESEKKVERETETDPVT,RPSSQPRGDPTGPKESEKKVERETETDPVT*,AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG -MK115158.1,rev_exon2,7897,8173,7794,8069,forward,0.45968205324650446,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGSGTSGTQGVGSPQVLVESPAVLEPGTKE,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGSGTSGTQGVGSPQVLVESPAVLEPGTKE*,GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG -MK115158.1,nef,8316,8940,8213,8833,forward,0.5254870367657829,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC*,ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA -MK114705.1,gag,532,2047,789,2291,forward,0.27270615563298484,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNTIAVLYCVHQRIDVKDTKEALNKIEEEQNKSKKKAQQAAADTGNSSQSSQVSQNYPIVQNHQGQMVYQALSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIIMGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQATGGATNIMMQKGNFRNQGKPIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCSERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTAPPQKQEPTDKELYPFSSLKSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNTIAVLYCVHQRIDVKDTKEALNKIEEEQNKSKKKAQQAAADTGNSSQSSQVSQNYPIVQNHQGQMVYQALSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIIMGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQATGGATNIMMQKGNFRNQGKPIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCSERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTAPPQKQEPTDKELYPFSSLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGCGAATTAGATAGATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCGGTTAATCCTGGCCTGTTAGAAACATCAGAGGGCTGTAGGCAAATACTGGGACAGCTACAACCGTCCCTTCAAACAGGATCAGAAGAACTTAAATCATTATTTAATACAATAGCAGTCCTTTATTGCGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTCTAAATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGTCAGAGCAGTCAAGTCAGCCAAAATTACCCTATAGTGCAGAACCATCAGGGGCAAATGGTATATCAGGCTCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCCGAGGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAGGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGGACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCATGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTATCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCTTTTAGAGATTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAAGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTCTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCAGCCACAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGCAACAGGTGGTGCAACTAACATAATGATGCAGAAAGGCAATTTTAGGAACCAAGGAAAACCTATTAAGTGTTTCAATTGTGGCAAAGAAGGGCACCTAGCTAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCTCTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAA -MK114705.1,pol,1839,4851,2084,5095,forward,0.21944123990570308,FFRENLAFPQGKAREFPPEQTRANSPTRRELQVWGRDNSSPSEAGADRQGTVSLLFPQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTKIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRAKVENLREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVITLTEEAELELAENREILKEPVHGVYYDPSKDLVAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVVWGKIPKFRLPIQKETWETWWMEYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVIPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEDHEKYHSNWKAMASDFNIPPVVAKEIIASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGEYCAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGKAREFPPEQTRANSPTRRELQVWGRDNSSPSEAGADRQGTVSLLFPQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTKIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRAKVENLREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVITLTEEAELELAENREILKEPVHGVYYDPSKDLVAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVVWGKIPKFRLPIQKETWETWWMEYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVIPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEDHEKYHSNWKAMASDFNIPPVVAKEIIASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGEYCAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTAAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAACCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCCGTATTTGCCATAAAGAAAAAGGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAGCTTAATAAAAGAACTCAAGACTTTTGGGAGGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTCTCAGTTCCTTTAGATGAAAGCTTCAGAAAGTATACTGCATTTACCATACCTAGTACTAACAATGAGACACCCGGGATTAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAAAGTAGCATGACGAAAATCTTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTACCAATACATGGATGATTTATACGTAGGATCTGACTTAGAAATAGAGCAGCATAGAGCAAAAGTAGAGAACCTGAGAGAGCATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGCTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAGCTAAGGCACTAACAGAAGTGATAACACTAACAGAAGAAGCAGAGCTAGAATTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAGTAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACCTATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAAGCAGTTCAAAAAATAGCCACAGAGAGCATAGTAGTATGGGGAAAGATTCCTAAATTTAGATTACCCATACAGAAAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAGTACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGGCAAAAAGTTATCCCCTTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGACAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAGAAGGTCTACCTGACATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCAGGAATCAGGAAAATACTATTTTTGGATGGAATAGATAAGGCCCAGGAAGATCATGAGAAATATCATAGTAATTGGAAAGCAATGGCTAGTGATTTTAACATACCACCTGTGGTAGCAAAAGAGATAATAGCCAGCTGTGATAAATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGGTACATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAATAATACATACAGATAATGGTAGCAATTTCACCAGCACTACAGTCAAGGCCGCCTGCTGGTGGGCAGGTGTTAAGCAGGAGTTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGAGTACTGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATCAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK114705.1,vif,4795,5374,5040,5618,forward,0.3566796368352788,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWVYRHHYESTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYNTQVDPGLADQLIHMYYFDCFSESAIRQAILGHRVSPSCEYQAGHNKVGSLQYLALAVLVAPKKIKPPLPSVARLTEDRWNKPRKIKGHRESHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWVYRHHYESTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYNTQVDPGLADQLIHMYYFDCFSESAIRQAILGHRVSPSCEYQAGHNKVGSLQYLALAVLVAPKKIKPPLPSVARLTEDRWNKPRKIKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTCTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGGTTTACAGACACCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAACACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATATGTACTATTTTGATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTGCAATACCTGGCACTAGCAGTATTAGTAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTCGCGAGACTGACAGAGGATAGATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAG -MK114705.1,vpr,5313,5598,5558,5849,forward,0.599565614997714,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS*,ATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAGCTTAAGAGGGAAGCTGTTAGACATTTTCCTAGGGAATGGCTCCATAGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGGCAGGAGTAGGAGCCATAATAAGAATACTGCAACAATTACTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGGATAGGCATACTGAGGAGAACAAGAAATGGAGCCCGTAGATCCTAG -MK114705.1,tat_exon1,5578,5794,5830,6044,forward,0.5284227381905524,MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ,MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ,ATGGAGCCCGTAGATCCTAGACTGGAACCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTAACAATTGCTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGCTTCACAAAAAAGGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAG -MK114705.1,rev_exon1,5717,5795,5969,6044,forward,0.5989458552946814,MAGRSGDRDEDLLETVRFIKFLYQNS,MAGRSGDRDEDLLETVRFIKFLYQNS,ATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAGT -MK114705.1,vpu,5809,6055,6061,6309,forward,0.5826369545032498,MQPLEISAIVALVVVAIIAIVVWTIVLLEYRKILRQKKIDRLINRISERAEDSGNESDGDQEELSALMEMGRLAPWNVDDL,MQPLEISAIVALVVVAIIAIVVWTIVLLEYRKILRQKKIDRLINRISERAEDSGNESDGDQEELSALMEMGRLAPWNVDDL*,ATGCAACCTTTAGAGATATCAGCAATAGTAGCATTAGTAGTAGTAGCAATAATAGCAATAGTTGTGTGGACCATAGTACTCTTAGAGTATAGGAAAATATTAAGGCAAAAGAAAATAGACAGATTAATTAATAGAATAAGTGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAG -MK114705.1,env,5972,8549,6224,8794,forward,0.49969138043715056,MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL,MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL*,ATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAGTGCTACAAACATGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGATGCAACCACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGATACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCGACCCACAAGAAGTAGTACTGGAAAATGTGACAGAAAATTATAATATGGGAAAAAATAACATGGTGGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTACTCTTAACCCCATTCTGTGTCACTTTAAATTGCACTGATGCTAACATCACCAGCACTAATAATAGTAGAGATAAGAAGGAAGGAGAAAGTACATTGGAGGAGACGAAAGGAGAAATAAAAAACTGCTCTTTCAATATGACTTCAAGCATGAGCGATAAGTCTCAGAAACAACGTGCACTTTTTTATAAGCTTGATGTGGTACAAATAGATGAGACTAATAATAATAGTTATAGGTTGATAAGTTGTAACACCTCAGTCGTCACACAGGCTTGTCCAAAGGTATCCTTTGATCCAATCCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGAAATTCAATGGAACAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAACCTGTAGTGTCAACCCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAAGTAATGATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTGCAGCTGAAGACACCTGTACAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGGATAAGTATGGGACCAGGGAGAGTAATTTATGCAACAGGACAAATAATAGGAGATATAAGAAAAGCACATTGCAACATTAGTAGAGCAGAATGGAATACAACTTTAAAGCAGATAGTTACACAATTAAGAAAGCAGTGGAATAGAACCATAATCTTTAACTCATCCTCAGGAGGGGACCCAGAAATTGTGATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACAAAACTATTTAATAGTACTTGGCCACGTAATAGTACTTGGAATAATACTGAAGGGTCAAATGACACTGAAATAATCACACTCCCGTGCAGAATAAAACAAATTGTAAACAGGTGGCAGGAAGTAGGCAAAGCAATGTATGCCCCTCCCATCCAAGGACAAATTAGTTGTTCATCAAATATTACAGGGCTGCTACTAGTTAGAGATGGTGGAATTAACACCAGTGAGAGCAACGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAGGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATGCTGGGAGCTATGTTCCTTGGGCTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGTTGACGGTACAGACCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGACTCCTAGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAACACTAGTTGGAGTAATAGATCTTATGAAGATATTTGGAACAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAGGCTTAATATACACCTTAATTGAAAAATCGCAGAACCAGCAGGAAATAAATGAACAAGAACTATTGTCATTGGATAAGTGGGCAAGCCTGTGGAATTGGTTTAATATAACAAATTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAGTGCTATCAGCTTGCTCAACGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGAATCATAGGAGTAGTACAAAGAACTTGGAGAGCTTTTATCCACATACCTAGGAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA -MK114705.1,tat_exon2,8130,8223,8376,8468,forward,0.6074396517609815,RPPAQPQGDPTGPKKSKKEVEKETETDQCD,RPPAQPQGDPTGPKKSKKEVEKETETDQCD*,AGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAG -MK114705.1,rev_exon2,8131,8407,8377,8652,forward,0.47688921496698455,DPPPSPKGTRQARRNRRRRWRKRQRQISAISDWILRTFLGRSTKPVPLQLPPLERLNLDCSEGCGNSGTQGVGSPQILVESPAVLESGNKE,DPPPSPKGTRQARRNRRRRWRKRQRQISAISDWILRTFLGRSTKPVPLQLPPLERLNLDCSEGCGNSGTQGVGSPQILVESPAVLESGNKE*,GACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAG -MK114705.1,nef,8550,8985,8796,9416,forward,0.6054615099468668,MGGKWSKKSGGGWPAVREKMKRTEPAAEGVGAASRDLDKYGAITSSNTAQTNPDCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDMSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGGCFK,MGGKWSKKSGGGWPAVREKMKRTEPAAEGVGAASRDLDKYGAITSSNTAQTNPDCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDMSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGGCFK*,ATGGGTGGAAAATGGTCAAAAAAGAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAAAATGAAGCGAACTGAGCCAGCAGCAGAGGGGGTGGGAGCAGCATCTCGAGACCTGGACAAATATGGAGCAATCACAAGTAGCAATACAGCACAGACCAATCCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAAGAGGTAGGCTTTCCAGTCAGACCCCAGGTACCTTTGAGACCAATGACTTACAAGGCAGCTGTGGATATGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAGAGACAAGATATCCTTGATCTGTGGATCTATCACACACAAGGCTACTTCCCTGATTGGCAAAATTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGAGGGTGCTTCAAGTAG -MK114856.1,gag,120,2022,789,2291,forward,0.7511039743075072,MTKTLLVQNANPDCKTILKALGPAATLEEMMTACQKVRRPGHKAKVLAEAMSQATGAANIMMQRGNFKNQRKPVKCFNCGKEKHIAKNCKAPKKKGC,MLHISSCFLPVLGLSGWTRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSGSNLVQPERPSSSGARTGT*KRK*NQRRSLDAGLGLLKRARQEAKGGDW*VRHF*LAEARRRKIGARASVLSRGELDR*EKIQLRPRRKKKYRLKHIV*ASKELERFAVNPGLLETSGGCKQILEQLQPSLQTRSEELRSLYNTVATLYYVHQKIDVKDTKEALDKVEEEQNKSKKKAQQAAADTRNRGQTSQNFPIVQNLQGQIVHQAISPRTLNA*VKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVSRHQAAMQMLKKVINEEAAE*DRLHPVHARPIAPGQIREPKRSDIARTTSTLQEQIR*MTHNPPIPVRKIYKR*IILGLNKIVKMYSPTSILDIKQRPKEPFRDYVDQFYKTLRAKQATQKVKN*MTKTLLVQNANPDCKTILKALGPAATLEEMMTACQKVRRPGHKAKVLAEAMSQATGAANIMMQRGNFKNQRKPVKCFNCGKEKHIAKNCKAPKKKGC*KYRKERHQMKNCTKRQANFLKKIWPSHKGRPENFPQSRPEIPQSRPEPTAPPAPPEKSFKFEEATTPSQKQETIDKELYPLTSLRSLFGNDPSSQ*,ATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACTGGGTCTCTCTGGTTGGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGTTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGGCTCAAATCTGGTCCAACCAGAGAGACCCAGTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGTAGAACCAGAGAAGATCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAAAGGCGGCGACTGGTGAGTACGCCATTTTTGACTAGCAGAGGCTAGAAGGAGAAAGATAGGTGCGAGAGCGTCAGTATTGAGCAGAGGAGAATTAGATAGATAGGAGAAAATTCAGTTAAGGCCAAGGAGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAAGGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGGAGGCTGTAAACAGATATTAGAACAGCTACAACCATCCCTTCAGACAAGATCAGAAGAACTTAGATCATTATATAATACAGTAGCCACCCTCTATTATGTACATCAAAAGATAGATGTAAAAGACACCAAAGAAGCGTTAGACAAAGTAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCGGCAGCTGACACAAGAAACAGAGGCCAGACCAGTCAAAATTTCCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAGGCCATATCACCTAGAACTTTAAATGCATAAGTAAAAGTAGTAGAAGAAAAAGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACTATGCTAAACACAGTAAGTAGACACCAAGCAGCTATGCAAATGTTAAAAAAGGTCATCAATGAAGAAGCTGCAGAATAAGATAGATTACATCCAGTGCATGCAAGGCCTATTGCACCAGGCCAGATAAGAGAACCAAAAAGAAGTGACATAGCAAGAACTACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAAGAAAGATTTATAAAAGATAAATAATTCTAGGACTAAATAAAATAGTAAAAATGTATAGCCCTACCAGCATTTTAGACATAAAGCAAAGGCCAAAAGAACCCTTTAGAGACTATGTAGACCAGTTCTATAAAACTTTAAGAGCCAAGCAAGCTACACAGAAAGTAAAAAATTAGATGACAAAAACCTTGTTAGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAAAAAGTAAGAAGACCCGGCCATAAAGCAAAAGTTTTAGCTGAAGCAATGAGCCAAGCAACAGGTGCAGCCAACATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAAAGCACATAGCCAAAAATTGCAAGGCCCCTAAGAAAAAAGGCTGTTAGAAATATAGAAAAGAAAGACACCAAATGAAAAATTGCACTAAGAGACAGGCTAATTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA -MK114856.1,pol,1787,4826,2084,5095,forward,0.7637180771917039,ASQIYAKIKVKQLCKLLKRTKALTEVVPLTEEAELELAENRKILKDPVHRAYYDPAKDLIAELQKQREGQWTYQIYQKPFKNLKTEKYARTKGAHTNDVKQLTEAVQKISTESIVI,FFKENLAFPQRKARELSSKPTRNSSEQTRANSPTSPSRKELQV*RSNNSLSEAGDNRQGAVSSNFPQITLWQRPIVTIKIREQLKKALLNTGADDTVLEDIDLPRK*KPKMIRRIRSFIKVRQYEQVPIEISRHKAISTVLVGPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPRMDGPKVKQ*PLTEEKIKALVEICTEIEKEKKISKIRPENPYNTPVFAIKKKDGTK*KKLVDFRELNKKTQDF*EIQLSIPHPAKLKKKKSVTVLDVGDAYFSVPLDKDFKKYTAFTIPSINNETPKIRYQYNVLPQR*KRSPAIFQSSMTKILEPFRKTNPDIVIYQYIDDLYVRSDLEIRQHRTKVKELRQHLMR*RFTTPDKKHQKEPPFL*MRYELHPDKWTVQPIVLPEKES*TVNDIQKLVRKLN*ASQIYAKIKVKQLCKLLKRTKALTEVVPLTEEAELELAENRKILKDPVHRAYYDPAKDLIAELQKQREGQWTYQIYQKPFKNLKTEKYARTKGAHTNDVKQLTEAVQKISTESIVI*RRTPKFRLPIQKET*ET*WTDYWQAT*IPK*EFVNTPPLVKL*YQLEKEPIIRAETFYVDKAANKDNKSRKARYVTDRRRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALKIIQAQPDKSESELVSQIIEQLIKKKKVYLA*VPAHKRIKRNEQVNKLVSARIKKVLFLDKIEKAQEDHKKYHSN*RTMASNFNLPPIVAKEIVASCDKCQLKKEAMHRQVDCSPGIWQLDCTHLEEKIILVAVHVASRYIEAEVIPAETRQETAYFILKLARRWPVKTIHTDNGRNFTSNTVKAAC**AKIKQEFSIPYNPQSQEVVKSINNELKKIIRQVKDQAKHLKTAVQMAVFIHNFKRKGGIEGYSAEERIVDIIATEIQTKELQKQITKIQNFQVYYKDSRDPL*KGPAKLLWKGEKAVVIQDNSDIKVVPRRKAKIIKDYKKQMASDDCVASRQDED*,TTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAAGAGAGCAACTAAAGAAAGCTTTATTAAATACAGGAGCAGATGATACAGTATTAGAAGACATAGATTTGCCAAGAAAATAGAAACCAAAAATGATAAGAAGAATTAGAAGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCAGACACAAAGCTATAAGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTAGAAGAAATCTGTTGACTCAGCTTAGTTGCACTCTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAAGAATGGACGGCCCAAAAGTTAAACAATAGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATAGAAAAAGAAAAGAAAATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATAGAAAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAACTCAAGATTTCTAAGAAATTCAATTAAGTATACCACATCCTGCAAAGCTAAAAAAGAAAAAATCAGTCACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAAGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAAAGATTAGATATCAGTATAATGTGCTTCCACAAAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTAGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAAGGCAACATAGAACAAAAGTAAAGGAACTGAGGCAACATCTAATGAGGTAAAGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTAGATGAGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAAGAAAGTTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAATTGAATTAGGCAAGTCAGATTTATGCAAAGATTAAAGTGAAGCAATTATGTAAGCTCCTTAAAAGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAAAGATTCTAAAAGATCCAGTACATAGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAAAGAGAAGGTCAGTGGACATATCAAATTTATCAAAAGCCATTTAAAAATCTAAAAACAGAGAAATATGCAAGAACGAAAGGTGCCCATACTAATGATGTAAAGCAATTAACAGAAGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATAAAGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATAAGAAACATAGTGGACAGATTATTGGCAAGCCACCTAGATCCCTAAGTAAGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATAGTACCAGTTAGAAAAAGAACCCATAATAAGAGCAGAAACCTTCTATGTAGATAAGGCAGCTAATAAAGATAATAAATCAAGAAAAGCAAGATATGTTACTGACAGAAGAAGACAAAAAGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAAGATTCAGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAAAGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGAAAAAAGTCTACCTGGCATAAGTGCCAGCCCACAAAAGAATTAAAAGAAATGAACAGGTAAATAAACTAGTCAGTGCTAGAATCAAGAAAGTACTATTTTTAGATAAAATAGAAAAAGCCCAAGAAGACCATAAAAAATATCACAGTAATTAAAGAACAATGGCTAGTAATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAAAAGAAGCTATGCATAGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAAGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAAGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTAGTAGGCAAAAATCAAGCAAGAATTTAGTATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAAAATCTATAAATAATGAATTAAAGAAAATTATAAGACAAGTAAAAGATCAGGCTAAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGAAGGATACAGTGCAGAGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCAGGTTTATTACAAGGACAGCAGAGATCCACTTTAGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAAAGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAAGGATTATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAG -MK114856.1,vif,4425,5349,5040,5618,forward,0.7566838361540349,MIVWQVDRMKIRTWKSLVKYHMYISKKAKK,MN*RKL*DK*KIRLNILRQQYKWQYSSTILKEKGGLKDTVQRKE**T**QQKYRLKNYKNKLQKFKIFRFITRTAEIHFRKDQQSFSGKVKRQ**YKITVT*K*CQEEKQKSLRIIKNRWQVMIVWQVDRMKIRTWKSLVKYHMYISKKAKK*AYRHHYETTHPRISSEVHIPLGGARLVITTY*GLHTGEKDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVRSLQYLALTALITPKKIKPPLPSVKKLTEDR*NKPQKTKGHRKSHTMNRH*,ATGAATTAAAGAAAATTATAAGACAAGTAAAAGATCAGGCTAAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGAAGGATACAGTGCAGAGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCAGGTTTATTACAAGGACAGCAGAGATCCACTTTAGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAAAGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAAGGATTATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAGAAATAGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGAGGTGCTAGATTAGTAATAACAACATATTAAGGTCTGCATACAGGAGAAAAAGACTGGCATTTAGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAAGTAAGATCTCTACAATACTTGGCACTAACAGCATTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAAGATAGATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAG -MK114856.1,vpr,5033,5582,5558,5849,forward,0.7667502687208886,MLFIHFRIKCHHSRIGIVLQRRARNRASRS,MEKKEI*HTSRP*LSRPSNSCALF*LFFRICYKKCHIRT*S*P*V*ISSRT*QSKISTILGTNSINNTKEDKATFA*CKETDRR*IEQAPEDQRPQKEPYNE*TLELLEELKREAVRHFPRPWLQNLRQYIYETYKDTWTRVEAIIRILQQMLFIHFRIKCHHSRIGIVLQRRARNRASRS*T,ATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAAGTAAGATCTCTACAATACTTGGCACTAACAGCATTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAAGATAGATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAAACCTAAGACAATATATCTATGAAACTTATAAAGATACTTGGACAAGAGTAGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACT -MK114856.1,tat_exon1,5486,5777,5830,6044,forward,0.7619181418001311,MLFIHFRIKCHHSRIGIVLQRRARNRASRS,MLFIHFRIKCHHSRIGIVLQRRARNRASRS*TRALEASRKSA*DSLYQLLL*KVLLSLPSVFYTKSLRHLLWQEEAETETKISSRQSDSSSSSTKAV,ATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTA -MK114856.1,rev_exon1,5698,5776,5969,6044,forward,0.5275498241500586,MAGRSGDRDEDLLKTVRLIKFLYQSS,MAGRSGDRDEDLLKTVRLIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGT -MK114856.1,vpu,5790,6039,6061,6309,forward,0.6962169553327257,TIVGIKYKKILRQRKIDRIINRIRKRAEDSGNESEKDQEELSALVVEIRHDAP,MQPLKILAIVALVVAAIIAIVV*TIVGIKYKKILRQRKIDRIINRIRKRAEDSGNESEKDQEELSALVVEIRHDAP*DVDDL*,ATGCAACCTTTAAAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGACCATAGTAGGCATAAAATATAAGAAAATATTAAGACAAAGAAAAATAGATAGAATAATTAATAGAATAAGAAAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAG -MK114856.1,env,5953,8521,6224,8794,forward,0.7529994904340572,MTNCSFNITTEIRDKVRKEYALFYKLDVMPIDKDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPASFAILKCKDEMFNRTRPCKNVSTVQCTHRIRPVVSTQLLLNSSLAEKKIVLRSENFTDNTKNIIVQLNRSIVINCTRPNNNTRKSISVAKRAIYATRQIIEDIRQAHCNISETD,MRAKKIRKNCQHL**K*GTMLLKMLMICSAAENL*VTVYYEVPV*RDANTTLFCASDAKAYDTEVHNV*ATHACVPTDPNPHEVELKNVTENFNM*KNNMVDQMHEDIINL*DQSLKPCAKLTPLCVTLNCTDLKNNTVGNQTNYHLNETNTIQRKEMTNCSFNITTEIRDKVRKEYALFYKLDVMPIDKDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPASFAILKCKDEMFNRTRPCKNVSTVQCTHRIRPVVSTQLLLNSSLAEKKIVLRSENFTDNTKNIIVQLNRSIVINCTRPNNNTRKSISVAKRAIYATRQIIEDIRQAHCNISETD*SDTLSKIVEKLKEKFRKNKTIIFKQSSREDIEIETHSFNCREEFFYCNTTRLFNST*SVNRTSINRTNNKNITLPCRIKQIINRWQEVRKAMYAPPISKIIRCSSNITGLILTRDSSTTNSKEETFRPRERNMKDN*RSELYKYKVVKIEPLKVAPTKAQRKVVQREKRAIRTLGAMFLRFLRTAGSTIGAASLTLTVQARQLLSGIVQQQNNLLKAIEAQQHMLQLTV*GIKQLQARVLSVERYLQDQQLLKI*SCSRKLICTTTVP*NTS*SNKSYSTI*DNMT*MQ*DREIQNYTKIIYNLLKESQIQQKKNEKELLELDQ*ANL*N*FSITKWL*YIKIFIMIVGGLVSLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRRPDRPERIEEEGEKRDRGRSRRLVTRFLPLI*DDLQSLCLFSYHHLKDLLLIVLKTVQILGHKK*EILKY**SLLQY*IQELKNSAVSLLNTIAIAVAERTDKVIEVRQKISRAFLHIPRKIRQGLEKALQ*,ATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTAAGTCACAGTCTATTATGAGGTACCTGTGTAAAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTAGAAAAATAACATGGTAGACCAGATGCATGAGGATATAATCAATTTATGAGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAAAAATAATACTGTAGGAAATCAAACAAATTATCATCTCAATGAAACTAATACAATACAAAGAAAAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAATATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAAAGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTAGTTTTGCAATTCTAAAGTGTAAAGATGAGATGTTCAATAGAACAAGACCATGTAAGAATGTCAGCACAGTACAATGTACACATAGAATTAGACCAGTAGTGTCAACTCAACTGCTGTTAAATAGTAGCCTAGCAGAAAAAAAGATAGTACTTAGATCTGAAAATTTCACAGACAATACTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAAAGAGAGCAATTTATGCAACAAGACAGATAATAGAAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGAAGTGACACTTTAAGCAAAATAGTTGAAAAATTAAAGGAAAAATTTAGAAAAAATAAAACAATAATCTTTAAGCAATCATCAAGAGAGGACATAGAAATTGAAACGCACAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGAAGTGTTAATAGAACTAGCATAAACAGAACTAACAATAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATCAACAGGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCTATCAGTAAGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATAGTAGTACAACTAATAGTAAAGAAGAGACCTTCAGACCTAGAGAAAGAAATATGAAGGACAATTAGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAAAAGTAGCACCCACCAAGGCACAAAGAAAAGTAGTGCAGAGAGAAAAAAGAGCAATAAGAACGTTAGGAGCTATGTTCCTCAGGTTCTTAAGAACAGCAGGAAGCACTATAGGCGCAGCGTCACTGACGCTGACAGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTAAGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTAGAAAGATACCTACAAGATCAACAGCTCCTGAAGATTTGAAGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATACTAGTTAGAGCAATAAATCTTACAGTACCATCTAAGATAACATGACCTAGATGCAGTAGGACAGAGAAATTCAAAATTACACAAAGATAATATACAACTTACTTAAAGAATCGCAAATCCAACAGAAAAAGAATGAAAAAGAATTATTAGAACTAGATCAATGAGCAAATTTGTAGAATTAGTTTAGTATAACAAAATGGCTATAGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAAGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAAAGAACAGATAAAGTTATAGAAGTAAGACAAAAAATTAGCAGAGCTTTTCTCCACATACCTAGAAAGATAAGACAAGGCTTAGAAAAGGCTTTGCAATAA -MK114856.1,nef,7916,9176,8796,9416,forward,0.7587548638132295,MSLHRMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC,MKKNY*N*INEQICRISLV*QNGYSI*KYS****EA**V*K*FLLYFL**IELGKDTHHYHCRPSSQPQEDQTGPKE*KKKVKRETEADPED**LDSYHSSKTTCKACAFSATTT*KTYS*LC*RLCKF*DTRSKRSSSTSRVSCSIKFRN*RIVLLACSIP*LSQ*LKEQIKL*K*DKKLAELFSTYLER*DKA*KRLCNKISGK*SKSCMAR*PAVKERIERVNPRPAAKKEQAEPAAAKVRAASRDLEKYRAITSSNTSTTNAACA*LEAQEEEEVGFPVRPQVPLRPMTYKAALNLSHFLKEKGGLEGLI*SQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRNPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHRMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC*,ATGAAAAAGAATTATTAGAACTAGATCAATGAGCAAATTTGTAGAATTAGTTTAGTATAACAAAATGGCTATAGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAAGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAAAGAACAGATAAAGTTATAGAAGTAAGACAAAAAATTAGCAGAGCTTTTCTCCACATACCTAGAAAGATAAGACAAGGCTTAGAAAAGGCTTTGCAATAAAATAAGTGGCAAGTAGTCAAAAAGTTGTATGGCTAGATAGCCTGCTGTAAAAGAAAGAATAGAAAGAGTTAATCCAAGGCCTGCTGCAAAGAAAGAACAAGCTGAGCCAGCAGCAGCTAAGGTAAGAGCAGCATCTCGAGACTTAGAAAAATATAGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTAGCTAGAAGCACAAGAGGAAGAAGAAGTAGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTAAATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTAGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGAAATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATAGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA -MK114856.1,tat_exon2,8102,8198,8376,8468,forward,0.7134052388289676,RPSSQPQEDQTGPKE,RPSSQPQEDQTGPKE*KKKVKRETEADPED**,AGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGA -MK114856.1,rev_exon2,8103,8379,8377,8652,forward,0.6888374145157732,ILTTHLRRPAKPVPFQLPPLKRLTLDCAEDCANSRTQEVRDPQVLVESPAVLNSGTKE,DPPPNPKKTRQARKNRRRR*KERQRQIQKISD*ILTTHLRRPAKPVPFQLPPLKRLTLDCAEDCANSRTQEVRDPQVLVESPAVLNSGTKE*,GACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAG -MK115009.1,gag,302,1715,789,2291,forward,0.7484174646972894,MTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVRGPGHKARVLAEAMSQVTKSASIIVQGGNFKNQRKNVKCFNCGKERHTAKNCRAPKKKGC,MGARASVLSGGKLDR*EKIYLRPEGKKKYRLKHIV*ASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQIVHQPLSPRTLNA*VKVIEEKAFSPEVIPMFSALSERATPQDLNTMLNTVRGHQAAMQMLKETINDEAAE*DRLHPVHAGPIAPGQMKEPRGSDIAGTTSTLQEQIR*MTHNPPIPVGEIYKRWIILRLNKIVRMYSPVSILDIRQRPKEPFKDYVDRFYKTLKAEQASQDVKN*MTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVRGPGHKARVLAEAMSQVTKSASIIVQGGNFKNQRKNVKCFNCGKERHTAKNCRAPKKKGC*KCRKERHQMKDCTKRPD*DG*FFKEDLAFPQGKAREFSPEQTRANSPASRELQV*,ATGGGTGCTAGAGCGTCAGTATTAAGCGGCGGAAAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGAAGGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATAGTACATCAACCATTATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAAGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATAGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAAGAACCAAGAGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAAGGCTAAACAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAAGAACCCTTTAAAGATTATGTAGACCGATTCTATAAAACTCTAAAGGCTGAGCAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAAGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATAGTGCAAGGAGGCAATTTTAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAAGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAAAAATGTAGAAAGGAAAGACACCAAATGAAAGATTGTACTAAGAGACCAGACTAAGACGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGA -MK115009.1,pol,1613,4625,2084,5095,forward,0.7535201229073285,AEIKQEFSIPYNPQSQRVVKSMNNKLKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED,FFKEDLAFPQGKAREFSPEQTRANSPASRELQV*GRGNSSSSEAGDERPRTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEINLPGK*KPKMIKRIRSFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKSGIDSPRVKQWPLTEEKIKALIEICAEIEKEKKITKIRPENPYNTPVFAIKKKDSTK*KKLVDFRELNKRTQDF*EVQLRIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPEVKYQYNVLPQR*KRSPAIFQSSMTKILEPFRKENPDIVIYQYIDDLYVRSDLEIEQHRTKIEELRQHLLR*GLTTPDKKHQKKPPFL*ISYELHPDK*TVQPIQLPDKDS*TVNDIQKLVRKLN*ASQIYPEIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQRRDQ*TYQIYQEPFKNLKTEKYARTRGAHTNDVKQLTEAVQKIALEAIVI*RKTPKFKLPIQKET*KMWWTKYWQAT*IPE*EFVNTPPLVKL*YQLEKEPIVRAETFYVNKAANRKTKLRKAEYVTDRRRQKVVSLIDTTNQRTKLHAIHLALQDSGSTVNIVTDSQYALKIIQAQPDKSESELVSQIIEQLIKKEKIYLA*VPAHKRIEGNEQVDKLVSNRIRRVLFLDRIDKAQEEHEKYHSN*RAMASNFNLPPVVAKEIVASCDKCQLKGEPMHRQVDCSPGIWQLDCTHLERKIILVAVHVASRYIEAEVIAAETGQETAYFILKLAGR*PVKIIHTDNGSNFTSTTVKAAC**AEIKQEFSIPYNPQSQRVVKSMNNKLKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED*,TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGAGGAAGAGGCAACAGCTCCTCCTCAGAAGCAGGAGACGAAAGACCAAGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATGATAAAAAGAATTAGAAGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACGCCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGTCAGGAATAGATAGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAGGAAAAGAAAATTACAAAAATTAGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAAAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTAAGAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGAAGTTAAATATCAGTACAATGTGCTTCCACAGAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAGGTGAGGACTCACCACACCAGACAAGAAACATCAGAAAAAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTGATAAATAGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTGAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGAGGAGAGACCAATAGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGAAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAGAAAATGTGGTGGACAAAGTATTGGCAAGCCACCTAGATTCCTGAGTAAGAATTTGTCAATACCCCTCCCTTAGTAAAACTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAAATAAGGCAGCTAATAGAAAGACTAAATTAAGAAAAGCAGAGTATGTTACGGACAGAAGAAGACAAAAGGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAGATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTGAAGGAAATGAACAAGTAGATAAATTAGTCAGTAATAGAATCAGAAGAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAGAGAGCAATGGCTAGTAATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATAGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATAGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAAGCCGCCTGTTAGTAGGCAGAGATCAAGCAGGAATTTAGTATTCCCTACAATCCTCAAAGTCAAAGAGTAGTAAAATCTATGAATAATAAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCATAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGAGATTATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115009.1,vif,4289,4988,5040,5618,forward,0.7624714704923379,MAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED,MAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED*NIEQFSKTPYAYFKESSEIGLQTSL*KP*SKNKFRSTHPIRGSKIGNKNILRSAYRRKRLAFRPGSIHRMEEKEI*HTSRPWPGRPTNSPVLF*LFFRICYKECHIRT*S*S*V*ISGRT*,ATGGCAGTATTCATTCATAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGAGATTATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATAGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATAGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGAGGAAGCAAGATTGGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAA -MK115009.1,vpr,5087,5381,5558,5849,forward,0.6769116239498034,MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT,MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT*AEVEAIIRTLQQLLFIHFRIRCQHSRIGIIRQRRARNRASRS*L,ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGCTT -MK115009.1,tat_exon1,5235,5574,5830,6044,forward,0.6412492073557388,MTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ,MEILRQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPSLEP*KHPGSQPMTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ,ATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGCTTAGAGCCCTAGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAACAAAAAGCTTAGGCATCTCCTATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAG -MK115009.1,rev_exon1,5497,5575,5969,6044,forward,0.5678884873515746,MAGRSRDSDEELLTAVRIIKRLYQSS,MAGRSRDSDEELLTAVRIIKRLYQSS,ATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT -MK115009.1,vpu,5589,5835,6061,6309,forward,0.6428836863619473,MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP,MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP*DSNDM*,ATGCATGCCTTAGAAATAGCAGCAATAGCAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAG -MK115009.1,env,5752,8353,6224,8794,forward,0.7518870380010406,MKEKGEIKNCSFNVTTGIRDKVTKEHALFYKLNVVPIDEDSKNTTGKYKMINCNTSVITQACPKVSFKPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEKIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYRTDIIRDIRQAHYNISKKD,MRVTRTKRNYPHLWR*GILFLKIVMICSANNL*VTVYYKVPV*KEATTTLFCASDAKAYETEKHNV*ATHACVPTDPSPQEVALENVTETFNM*KNDMVEQMHEDIISL*DQSLKPCVKLTPLCVTLNCTDNLNLTCPNNNTCSNNTNYNMKEKGEIKNCSFNVTTGIRDKVTKEHALFYKLNVVPIDEDSKNTTGKYKMINCNTSVITQACPKVSFKPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEKIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYRTDIIRDIRQAHYNISKKD*NDTLKQIVIKLKEKFKNKTIVFTQSSGEDPEIVMHSFNCREEFFYCNTTQLFNST*NNST*NSTDN*NSTESNSTITLPCRIKQIINLWQEVRRAMYAPPIQRQIRCSSNITGLLLVRDGRSNNSSNDTETFRPRGRDMKDN*RSKLYKYKVVKIKPLRIAPTHAKRRVVQKEKRAIRLEAFFLRFLRAAGSTIGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTV*GIKQLQAKVLALERYLKDQQLLRI*SCSGKLICTTNVPWNISWSPRWNRSLDKI*TNMT*KQ*EKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDK*ASL*N*FDITQ*L*YIKIFIIIVRGLISLRIVFTILSIVNKVRQGYSPLSLQTLLPTQRGPDRPERTEERGRKKDRGTSTRLVHRFLALI*DDLRSLFLFSYHRLKDLLLIAARIVELLGRRK*EALKY**NLLQY*SQEIKNSAVSLLNTTAIAVAERTDKIIEVLQRGFRAILHIPTRIRQGLEKALL*,ATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATAAAGTACCTGTATAGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTAGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTAGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAACTTGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATGAAAGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTAATGTAGTACCAATAGATGAAGATAGTAAAAATACTACGGGCAAATATAAGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTAAACGGCAGTCTAGCAGAAGAAAAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATAGAACAGACATAATAAGAGATATAAGACAAGCGCATTATAACATTAGTAAGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGAAGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAGTACTTAGAATAGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAAGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTAGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTAGAGGAAGAGATATGAAGGACAATTAGAGAAGTAAATTATATAAATATAAAGTAGTCAAAATTAAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAAGGAAAAAAGAGCAATAAGACTTGAAGCTTTCTTCCTTAGGTTCTTAAGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACAGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAAAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTAAGGATTTAAAGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAAGAAAAAGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTAGGCAAGTTTGTAAAATTAGTTTGACATTACACAGTAGCTATAGTATATAAAAATATTCATAATAATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGAGGACAGATAAAATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA -MK115009.1,nef,7748,8996,8796,9416,forward,0.7344808947652905,MTYKRALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFR,MNKTYWH*ISRQVCKISLTLHSSYSI*KYS****EA**V*E*FLLYCL**IKLGKDTHHCHCRPSSQPREDPTGPKEQKKEVERKTEAHPRD*CIDS*HSSRTTCGACSSSVTTA*KTYS*LRREL*NF*DAESKKH*SISRISCSIRVRK*RIVQLACSTPQQ*Q*LRGQIKL*KYYKEALELFSTYLHE*DRA*KRLCYKIGNKLSRRLKARWPAIKEKIRRARPVREPEPATAKVRAASRDLKRHGALTSSNTAATNADVACLEAQQKKEEVSFPVRPQVPLRPMTYKRALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFR*CFKLVPVDPDKVEEASVRENNCLLSPENLHRMEDEHREVLQWRFDSRLAFHHIARELHPEYYKDC*,ATGAACAAGACTTATTGGCATTAGATAAGTAGGCAAGTTTGTAAAATTAGTTTGACATTACACAGTAGCTATAGTATATAAAAATATTCATAATAATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGAGGACAGATAAAATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAAGATAGGTAACAAGTTGTCAAGAAGGCTCAAGGCTAGATGGCCTGCCATAAAGGAGAAAATAAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAACAGCTAAGGTAAGAGCAGCATCTCGAGACCTGAAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGAAGAAAGAAGAGGTAAGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGAGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAAGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATAGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAAGTAGAAGAGGCCAGTGTAAGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACATAGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA -MK115009.1,tat_exon2,7934,8027,8376,8468,forward,0.5633802816901408,RPSSQPREDPTGPKEQKKEVERKTEAHPRD,RPSSQPREDPTGPKEQKKEVERKTEAHPRD*,AGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAG -MK115009.1,rev_exon2,7935,8211,8377,8652,forward,0.6781884553958476,ILSTHLGRPAEPVPLQLPPLERLTLNCGENCRTSRTQKVRSTEVLVESPAVLESGNKE,DPPPNPERTRQARKNRRKR*KERQRHIHEISA*ILSTHLGRPAEPVPLQLPPLERLTLNCGENCRTSRTQKVRSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG -MK115387.1,gag,292,1795,789,2291,forward,0.25442849599155104,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADTGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADTGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAGGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACACAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAAGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCGCAGGATGTAAAAAATTGGATGACAGAAACCTTATTGGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115387.1,pol,1587,4599,2084,5095,forward,0.17509882471546434,FFREDLAFLQGKARELSSEQTRANSPTRGELQVWGGDSNSSSEAGAGGQGSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVQLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSIPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMIKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPDKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLKGAKALTEVIQLTEEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALGIIQAQPDRSESEIVNQIIEQLIKKERVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNLPPVVAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKAIHTDNGTNFTSATVKAACWWAGIKQECGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFLQGKARELSSEQTRANSPTRGELQVWGGDSNSSSEAGAGGQGSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVQLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSIPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMIKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPDKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLKGAKALTEVIQLTEEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALGIIQAQPDRSESEIVNQIIEQLIKKERVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNLPPVVAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKAIHTDNGTNFTSATVKAACWWAGIKQECGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAGGGAAGATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAGAAATCAATAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAATAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTGGGGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAGACAATTATGTAAACTCCTTAAGGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGGGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATGGGATACCTGGTGGACAGAATATTGGCAAGCCACCTGGATTCCCGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAAAAAGAACCTATTGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAGGAATAATTCAAGCACAACCAGATAGGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAGGGTCTACCTTGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTCCTATTTTTGGATGGAATAGATAAGGCCCAAGAGGAGCATGAGAAATATCACAATAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAGGGAGAAGCCACGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTGGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115387.1,vif,4543,5122,5040,5618,forward,0.34158090650317496,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIECRERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRGSQTMNGH,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIECRERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRGSQTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAGGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGCAGGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAG -MK115387.1,vpr,5061,5352,5558,5849,forward,0.5777089783281733,MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS,MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTGGGACAACATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115387.1,tat_exon1,5332,5548,5830,6044,forward,0.40192926045016075,MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ,MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ,ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAG -MK115387.1,rev_exon1,5471,5549,5969,6044,forward,0.48200514138817474,MAGRSGDSDEDLLKTVRLIKYLYQSS,MAGRSGDSDEDLLKTVRLIKYLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGT -MK115387.1,vpu,5563,5809,6061,6309,forward,0.5669679539852097,MQSLYILTIVALVVAAILAIVVWAIVLIEYKKILKQRRIDRLIDRIIDRAEDSGNESEGDQEELSALVEMGHHAPWNVDDL,MQSLYILTIVALVVAAILAIVVWAIVLIEYKKILKQRRIDRLIDRIIDRAEDSGNESEGDQEELSALVEMGHHAPWNVDDL*,ATGCAATCCTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAGGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAG -MK115387.1,env,5726,8288,6224,8794,forward,0.52645935624659,MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL,MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL*,ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGGAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTACTTGGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATGAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAGAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAGGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAGGAAGCCATACAAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAGGACCAGGGAGAGCATTTTACACAACAGGAGATATAATAGGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAATAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAGGGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAGGGGAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTGGACTAAAAATGGTACTGGTAGTTGGCAGTCTAATGATACTCAGAATGGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGGAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAACTGTACATCAAATATTACAGGGCTGGTTTTAACAAGAGATGGGGGGAAGGTGATTAATGAAACTGAGACCTTTAGACCTGGAGGAGGAAATATGAAGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAAAGAGAGAAAAGAGCAGTAGGACTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCCGGAAGCACTATGGGCGCAGCGTCAATAGCGCTGACGGAACAGGCCAGACGAGTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATCGTAGTTGGGGTGGGCATAACAAAAATCTAGATGACATTTGGGGTAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAGAAAAGAATGAACAAGAATTATTGGCATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAGGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAGGGACAGATAGGATAATAGAAATATTACAAAGAATTGGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA -MK115387.1,tat_exon2,7869,7962,8376,8468,forward,0.6995153473344102,RPSSQLRGEPTGPKE,RPSSQLRGEPTGPKE*KKEVERETKADPVD*,AGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAG -MK115387.1,rev_exon2,7870,8146,8377,8652,forward,0.32366339007432277,DPPPSSEGSRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDCDQDCGTSGTQGVGSPQILVESPAVLESGTKE,DPPPSSEGSRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDCDQDCGTSGTQGVGSPQILVESPAVLESGTKE*,GACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAG -MK115387.1,nef,8289,8940,8796,9416,forward,0.46946145391741245,MGGKWSKSSRVGWNAVRERMRRAQPTADRERAEPAADGVGAASRDLEKYGALTSRNTAATNADCAWLEAQEEEDEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHGMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC,MGGKWSKSSRVGWNAVRERMRRAQPTADRERAEPAADGVGAASRDLEKYGALTSRNTAATNADCAWLEAQEEEDEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHGMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTAGGGTTGGATGGAATGCAGTGAGGGAAAGAATGAGACGAGCTCAGCCAACAGCAGATAGGGAACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAATATGGAGCACTTACAAGTAGGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGATGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATGGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGGTTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA -MK115491.1,gag,521,2021,789,2291,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115491.1,pol,1813,4825,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115491.1,vif,4769,5348,5040,5618,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115491.1,vpr,5287,5581,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115491.1,tat_exon1,5558,5777,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA -MK115491.1,rev_exon1,5697,5775,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115491.1,vpu,5789,6035,6061,6309,forward,0.5513972055888224,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115491.1,env,5952,8493,6224,8794,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115491.1,tat_exon2,8074,8170,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115491.1,rev_exon2,8075,8351,8377,8652,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115491.1,nef,8494,9109,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK116110.1,gag,62,1601,140,1642,forward,0.7362754920106639,MSQVNSTTVMMQKGNFRNQKKTVKCFNCGKIGHIAKNCRAPRRKGCWKCGQEGHQMKDCSERQANFLGKLWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPIDRELYSLASLKSLFGNDPSSQ,MAGSPHSLTCCHHFF*CSCWSQCF*NSLTIWVCILDQQGFCHPIFYIL*SLLGS*GFIEPVYIVSKGFLWSLFYVQNADRTIHSYYFI*SQDYPSFIDISYWDRWIICHPSYLFLKGTSSSCYITSPWFSHLAWCNRPCMHWMQSIPFCSFLIDGLF*YLHCCLMSPHCI*HGVYILWGGSFC*C*KHRYYFWAKSLFFYYFYPCI*SSR*HGLMYHLPLEVLHYRVVLADLAVISCASCCLLCFHLAFVLLFLYLI*RSLGVLYLYPLMYTIEDRYCII**FKLF*PCLEGWL*LSQYLSTAF*CF*QARINCESF*LPACPY*MF*PIFFLSSWP*PNFFPLV*FSPA*Y*RSRTHLSPSSLR*VKTFLAYSPVAEAMSQVNSTTVMMQKGNFRNQKKTVKCFNCGKIGHIAKNCRAPRRKGCWKCGQEGHQMKDCSERQANFLGKLWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPIDRELYSLASLKSLFGNDPSSQ*,ATGGCCGGGTCCCCCCACTCCCTGACATGCTGTCATCATTTCTTCTAGTGTAGCTGCTGGTCCCAATGCTTTTAAAATAGTCTTACAATCTGGGTTTGCATTTTGGACCAACAAGGTTTCTGTCATCCAATTTTTTACATCCTGTGAAGCTTGCTCGGCTCTTAGGGTTTTATAGAACCGGTCTACATAGTCTCTAAAGGGTTCCTTTGGTCCTTGTTTTATGTCCAAAATGCTGACAGGACTATACATTCTTACTATTTTATTTAATCCCAGGATTACCCATCTTTTATAGATATCTCCTACTGGGATAGGTGGATTATTTGTCATCCATCCTATTTGTTCCTGAAGGGTACTAGTAGTTCCTGCTATATCACTTCCCCTTGGTTCTCTCATTTGGCCTGGTGCAACAGGCCCTGCATGCACTGGATGCAATCTATCCCATTCTGCAGCTTCCTCATTGATGGTCTCTTTTAATATTTGCATTGCTGCTTGATGTCCCCCCACTGTATTTAGCATGGTGTTTATATCTTGTGGGGTGGCTCCTTCTGCTAATGCTGAAAACATAGGTATTACTTCTGGGCTAAAAGCCTTTTCTTCTACTACTTTTACCCATGCATTTAAAGTTCTAGGTGACATGGCCTGATGTACCATTTGCCCCTGGAGGTTTTGCACTATAGGGTAGTTTTGGCTGACCTGGCTGTTATTTCCTGCGCCAGCTGCTGCTTGCTGTGCTTTCATCTTGCTTTTGTTTTGCTCTTCCTCTATCTTATCTAGCGCTCCCTTGGTGTCTTGTATCTCTATCCTTTGATGTATACAATAGAGGACCGCTACTGTATTATATAATGATTTAAGCTCTTCTGACCCTGTTTGGAGGGATGGCTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGCTCCCTGCTTGCCCATACTAGATGTTTTAACCTATATTTTTTCTTTCCTCCTGGCCTTAACCGAATTTTTTCCCATTGGTCTAATTTTCCCCCGCTTAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGGTAAAAACTTTTTTGGCGTACTCACCAGTCGCCGAAGCAATGAGCCAAGTAAATTCAACTACCGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAAGAAGACTGTTAAGTGTTTCAACTGTGGTAAAATAGGGCATATAGCAAAAAATTGCAGGGCCCCCAGGAGAAAGGGCTGTTGGAAATGTGGACAGGAAGGACACCAGATGAAAGATTGTAGTGAGAGACAGGCTAATTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAA -MK116110.1,pol,1393,4405,1435,4446,forward,0.2475474244944199,FFRETLAFPQGEAREFPSEQTRANSPTRGELQVWGRDSNSPSEAGADRQGTIFLSFPQITLWQRPLVSIKVGGQLKEALLDTGADDTVLEEMCLPGKWKPKMIGGIGGFIKVRQYDQIPIEIYGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVRLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDMVIYQYMDDLYIGSDLELGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPITLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKSLTEVVPLTREAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARIKGTHTNDVKQLTQAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAANRDTKLGKAGYVTDRGRQKIVPLTDTTNQKTELQAIYLALQDSGSEVNIVSDSQYAIGILQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVNTIHTDNGSNFTSTAVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGRYSAGERIVDMIASDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED,FFRETLAFPQGEAREFPSEQTRANSPTRGELQVWGRDSNSPSEAGADRQGTIFLSFPQITLWQRPLVSIKVGGQLKEALLDTGADDTVLEEMCLPGKWKPKMIGGIGGFIKVRQYDQIPIEIYGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVRLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDMVIYQYMDDLYIGSDLELGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPITLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKSLTEVVPLTREAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARIKGTHTNDVKQLTQAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAANRDTKLGKAGYVTDRGRQKIVPLTDTTNQKTELQAIYLALQDSGSEVNIVSDSQYAIGILQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVNTIHTDNGSNFTSTAVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGRYSAGERIVDMIASDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED*,TTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAAAAGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGTGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACCCATAGAAATCTATGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATTTGTTGACTCAGATTGGGTGCACTTTAAATTTTCCCATTAGTCCTATCGAAACTGTACCAGTAAGATTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAGATTTCAAAGATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTGGGAATACCGCATCCCGCAGGATTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCATTAGATAAAGACTTTAGGAAGTATACTGCATTTACCATACCCAGTGTAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAGCTAGCATGACAAAAATTTTAGAGCCTTTTAGGAAGCAAAATCCAGACATGGTTATTTATCAATACATGGATGATCTATATATAGGATCTGACTTGGAATTAGGACAGCATAGGACAAAAATAGAGGAACTGAGACAACATCTATTGAGGTGGGGGTTTACCACACCAGACAAGAAGCATCAGAAAGAACCTCCATTCCTCTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAACACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTAGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAGTTATGTAAACTCCTTAGAGGAACCAAATCACTAACAGAAGTAGTACCACTAACAAGAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAAAAGCAGGGACAAGGCCAGTGGACTTATCAGATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATAAAGGGTACCCACACTAATGATGTAAAACAATTAACACAGGCTGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGTAGGAGCAGAAACATTCTATGTCGATGGGGCAGCCAATAGGGATACTAAATTAGGAAAAGCAGGATATGTTACTGACAGGGGAAGACAAAAAATTGTCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTACCTAGCTCTGCAGGATTCAGGATCAGAAGTAAACATAGTATCAGACTCACAGTATGCAATAGGAATTCTTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGACATGGGTGCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTATTATTCTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCCATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGTTAAAAGGAGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTGGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTGGCCAGTGGATATATTGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATATTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAACACAATACATACAGACAATGGCAGCAACTTCACTAGCACTGCGGTTAAAGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGGGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGAGGTACAGTGCAGGGGAAAGAATAGTAGACATGATAGCATCAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGACTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAA -MK116110.1,vif,4349,4928,4391,4969,forward,0.4686663095875737,MENRWQVMIVWQVDRMRINAWKSLVKHHMHVSRKVERWVYKHHYESTNPRISSEVHIPLGDARLKITTYWGLHTGERDWHLGQGVSIEWRKKSYNTQVDPEVADQLIHLYYFDCFSESAIRKAIVGHRVSPSCEYQAGHNKVGSLQYLALAALVKSKKTKPPLPSVTKLTEDRWNKPQRTKGRRGNHIMNGH,MENRWQVMIVWQVDRMRINAWKSLVKHHMHVSRKVERWVYKHHYESTNPRISSEVHIPLGDARLKITTYWGLHTGERDWHLGQGVSIEWRKKSYNTQVDPEVADQLIHLYYFDCFSESAIRKAIVGHRVSPSCEYQAGHNKVGSLQYLALAALVKSKKTKPPLPSVTKLTEDRWNKPQRTKGRRGNHIMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAATGCATGGAAAAGCTTAGTAAAGCACCATATGCATGTTTCAAGGAAAGTTGAGAGATGGGTTTATAAACATCACTATGAAAGTACTAATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAAAAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAAGAGCTATAATACACAAGTAGACCCTGAAGTAGCAGACCAACTAATCCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAAAGCCATAGTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCCCTACAGTACTTGGCATTAGCAGCATTAGTAAAATCAAAAAAGACAAAGCCACCTTTGCCTAGCGTTACGAAGCTGACGGAGGATAGATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAG -MK116110.1,vpr,4867,5158,4909,5199,forward,0.4534005037783373,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPRIWLQSLGQYVYETYGDTWTGVEAIIRILQQMLFIHFRIGCQHSRIGIIRRGRTRNGASRP,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPRIWLQSLGQYVYETYGDTWTGVEAIIRILQQMLFIHFRIGCQHSRIGIIRRGRTRNGASRP*,ATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGGATATGGCTTCAGAGCTTAGGACAATACGTCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTTTGCAACAAATGCTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACGAGGGAGAACAAGAAATGGAGCCAGTAGACCCTAG -MK116110.1,tat_exon1,5138,5357,5180,5394,forward,0.5040543502081963,MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKQ,MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKQ*,ATGGAGCCAGTAGACCCTAGCCTAGCGCCCTGGAAGCACCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGCTATTGTAAAAAGTGCTGCTTACATTGCCAAGTTTGTTTCACAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGTAA -MK116110.1,rev_exon1,5277,5355,5319,5394,forward,0.48200514138817474,MAGRSGDSDEDLLKAVRLIKILYQSS,MAGRSGDSDEDLLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGT -MK116110.1,vpu,5369,5615,5411,5656,forward,0.46373488953730724,MQSLQIGAIVALVVGTIIAIVVWSIVLIEYRKILRQKKIDRIIDRIVERAEDSGNESEGDQEELSALVERGHDAPWNVNDL,MQSLQIGAIVALVVGTIIAIVVWSIVLIEYRKILRQKKIDRIIDRIVERAEDSGNESEGDQEELSALVERGHDAPWNVNDL*,ATGCAATCTTTGCAAATAGGAGCAATAGTAGCATTAGTAGTAGGAACAATAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAAGAAAATAGATAGAATAATAGATAGAATAGTAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAG -MK116110.1,env,5532,8073,5574,8123,forward,0.47450452559300893,MRVKETRKSYQHWWKGGMMLLGMLMICSAATNLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVLLGNVTEDFNAWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILHCTDVNNTRNGMTGELKNCSFNITTKITNKVQKEYALFYKLDVVPINNKDNDTSFNNNSYRLISCNTSVITQACPKVSFEPIPIHYCTPAGYAILRCNNETFSGKGPCTNVSSIQCTHGIRPVVSTQLLLNGSLAKQEVVIRSQNFSDNVKTIIVQLKTPVKINCTRPNNNTRKSIHAGPGKVIYATGEIIGDIRQAHCNISAAEWNDTLGQIVTKLQEQFGNKTIVFNQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWNNNGTNTWNSTGNITLPCKIRQIVNMWQKVGKAMYAPPIRGQIKCSSNITGLLLTRDGGNESESETFRPGGGDMRDNWRSELYKYKVVRIEPLGLAPTKAKRRVVQREKRAIGTLGAVFLGFLGTAGSTMGAASMTLTVQARQLLSGIVQQQNNLLKAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNASWSNKSLNEIWDNMTWMEWEKEISNYTQLIYTLIEESQSQQEKNEQELLALDKWDSLWSWFSITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEDGGERDRDRSTRLVTGFLPLFWDDLRSLCLFSYHRLRDLLLIAARIVELLGHRGWEILKHWWSLLQYWSQELKKSAVSLLNATAIAVAEGTDRIIEVVQRACRAILHIPVRLRQGLERALL,MRVKETRKSYQHWWKGGMMLLGMLMICSAATNLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVLLGNVTEDFNAWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILHCTDVNNTRNGMTGELKNCSFNITTKITNKVQKEYALFYKLDVVPINNKDNDTSFNNNSYRLISCNTSVITQACPKVSFEPIPIHYCTPAGYAILRCNNETFSGKGPCTNVSSIQCTHGIRPVVSTQLLLNGSLAKQEVVIRSQNFSDNVKTIIVQLKTPVKINCTRPNNNTRKSIHAGPGKVIYATGEIIGDIRQAHCNISAAEWNDTLGQIVTKLQEQFGNKTIVFNQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWNNNGTNTWNSTGNITLPCKIRQIVNMWQKVGKAMYAPPIRGQIKCSSNITGLLLTRDGGNESESETFRPGGGDMRDNWRSELYKYKVVRIEPLGLAPTKAKRRVVQREKRAIGTLGAVFLGFLGTAGSTMGAASMTLTVQARQLLSGIVQQQNNLLKAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNASWSNKSLNEIWDNMTWMEWEKEISNYTQLIYTLIEESQSQQEKNEQELLALDKWDSLWSWFSITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEDGGERDRDRSTRLVTGFLPLFWDDLRSLCLFSYHRLRDLLLIAARIVELLGHRGWEILKHWWSLLQYWSQELKKSAVSLLNATAIAVAEGTDRIIEVVQRACRAILHIPVRLRQGLERALL*,ATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAGTGCTGCAACAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACGCATGCCTGTGTACCCACGGACCCCAACCCACAAGAAGTATTATTGGGAAATGTGACAGAAGATTTTAATGCATGGAAAAATAACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTTTGTGTTATTTTGCATTGCACTGATGTCAACAATACTAGAAATGGGATGACAGGAGAACTAAAAAACTGCTCTTTCAATATCACCACAAAAATAACAAATAAGGTACAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTACCAATAAATAATAAGGATAATGATACTAGCTTTAATAATAATAGCTATAGGTTGATAAGTTGTAACACCTCAGTTATTACACAGGCTTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTACTGTACCCCGGCTGGTTATGCAATTCTAAGGTGTAACAATGAGACATTCAGTGGAAAAGGGCCATGTACAAATGTCAGCTCAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGCAGTCTAGCAAAACAGGAGGTAGTAATTAGATCTCAAAATTTCTCGGACAATGTTAAAACCATAATAGTACAGCTGAAGACCCCTGTAAAAATTAACTGTACAAGGCCCAATAACAATACAAGAAAAAGTATACATGCAGGACCAGGGAAAGTAATTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGCAACATTAGTGCAGCAGAGTGGAATGATACTTTAGGACAGATAGTTACAAAATTACAAGAACAATTTGGGAATAAAACAATAGTCTTCAATCAATCGTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTTTACTGTAATTCAACACAACTGTTTAATAGTACTTGGAATAATAATGGTACTAATACTTGGAATAGTACAGGTAATATCACACTCCCATGTAAAATAAGGCAAATTGTAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCTCCTCCCATCCGTGGACAAATTAAATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAACGAGAGTGAGAGCGAAACCTTCAGACCTGGCGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGACTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAACACTGGGAGCTGTGTTCCTTGGGTTCTTGGGAACAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGCAATAAATCTCTGAATGAAATTTGGGATAACATGACCTGGATGGAGTGGGAAAAAGAAATTAGTAATTACACACAATTAATATACACTTTAATTGAAGAATCGCAGAGCCAGCAAGAAAAGAATGAACAAGAATTATTGGCACTAGATAAGTGGGACAGCTTGTGGAGTTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAATAGGGTTAAGAATAGTTTTTACTGTACTTTCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTGTCATTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTAGTACAAAGAGCTTGTAGAGCTATTCTCCACATACCTGTAAGACTAAGACAAGGCTTAGAAAGAGCTTTGCTATAA -MK116110.1,tat_exon2,7654,7750,7705,7797,forward,0.4464285714285715,RPASQPRGDPTGPKESKKTVERETETDPHA,RPASQPRGDPTGPKESKKTVERETETDPHA**,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAA -MK116110.1,rev_exon2,7655,7940,7706,7981,forward,0.46126825660935467,DPLPSPEGTRQARRNRRRRWRERQRQIHTLSNRILTTFLGRPEEPVPLQLPPLERLTLDCSEDCGTSGTQGVGNPQTLVESPTILESGTKKKCC,DPLPSPEGTRQARRNRRRRWRERQRQIHTLSNRILTTFLGRPEEPVPLQLPPLERLTLDCSEDCGTSGTQGVGNPQTLVESPTILESGTKKKCC*,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAGTGCTGTTAG -MK116110.1,nef,8074,8695,8125,8751,forward,0.437094682230869,MGGKWSKSSVVGWPAVRERIRRAGPAAEGVGAVSRDLDKHGAITSNNTPATNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGMIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPLETEQVEAATGGENNCLLHPLNQHGMDDPEREVLMWKFDSSLAFHHRAKELHPEYYKDC,MGGKWSKSSVVGWPAVRERIRRAGPAAEGVGAVSRDLDKHGAITSNNTPATNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGMIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPLETEQVEAATGGENNCLLHPLNQHGMDDPEREVLMWKFDSSLAFHHRAKELHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTGTAAGGGAAAGAATAAGAAGAGCTGGGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGACAAACATGGAGCAATCACAAGTAACAATACACCAGCTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTTAGGCCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATGATATACTCCCAGCAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGACCAGGGGTCAGGTTTCCACTGACCTTTGGATGGTGCTTCAAACTAGTACCACTTGAGACAGAGCAGGTAGAAGCGGCCACTGGAGGAGAGAACAACTGCTTGTTACACCCTTTGAACCAGCATGGGATGGATGACCCGGAGAGAGAAGTACTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAGAGCCAAAGAGCTGCATCCGGAGTACTACAAAGACTGCTGA -MK115527.1,gag,683,2183,789,2291,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115527.1,pol,1975,4987,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115527.1,vif,4931,5510,5040,5618,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115527.1,vpr,5449,5743,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115527.1,tat_exon1,5720,5939,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA -MK115527.1,rev_exon1,5859,5937,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115527.1,vpu,5951,6197,6061,6309,forward,0.5513972055888224,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115527.1,env,6114,8655,6224,8794,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115527.1,tat_exon2,8236,8332,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115527.1,rev_exon2,8237,8513,8377,8652,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115527.1,nef,8656,9271,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK114997.1,gag,210,1719,789,2291,forward,0.27936962750716343,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPSLQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDTIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPSTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASIMAQGGNFRNQKRNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDKELYPLASLRSLFGNDP,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPSLQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDTIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPSTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASIMAQGGNFRNQKRNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDKELYPLASLRSLFGNDP*YQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACACAATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGCACTTTAAATGCATGGGTAAAAGTGATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGATCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCCTCCATAATGGCGCAAGGAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA -MK114997.1,pol,1511,4523,2084,5095,forward,0.26443159013103534,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGQGTVSFSFPQITLWQRPIISIRIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEDKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDQDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGQGTVSFSFPQITLWQRPIISIRIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEDKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDQDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAGAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGACAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACCAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTCCATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTAACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCCGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK114997.1,vif,4467,5046,5040,5618,forward,0.3903081914030819,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPSLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPSLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTAGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCCTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG -MK114997.1,env,4985,8207,6224,8794,forward,0.6663239775063792,MHSFNCGGEFFYCNTTQLFNSTWNGTDNWNGTESNNTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAVGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWRQWEKEIDNYTDTIYNLIELSQNQQEQNEQDLLALDKWASLWSWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*LRALETSRKSTYDCL*QLLL*TVLLSLPSLLHHKRLRHLLWQEEAETATKSFSQQSESSSGSTRAVSITCNACLKNSSNSRISSSNNNSNSCVVYSINRI*ENFKTKENRQVN**NKRKSRRQWQ*K*RRPGGIISTCGDGASCSLGW**YVVPTTCGSQSIMGYLYGKKQLPLYFVHQMLKHMRQRSIMFGQPMPVYPQTPAHRK*H*KM*QKHLTCGKMTW*SRCMRI*SVYGIKA*SHV*N*PHSVLL*IALIV*VIILIII*RKKEK*KTALSMSPQE*EIG*QKNMHFSIDLM*YQ*MKIVEILRANIG**IVTPQSLHKPVQRYPLSQFPYIFVPRLVLRF*SVEIRNSMEQENVEMSAQYNVHMELGQ*YQLNCC*TAV*QKKR**LDLPISRTMLKP**YS*INL*KLIVQDPTIIQEEVYT*DQGEHFMEQT**GI*DKRIVTLVGKIGMTL*KKIVIKLKEKFENKTIVFNQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNGTDNWNGTESNNTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAVGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWRQWEKEIDNYTDTIYNLIELSQNQQEQNEQDLLALDKWASLWSWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL*,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAGTATTACATGTAATGCATGCCTTAAAAATAGCAGCAATAGTAGGATTAGTAGTAGCAACAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGAAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAGTTTAAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAGACTTGATGTAGTATCAATAGATGAAGATAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGATATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAACCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGAGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAAAAAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAACACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCTCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACATTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAGTGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAGGCAGTGGGAAAAGGAAATTGACAATTACACAGACACAATATATAACTTAATTGAACTATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAGTTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTACTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA -MK114997.1,vpr,4985,5279,5558,5849,forward,0.5928358208955226,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*L,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGCTT -MK114997.1,tat_exon1,5256,5472,5830,6044,forward,0.48719691819623834,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ,ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAG -MK114997.1,rev_exon1,5395,5473,5969,6044,forward,0.531617235590375,MAGRSGDSDEELLTAVRIIKRLYQSS,MAGRSGDSDEELLTAVRIIKRLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT -MK114997.1,vpu,5487,5733,6061,6309,forward,0.5852080576385825,MHALKIAAIVGLVVATIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM,MHALKIAAIVGLVVATIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM*,ATGCATGCCTTAAAAATAGCAGCAATAGTAGGATTAGTAGTAGCAACAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG -MK114997.1,tat_exon2,7788,7881,8376,8468,forward,0.4918032786885247,RPSSQPRGDPTGPKEQKKEVERETEAHPRD,RPSSQPRGDPTGPKEQKKEVERETEAHPRD*,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG -MK114997.1,rev_exon2,7789,8065,8377,8652,forward,0.40871934604904625,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG -MK114997.1,nef,8208,8850,8796,9416,forward,0.5478186258332784,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTFKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHMARELHPEYFKDC,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTFKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHMARELHPEYFKDC*,ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTTCAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTTCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGTCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAATATTTCAAGGACTGCTGA -MK115518.1,gag,739,2239,789,2291,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115518.1,pol,2031,5043,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115518.1,vif,4987,5566,5040,5618,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115518.1,vpr,5505,5796,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115518.1,tat_exon1,5776,5992,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG -MK115518.1,rev_exon1,5915,5993,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115518.1,vpu,6007,6253,6061,6309,forward,0.5513972055888224,MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115518.1,env,6170,8711,6224,8794,forward,0.45675101255163,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115518.1,tat_exon2,8292,8385,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD*,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG -MK115518.1,rev_exon2,8293,8569,8377,8652,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115518.1,nef,8712,9327,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115065.1,gag,221,1730,789,2291,forward,0.2880084183556756,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHPRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINAEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRRNVKCFNCGKEGHTAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHPRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINAEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRRNVKCFNCGKEGHTAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP*YQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCCAAGGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGCTGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA -MK115065.1,pol,1522,4534,2084,5095,forward,0.25117173416656646,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTINDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTINDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGATTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTATCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAAATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCTATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGAATCAGAAAAGTACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115065.1,vif,4478,5057,5040,5618,forward,0.3903081914030819,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG -MK115065.1,vpr,4996,5290,5558,5849,forward,0.6000000000000001,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*L,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGCTT -MK115065.1,tat_exon1,5267,5486,5830,6044,forward,0.5060292850990527,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*,ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA -MK115065.1,rev_exon1,5406,5484,5969,6044,forward,0.531617235590375,MAGRSGDSDEELLTAVRIIKRLYQSS,MAGRSGDSDEELLTAVRIIKRLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT -MK115065.1,vpu,5498,5744,6061,6309,forward,0.5852080576385825,MHALEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM,MHALEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM*,ATGCATGCCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG -MK115065.1,env,5661,8262,6224,8794,forward,0.5065440396179699,MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL,MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL*,ATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAGTACTTGGAATGGTACTGACAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATAGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACCTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA -MK115065.1,tat_exon2,7843,7936,8376,8468,forward,0.4918032786885247,RPSSQPRGDPTGPKEQKKEVERETEAHPRD,RPSSQPRGDPTGPKEQKKEVERETEAHPRD*,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG -MK115065.1,rev_exon2,7844,8120,8377,8652,forward,0.40871934604904625,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG -MK115065.1,nef,8263,8905,8796,9416,forward,0.5386842636859471,MGNKLSRGLRAGWPTIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC,MGNKLSRGLRAGWPTIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC*,ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTACCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAGAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA -MK115464.1,gag,527,2297,789,2291,forward,0.706855791962175,MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKERHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLRKIWPSSKGRPRNFLQSRPEPTAPPEESFRFREETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ,MLQAIPGLRVGPAGSHPFALTSLSASCCHQSQSPGCSGATARTGT*KRK*NQRSSLDAGLGLLKRARQEARGGDW*VRQF*LAEARRREISARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADKGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNA*VKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAE*DRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIA*MTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKN*MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKERHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLRKIWPSSKGRPRNFLQSRPEPTAPPEESFRFREETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ*,ATGCTTCAGGCCATCCCTGGTTTGAGGGTGGGTCCCGCCGGGTCCCACCCCTTTGCACTCACGAGCCTGTCTGCCTCCTGCTGCCATCAATCACAGAGCCCAGGCTGTTCGGGCGCCACTGCCCGAACAGGGACCTGAAAGCGAAAGTAGAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGAGGCGGCGACTGGTGAGTACGCCAATTTTGACTAGCAGAGGCTAGAAGGAGAGAGATAAGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAAGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACAAAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATAGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATAAGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATAGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCACAGGATGTAAAAAATTAGATGACAGAAACCTTATTAGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTAAGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAAGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MK115464.1,pol,2089,5101,2084,5095,forward,0.7518376924488996,AKIKQECGIPYNPQSQEVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIKDYGKQMAGDDCVASRQDED,FFKEDLAFLQRKAKELSSEQTRANSPTRRELQV*GGDSNSSSEAGAGGQRSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEISLPGR*KPKMIEGIGGFIKVRQYDQITIEICGHKAIGTVLVGPTPVNIIGRNLLTQISCTLNFPISPIETVPVQLKPGIDSPKVKQWPLTEEKIKALVEICTEMEKEKKISKIRPENPYNTPVFAIKKKDSTK*RKLVDFKELNKRTQDF*EVQLRIPHPARLKKKKSITVLDVGDAYFSIPLDKDFKKYTAFTIPSINNKTPEIRYQYNVLPQG*KRSPAIFQSSMIKILEPFRKQNPDIVIYQYIDDLYVRSDLEIRQHRTKIEELRQHLLK*RLTTPDKKHQKEPPFLWISYELHPDKWTVQPIVLPDKDSWTVNDIQKLVRKLN*ASQIYAEIKVRQLCKLLKGAKALTEVIQLTEEAELELAENKEILKEPVHEVYYDPSKDLIAELQKQRQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVI*GKTPRFKLPIQKET*DT*WTEYWQAT*IPE*EFVNTPPLVKL*YQLEKEPIVGAETFYVDRAANKETKLRKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALRIIQAQPDKSESEIVNQIIEQLIKKEKVYLA*VPAHKRIRRNEQVDKLVSARIRKVLFLDRIDKAQEEHKKYHNN*RAMASDFNLPPVVAKKIVASCDKCQLKKEATHRQVDCSPRIWQLDCTHLEGKVILVAVHVASRYIEAEVIPAETGQETAYFLLKLAGR*PVKAIHTDNGTNFTSATVKAAC**AKIKQECGIPYNPQSQEVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIKDYGKQMAGDDCVASRQDED*,TTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATAAGTTTGCCAGGAAGATAGAAACCAAAAATGATAGAAGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATAACTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTAGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAACCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAAAAAAGATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAGAAAATTAGTAGATTTCAAGGAACTTAATAAAAGAACTCAAGACTTCTAAGAAGTTCAATTAAGAATACCACACCCCGCAAGGTTAAAAAAGAAGAAATCAATAACAGTACTAGATGTAGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAAGAAGTATACTGCATTTACCATACCTAGTATAAATAATAAGACACCAGAGATTAGATATCAGTACAATGTGCTTCCACAGGGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATAGATGACTTGTATGTAAGATCTGACTTAGAAATAAGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTAGAGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATAAGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTAAGAAAATTGAATTGAGCAAGTCAGATTTATGCAGAGATTAAAGTGAGACAATTATGTAAACTCCTTAAAGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAAGAAATTCTAAAAGAACCAGTACATGAAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGAGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGAGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGAGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATAAGATACCTAGTGGACAGAATATTGGCAAGCCACCTAGATTCCCGAGTAAGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATAGTACCAATTAGAAAAAGAGCCTATTGTAGGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAAAGAGACTAAATTAAGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAAGAATAATTCAAGCACAACCAGATAAGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTTGCATAGGTACCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTGCTAGAATCAGGAAAGTCCTATTTTTAGATAGAATAGATAAGGCCCAAGAAGAGCATAAGAAATATCACAATAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAAAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAAAAAGAAGCCACGCATAGACAAGTAGACTGTAGTCCAAGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTAGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTAGTAGGCAAAGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAAGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115464.1,vif,5045,5624,5040,5618,forward,0.7067546928117459,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTY,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTY*GLHTGERDWHLGQGVSIEWKERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRRSQTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAAGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTAGGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGTCAAGGAGTCTCCATAGAATGGAAGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAG -MK115464.1,vpr,5563,5857,5558,5849,forward,0.6021542339864789,MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS,MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS*I,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTAGGACAACATATCTATGAAACTTATGAGGATACTTGGACAGGAGTAGGAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTAGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGATT -MK115464.1,tat_exon1,5563,6052,5830,6044,forward,0.7571801566579635,MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS,MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS*IRALEASRKSA*DSLYQMLL*KMLLSLPSLFHNKRLRHLLWQEEAETATKTSSRQSDSSSTSTKAV,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTAGGACAACATATCTATGAAACTTATGAGGATACTTGGACAGGAGTAGGAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTAGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTG -MK115464.1,rev_exon1,5973,6051,5969,6044,forward,0.48200514138817474,MAGRSGDSDEDLLKTVRLIKYLYQSS,MAGRSGDSDEDLLKTVRLIKYLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGT -MK115464.1,vpu,6065,6311,6061,6309,forward,0.6838632273545291,AIVLIEYKKILKQRRIDRLIDRIIDKAEDSGNESEGDQEELSAFVEIGHHAP,MQSLYILTIVALVVAAILAIVV*AIVLIEYKKILKQRRIDRLIDRIIDKAEDSGNESEGDQEELSAFVEIGHHAP*NVDDL*,ATGCAATCTTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTAGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAAGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAG -MK115464.1,env,6228,8799,6224,8794,forward,0.7471048806788873,MTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST,MRVKEIKRSYQHL*R*GIMLLRMLMIYSTADQWWVTVYYKVPVWREANTTLFCASDAKAYSTEAHNV*ATHACVPTDPNPQEIVIENVTEDFNMWKNNMVDQMHEDIISL*DQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTSNTT*GEMTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST*TKNGTDSWQSNDTQNSNITLQCRIKQIINLWQEVRKAMYAPPISRQINCTSNITGLVLTRDRRNETKTFRPGRENMKDNWRSKLYKYKVVRIEPLRIAPTKAKRRVVQREKRAVRLGAMFLKFLGAARSTIGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTV*GIKQLQARVLAVERYLQDQQLLGL*GCSRKLICTTTVP*NRS*GRHNKNYKSLDDI*DNMT*IE*EKEIDNYTSLIYTLITESHSQQKKNEQELLALDK*ASL*N*FDISQWLWYIKIFIMIVGGLVSLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPERIEERGRERDKGRSGRLVNGFLALI*DDLRSLCLFSYHRLSDLLLIVIKIVELLRRKR*EALKY**NLLQY*SQELKNSAVSLLNTTAIVVAERTDKIIEILQRISRAFLHIPRRIRQGLEKALL*,ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATAAGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGAAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTTCTAATACTACTTAGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATAAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAAAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAAGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATAGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAAGAAGCCATACGAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAAGACCAGGAAGAGCATTTTATACAACAGGAGATATAATAAGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAGTAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAAAGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAAAGAAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTAGACTAAAAATGGTACTGATAGTTGGCAGTCTAATGATACTCAGAATAGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCCATCAGTAGACAAATTAACTGTACATCAAATATTACAGGGCTAGTTTTAACAAGAGATAGGAGGAATGAAACTAAGACCTTTAGACCTGGAAGAGAAAATATGAAGGATAATTGGAGAAGTAAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAAGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAGAAAAGAGCAGTAAGACTAGGAGCTATGTTCCTTAAGTTCTTAGGAGCAGCCAGAAGCACTATAGGCGCAGCGTCGATAGCGCTGACGGAACAGGCCAGACGAGTCTTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTAAGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATCGTAGTTGAGGTAGGCATAACAAAAATTACAAAAGTCTAGATGACATTTAGGATAACATGACCTAGATAGAGTAGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAAAAAAGAATGAACAAGAATTATTGGCATTAGATAAATAGGCAAGTTTGTAGAATTAGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAAGGACAGATAAGATAATAGAAATATTACAAAGAATTAGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA -MK115464.1,nef,8194,9451,8796,9416,forward,0.6542937183493158,MTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPRIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHRMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC,MNKNYWH*INRQVCRISLTYHNGCGI*KYS****EA**V*E*FLLYFL**IKLGKDTHHYHFRPSSQPREEPTGPKE*KKEVERETKADPVD**TDS*HLSRTIYGAYASSATTA*ATYS*L*SRLWNF*DARGKKPSNISRISCSTRVRN*RIVLSACSTPQL***LKGQIR**KYYKELVELFSTYLGE*DRA*KRLCYKIGGKWSKSSKVK*NAVKERIRRAQPTADKERAEPAADKVRAASRDLEKYGALTSKNTAATNADCAWLEAQEEEDEVGFPVRPQLPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPRIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHRMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC*,ATGAACAAGAATTATTGGCATTAGATAAATAGGCAAGTTTGTAGAATTAGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAAGGACAGATAAGATAATAGAAATATTACAAAGAATTAGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAAGATAGGTGGCAAGTGGTCAAAAAGTAGTAAGGTTAAATAGAATGCAGTGAAAGAAAGAATAAGACGAGCTCAGCCAACAGCAGATAAAGAACGAGCTGAGCCAGCAGCAGATAAGGTAAGAGCAGCATCTAGAGACCTAGAAAAATATGGAGCACTTACAAGTAAGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGATGAGGTAGGTTTTCCAGTCAGACCTCAGTTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAAGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATAGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGATTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA -MK115464.1,tat_exon2,8380,8476,8376,8468,forward,0.6995153473344102,RPSSQPREEPTGPKE,RPSSQPREEPTGPKE*KKEVERETKADPVD**,AGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGA -MK115464.1,rev_exon2,8381,8657,8377,8652,forward,0.5716671727907683,RERQRQIRSISERILSTYLGRSTEPMPLQLPPLERLTLDCDQDCGTSKTQEVRSPQILVESPAVLESGTKE,DPPPSPERSRQARKNRRKR*RERQRQIRSISERILSTYLGRSTEPMPLQLPPLERLTLDCDQDCGTSKTQEVRSPQILVESPAVLESGTKE*,GACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAG -MK115530.1,gag,746,2246,789,2291,forward,0.3014827756125966,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115530.1,pol,2038,5050,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115530.1,vif,4994,5573,5040,5618,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115530.1,vpr,5512,5806,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115530.1,tat_exon1,5783,6002,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA -MK115530.1,rev_exon1,5922,6000,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115530.1,vpu,6014,6260,6061,6309,forward,0.5399181166837258,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115530.1,env,6177,8718,6224,8794,forward,0.4585964351370794,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115530.1,tat_exon2,8299,8395,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115530.1,rev_exon2,8300,8576,8377,8652,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115530.1,nef,8719,9334,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115520.1,gag,695,2195,789,2291,forward,0.3014827756125966,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115520.1,pol,1986,5004,2084,5095,forward,0.6033592883813991,YGKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,IF*GKSGLPTKEGQGTSFRADQSQQPHQKRASGLGKRQ*LPLRSRSQETRNCIPCLPSNHSLAATPRQNKNRGAI*RSFLRYRSR*YSIRRNEFARKMETKNDRGNWRFYQSKTV*SDTHRNLWTQSYRYSINRTYTCQHNWKKSVDSAWLYLKFSH*SY*NCTSKIKARNGWPKS*TMAIDRRKNKSISRNLYRNGKGRKNFKNWA*KSIQYSSICYKEKRQY*MEKISRFQRTQ*ENSRLLGSSIRNTTSCRVKKEKISNSTGCG*CIFFNSLR*RIQEVYCIYHT*YK**DTRD*V*VQCAATGMERITSNIPK*HDKNLRAF*KAKSRYSYLSIHG*FVCRI*LRNRAA*NKNRGTKTTSVKVGTYHTRQKTSERTSIPLDGL*TPS*YGKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,ATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATATGGGAAATGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115520.1,vif,4948,5527,5040,5618,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115520.1,vpr,5466,5757,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115520.1,tat_exon1,5737,5953,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG -MK115520.1,rev_exon1,5876,5954,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115520.1,vpu,5968,6214,6061,6309,forward,0.5399181166837258,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115520.1,env,6131,8672,6224,8794,forward,0.4569687738004571,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115520.1,tat_exon2,8253,8346,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD*,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG -MK115520.1,rev_exon2,8254,8530,8377,8652,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115520.1,nef,8673,9288,8796,9416,forward,0.4756067663643049,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115503.1,gag,817,2317,789,2291,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115503.1,pol,2109,5121,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115503.1,vif,5065,5644,5040,5618,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115503.1,vpr,5583,5874,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -MK115503.1,tat_exon1,5854,6070,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG -MK115503.1,rev_exon1,5993,6071,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115503.1,vpu,6085,6331,6061,6309,forward,0.5513972055888224,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115503.1,env,6248,8789,6224,8794,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115503.1,tat_exon2,8370,8463,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD*,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG -MK115503.1,rev_exon2,8371,8647,8377,8652,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115503.1,nef,8790,9405,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115570.1,gag,687,2187,789,2291,forward,0.2967573174581697,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCGACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115570.1,pol,1979,4991,2084,5095,forward,0.19298018391400085,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115570.1,vif,4935,5514,5040,5618,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115570.1,vpr,5453,5747,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115570.1,tat_exon1,5724,5940,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG -MK115570.1,rev_exon1,5863,5941,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115570.1,vpu,5955,6201,6061,6309,forward,0.5399181166837258,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115570.1,env,6118,8659,6224,8794,forward,0.4604674291397314,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCTCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAAGGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115570.1,tat_exon2,8240,8336,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115570.1,rev_exon2,8241,8517,8377,8652,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115570.1,nef,8660,9275,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115509.1,gag,555,2055,789,2291,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115509.1,pol,1847,4859,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115509.1,vif,4803,5382,5040,5618,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115509.1,vpr,5321,5615,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115509.1,tat_exon1,5592,5808,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG -MK115509.1,rev_exon1,5731,5809,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115509.1,vpu,5823,6069,6061,6309,forward,0.5513972055888224,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115509.1,env,5986,8527,6224,8794,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115509.1,tat_exon2,8108,8204,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115509.1,rev_exon2,8109,8385,8377,8652,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115509.1,nef,8528,9143,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115702.1,gag,246,1782,789,2291,forward,0.35613851839948674,MGARASVLSGGELDKWEKIRLRPGGRKRYKLKHIVWASRELERFAVNPGLLETSEGCKQIMGQLQPALQTGSEELRSLYNTVAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKTQQAAAADTGNNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVQAGPVAPGQIREPRGSDIAGTTSTLQEQIAWMTHNPPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMSQATGAHAIMMQRGNFKNQRKTVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESLRPTAPPVESFRFGEETAAPFQKQEPRDKEMSPLASLKSLFGNDQ,MGARASVLSGGELDKWEKIRLRPGGRKRYKLKHIVWASRELERFAVNPGLLETSEGCKQIMGQLQPALQTGSEELRSLYNTVAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKTQQAAAADTGNNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVQAGPVAPGQIREPRGSDIAGTTSTLQEQIAWMTHNPPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMSQATGAHAIMMQRGNFKNQRKTVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESLRPTAPPVESFRFGEETAAPFQKQEPRDKEMSPLASLKSLFGNDQ*SQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGCGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAGGAAACGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGCTTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAAACAAATAATGGGACAACTCCAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAGAGGATAGATGTAAAGGATACCAAAGAAGCTTTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCCGCTGACACAGGAAACAACAGCCAAGTCAGCCAAAATTACCCCATAGTGCAGAACATGCAGGGACAAATGGTACATCAGGCCATATCACCCAGAACCCTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCATTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAGGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCAGGCAGGACCTGTTGCACCAGGCCAGATAAGGGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACACATAATCCACCCGTCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCTGAGCAAGCTTCACAGGAAGTAAAAGGTTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCAGCCATAAGGCAAGGGTTTTGGCAGAAGCAATGAGCCAAGCAACAGGTGCACATGCCATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAA -MK115702.1,pol,1544,4586,2084,5095,forward,0.24526399193257942,FFRENLAFPQGKAGEFPSEQTRANSPTRGEPQANSPTSRELQVWGRDSSPLSEAGTERQGDVSLSFPQITLWQRPVVTIKIGGQIKEALLDTGADDTVLEEMALPGRWKPKMIGGIGGFIKVRQYDQIAIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSVPLDEEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRGHLLKWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIKLPEKEIWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPTKELIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKISTESIVIWGKTPKFKLPIQKETWEIWWTDYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYITDRGRQKVVTLNDTTNQKTELQAILLALQDSGLEANIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLTWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDRAQEEHERYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQIDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQISKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGKAGEFPSEQTRANSPTRGEPQANSPTSRELQVWGRDSSPLSEAGTERQGDVSLSFPQITLWQRPVVTIKIGGQIKEALLDTGADDTVLEEMALPGRWKPKMIGGIGGFIKVRQYDQIAIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSVPLDEEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRGHLLKWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIKLPEKEIWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPTKELIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKISTESIVIWGKTPKFKLPIQKETWEIWWTDYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYITDRGRQKVVTLNDTTNQKTELQAILLALQDSGLEANIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLTWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDRAQEEHERYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQIDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQISKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAAAGATAGGGGGGCAAATAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGAAATGGCGTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATAGCCATAGAAATTTGTGGACATAAAGCAATTGGTACAGTATTAGTAGGACCTACACCTGTCAATATAATTGGAAGAAATCTATTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAGTTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATAGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGATTAAAAAAGAAAAAATCAATAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGATGAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATATTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGACGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGAGGACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAGGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAGATGGACAGTACAGCCTATAAAGCTGCCAGAGAAAGAAATCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAAACAATTATGTAAACTCCTTAGGGGAACCAAAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGAATTAATAGCAGAAATACAGAAGCAAGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCGAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAGAAAATATCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAATATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAGTACTCCTCCCCTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCATCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATATTACTGACAGAGGAAGACAAAAGGTTGTCACCCTAAATGACACAACCAATCAAAAGACAGAGTTACAAGCAATTCTTCTAGCATTGCAGGATTCAGGATTAGAAGCAAACATAGTGACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGATCTACCTGACATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAGCAAGTAGATAAATTAGTCAGTACTGGGATTAGGAAAGTATTATTTTTAGATGGAATAGATAGGGCCCAAGAAGAGCATGAGAGATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTCAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAATAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACCACAGTTAAGGCCGCCTGTTGGTGGGCGGGGGTCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTGGTAGAATCTATGAATAAAGAATTAAAGAAAATAATAGGACAGGTCAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTCAAAAATTCAAAACTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCGGTAGTAATACAGGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115702.1,vif,4530,5109,5040,5618,forward,0.3566796368352788,MANRWQVMIVWQVDRMRIRTWNSLVKHHMYVSKKTKGWFYRHHYESTHPKISSEVHIPLGDAELVVTTYWGLQPGERDWHLGQGVSIEWRKGRYRTHVDPNLADQLIHLHYFDCFSESAIRHAILGHRVSPRCEYQAGHNKVGSLQYLALAALRAPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMYVSKKTKGWFYRHHYESTHPKISSEVHIPLGDAELVVTTYWGLQPGERDWHLGQGVSIEWRKGRYRTHVDPNLADQLIHLHYFDCFSESAIRHAILGHRVSPRCEYQAGHNKVGSLQYLALAALRAPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAACTAAGGGATGGTTTTATAGACATCACTATGAGAGCACTCATCCAAAAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTGAGTTGGTAGTAACAACATATTGGGGTTTGCAGCCAGGGGAAAGGGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAGGGAGATATAGAACACACGTGGACCCTAACCTAGCAGACCAACTAATTCATCTGCATTACTTTGATTGTTTTTCAGAATCTGCTATAAGACATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAAGAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACTAAACTAACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG -MK115702.1,vpr,5048,5342,5558,5849,forward,0.5873733056539261,MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS,MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS*P,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAACTTAAGCAGGAAGCTGTTAGGCATTTTCCTAGGCCATGGCTTCATAGCTTAGGGCAATATATCTATGAAACTTATGGGGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATGCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATCCCACAGAGGAGAGCAAGAAATGGATCCAGTAGATCCTAACCT -MK115702.1,tat_exon1,5319,5535,5830,6044,forward,0.5375823324292911,MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ,MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ,ATGGATCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTAACAATTGCTATTGTAAAAAGTGTTGCCTTCATTGCCAAGTTTGTTTCACACGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAG -MK115702.1,rev_exon1,5458,5536,5969,6044,forward,0.5657327586206896,MAGRSGDGDEDLLKAVRLIKTLYQSS,MAGRSGDGDEDLLKAVRLIKTLYQSS,ATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAGT -MK115702.1,vpu,5550,5808,6061,6309,forward,0.6128024980483997,MLSLEVIVAITALVVAGIIAIVVWTIVLIEYRKILRQRKIDKILDRIRERAEDSGNESEGDQEELSALVEMGHNAHHAPWDIND,MLSLEVIVAITALVVAGIIAIVVWTIVLIEYRKILRQRKIDKILDRIRERAEDSGNESEGDQEELSALVEMGHNAHHAPWDIND**,ATGTTATCTTTAGAAGTAATAGTAGCAATAACAGCATTAGTAGTAGCAGGAATAATAGCAATAGTTGTGTGGACCATAGTACTTATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAAGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAA -MK115702.1,env,5716,8257,6224,8794,forward,0.5208458282639616,MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE,MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE*DRA*,ATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAATGCTACAGAACAATTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACAACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTATTAATAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAAATGCAAGAGGACATAGTCAGCTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACCTTAAATTGCACTAATTTGACCATTGAGCCAAACAATGCTACTAAAGCCAATATTAGTGGGAGGTTAGAGGGGAAAGGAGAAATGACAAACTGCTCTTTCAATGTCACCACAAGCCTAAGAGATAAGAGGAAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTAGCAACAGGTGAAAATAATAACAGCTTTAGGTTGATAAGTTGTAATACCTCAGAGATTACACAGGCCTGTCCAAAGGTATCATTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAAAAGTTCAATGGAACAGGAAAATGTAACAATGTCAGCATAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAGTTAGATCTGCCAATTTCTCAGACAATACTAAGACCATAATAGTACAGCTGAACAAAACTGTAGTAATTAATTGTACAAGACCCAACAACAATACAAGGAGAAGTATACATATAGCACCAGGGAGAGCATTTTATGCAACAGGAGATATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAAAGAAGATTGGAATACCACTTTAAACCAGGTGGCTAAAAAATTACAAGAACAATTTGAGAATGCAACAATAGACTTTAAACCATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACGGAACTATTTTCTTGGAATGCTACAACAAAACTGTTTACTTGGAATGCTACAAATAGCAATAATGGAACCATCATACTCCCATGTAGAATAAAACAAATTATAAACATGTGGCAAGAGGTAGGAAAAGCAATGTATGCCCCTCCCATTCGTGGACAAATTAGATGTTCGTCAAATATTACAGGACTGCTATTAACAAGAGATGGTGGGACTAACGGGACAGGGAACAGGAATGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAGAAATTAAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGACCATAGGAGCTATGTTCCTTGGGTTCCTGGGGGCAGCAGGAAGCACTATGGGCGCAGCATCACTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCGATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATAAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGGTACCTAAGAGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATATTAGTTGGAGTAATAGAACTCTGAATAACATTTGGGACAATTTGACTTGGATGCAGTGGGATACAGAAATTAACAATTACACAAACAAAATATACCAATTACTTGAGGAAGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAATTTGTGGAATTGGTTTGACATATCAAACTGGCTGTGGTACATAAAAATATTCATATTAATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTAAATGCCACAGCCATAGTAGTAGCTGAGGGGACAGATAGGATTATAGAATTAGCACAAAGAATTTGTAGAGCAGAATAAGACAGGGCTTGA -MK115702.1,tat_exon2,7874,7970,8376,8468,forward,0.3921568627450981,RPASQSRGDPTGPKEPKKKVERETETDPTD,RPASQSRGDPTGPKEPKKKVERETETDPTD**,AGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGA -MK115702.1,rev_exon2,7875,8151,8377,8652,forward,0.3471418653089562,DPLPSPEGTRQARRNRRRRWRERQRQIRQISERILDTYLGRSEEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSTQILVECPAILESGTKE,DPLPSPEGTRQARRNRRRRWRERQRQIRQISERILDTYLGRSEEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSTQILVECPAILESGTKE*,GACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAG -MK115702.1,nef,8275,8896,8796,9416,forward,0.47444962236863253,MGGKWSKHSKSEWADVRERMAQTEAAADGVGAVSRDLERHGAITSSNTATNNAACAWLEAQEEEEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPDKVEEANKGENNSLLHPMSLHGMEDTEREVLMWKFDSRLAFHHVAREKHPEYFKDC,MGGKWSKHSKSEWADVRERMAQTEAAADGVGAVSRDLERHGAITSSNTATNNAACAWLEAQEEEEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPDKVEEANKGENNSLLHPMSLHGMEDTEREVLMWKFDSRLAFHHVAREKHPEYFKDC*,ATGGGTGGCAAGTGGTCAAAACATAGTAAGAGTGAATGGGCTGATGTAAGGGAAAGAATGGCACAAACTGAGGCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGATCTGGAAAGACATGGAGCAATCACAAGTAGCAATACAGCAACTAACAATGCTGCTTGTGCTTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAAACCTCAGGTGCCTTTGAGACCAATGACCTACAAGGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTATTCCCAAAAAAGACAAGACATCCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGAACCAGATTCCCACTGTGCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTGGAAGAAGCCAATAAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACACCGAGAGAGAGGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACGTAGCCAGAGAGAAACATCCGGAGTACTTCAAGGACTGCTGA -MK115095.1,gag,2,1697,789,2291,forward,0.7478034493979825,MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNLKNQRKNVKCFNCGKEGHTAKNCRAPKKKGC,SGFSFTFKSLFGRHLGDLKAKEKPEELSRRRTRLAERARQEARRGD**VRRNF*LAEARRREIGARASVLSGGELDR*EKIYLRPGRKKKYRLKHIVWASRELERFAVNPGLLKSSEGCRQILGQLQPALQTGSEELRSLYNTIAVLYCVHQKINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQIVHQPISPRTLNA*VKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVRRHQAAMQMLKETINDEAAE*DRLHPVHAGPIAPGQMKEPRRSDIAGTTSTLQEQIR*MTNNPPIPVGEIYKR*IILRLNKIVKMYSPVSILDIRQRPKEPFKDYVDRFYKTLKAEQASQDVKN*MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNLKNQRKNVKCFNCGKEGHTAKNCRAPKKKGC*KCGKKGHQMKDCTKRQANFLRKIWPSHKGRPKNFLQSRPEPTAPPAESFRFKEEATAPPQKQETKDQELYPLASLRSLFGNDP*YQ*,TCTGGTTTCTCTTTCACTTTCAAGTCCCTGTTCGGGCGCCATCTAGGGGACCTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCACGGCAAGAGGCGAGGCGCGGCGACTAGTGAGTACGCCGAAATTTTTGACTAGCAGAGGCTAGAAGGAGAGAGATAGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGGGAGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAAAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAACCCATATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAAGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAAGCTGCAGAATAAGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAGGAACCAAGAAGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATAGATAATCCTGAGGTTAAATAAAATAGTAAAAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAGGAACCCTTTAAGGATTATGTAGACCGGTTCTATAAAACTCTAAAGGCTGAACAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAAGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATGGTGCAAGGAGGCAATTTAAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAGAAATGTGGAAAGAAAGGACACCAAATGAAAGATTGTACTAAGAGACAGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA -MK115095.1,pol,1489,4501,2084,5095,forward,0.7624366800883231,KPKIIEGIRGFIKVRQYDQVPIKICRHKAISTVLIRPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPGIDSPKVKQWPLTEEKIKALIEICAEIEKERKITKIRPKNPYNTPVFAIKKKDSTK,FFKEDLAFPQGKAKEFSPEQTRANSPASRELQV*GRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIRRQLKEALLDTGADDTVLKEINLPGK*KPKIIEGIRGFIKVRQYDQVPIKICRHKAISTVLIRPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPGIDSPKVKQWPLTEEKIKALIEICAEIEKERKITKIRPKNPYNTPVFAIKKKDSTK*RKLVDFRELNKRTQDF*KVQLRIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPRVRYQYNVLPQR*KGSPAIFQSSMTKILEPFRKENPDIVIYQYIDDLYVRSDLEIEQHRTKIEELRQHLLK*RLTTPDKKHQKEPPFL*ISYELHPNKWTVQPIQLPDKDS*TVNDIQKLVRKLN*ASQIYPEIKVKQLCKLLRRTKALTEVVPLTEEAELELAENKEILKEPVHRAYYDPSKDLIAEVQKQGGDQWTYQIYQKPFKNLKTKKYARTRGAHTNDVKQLTEAVQKIALEAIVI*RKTPKFKLPIQKET*EM**TEYWQAT*IPE*EFVNTPPLVKL*YQLEKEPIVRAETFYVDRAANRETKLRKARYVTDRRRQKVVSLIDTTNQRTKLHAIHLALQDSGSTVNIVTDSQYALKIIQAQPDKSESELVSQIIEQLIKKEKIYLA*VPAHKRIRRNEQVDKLVSSRIRKVLFLDRIDKAQEEHEKYHSN*RAMASDFNLPPVVAKEIVASCDKCQLKREPMHGQVDCSPGIWQLDCTHLERKIILVAVHVASRYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAAC**ARIKQKFSIPYNPQSQGVVESMNNELKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKRGIGGYSAEERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLL*KGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED*,TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAAGAAGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAAAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATAATAGAAGGAATTAGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAAAAATTTGTAGACATAAAGCTATAAGTACAGTATTAATAAGACCTACACCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAAGAAAGGAAAATTACAAAAATTAGGCCTAAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATAGAGAAAATTAGTAGATTTCCGAGAACTTAATAAAAGAACACAAGACTTTTAAAAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAAGAGTTAGATATCAGTACAATGTGCTTCCACAAAGATAGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAAGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAAGTAAAGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTAAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAAGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAGGAGATTCTAAAAGAACCAGTACATAGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGAGGAGACCAATGGACATATCAAATTTATCAGAAGCCATTTAAAAATCTGAAAACAAAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATAGAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAAGAAATGTAGTAGACAGAGTATTGGCAAGCCACCTAGATTCCTGAGTAGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAGAGAGACTAAATTAAGAAAAGCAAGGTATGTTACGGACAGAAGAAGACAAAAAGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAAATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTAGTAGAATCAGAAAAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAAGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAGGCCGCCTGTTAGTAGGCAAGGATCAAGCAGAAATTTAGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAAGGGGGATTGGGGGGTACAGTGCAGAGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTAGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAG -MK115095.1,vif,4445,5024,5040,5618,forward,0.7494633160752622,MENRWQVMIVWQVDKMRIRTWNSLVKHHMHISKKAQR,MENRWQVMIVWQVDKMRIRTWNSLVKHHMHISKKAQR*VYRHHHESHNPKTSSEVHIPLREARLVIKTY*GLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVRPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKRSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGAGTTTACAGACATCACCATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAAGGGAAGCAAGATTAGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGGCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAG -MK115095.1,vpr,4963,5257,5558,5849,forward,0.6216628527841345,MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS,MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS*L,ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAACTT -MK115095.1,tat_exon1,5111,5453,5830,6044,forward,0.6491818320844167,MTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEQ,MEILGQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPNLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEQ*,ATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAACTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA -MK115095.1,rev_exon1,5373,5451,5969,6044,forward,0.531617235590375,MAGRSGDSDEELLTAVRIIKRLYQSS,MAGRSGDSDEELLTAVRIIKRLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT -MK115095.1,vpu,5465,5711,6061,6309,forward,0.6918418647166362,SIVLIEYKKILRQKKIDRLIDRIRERAEDSGNESDEDQEELSAIVEIGHLVP,MHALEIAAIVRLVVAAIIAIVV*SIVLIEYKKILRQKKIDRLIDRIRERAEDSGNESDEDQEELSAIVEIGHLVP*DSDDM*,ATGCATGCCTTAGAAATAGCAGCAATAGTAAGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAAGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAG -MK115095.1,env,5628,8229,6224,8794,forward,0.7513561129398668,MTEEGEIKNCSFNVTTGIRDKVTKEHALFYKLDVVPIDESSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPRRAFYRTDIIGDIRQAHCNISRKD,MKVTRTRKNYQQL*R*GILFLRIVMICSANNL*VTVYYEVPVWKEATTTLFCASDAKAYKTEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISL*DQSLKPCVKLTPLCVTLNCTDELNLNCPNNNTCSNNTKYNMTEEGEIKNCSFNVTTGIRDKVTKEHALFYKLDVVPIDESSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPRRAFYRTDIIGDIRQAHCNISRKD*NDTLKQIVIKLKEKFKNKTIVFNQSSGKDPEIVMHSFNCREEFFYCNTTQLFNST*NNNT*NGTDN*NSTESNSTITLPCRIKQIINLWQEVRRAMYAPPIQGQIRCSSNITGLLLVRDSKSNNSSNDTKTFRPRRGDMKDN*RSELYKYKVVKIEPLRIAPTHAKRRVVQKEKRAIGLRAFFLKFLGAAGSTIGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTV*GIKQLQARVLALERYLKDQQLLKI*GCSRKLICTTNVP*NVS*SPR*NRSLDKI*TNMT*KQ*EKEIDNYTDTIYNLIEQSQNQQEQNEQDLLALDK*ASL*N*FDITQWL*YIKIFIMIVRGLISLRIVFTILSIVNRVRQRYSPLSLQTLLPTQRGPDRPERTEEGGREKDRGTSTRLVHRFLALI*DDLRSLFLFSYHRLRDLLLIVARIVELLRRRK*EALKYW*NLLQY*SQEIKNSAVSLLNTTAIAVAERTDKIIEVLQRGFRAILHIPTRIRQGLEKALL*,ATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATGAGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAAGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATGAATTAAATCTAAATTGCCCTAACAATAATACTTGTAGTAATAATACTAAATATAATATGACGGAAGAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAAGTAGTGGAAATACTACAGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAAGGAGGGCATTTTATAGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTAGGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAAAGGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAAGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAATACTTAGAATGGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATAGTAAGAGCAACAATAGTAGTAATGATACAAAGACCTTCAGGCCTAGAAGAGGAGATATGAAGGACAATTAGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAGAAAGAAAAAAGAGCAATAGGACTTAGAGCTTTCTTCCTTAAGTTCTTAGGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTGAAGATTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTAATGTGCCCTAAAATGTTAGTTAGAGCCCTAGATAGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAGGAAAAAGAAATTGACAATTATACAGACACAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTAGCATTAGATAAGTAGGCAAGTTTGTAGAATTAGTTTGACATTACACAGTGGCTATAGTATATAAAAATATTCATAATGATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGAGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAAAGGACAGATAAGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAAGCTTTGCTATAA -MK115095.1,nef,7624,8872,8796,9416,forward,0.5872017754762344,MRQARPVRKPEPAATKVRAASRDLERHGALTSSNTAATNADVACLEAQQEEKEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPRPGVRFPLCFRWCFKLVPVDPDKVEEASVGENNCLLSPENLHRIEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC,MNKTY*H*ISRQVCRISLTLHSGYSI*KYS****EA**V*E*FLLYCL**IELGRDTHHCHCRPSSQPRGDPTGPKEQKKEVERKTEAHPRD*CIDS*HSSRTTCGACSSSVTTA*ETYS*LWRGL*NF*DAESKKH*SIGRISCSIRVRK*RIVQLACSTPQQ*Q*LKGQIRL*KYYKEALELFSTYLHE*DRA*KKLCYKIGNKLSRRLRARWPAIKERMRQARPVRKPEPAATKVRAASRDLERHGALTSSNTAATNADVACLEAQQEEKEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPRPGVRFPLCFRWCFKLVPVDPDKVEEASVGENNCLLSPENLHRIEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC*,ATGAACAAGACTTATTAGCATTAGATAAGTAGGCAAGTTTGTAGAATTAGTTTGACATTACACAGTGGCTATAGTATATAAAAATATTCATAATGATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGAGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAAAGGACAGATAAGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAAGCTTTGCTATAAGATAGGTAACAAGTTGTCAAGAAGGCTCAGGGCTAGATGGCCTGCCATAAAAGAAAGAATGAGACAAGCTAGGCCAGTAAGAAAGCCAGAGCCAGCAGCAACTAAGGTAAGAGCAGCATCTCGAGACCTAGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGGAAGAAAAAGAGGTAGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAAGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAAGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATAGAAGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA -MK115095.1,tat_exon2,7810,7903,8376,8468,forward,0.5303030303030303,RPSSQPRGDPTGPKEQKKEVERKTEAHPRD,RPSSQPRGDPTGPKEQKKEVERKTEAHPRD*,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAG -MK115095.1,rev_exon2,7811,8087,8377,8652,forward,0.6667847862036381,ILSTHLGRPAEPVPLQLPPLERLTLDCGEDCRTSKTQKVRSTEVLVESPAVLESGNKE,DPPPNPEGTRQARKNRRRR*RERQRHIHEISA*ILSTHLGRPAEPVPLQLPPLERLTLDCGEDCRTSKTQKVRSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG -MK115490.1,gag,549,2049,789,2291,forward,0.3014827756125966,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115490.1,pol,1841,4853,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115490.1,vif,4797,5376,5040,5618,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115490.1,vpr,5315,5609,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115490.1,tat_exon1,5586,5802,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG -MK115490.1,rev_exon1,5725,5803,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115490.1,vpu,5817,6063,6061,6309,forward,0.5399181166837258,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115490.1,env,5980,8521,6224,8794,forward,0.4585964351370794,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115490.1,tat_exon2,8102,8198,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115490.1,rev_exon2,8103,8379,8377,8652,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115490.1,nef,8522,9137,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -MK115576.1,gag,468,1968,789,2291,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA -MK115576.1,pol,1760,4772,2084,5095,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MK115576.1,vif,4716,5295,5040,5618,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -MK115576.1,vpr,5234,5528,5558,5849,forward,0.5994382022471912,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MK115576.1,tat_exon1,5505,5721,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG -MK115576.1,rev_exon1,5644,5722,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKILYQSS,MAGRSGDSDEELLKAVRLIKILYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT -MK115576.1,vpu,5736,5982,6061,6309,forward,0.5513972055888224,MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL,MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*,ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG -MK115576.1,env,5899,8440,6224,8794,forward,0.45675101255163,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA -MK115576.1,tat_exon2,8021,8117,8376,8468,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA -MK115576.1,rev_exon2,8022,8298,8377,8652,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG -MK115576.1,nef,8441,9056,8796,9416,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA -OQ092466,gag,825,2361,789,2291,forward,0.2559303794507086,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPSLLETAEGCRQILGQLQPSLQTGSEELKSLYNTLATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVRHTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSHVTNSSAIMMQRGNFRNQRKAVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPPEEIFRFVEETTTPSQKQEPIDKELYPPLASLKSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPSLLETAEGCRQILGQLQPSLQTGSEELKSLYNTLATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVRHTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSHVTNSSAIMMQRGNFRNQRKAVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPPEEIFRFVEETTTPSQKQEPIDKELYPPLASLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTAGCCTGTTAGAAACAGCAGAAGGCTGTAGACAAATATTGGGACAGTTACAACCGTCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACATTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCCTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGCAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGCCAGGTTAGACACACAGGAAACAGCAGCCAGGTCAGCCAAAATTACCCTATAGTACAGAACCTTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGACTACATCCAGTGCATGCAGGGCCCATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTACACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGGGGACCCGGACATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCACGTAACAAATTCAAGTGCCATAATGATGCAGAGGGGCAATTTTAGAAACCAAAGAAAGGCTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092466,pol,2147,5165,2084,5095,forward,0.19193360134872262,FFRENLAFPQRKAREFSPEQTRANSPTTRRDLQVCGRDNNSLSETGANRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPYRTRNPEMVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPNKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGEGQWTFQIYQEPFKNLKTGKYARARGAHTNDVKQLTEAVQKIATEGIVIWGKIPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQIIKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKAREFSPEQTRANSPTTRRDLQVCGRDNNSLSETGANRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPYRTRNPEMVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPNKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGEGQWTFQIYQEPFKNLKTGKYARARGAHTNDVKQLTEAVQKIATEGIVIWGKIPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQIIKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAGGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAATCTTAGAGCCTTATAGAACACGAAATCCAGAAATGGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGATTTACTACCCCAGACAAAAAACATCAAAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAAACAAAGACAGCTGGACTGTCAATGACATACAGAAACTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCGCTAACAGAAGAAGCAGAGTTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGGGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGGAAGGACAATGGACATTTCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGCGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAGGCATAGTAATATGGGGAAAAATTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATACGCATTGGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTGATAAAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTGCTGGAATCAGGAAAGTATTATTTTTAGATGGAATAGAGAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGCTAAAAGGAGAAGCCATACATGGACAGGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATACGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATCCAAACCAAAGAACTACAAAAACAAATTATAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTTATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092466,vif,5109,5688,5040,5618,forward,0.24677296886864086,MENRWQVMIVWQVDRMRIRTWNSLVKHHMYVSRKAKGWFYRHHFESNHPKISSEVHIPLEDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKRKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHMYVSRKAKGWFYRHHFESNHPKISSEVHIPLEDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKRKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAGGAAAGCTAAGGGATGGTTTTATAGACATCACTTTGAAAGCAATCATCCAAAAATAAGTTCAGAAGTACACATCCCACTGGAGGATGCTAGACTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGGGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTACTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACACATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGAGAAAGCCACCCTTGCCTAGTGTTAAGAAGCTAACAGAAGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG -OQ092466,vpr,5627,5921,5558,5849,forward,0.577708978328173,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*A,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAGCTTAGGGCAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATTCTGCAACAACTGTTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGGCT -OQ092466,tat_exon1,5898,6114,5830,6044,forward,0.42503863987635226,MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ,MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ,ATGGAGCCAGTAGATCCTAGGCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCACTTGTTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAG -OQ092466,rev_exon1,6037,6115,5969,6044,forward,0.48200514138817474,MAGRSGDSDDELLKTVRLIKVLYQSS,MAGRSGDSDDELLKTVRLIKVLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAGT -OQ092466,vpu,6129,6375,6061,6309,forward,0.5210204814947899,MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL,MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL*,ATGCAACCTTTAACAATATTAGCAATAGTAGCACTAGTAGTAGCAGCAATACTAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAA -OQ092466,env,6292,8875,6224,8794,forward,0.4841544358231281,MKVRGIRKNYQHWWRGGILLLGMLMICNATEQQLWVTVYYGVPVWKEANTTLFCASDAKAYSTEVHNVWATHACVPTDPNPQEVVLKNVTENFNMWENNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNTTRSSGNTTNEMKNCSFYTETDIRDKKRKEYALFYELDIVPIDEDNKNKSNNISYSRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEDEVVIKSSNFTNNAKTIIVQLNETVKINCTRPNNNTRKSIPIGPGRAFYATGDIIGDIRQAHCNISRANWTNTLKQIAEKLGKQFEENKTIVFNPSSGGDPEVVMHSFNCRGEFFYCNSTPLFNSTWKETNGIWTRIGESNDSATITLNDSDTITLQCKIRQIINLWQEVGKAMYAPPIKGQISCLSNITGLLLVRDGGNNTNGTEIFRPVGGEMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRATLGALFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNTSWSNKSLEKIWNNMTWMEWEREIDNYTSLIYTLLEESQNQQEKNEKELLELDTWASLWNWFDITNWLWYIKIFIMIIGGLVGLRIVFTVLSIVNRVRQGYSPLSFQIHPPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLALFWVDLRSLCLFSYHRLRDLLLIVARIVELLGRRGWEALKYGWSLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEILQRACRAILHIPTRIRQGLERALL,MKVRGIRKNYQHWWRGGILLLGMLMICNATEQQLWVTVYYGVPVWKEANTTLFCASDAKAYSTEVHNVWATHACVPTDPNPQEVVLKNVTENFNMWENNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNTTRSSGNTTNEMKNCSFYTETDIRDKKRKEYALFYELDIVPIDEDNKNKSNNISYSRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEDEVVIKSSNFTNNAKTIIVQLNETVKINCTRPNNNTRKSIPIGPGRAFYATGDIIGDIRQAHCNISRANWTNTLKQIAEKLGKQFEENKTIVFNPSSGGDPEVVMHSFNCRGEFFYCNSTPLFNSTWKETNGIWTRIGESNDSATITLNDSDTITLQCKIRQIINLWQEVGKAMYAPPIKGQISCLSNITGLLLVRDGGNNTNGTEIFRPVGGEMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRATLGALFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNTSWSNKSLEKIWNNMTWMEWEREIDNYTSLIYTLLEESQNQQEKNEKELLELDTWASLWNWFDITNWLWYIKIFIMIIGGLVGLRIVFTVLSIVNRVRQGYSPLSFQIHPPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLALFWVDLRSLCLFSYHRLRDLLLIVARIVELLGRRGWEALKYGWSLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEILQRACRAILHIPTRIRQGLERALL*,ATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAATGCTACAGAACAACAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGGTATTAAAAAATGTGACAGAAAATTTTAATATGTGGGAAAATAACATGGTAGAACAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAGTTAACTCCACTCTGTGTTACTCTAAATTGCACTAATACCACTAGGAGTAGTGGAAATACTACCAATGAAATGAAAAACTGCTCTTTCTATACCGAAACAGACATAAGAGATAAGAAGAGAAAGGAATATGCACTTTTTTATGAACTTGATATAGTACCCATAGATGAGGATAATAAGAATAAGAGTAATAATATTAGCTATTCTAGGTTAATAAGTTGCAACACCTCAGTTATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGGCCATGTACAAATGTCAGCACAGTGCAATGTACACATGGTATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGACGAGGTAGTAATTAAATCTAGCAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTAAATGAAACTGTAAAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGACATAAGACAAGCACATTGTAACATCTCTAGAGCAAACTGGACAAACACTTTAAAACAGATAGCTGAAAAATTAGGAAAACAATTTGAGGAAAATAAAACAATAGTCTTTAATCCCTCCTCAGGAGGGGACCCAGAGGTTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATTCAACACCACTGTTTAATAGTACTTGGAAGGAGACTAATGGGATTTGGACTCGTATTGGAGAGTCAAATGATAGTGCTACTATCACACTAAATGATAGTGATACTATCACACTCCAATGTAAAATAAGGCAAATTATAAACTTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAAAGGACAAATTAGCTGTTTATCAAACATTACAGGGCTGCTATTAGTAAGAGATGGTGGCAATAACACGAACGGGACCGAGATCTTCAGACCTGTAGGAGGAGAAATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAGAAAAGAGCGACATTGGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTGGAAAAAATTTGGAATAATATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACCTTACTTGAAGAATCGCAGAACCAGCAAGAAAAAAATGAAAAAGAATTATTGGAATTAGATACATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAATAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAATATTACAAAGAGCTTGTAGAGCTATTCTCCATATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA -OQ092466,tat_exon2,8456,8552,8376,8468,forward,0.5303030303030303,RSTPQLRGDPTGPKESKEKVERETETDPVH,RSTPQLRGDPTGPKESKEKVERETETDPVH**,AGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGA -OQ092466,rev_exon2,8457,8733,8377,8652,forward,0.3690449563855961,DPPPSSEGTRQARRNRRRRWRERQRQIRSISERILSTFLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGSSQIRVESPTILEPGTKE,DPPPSSEGTRQARRNRRRRWRERQRQIRSISERILSTFLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGSSQIRVESPTILEPGTKE*,GATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAG -OQ092466,nef,8876,9509,8796,9416,forward,0.4195274186357557,MGGKWSKSKLFGWPAVRERMRRAEPAAEPAADGVGAASRDLERHGAITSSNTPTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSPKRQEILDLWVYHTQGFFPDWDNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEREVLEWRFDSRLAFRHVARELHPEYYKDC,MGGKWSKSKLFGWPAVRERMRRAEPAAEPAADGVGAASRDLERHGAITSSNTPTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSPKRQEILDLWVYHTQGFFPDWDNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEREVLEWRFDSRLAFRHVARELHPEYYKDC*,ATGGGTGGCAAATGGTCAAAAAGTAAGCTATTTGGATGGCCTGCTGTAAGGGAAAGAATGAGAAGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACACGGAGCAATCACAAGTAGCAATACACCAACTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCGGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCCAAAAAGACAAGAGATCCTTGATCTGTGGGTCTATCATACACAAGGTTTCTTCCCTGATTGGGATAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGTTAGTACCAGTGGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAATAACTGCTTGTTACACCCTATGAGCCAGCATGGGATGGAGGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCGTCACGTGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA -OQ092463,gag,801,2313,140,1642,forward,0.2589641434262949,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKTQQAAADTGNNSQTSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNFRNQRKNVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPVDKELYPLASLRSLFGNDPSSQ,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKTQQAAADTGNNSQTSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNFRNQRKNVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPVDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAGCGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAACACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGACCAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTCAGGAACCAGAGAAAGAATGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092463,pol,2105,5117,1435,4446,forward,0.14797358397500882,FFRENLAFPQGEAREFSSEQTRANSPTRRELQVWGGDNNSLSEAGAGRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVTTIHTDNGSNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEAREFSSEQTRANSPTRRELQVWGGDNNSLSEAGAGRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVTTIHTDNGSNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAGCAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCTATAGTAGGAGCAGAAACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGACCATGAGAAATATCACAGTAATTGGAGGGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGGTATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCAGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092463,vif,5061,5640,4391,4969,forward,0.32379004771642805,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSIHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKERYSTQVDPGLADQLIHRYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSIHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKERYSTQVDPGLADQLIHRYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTATTCATCCAAGAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAGGAGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATCGGTATTACTTTGATTGTTTTTCAGAATCTGCCATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG -OQ092463,vpr,5579,5870,4909,5199,forward,0.34013605442176864,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAITRILQQLLFIHFRIGCQHSRIGIIQRRRARNGTSRS,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAITRILQQLLFIHFRIGCQHSRIGIIQRRRARNGTSRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTCCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAACAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAACCAGTAGATCCTAG -OQ092463,tat_exon1,5850,6066,5180,5394,forward,0.41088854648176687,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ,ATGGAACCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAG -OQ092463,rev_exon1,5989,6067,5319,5394,forward,0.35828025477707015,MAGRSGDSDEELIKTVRLIKLLYQSS,MAGRSGDSDEELIKTVRLIKLLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGT -OQ092463,vpu,6081,6330,5411,5656,forward,0.44033465433729635,MQPLEISAIVALVVAIIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL,MQPLEISAIVALVVAIIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL*,ATGCAACCTTTAGAAATATCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG -OQ092463,env,6244,8827,5574,8123,forward,0.44564023273792597,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFYASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNTTSTKNTTPSTTASSGERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKKFNGSGPCTNVSTIQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKTIIVHLKDSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSSWKDESNGTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLITRDGGKNNESNTTEIFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKTLEQIWDNMTWMQWEREIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHPPAPRGPDRPEGIGEEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLNIPRRIRQGLERALL,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFYASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNTTSTKNTTPSTTASSGERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKKFNGSGPCTNVSTIQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKTIIVHLKDSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSSWKDESNGTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLITRDGGKNNESNTTEIFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKTLEQIWDNMTWMQWEREIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHPPAPRGPDRPEGIGEEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLNIPRRIRQGLERALL*,ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTATGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATACCACTAGTACCAAGAATACCACCCCTAGTACCACTGCTAGTAGCGGGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGAAGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAAGGATTCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTCTTGGAAGGATGAGTCAAATGGCACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAATAATGAGAGCAACACCACCGAGATTTTCAGACCTGAAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAGAGAAATTGACAATTACACAAGCTTGATATACACTTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGACAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCAACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA -OQ092463,tat_exon2,8408,8504,7705,7797,forward,0.4918032786885247,RPTPQPRGDPTGQKESEKKVERETETDPDH,RPTPQPRGDPTGQKESEKKVERETETDPDH**,AGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGA -OQ092463,rev_exon2,8409,8685,7706,7981,forward,0.3471418653089562,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE*,GACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG -OQ092463,nef,8828,9461,8125,8751,forward,0.33768732280275404,MGGKWSKSSVVGWPAIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC,MGGKWSKSSVVGWPAIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC*,ATGGGTGGTAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGATTGCTGA -OQ092465,gag,855,2358,200,1699,forward,0.38383718162342295,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQQIDVKDTKEALDKIEEEQNKSKKKVQQAATDTGNNSQASQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGAIMMQRGNFRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQQIDVKDTKEALDKIEEEQNKSKKKVQQAATDTGNNSQASQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGAIMMQRGNFRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGGTGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAACTAGAACGGTTTGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAACAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGTACAGCAAGCAGCAACTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAATCTCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGACTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTGCCATAATGATGCAGAGAGGCAATTTCAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092465,pol,2150,5162,1492,4503,forward,0.21863141758600757,FFREDLAFPQGEAREFSSEQTRANSPTSRELQVRGGDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLKGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFPQGEAREFSSEQTRANSPTSRELQVRGGDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLKGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTTTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAAGACAGTTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAGACTCCTTAAGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGACAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAGCTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTTTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACCACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATCCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAAGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGTGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATACCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATACACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092465,vif,5106,5685,4448,5026,forward,0.3589413907639558,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWIYKHHYDSINPKISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADRLIHLYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALTALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWIYKHHYDSINPKISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADRLIHLYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALTALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGATTTATAAGCATCACTATGACAGTATTAATCCAAAAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCAAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCGACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGGTCTCTACAGTACTTGGCACTAACAGCACTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG -OQ092465,vpr,5624,5915,4966,5256,forward,0.2400808693454637,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGILQRRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGILQRRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTACACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCCTTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -OQ092465,tat_exon1,5895,6111,5237,5451,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ,ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAG -OQ092465,rev_exon1,6034,6112,5376,5451,forward,0.48200514138817474,MAGRSGDSDEELIKTVRLIKLLYQSS,MAGRSGDSDEELIKTVRLIKLLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGT -OQ092465,vpu,6126,6375,5468,5707,forward,0.3726554787759131,MQPLVISAIVALVVVAIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL,MQPLVISAIVALVVVAIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL*,ATGCAACCTTTAGTAATATCAGCAATAGTAGCATTAGTAGTAGTAGCGATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGGGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG -OQ092465,env,6289,8881,5631,8207,forward,0.48107374453325313,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTGANNTTSTNTTTPSTTVSSEERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNTSYRLISCNTSVITQACPKVTFEPIPIHYCAPAGFAILKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKNIIVHLNKSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLEQVFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSYWKGEGSNNDTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLLTRDGGNESETTDTETFRPVGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKNLTQIWDNMTWMQWEKEIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTGANNTTSTNTTTPSTTVSSEERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNTSYRLISCNTSVITQACPKVTFEPIPIHYCAPAGFAILKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKNIIVHLNKSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLEQVFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSYWKGEGSNNDTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLLTRDGGNESETTDTETFRPVGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKNLTQIWDNMTWMQWEKEIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL*,ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGCGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTGTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACTGGTGCCAATAATACCACTAGTACCAATACTACCACCCCTAGTACCACTGTTAGTAGCGAGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGACGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAAACATAATAGTACATCTGAATAAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGAGCAGGTATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGGTGAAGGGTCAAACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTACTATTAACAAGAGATGGTGGTAACGAGAGCGAGACCACCGACACTGAGACCTTCAGACCTGTAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGATTAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAAATCTGACACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCAGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA -OQ092465,tat_exon2,8462,8558,7789,7881,forward,0.5337214944201844,RPTSQPRGDPTGQKESKEKVERETETDPDH,RPTSQPRGDPTGQKESKEKVERETETDPDH**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGA -OQ092465,rev_exon2,8463,8739,7790,8065,forward,0.41220115416323155,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG -OQ092465,nef,8882,9515,8209,8841,forward,0.3463855421686747,MGGKWSKSSVVGWPKIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC,MGGKWSKSSVVGWPKIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTAAAATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAACCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA -OQ092462,gag,767,2270,789,2291,forward,0.316486214000789,MGARASVLSGGELDKWEKIRLRPGGKKKYQLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFHTVATLYCVHQKIEVKDTKEALEKIEEEQKKSKKKAQQAAADTGNNSQVSQNYPIVQNMQGQMVHQPLSPRTLNAWVKVIEDKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQLREPRGSDIAGTTSNLQEQIAWMTHNPPVPVGEIYKRWILLGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQVANPASIMMQRGNFRNQRKPIKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKVWPSRKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKPEPIDKELYPLASLRSLFGNDPSSQ,MGARASVLSGGELDKWEKIRLRPGGKKKYQLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFHTVATLYCVHQKIEVKDTKEALEKIEEEQKKSKKKAQQAAADTGNNSQVSQNYPIVQNMQGQMVHQPLSPRTLNAWVKVIEDKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQLREPRGSDIAGTTSNLQEQIAWMTHNPPVPVGEIYKRWILLGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQVANPASIMMQRGNFRNQRKPIKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKVWPSRKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKPEPIDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAGCTAAAACATATAGTATGGGCAAGCAGGGAACTAGAGCGATTTGCAGTTAATCCCGGCCTGTTAGAAACATCGGAGGGCTGTAGACAAATACTAGGGCAACTACAGCCCGCTCTTCAGACAGGATCAGAAGAACTTAAATCACTATTTCATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTGAAAGACACCAAAGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAAGAAAAGTAAGAAAAAGGCACAGCAAGCAGCCGCTGACACAGGAAATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGACAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGATAAGGCTTTCAGTCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCAGGCAGGGCCTGTTGCGCCAGGCCAGCTACGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAGCAAATAGCATGGATGACACATAATCCACCTGTCCCAGTAGGAGAAATCTATAAAAGATGGATACTTCTGGGATTAAATAAAATAGTAAGAATGTACAGCCCCGTCAGCATTCTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCTGAGCAGGCTTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAGCAAATCCAGCTAGCATAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGCCTATTAAGTGTTTCAACTGTGGCAAAGAGGGGCATATTGCTAAAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092462,pol,2062,5074,2084,5095,forward,0.1943357603710517,FFREGLAFPQGEAREFPSEQTRANSPTRRELQVWGRDDNSLSETGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPILLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPYKNLKTGKYARMRGTHTNDIKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDKGRQKVVPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLKGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQEVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFVHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREGLAFPQGEAREFPSEQTRANSPTRRELQVWGRDDNSLSETGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPILLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPYKNLKTGKYARMRGTHTNDIKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDKGRQKVVPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLKGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQEVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFVHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTTTTAGAAGAAATGAATTTGCCAGGAAAATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGCTGCACTTTAAATTTTCCCATTAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAGAAAGATGGTAATAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCTGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGACTTCAGGAAGTATACTGCATTTACAATACCTAGCACAAACAATGAGACACCAGGGATTAGATACCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTGGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTATATGTAGGGTCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAACTGAGACAACATCTATTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCCGATAAATGGACAGTACAGCCTATATTGCTGCCAGAAAAAGACAGCTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATCAAAGTAAGGCAGCTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTGGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGCAGAACTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATATAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGAGGTACCCACACTAATGATATAAAACAATTAACAGAGGCAGTGCAAAAAATAGCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAATTACCCATACAAAAGGAAACATGGGAAGCATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGTCAATACCCCTCCCTTAGTGAAACTATGGTACCAGTTGGAAAAAGAACCCATAGTGGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAAACTAAATTAGGAAAAGCAGGATATGTTACTGACAAAGGAAGACAAAAAGTTGTCCCCCTAACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAACTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAGTTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTATAGTAGCAAAGGAAATAGTAGCCAGTTGTGACAAATGTCAGTTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCTGGAATATGGCAGCTAGATTGTACACATCTAAAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTGAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCTGCCTGTTGGTGGGCGGGGATCAAACAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCAGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCGTCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACCAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATCAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092462,vif,5018,5597,5040,5618,forward,0.3566796368352788,MENRWQVMIVWQVDRMRIRTWKSLVKHHMHISRKAKNWLYRHHYESIHPRISSEVHIPLGDARLVITTYWGLLTGERDWHLGQGVSIEWRERRYRTQVDPDLADQLIHLYYFDCFSESAIRNALLGRVVSPRCEYQAGHNQVGSLQYLALTALITPKRRKPPLPSVRKLTEDRWNKPQKTKGHKGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMHISRKAKNWLYRHHYESIHPRISSEVHIPLGDARLVITTYWGLLTGERDWHLGQGVSIEWRERRYRTQVDPDLADQLIHLYYFDCFSESAIRNALLGRVVSPRCEYQAGHNQVGSLQYLALTALITPKRRKPPLPSVRKLTEDRWNKPQKTKGHKGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAGAGTTTAGTAAAACATCATATGCATATATCAAGGAAAGCTAAGAATTGGTTGTATAGACATCACTATGAAAGCATTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAGTAATAACAACATATTGGGGTCTGCTTACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAGGAGATATAGAACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCTTATTAGGACGTGTAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACCAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAG -OQ092462,vpr,5536,5830,5558,5849,forward,0.5799857380556208,MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS,MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS*P,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAACTTAAAAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTTCATGGATTGGGACAGCATATCTATGAAACATATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCGACATAGCAGAATAGGCATTAATCTACAGAGGAGAGCAAGGAATGGATCCAGTAGATCCTAGCCT -OQ092462,tat_exon1,5807,6026,5830,6044,forward,0.40192926045016075,MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKQ,MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKQ*,ATGGATCCAGTAGATCCTAGCCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGTTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGTAA -OQ092462,rev_exon1,5946,6024,5969,6044,forward,0.48200514138817474,MAGRSGDSDEELLKAVRLIKFLYQSS,MAGRSGDSDEELLKAVRLIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGT -OQ092462,vpu,6038,6284,6061,6309,forward,0.5576513039199607,MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL,MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL*,ATGCGACCTTTAGAAATAGCAGCAATAGTAGCACTAGTAGTAGCAGTACTAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAGTGGATAGAATAAGAGAAAGAGCAGAAGATAGTGGAAATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAG -OQ092462,env,6201,8778,6224,8794,forward,0.5096088152339575,MRVKGIRRNYQHWWRGGTLLLGILMICSATENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPSPQEVVLKNVTEKFNMWKNNMVEQMHQDIISLWDESLKPCVKLTPLCVTLNCTNATISGNATEEIKNCSFNVNTKIGGKKQKERALFYKLDVVPIDDDSTNRTNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCGDKEFNGTGLCRNVSTVQCTHGIRPVVSTQLLLNGSLAEGEVVIKSENITNNVKTIIVQLNETVSINCTRPSNNTRRSIHMGPGRAFYATGEIIGDIRKAQCILNKTDWSDTLTRIAKKLHKQFHGPIAFEQSSGGDPEITMHTFNCGGEFFYCNTSALFSGTWNGTAWTNATWGNIAGNNITLQCRIKQFINRWQEVGKAMYAPPIRGEIRCSSNITGLLLTRDGGSNTTNGGENGTQIGENVTQIFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVTFGALFLGFLGAAGSTMGAASMTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLRDQQLLGIWGCSGKLICTTTVPWNASWSNKTLEKIWGNMTWMEWEREIDNYTDLIYTLIEQSQNQQEKNEQELLELDKWAGLWNWFDITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPASRGPDRPEGTDEEGGERDRDRSGSLVNGFLALIWIDLRSLFLFSYLRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNAIAISVAEGTDRIIEAIQRICRAILHIPTRIRQGLERALL,MRVKGIRRNYQHWWRGGTLLLGILMICSATENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPSPQEVVLKNVTEKFNMWKNNMVEQMHQDIISLWDESLKPCVKLTPLCVTLNCTNATISGNATEEIKNCSFNVNTKIGGKKQKERALFYKLDVVPIDDDSTNRTNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCGDKEFNGTGLCRNVSTVQCTHGIRPVVSTQLLLNGSLAEGEVVIKSENITNNVKTIIVQLNETVSINCTRPSNNTRRSIHMGPGRAFYATGEIIGDIRKAQCILNKTDWSDTLTRIAKKLHKQFHGPIAFEQSSGGDPEITMHTFNCGGEFFYCNTSALFSGTWNGTAWTNATWGNIAGNNITLQCRIKQFINRWQEVGKAMYAPPIRGEIRCSSNITGLLLTRDGGSNTTNGGENGTQIGENVTQIFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVTFGALFLGFLGAAGSTMGAASMTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLRDQQLLGIWGCSGKLICTTTVPWNASWSNKTLEKIWGNMTWMEWEREIDNYTDLIYTLIEQSQNQQEKNEQELLELDKWAGLWNWFDITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPASRGPDRPEGTDEEGGERDRDRSGSLVNGFLALIWIDLRSLFLFSYLRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNAIAISVAEGTDRIIEAIQRICRAILHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAGTGCCACAGAGAACTTGTGGGTCACAGTCTACTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAGATGCCAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAGCCCACAAGAAGTAGTATTGAAAAATGTGACAGAAAAGTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATCAGGATATAATCAGTTTATGGGATGAAAGCCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATGCTACTATCAGTGGTAATGCAACAGAAGAAATAAAAAACTGCTCTTTCAATGTCAATACAAAAATAGGAGGTAAGAAGCAGAAAGAACGTGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAGTACTAATAGGACTAATACCAGCTATAGGTTGATAAGTTGTAACACTTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTGGAGATAAAGAGTTCAATGGAACAGGACTATGTAGAAATGTCAGCACAGTCCAATGTACACATGGAATCAGGCCAGTAGTATCAACTCAATTGCTGTTGAATGGCAGTCTAGCAGAAGGAGAGGTAGTAATTAAATCTGAAAATATCACGAACAATGTTAAAACCATAATAGTACAGCTAAATGAAACTGTATCAATTAATTGTACAAGACCTAGCAACAATACAAGAAGAAGCATACATATGGGACCAGGGAGAGCCTTTTATGCAACAGGAGAAATAATAGGAGATATAAGGAAAGCACAGTGTATCCTGAATAAGACAGACTGGAGTGACACTTTAACAAGGATAGCTAAAAAATTACACAAGCAATTTCATGGACCAATAGCATTTGAGCAATCCTCAGGAGGGGACCCTGAAATTACAATGCACACTTTTAATTGTGGAGGGGAATTTTTCTACTGCAACACATCAGCGTTGTTTAGCGGGACCTGGAATGGTACTGCTTGGACTAATGCTACTTGGGGTAATATTGCAGGTAACAATATCACACTCCAATGCAGAATAAAACAATTTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGAGAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGCAGTAACACAACAAATGGTGGCGAGAATGGGACCCAGATTGGCGAGAATGTGACCCAGATCTTCAGACCTGGAGGAGGGGATATGAGGGACAATTGGAGAAGTGAATTATACAAATATAAAGTAGTAAAAATTGAGCCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTAACATTCGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACACTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTTCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAAAAAATTTGGGGGAACATGACCTGGATGGAGTGGGAGAGAGAAATTGACAATTATACAGACTTAATATACACCTTAATTGAACAATCGCAGAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAGGCTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGTTTAGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTATCATTCCAGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGTTTGCTTAATGCTATAGCTATATCAGTAGCGGAGGGAACAGATAGGATTATAGAAGCAATACAAAGAATTTGTAGAGCTATCTTACACATACCTACAAGGATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA -OQ092462,tat_exon2,8359,8455,8376,8468,forward,0.4542873367404884,RPASQPRGDPTGPKEPTKKVERETETDPDH,RPASQPRGDPTGPKEPTKKVERETETDPDH**,AGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGA -OQ092462,rev_exon2,8360,8636,8377,8652,forward,0.38952607660679506,DPPPSLEGTRQARRNRRRRWRERQRQIRIISERILSTHLDRPAESVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKE,DPPPSLEGTRQARRNRRRRWRERQRQIRIISERILSTHLDRPAESVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKE*,GACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG -OQ092462,nef,8779,9406,8796,9416,forward,0.5105263157894737,MGGKGSKMRGWVAVREKMRRTKPEDEPAANGVGAASRDLEKYGALTSSNTVATNADLAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVHHTQGYLPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDQVEEANAGENNSLLHPMSLHGIEDPEKEVLMWKFDSHLAFRHMARELHPEYYKDC,MGGKGSKMRGWVAVREKMRRTKPEDEPAANGVGAASRDLEKYGALTSSNTVATNADLAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVHHTQGYLPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDQVEEANAGENNSLLHPMSLHGIEDPEKEVLMWKFDSHLAFRHMARELHPEYYKDC*,ATGGGTGGCAAGGGGTCAAAAATGAGGGGATGGGTTGCTGTAAGGGAAAAAATGAGGCGAACTAAGCCAGAAGATGAGCCAGCAGCAAATGGGGTGGGGGCAGCATCTCGAGACTTGGAGAAATATGGCGCACTCACAAGTAGCAATACAGTAGCTACTAATGCTGATTTAGCTTGGCTAGAAGCACAAGAGGAAGAGGAGGTGGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTCAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGGTTCACCACACACAAGGCTATCTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATCAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATAGAGGACCCGGAGAAAGAAGTCTTAATGTGGAAGTTTGACAGCCACCTAGCATTCCGTCACATGGCCCGAGAGCTGCATCCGGAGTATTACAAAGACTGCTGA -OQ092464,gag,773,2276,200,1699,forward,0.3910844507174782,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPALQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNLRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKEMYPLASLRSLFGNDPSSQ,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPALQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNLRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKEMYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAGCCAGCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATCGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCGCTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTAAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCTCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092464,pol,2068,5080,1492,4503,forward,0.21863141758600757,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQRNVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTREAELELAENREILKEPVHGVYYDPTKDLIVEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQRNVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTREAELELAENREILKEPVHGVYYDPTKDLIVEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAATACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAGAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGTAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAGACTACCCATACAAAGAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGACGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAGAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092464,vif,5024,5603,4448,5026,forward,0.3264662839130924,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTACTCATCCAAGAATAAGTTCAGAAGTTCACATCCCGCTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAGAAGAGATATAGCACACAAGTAGACCCTGGCTTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGCTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG -OQ092464,vpr,5542,5833,4966,5256,forward,0.2678354029705382,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGIIQRRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGIIQRRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG -OQ092464,tat_exon1,5813,6032,5237,5451,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKQ*,ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA -OQ092464,rev_exon1,5952,6030,5376,5451,forward,0.48200514138817474,MAGRSGDSDEELLQTVRLIKLLYQSS,MAGRSGDSDEELLQTVRLIKLLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGT -OQ092464,vpu,6044,6290,5468,5707,forward,0.4384802297327147,MQPLHIAAIVALVVAIIIAIVVWSIVFIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVEMGHCAPWDVNDL,MQPLHIAAIVALVVAIIIAIVVWSIVFIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVEMGHCAPWDVNDL*,ATGCAACCTTTACACATAGCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAG -OQ092464,env,6207,8790,5631,8207,forward,0.4684887834843129,MRVRGIKKNYQHLWRWGIVLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNNTTSTNDTTSTTTSSGEKMKEGEIKNCSFNITTSIRDKVQKEYALFYKPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGSCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSENFSDNAKTIIVHLNESVEITCIRPNNNTRKSIPIGPGRAFYATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNKTIIFTHSSGGDPEVVTHNFNCGGEFFYCNTTKLFNSTWNETSYWKDERSNDNDTITLPCRIKQIINLWQEVGKAMYAPPIRGYIKCSSNITGLLITRDGGKNESNTTETFRPGGGNMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWDNMTWMEWEREIDNYTSLIYTLIEKSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSGPLVNGFLTIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL,MRVRGIKKNYQHLWRWGIVLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNNTTSTNDTTSTTTSSGEKMKEGEIKNCSFNITTSIRDKVQKEYALFYKPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGSCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSENFSDNAKTIIVHLNESVEITCIRPNNNTRKSIPIGPGRAFYATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNKTIIFTHSSGGDPEVVTHNFNCGGEFFYCNTTKLFNSTWNETSYWKDERSNDNDTITLPCRIKQIINLWQEVGKAMYAPPIRGYIKCSSNITGLLITRDGGKNESNTTETFRPGGGNMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWDNMTWMEWEREIDNYTSLIYTLIEKSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSGPLVNGFLTIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL*,ATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATAATACCACTAGTACCAATGATACCACTAGTACCACTACTAGTAGCGGGGAAAAGATGAAGGAAGGAGAGATAAAAAACTGCTCTTTCAATATCACCACAAGCATAAGAGATAAGGTGCAGAAAGAATATGCACTCTTTTATAAACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACGATAAGAAGTTCAATGGAACAGGATCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAATGAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAAGACAATAATCTTTACTCACTCCTCAGGAGGGGACCCAGAAGTTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGATGAAAGGTCAAATGACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGATACATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAACGAGAGCAACACCACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAAAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGGATAACATGACCTGGATGGAATGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACTTTAATTGAAAAATCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA -OQ092464,tat_exon2,8371,8467,7789,7881,forward,0.4971219256933542,RPTSQPRGDPTGQKESKKKVERETETDPDH,RPTSQPRGDPTGQKESKKKVERETETDPDH**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGA -OQ092464,rev_exon2,8372,8648,7790,8065,forward,0.3934495959166312,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILNNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGTPQILVESPAVLESGTKE,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILNNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGTPQILVESPAVLESGTKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG -OQ092464,nef,8791,9412,8209,8841,forward,0.36363636363636354,MGGKWSKSSVVGWPAIRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEDEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIHSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC,MGGKWSKSSVVGWPAIRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEDEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIHSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC*,ATGGGTGGTAAGTGGTCAAAGAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGATGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGGTATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGGTTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA -OQ092467,gag,808,2308,789,2291,forward,0.2880084183556755,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEEFRSLYNTVATLYCVHQKIEVKDTKEALEKIEEEQNQSKKKAAAAAADTGNRSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTIGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSNLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTVMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSKPEPTAPPEESFRFGEETTTPSQKQGPIDKELYPLASLKSLFGNDPSSQ,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEEFRSLYNTVATLYCVHQKIEVKDTKEALEKIEEEQNQSKKKAAAAAADTGNRSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTIGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSNLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTVMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSKPEPTAPPEESFRFGEETTTPSQKQGPIDKELYPLASLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAAAAACAATACAAATTAAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTCTAGAGACATCAGAAGGGTGTAGACAAATACTGGGACAGCTACAACCAGCTCTTCAGACAGGATCAGAAGAATTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAAATAGAGGAAGAGCAAAACCAAAGTAAGAAAAAAGCAGCAGCTGCAGCAGCTGACACAGGAAACAGAAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCTTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAATAGGTGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCAGGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATTCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTGAGAGCCGAGCAAGCAACACAGGAAGTAAAGAATTGGATGACAGAAACTTTGCTGGTCCAAAATGCAAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCAGGCCACAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAACTGTAATGATGCAGAGAGGCAATTTTAGGAATCAAAGAAAGACAGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCATATAGCAAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAAGAAGGGCACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA -OQ092467,pol,2100,5112,2084,5095,forward,0.1880239208210378,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGKWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQSQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIAIESIVIWGKTPKFKLPIQKETWETWWTDYWQATWIPDWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETRSGKAGYVTDRGRQKVVPLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKILFLDGITKAQDDHERYHSNWRAMASDFNLPPIIAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSAAVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGKWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQSQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIAIESIVIWGKTPKFKLPIQKETWETWWTDYWQATWIPDWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETRSGKAGYVTDRGRQKVVPLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKILFLDGITKAQDDHERYHSNWRAMASDFNLPPIIAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSAAVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAAATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGATATGAATTTACCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCGCATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCCTATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTACACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGGATATTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTGGGATCTGACTTAGAAATAGGACAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGATAGCTGGACTGTCAATGACATCCAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACTAAAGCACTAACAGAAGTAGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCAACGAAAGACCTAATAGCAGAACTACAGAAGCAGGGGCAAAGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTGAAACAATTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAAATTACCTATACAAAAAGAAACATGGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGATTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAGATCAGGCAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCCCCCTAACAGACACAACAAACCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTGACAGACTCACAATATGCACTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGGAAGATACTATTTTTAGATGGAATAACTAAGGCCCAAGATGATCATGAGAGATACCACAGCAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTATAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCACGCATGGACAAGTAGACTGCAGTCCAGGAATATGGCAACTAGATTGTACGCATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTAAAACTAGCAGGAAGATGGCCAGTAAAGACAGTACATACAGATAATGGCAGCAATTTCACCAGTGCTGCGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAAAGTCAAGGAGTCATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAGAAACAAATCACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAGGCAAAGATCATTAGAGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -OQ092467,vif,5056,5635,5040,5618,forward,0.41532297468972923,MENRWQVMIVWQVDRMRIRTWNSLVKHHIHVSKKAKGWVYRHHYESTHPRISSEVHIPLGEARLVIATYWGLHTGERDWHLGQGVSIEWREKKYITQVDPDLADQLIHLHYFDCFSESAIRNAIVGRLVSPQCEYQTGHNKVGSLQYLALVALITPKKRKPPLPSVRKLTEDRWNKPQKTKDHRGSHIMSGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHIHVSKKAKGWVYRHHYESTHPRISSEVHIPLGEARLVIATYWGLHTGERDWHLGQGVSIEWREKKYITQVDPDLADQLIHLHYFDCFSESAIRNAIVGRLVSPQCEYQTGHNKVGSLQYLALVALITPKKRKPPLPSVRKLTEDRWNKPQKTKDHRGSHIMSGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATACATGTCTCAAAGAAAGCTAAGGGATGGGTTTATAGACACCACTATGAAAGCACCCATCCAAGAATAAGTTCAGAAGTACATATCCCGCTAGGGGAAGCTAGATTAGTAATAGCAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAAGAAATATATCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGCATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCATAGTAGGACGTTTAGTTAGCCCTCAGTGTGAATATCAAACAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGTAGCATTAATAACGCCAAAAAAGAGAAAGCCACCTTTGCCTAGTGTTAGGAAATTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAG -OQ092467,vpr,5574,5865,5558,5849,forward,0.5928358208955226,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAGAGCTTTTAGAGGAACTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAACTTAGGACAATACATCTATGCAACTTATGGGGATACTTGGACAGGAGTGGAAGCTTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGGAATGGAGCCAGTAGATCCTAG -OQ092467,tat_exon1,5845,6061,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTCTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAG -OQ092467,rev_exon1,5984,6062,5969,6044,forward,0.48200514138817474,MAGRSGDSDEDLLKTVRLIKQLYQSS,MAGRSGDSDEDLLKTVRLIKQLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAGT -OQ092467,vpu,6076,6340,6061,6309,forward,0.5091759625764664,MQPLVILAIVALVVAAIIAIVVWTIVLIEYRKILRQRKIDSIINRIRERAEDSGNESEGDQEELSALVEMGHHVEMGHHAPWNVDDL,MQPLVILAIVALVVAAIIAIVVWTIVLIEYRKILRQRKIDSIINRIRERAEDSGNESEGDQEELSALVEMGHHVEMGHHAPWNVDDL*,ATGCAACCCTTAGTAATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAGAAAGATAGATAGCATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAG -OQ092467,env,6239,8807,6224,8794,forward,0.4718646278993922,MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL,MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL*,ATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAGTGCTAAAGAACAATTGTGGGTCACAGTTTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAAATGCTAAAGCATATGACCCAGAGGTGCATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAAGAAGTACCATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGACATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTATTTTAAATTGCACTAATGTGAATGTTACTACTAACAATAATAGTAGTAGTGAGGAACAGATGGAGGTAGGAGAAATAAAAAACTGCTCTTTCAATATTGCCACAAGAATAAAAAATAAGATAAAGAAAGAATATGCACTTTTTAATAGACTTGATGTAGTACCAATAGAGGATGATAATACAAGCTATATGTTGATAAATTGTAATACCTCAGTCACTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATACTAAAATGTAATGATAAAAAGTTCAATGGAACAGGACCATGTAACAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGATAGTAGTTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTGAATAAAACTATAGAAATTAATTGTATAAGACCCAACAATAATACAAGAAAAAGTATATCTTTAAGACCGGGGCAAGCAATTTATGCAACAGAAGACATAATAGGAAATATAAGACAAGCACATTGTAACATTAGGAGAAAAGACTGGGATAAAGCTTTAGAACAGGTAGTTGCAAAATTAAGAGAACAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGAGACCCAGAAATTGTAATGCATAGTTTTAATTGTGCAGGGGAATTTTTCTACTGTAACACAACAAAGCTGTTTAATAGTACTTGGAATGTTAATAACACTCGGAATAATACTACTGATAATAGCACCATCACTCTCCCGTGCAGAATAAAACAAATTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCTCCTCCCATCAAAGGGCAAATTAAATGTTCATCAAATATTACAGGGTTATTATTAACAAGAGATGGTGGTGTCCGCGAGGACAACGCCCCTGAGATCTTTAGACCTGGAGGAGGAGATATGAGGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTGGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAAAGAGGAAAAAGAGCAGTAACGCTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACTTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTACCTTGGAATACTAGTTGGAGTAATAAATCTTTGGAAAAGATTTGGAAAAACATGACCTGGATGGAGTGGGAGAAAGAAATTAACAATTACACAAGGACAATATACACCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAGGAATTATTGGAATTGGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATCATGATAGTAGGAGGTATAGTAGGGTTAAGAATAGTTTTTACTGTGCTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTGTCATTCCAGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAATTGGTAGAGCTATTCTCCACATCCCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA -OQ092467,tat_exon2,8388,8481,8376,8468,forward,0.4918032786885247,RPASQLRGDQTGPKEQKKKVERETETDPGN,RPASQLRGDQTGPKEQKKKVERETETDPGN*,AGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAG -OQ092467,rev_exon2,8389,8674,8377,8652,forward,0.4123815285339786,DPLPSSEGTRQARRNRRRRWRERQRQIRAISERILCAYLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKEECC,DPLPSSEGTRQARRNRRRRWRERQRQIRAISERILCAYLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKEECC*,GACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAGTGCTGTTAG -OQ092467,nef,8808,9417,8796,9416,forward,0.49080954243253805,MGGKWSKCSLVGWPAIRERMRRAEPAPAAEGVGAASRDLEKHGALTTSNTAANNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGAMDLGHFLKEKGGLEGLIYSPKRQEILDLWVYHTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPDEGENSCLLHPMNQHGADDTEREVLMWKFDSSLAFHHKARELHPEYYKNC,MGGKWSKCSLVGWPAIRERMRRAEPAPAAEGVGAASRDLEKHGALTTSNTAANNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGAMDLGHFLKEKGGLEGLIYSPKRQEILDLWVYHTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPDEGENSCLLHPMNQHGADDTEREVLMWKFDSSLAFHHKARELHPEYYKNC*,ATGGGTGGCAAGTGGTCAAAATGTAGTCTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCTCCAGCAGCAGAAGGGGTGGGAGCAGCATCTCGAGACTTGGAAAAACATGGAGCACTCACAACTAGTAATACAGCAGCTAATAATGCTGCTTGTGCCTGGCTGGAAGCACAAGAGGAGGAAGAGGTGGGGTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGGAGCTATGGATCTTGGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCCAAAAAGACAAGAAATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTATACACCAGGGCCAGGGACTAGATATCCATTAACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATGAAGGAGAGAATAGCTGTTTGCTACACCCGATGAACCAGCATGGGGCAGATGACACAGAAAGAGAAGTATTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAAGGCCCGAGAGCTGCATCCGGAGTACTACAAAAACTGCTGA +KX505501.1,env,0,1824,6224,8795,forward,0.7626080297560442,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,vif,0,1824,5040,5619,forward,0.7647696476964769,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,nef,0,1824,8796,9417,forward,0.7645782478980201,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,tat_exon1,1,1750,5830,6046,forward,0.7680130480667754,MRKLQNGIDCIQCMQGLLHQAR,VSLVRPDLSLGALWLTREPTA*ASIKLALSASSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKGKPEELSRRRTRLAERAQQEARGGDW*VRRKFFD*RRLEGERWVRERQY*AGENWIDGKKFG*GQEEIKDIN*NI*YGQAES*NDSQLILAC*KHQKAVDKYWDSYNRLLRQDQKNLSLCIIQ*QSSIVYIKK*M*KTPRKP*TR*KKSKTKLGNKHSKPQLQEVAVRSATITL*CRIIRGKWYIRPCHQEL*MHG*K**RKRLSAQK*YPCFQHYQKEPPHKT*TPC*IQWGDIKQPCKC*KRPLMRKLQNGIDCIQCMQGLLHQAR*ENQGEVT*QELLVPFRNK*HG*QIIHLSQ*ERFIKDG*S*G*IK**ECIALSAFWT*DKDQRNLLETM*TGSIKP*EPNKPHRK*KIG*QKPCWSRMRTQIVKLF*KH*DQQPH*KK**QHAREWEDPAIKQEFWLKQ*AK*QIQLQ**CRKAILGTKEKLLSASIVAKKGT*PEIAGPLEKRAVGNVEGKDIK*KTVLRDRLIL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLD,GTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGAT +KX505501.1,gag,336,1824,789,2292,forward,0.3997973809613161,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,rev_exon1,1306,1750,5969,6047,forward,0.758082497212932,MRTQIVKLF,MRTQIVKLF*KH*DQQPH*KK**QHAREWEDPAIKQEFWLKQ*AK*QIQLQ**CRKAILGTKEKLLSASIVAKKGT*PEIAGPLEKRAVGNVEGKDIK*KTVLRDRLIL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLD,ATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGAT +KX505501.1,vpr,1599,1824,5558,5843,forward,0.7638478800047243,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,ATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,pol,1627,1927,2084,5096,forward,0.7724330674761569,GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,IL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,ATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA +KX505501.1,tat_exon2,1746,1824,8376,8469,forward,0.7616257781032589,RCIRSTTRTADTELFTRDFPLGTFQ,RCIRSTTRTADTELFTRDFPLGTFQ*,AGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,rev_exon2,1747,1927,8377,8653,forward,0.7620186257236345,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA +KX505501.1,vpu,1748,1778,6259,6310,forward,0.7688723205964585,MHPEYYKDC,MHPEYYKDC*,ATGCATCCGGAGTACTACAAGGACTGCTGA +MN691959,gag,639,2142,789,2292,forward,0.0801186943620179,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAGATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MN691959,pol,1934,4946,2084,5096,forward,0.054722889368558514,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAGTAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACAAAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MN691959,vif,4890,5469,5040,5619,forward,0.09157509157509158,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGAATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG +MN691959,vpr,5408,5699,5558,5843,forward,0.5391891891891883,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MN691959,tat_exon1,5679,5898,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA +MN691959,rev_exon1,5818,5899,5969,6047,forward,0.4807692307692307,MAGRSGDSDEDLLKTVRLIKFLYQSSK,MAGRSGDSDEDLLKTVRLIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG +MN691959,env,6070,8656,6224,8795,forward,0.13638128518734216,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA +MN691959,vpu,6105,6156,6259,6310,forward,0.2321981424148607,MGVEMGHHAPWDIDDL,MGVEMGHHAPWDIDDL*,ATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG +MN691959,tat_exon2,8237,8333,8376,8469,forward,0.6672629695885509,RPTSQTRGDPTGPKE,RPTSQTRGDPTGPKE*KKKVERETETDPFD**,AGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGA +MN691959,rev_exon2,8238,8514,8377,8653,forward,0.210025203024363,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE*,GACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAG +MN691959,nef,8657,9278,8796,9417,forward,0.08588605782994552,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA +MN692074,nef,0,4059,8796,9417,forward,0.763072203234748,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,WKG*FGPRKDKISLICGSTTHKATSLIGRTTHQGRGPDFH*PLDGASS*YQLIQRR*KRPMQERTTACYTL*ACMGWRTRRKKC*CGSLTAA*HFITWPESCIRSTTRIADFELSTRDFPLGTFQGGVAWAGLGSGEPSDAAYKQLLFACTGSLWLDQI*AWELSG*LGNPLLKPQ*SLP*VL*VVCARLLCDSGN*RSLRPF*SVWKISSSGARTGT*KRKRNQRSSLDAGLGLLKRARQEARGGDW*VRQKF*LAEARRREMGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ*G*GGN*RKLY*IQEQMIQY*KK*ICQEDGNQK**GELEVLSK*DSMIRYS*KSVDIKL*VQY**DLHLST*LEEIC*LRLVAL*IFPLVLLKLYQ*N*SQEWMAQKLNNGH*QKKK*KH**KFVQKWKRKGKFQKLGLKIHTILQYLP*RKKTVLNGEN**ISENLIRELKTSGKFN*EYHIPQG*ERKNQ*QYWMWGMHIFQFP*IKNLGNILHLPYPVETMRHQGLDISTMCFHRDGKDHQQYSKVA*QKF*SLLENKIQK*LSINTWMIYM*DLI*K*GSIE*K*RN*DNIC*DGDLPHQTKNIRKNPHSFGWVMNSILINGQYSL*CCQKKTAGLSMTYRS*WEN*IGQVRFTQGLK*GNYVNSLGEPKH*QK*YH*QKKQSWNWQKTGKF*KNQYMEYIMTHQKT**QKYRSRGKVNGHIKFIKSHLKI*KQENMQK*GVPTLMM*NN*QRQCKK*PQKA**YGERLLNLNYPYKKKHGKHGGQSIGKPPGFLSGSLSIPLP**NYGTS*RKNP**EQKPSM*MGQLTGRLN*EKQDMLLTEEDKKLSP*LTQQIRRLNYKQFI*LCRIRD*K*T**QTHNMH*,TGGAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAG +MN692074,env,2,4115,6224,8795,forward,0.7604257801108195,MNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA +MN692074,vpu,2,2084,6259,6310,forward,0.7659115426105717,MGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ,EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*,GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAA +MN692074,gag,789,2292,789,2292,forward,0.19470123431286457,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MN692074,pol,2084,4115,2084,5096,forward,0.5617851221088768,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA +MN692074,vif,3617,4115,5040,5619,forward,0.7631664499349805,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA +MN692074,vpr,3617,4115,5558,5843,forward,0.7635778016363703,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA +MN692074,tat_exon1,3823,4084,5830,6046,forward,0.7659115426105717,MVPVRERTHSRSRNLLCRWGS,MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY,ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC +MN692074,rev_exon1,3823,4084,5969,6047,forward,0.7610789980732178,MVPVRERTHSRSRNLLCRWGS,MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY,ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC +MN692074,tat_exon2,4080,4164,8376,8469,forward,0.7699443413729128,IPSGEPSDAAYKQLLFACTGSLWLDQI,IPSGEPSDAAYKQLLFACTGSLWLDQI*,ATACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGA +MN692074,rev_exon2,4081,4153,8377,8653,forward,0.7667894365645325,YPVASPQMLHISSCFLPVLGLSG,YPVASPQMLHISSCFLPVLGLSG*,TACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAG +MN692145,gag,775,2281,789,2292,forward,0.20784453738651432,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MN692145,pol,2070,5085,2084,5096,forward,0.14843087362171337,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MN692145,vif,5029,5608,5040,5619,forward,0.2608047690014903,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG +MN692145,vpr,5547,5838,5558,5843,forward,0.587876570313453,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG +MN692145,tat_exon1,5818,6037,5830,6046,forward,0.28735632183908044,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ*,ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA +MN692145,rev_exon1,5957,6038,5969,6047,forward,0.4274965800273598,MAGRSGDSDEELLKTVRLIKFLYQSSK,MAGRSGDSDEELLKTVRLIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG +MN692145,env,6212,8783,6224,8795,forward,0.47520309038232134,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL*,ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA +MN692145,vpu,6253,6298,6259,6310,forward,0.3649167733674775,MEMGHHAPWDVDDL,MEMGHHAPWDVDDL*,ATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG +MN692145,tat_exon2,8364,8460,8376,8469,forward,0.3921568627450981,RPASQPRGDPTGPKESKKKVERETETDPLH,RPASQPRGDPTGPKESKKKVERETETDPLH**,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGA +MN692145,rev_exon2,8365,8641,8377,8653,forward,0.29843322556577967,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE*,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG +MN692145,nef,8784,9387,8796,9417,forward,0.4049958673891082,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA +MN090335,gag,315,1665,789,2292,forward,0.596665989022159,MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ,MYTIEGGYCII**SKFF*SCLKGWL*LFQYLFTAS*CF*QARINCESF*FPACPYYMF*SIFFLSPWP*PNFLPSI*FSPAQY*RSRTHLSPSSLR*SKMAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ*,ATGTACACAATAGAGGGTGGCTACTGTATTATATAATGATCTAAGTTCTTCTGATCCTGTCTGAAGGGATGGTTGTAGCTGTTCCAATATCTGTTTACAGCCTCCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGTTCCCTGCTTGCCCATACTATATGTTTTAATCTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATCTATCTAATTCTCCCCCGCTCAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAATGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCACCCCACAAGATTTGAACACTATGCTAAACACAGTGGGTGGACACCAAGCAGCTATGCAAATGTTAAAAGAGGTCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGACTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAGCAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTTTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGCAACAGGTACAGCCAACATACTGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAGGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA +MN090335,pol,1427,4469,2084,5096,forward,0.27887169154684477,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAGGACATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCGGACACAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTCTAAATTTTCCCATCAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGACGGCCCAAAGGTTAAACAATGGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGAATTCTGGGAAATTCAATTAGGTATACCACATCCTGCAGGGCTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTGGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAACATAGAACAAAAGTAGAGGAACTGAGGCAACATCTGATGAGGTGGGGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGAAAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAAGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGATCCGGTACATGGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAGGGAGAAGGTCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCATACTAATGATGTAAAGCAATTAACAGAGGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATGGGGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGATTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAATAGGGATAATAAATCAGGAAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAGGATTCGGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGGCATGGGTGCCAGCCCACAAAGGAATTGGAGGAAATGAACAGGTAGATAAACTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGAAAAAGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAACAATGGCTAGTGATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCCAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MN090335,vif,4413,4992,5040,5619,forward,0.3566796368352788,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAAAAATGGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGGGATGATAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAGAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAACAGCAGTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG +MN090335,vpr,4931,5222,5558,5843,forward,0.6083541998634192,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAG +MN090335,tat_exon1,5202,5421,5830,6046,forward,0.40192926045016075,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ*,ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAA +MN090335,rev_exon1,5341,5422,5969,6047,forward,0.524971623155505,MAGRSGDRDEDLLKTVRLIKFLYQSSK,MAGRSGDRDEDLLKTVRLIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAG +MN090335,env,5596,8158,6224,8795,forward,0.5139610675592354,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ*,ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA +MN090335,vpu,5643,5682,6259,6310,forward,0.5690703735881842,MGHDAPWDVDDL,MGHDAPWDVDDL*,ATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG +MN090335,tat_exon2,7739,7835,8376,8469,forward,0.6842105263157894,RPSSQPRGDQTGPKE,RPSSQPRGDQTGPKE*KKKVERETEADPED**,AGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGA +MN090335,rev_exon2,7740,8016,8377,8653,forward,0.4267425320056898,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE*,GACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAG +MN090335,nef,8159,8813,8796,9417,forward,0.49485619884358334,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAAGGTTGTATGGCTGGATGGCCTACTGTAAGGGAAAGAATGGAAAGAATTGATCCAAGGCCTGCTGCAAGGAGGGAACAAGCTGAGCCAGCAGCAGCTGGGGTAGGAGCAGCATCTCGAGACTTGGAAAAATATGGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTAGGCTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTGGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATGGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA +MN090376,gag,522,1590,789,2292,forward,0.6828741441147701,MYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP,MHTIEDCYCIK**FKFL*SCLKCWL*LSQYLSTAF*CF*QARINCESFQFPACPYYMF*LISFLSPWP*PNFLPFV*FSPA*YRRSRTHLSPSSLR*SKRRTHRSPPLASCCARFSKPSPIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP*SQ*,ATGCACACAATAGAGGACTGCTACTGTATTAAATAATGATTTAAGTTCCTCTGATCCTGTCTGAAGTGCTGGTTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCCAGTTCCCTGCTTGCCCATACTATATGTTTTAACTTATATCTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATTTGTCTAATTCTCCCCCGCTTAATACCGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAAGGCGTACTCACCGTTCGCCGCCCCTCGCCTCTTGCTGTGCGCGCTTCAGCAAGCCGAGTCCGATAATTCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA +MN090376,pol,1382,4394,2084,5096,forward,0.23966680468616797,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGEDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNIPPVVAKEIVACCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGEDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNIPPVVAKEIVACCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAGAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTGACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGATGAGCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCTGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTCGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAG +MN090376,vif,4338,4920,5040,5619,forward,0.42479043044174425,MENRWQVMVVWQVDRMRISMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRKKPPLPSVRKLTEDRWKEPQRTKGHRESHTMNGH,MENRWQVMVVWQVDRMRISMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRKKPPLPSVRKLTEDRWKEPQRTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAGCATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGACTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAAAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG +MN090376,vpr,4859,5150,5558,5843,forward,0.580763397371082,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS*,ATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCTTGGCTTCATGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MN090376,tat_exon1,5130,5349,5830,6046,forward,0.46628407460545196,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ*,ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAGTAA +MN090376,rev_exon1,5269,5350,5969,6047,forward,0.5763097949886105,MAGRSGDSDEELLRIAGTIKFLYQSSK,MAGRSGDSDEELLRIAGTIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAGTAAG +MN090376,env,5524,8110,6224,8795,forward,0.4972760674014952,MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL,MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAGGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAGCCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTAACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTAACATCAATAGCACTAATATAAACAATACCAATAGTATAGAAAGAGAAATGACAAACTGCTCTTTTAATGTCACCACAGTCATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAAACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGCTATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTAAAAATGTTAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAACACAGAAGTAAATATTATCACACTCCCATGCAAGATAAGGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACATTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA +MN090376,vpu,5568,5631,6259,6310,forward,0.6063492063492064,MGNHAHLDMGHHAPWDVNDL,MGNHAHLDMGHHAPWDVNDL*,ATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA +MN090376,tat_exon2,7691,7784,8376,8469,forward,0.4464285714285715,RPSSQPRGDPTGPKESEKKVERETETDPVT,RPSSQPRGDPTGPKESEKKVERETETDPVT*,AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG +MN090376,rev_exon2,7692,7968,8377,8653,forward,0.4267425320056898,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE*,GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG +MN090376,nef,8111,8735,8796,9417,forward,0.5093153589821267,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTSANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTSANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC*,ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACATCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA +MK115581.1,gag,680,2180,789,2292,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115581.1,pol,1972,4984,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115581.1,vif,4928,5507,5040,5619,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115581.1,vpr,5446,5737,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115581.1,tat_exon1,5717,5936,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115581.1,rev_exon1,5856,5937,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115581.1,env,6111,8652,6224,8795,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115581.1,vpu,6149,6194,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115581.1,tat_exon2,8233,8329,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115581.1,rev_exon2,8234,8510,8377,8653,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115581.1,nef,8653,9268,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115690.1,gag,777,2286,789,2292,forward,0.24076694150363465,MGARASVLSGGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQIMGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKVEEEQNKSKKKAQQAAAGAGNSSQTSTSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATNSAAIMMQRGNFRNQRKSVKCFNCGKDGHIAKNCRAPRRKGCWKCGKEGHQMKDCPERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTTPSQKQEKIDQDLYPLASLKSLFGNDPSSQ,MGARASVLSGGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQIMGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKVEEEQNKSKKKAQQAAAGAGNSSQTSTSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATNSAAIMMQRGNFRNQRKSVKCFNCGKDGHIAKNCRAPRRKGCWKCGKEGHQMKDCPERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTTPSQKQEKIDQDLYPLASLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAGTGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAGATTAAAGCATATCGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATAATGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTGTATAATACAGTAGCAACCCTCTATTGTGTACATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGGCGCAGGAAACAGCAGTCAGACCAGCACCAGCCAAAATTACCCTATAGTACAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTCTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACTAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCCACCAGCATTCTAGACATAAGACAAGGACCAAAGGAGCCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTAGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAGGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGTCATAAAGCAAGAGTTTTAGCGGAAGCAATGAGCCAAGCAACAAATTCAGCTGCCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAAAGTGTTAAGTGTTTTAATTGCGGCAAAGATGGGCACATAGCAAAAAATTGCAGGGCCCCTAGAAGAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTCCAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MK115690.1,pol,2078,5090,2084,5096,forward,0.185747174550021,FFRENLAFPQGEAREFSSEQTRANSPTRGELQVWGGDNNSLSEAGEDRPGPVSFSFPQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPYRKQNPNIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIALPEKESWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVIPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQEQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKSGKAGYVTNRGRQKVIPLTDTTNQKTELHAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQEAAYFILKLAGRWPVATIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDVIATDLQTKELQKQITKIQHFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEAREFSSEQTRANSPTRGELQVWGGDNNSLSEAGEDRPGPVSFSFPQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPYRKQNPNIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIALPEKESWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVIPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQEQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKSGKAGYVTNRGRQKVIPLTDTTNQKTELHAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQEAAYFILKLAGRWPVATIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDVIATDLQTKELQKQITKIQHFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGAAGATAGACCAGGACCTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGCACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATAGGGCCTGAGAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGACTTCAGAGAACTAAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGACAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAACCTTATAGAAAACAAAATCCAAACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACCTAGAAATAGGGCAGCATAGAATAAAAATAGAAGAACTGAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGACAAGTGGACAGTACAGCCTATAGCGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATCTACCCAGGAATTAAAGTAAGGCAATTATGTAAACTACTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAAAAGAAGCAGAGCTAGAATTAGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGAGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTGACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGGGAAAGACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAATCAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTATCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACATGCAATTTATCTAGCTTTGCAAGATTCGGGATTAGAAGTAAATATAGTAACAGACTCACAGTATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTGTTTTTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAGCAGCATACTTTATTTTAAAATTAGCAGGACGATGGCCAGTAGCAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACAGTTAAGGCCGCCTGCTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAACAATGAATTGAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACGTAATAGCAACAGACTTACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAACATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115690.1,vif,5034,5613,5040,5619,forward,0.39766435115272314,MENRWQVMIVWQVDRMRIRRWHSLVKHHIYISGKARGWVYKHHYENTHPRISSKVYIPLGEARLAVTTYWGLHTGERDWHLGQGVSIEWRKKEYSTQVDPNLADQLIHLYYFDCFSESAIRNVILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSITKLTEDRWNKPQRTKGHRGNHTMNGH,MENRWQVMIVWQVDRMRIRRWHSLVKHHIYISGKARGWVYKHHYENTHPRISSKVYIPLGEARLAVTTYWGLHTGERDWHLGQGVSIEWRKKEYSTQVDPNLADQLIHLYYFDCFSESAIRNVILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSITKLTEDRWNKPQRTKGHRGNHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAAGATGGCACAGTTTAGTAAAACACCATATATATATTTCAGGGAAAGCTAGAGGATGGGTTTATAAACATCACTATGAAAACACTCATCCAAGAATAAGTTCAAAAGTATACATCCCACTAGGGGAAGCTAGACTGGCAGTAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGGAATATAGCACACAAGTAGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGTCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTTTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTATTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAG +MK115690.1,vpr,5552,5843,5558,5843,forward,0.5980919765166341,MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS,MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS*,ATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACACATCTATGAGACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAAGTCTGCAACAACTGCTGTTCATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGAATTACTCCACAGAGGAGAACAAGAAATGGAGCCAGTAGATCCTAA +MK115690.1,tat_exon1,5823,6042,5830,6046,forward,0.40192926045016075,MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ,MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ*,ATGGAGCCAGTAGATCCTAATCTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCACTTGCTATTGTAAACAGTGTTGCTATCATTGCCAAGTTTGTTTCATAACCAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAGTAA +MK115690.1,rev_exon1,5962,6043,5969,6047,forward,0.524971623155505,MAGRSGDNDEDLLKTVRFIKLLYQSSK,MAGRSGDNDEDLLKTVRFIKLLYQSSK,ATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAGTAAG +MK115690.1,env,6217,8800,6224,8795,forward,0.4988789237668163,MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ,MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ*,ATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCTACAGACCCCAACCCACAAGAAATAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTACATTGCACTAAGTTGGAGATTAATAGCACTAAGAAGACTAATAGCACTAATAATGGTACTAACATCAATGCCACTGATGATAGTTGGGGGGAAATGAAAAACTGCTCTTTCAATACCACTGCAAGCATAAGAGATAAGGTACAGAGAGAATTTGCGCTTTTTTATAAACTTGATATAGTACCAATAGATAATGATGATATCAACTATAGGTTAATAAGTTGTAACACCTCAGTCCTTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAAAATTTCAATGGAACAGGACAATGTAAAAATGTCAGCACAGTGCAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTCAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAAATCTGAAAATATCACAGACAATACTAAAACTATAATAGTACAGCTGAATGCATCTGTAGCAATTGTTTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGGCCAGGGAGAGCGTTTTATGCAGCAGGAGATATAATAGGAGACATAAGACGAGCACACTGTATCCTTAACAAAACAACATGGGATAACACAATAGAACAGGTAGCTAAAAAATTAAGAGAACAATTTGAGAATAAGACAATAGTCTTTAGTGAATCCTCGGGAGGGGACCCAGAAATTACAATGATTAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAGTACAACTGTTTAATAGTACTTGGCATAATAATGGGAGTAGTACTACAGGGTCAAGTAGCAGTGAAGGCAATATCACACTCCCATGCAAAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACCAATTAGCTGCGAGTCAAATATTACAGGGTTGCTACTAACAAGAGATGGTGGGAATGACGCTAACGGGAACAACACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGCGAAGTGAATTATATAGATACAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCACAGAGAAGAGTGGTACAGAGAGAAAAAAGAGCAGTGGGTCTCGGAGCCTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGCTTTGGGGTTGCTCTGGAAAACTCATCTGCAACACTGCTGTGCCTTGGAATACTAGTTGGAGTAACAAATCTCTGGATGATATTTGGCATAACATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAACATAATATACAGCTTAATTGAGGCATCGCAAACCCAGCAAGAAAAGAATGAACAAGAATTACTAGAATTAGACAAATGGGCAAGTCTGTGGAATTGGTTTAGCATATCAAACTGGCTGTGGTACATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTATACTTTCTATAGTGAATAGAGTTAGGAAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAGTGCTGTTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGAGTTATAGAAGGATTGCGCAGAGCTTTTAGAGCTATTATCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGAGCTTTGCAATAA +MK115690.1,vpu,6255,6300,6259,6310,forward,0.3649167733674775,MEMGHHAPWDVDDL,MEMGHHAPWDVDDL*,ATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAG +MK115690.1,tat_exon2,8381,8474,8376,8469,forward,0.5303030303030303,RPTSQPRGDPTGPKEPETKVESKTETDPLT,RPTSQPRGDPTGPKEPETKVESKTETDPLT*,AGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAG +MK115690.1,rev_exon2,8382,8658,8377,8653,forward,0.42982806877249113,DPPPSPEGTRQARRNRRRRWRARQRQIHSLSGWILSNYLGRPAEPVPLPLPPLERLTLDCSEDCGTSGTQGVGSPQILVEPSAVLEAGTKE,DPPPSPEGTRQARRNRRRRWRARQRQIHSLSGWILSNYLGRPAEPVPLPLPPLERLTLDCSEDCGTSGTQGVGSPQILVEPSAVLEAGTKE*,GACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAG +MK115690.1,nef,8801,9425,8796,9417,forward,0.46635769262600346,MGGKWSKCSIVGWPTVRERIRRAEPAAEGVGAVSRDLEKHGAITSSNANNADCTWLEAQKEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGIIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVEPERVEEENKRENRCLLHPMSQHGMDDPEKEVLQWRFDSRLAFHHVARELHPEYYKN,MGGKWSKCSIVGWPTVRERIRRAEPAAEGVGAVSRDLEKHGAITSSNANNADCTWLEAQKEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGIIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVEPERVEEENKRENRCLLHPMSQHGMDDPEKEVLQWRFDSRLAFHHVARELHPEYYKN*NC*,ATGGGTGGTAAATGGTCAAAATGTAGTATAGTTGGATGGCCTACTGTAAGGGAAAGAATAAGACGAGCAGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAAGCATGGAGCAATCACAAGTAGCAATGCTAACAATGCTGATTGTACCTGGCTGGAAGCCCAAAAAGAAGAGGAGGAGGTAGGCTTTCCAGTCAGGCCTCAGGTACCCTTAAGACCAATGACTTACAAGGCAGCCTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATAATTTACTCCCAAAAAAGACAAGATATTCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACTAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGAGAGGGTAGAAGAGGAGAATAAAAGAGAGAACCGCTGCTTGTTACACCCTATGAGCCAGCATGGGATGGATGACCCAGAGAAAGAAGTGCTACAGTGGAGGTTTGACAGCCGCCTAGCCTTTCACCACGTAGCCAGAGAGCTGCATCCGGAGTACTATAAGAACTAGAACTGCTGA +MK115571.1,gag,579,2079,789,2292,forward,0.3014827756125966,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115571.1,pol,1871,4883,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115571.1,vif,4827,5406,5040,5619,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115571.1,vpr,5345,5636,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115571.1,tat_exon1,5616,5835,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115571.1,rev_exon1,5755,5836,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115571.1,env,6010,8551,6224,8795,forward,0.4569687738004571,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115571.1,vpu,6048,6093,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115571.1,tat_exon2,8132,8228,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115571.1,rev_exon2,8133,8409,8377,8653,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115571.1,nef,8552,9167,8796,9417,forward,0.4756067663643049,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115514.1,gag,584,2084,789,2292,forward,0.3014827756125966,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115514.1,pol,1876,4888,2084,5096,forward,0.19298018391400085,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115514.1,vif,4832,5411,5040,5619,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115514.1,vpr,5350,5641,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115514.1,tat_exon1,5621,5840,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115514.1,rev_exon1,5760,5841,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115514.1,env,6015,8556,6224,8795,forward,0.4585964351370794,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115514.1,vpu,6053,6098,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115514.1,tat_exon2,8137,8233,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115514.1,rev_exon2,8138,8414,8377,8653,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115514.1,nef,8557,9172,8796,9417,forward,0.4756067663643049,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115488.1,gag,707,2207,789,2292,forward,0.3014827756125966,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115488.1,pol,1999,5011,2084,5096,forward,0.19298018391400085,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAATACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115488.1,vif,4955,5534,5040,5619,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGAGATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115488.1,vpr,5473,5764,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115488.1,tat_exon1,5744,5963,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115488.1,rev_exon1,5883,5964,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115488.1,env,6138,8679,6224,8795,forward,0.4585964351370794,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115488.1,vpu,6176,6221,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115488.1,tat_exon2,8260,8356,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115488.1,rev_exon2,8261,8537,8377,8653,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115488.1,nef,8680,9295,8796,9417,forward,0.4756067663643049,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACGGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115030.1,gag,176,1685,789,2292,forward,0.27304152847199525,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRKNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRKNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP*YQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA +MK115030.1,pol,1477,4489,2084,5096,forward,0.26443159013103534,FFREDLAFPQGEAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED,FFREDLAFPQGEAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115030.1,vif,4433,5012,5040,5619,forward,0.3903081914030819,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG +MK115030.1,vpr,4951,5242,5558,5843,forward,0.594871162618666,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGACATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115030.1,tat_exon1,5222,5441,5830,6046,forward,0.48954161103693805,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*,ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA +MK115030.1,rev_exon1,5361,5442,5969,6047,forward,0.5290287574606619,MAGRSGDSDEELLTAVRIIKRLYQSSK,MAGRSGDSDEELLTAVRIIKRLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG +MK115030.1,env,5616,8217,6224,8795,forward,0.510440428145289,MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL,MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL*,ATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAGAAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAATACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGAAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAACCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA +MK115030.1,vpu,5660,5699,6259,6310,forward,0.6901936289818864,MGHLVPWDGDDM,MGHLVPWDGDDM*,ATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG +MK115030.1,tat_exon2,7798,7891,8376,8469,forward,0.4918032786885247,RPSSQPRGDPTGPKEQKKEVERETEAHPRD,RPSSQPRGDPTGPKEQKKEVERETEAHPRD*,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG +MK115030.1,rev_exon2,7799,8075,8377,8653,forward,0.40871934604904625,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG +MK115030.1,nef,8218,8860,8796,9417,forward,0.5478186258332784,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNTDVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNTDVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC*,ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATACTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA +MK115498.1,gag,663,2163,789,2292,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115498.1,pol,1955,4967,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115498.1,vif,4911,5490,5040,5619,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115498.1,vpr,5429,5720,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115498.1,tat_exon1,5700,5919,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115498.1,rev_exon1,5839,5920,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115498.1,env,6094,8635,6224,8795,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115498.1,vpu,6132,6177,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115498.1,tat_exon2,8216,8312,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115498.1,rev_exon2,8217,8493,8377,8653,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115498.1,nef,8636,9251,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115211.1,gag,250,1753,789,2292,forward,0.25132972351334526,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQKIEIKDTKEALDKIEEEQNKSKKKTQQAAADTGHNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNAVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQKIEIKDTKEALDKIEEEQNKSKKKTQQAAADTGHNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNAVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP*SQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAAGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACACAGGACATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCATTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAGCCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAAACCATTTTAAAAGCATTGGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAAATGCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA +MK115211.1,pol,1545,4557,2084,5096,forward,0.2540106951871657,FFRENLAFPQGKARELSSKQDRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISRIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVIAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDGCVASRQDED,FFRENLAFPQGKARELSSKQDRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISRIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVIAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDGCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAACTTTCCTCAAAGCAGGACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTCTGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAGAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCTGACATAGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATTTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAAGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTACCACCTGTAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGGCAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTGGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGTAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA +MK115211.1,vif,4501,5083,5040,5619,forward,0.40472673559822736,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKLTEDRWNEPQRTKGHRESHTMNGH,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKLTEDRWNEPQRTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGCTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACTGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG +MK115211.1,vpr,5022,5313,5558,5843,forward,0.5885636318531382,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS*,ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAACTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGGGCAAGAAATGGAGCCAGTAGATCCTAG +MK115211.1,tat_exon1,5293,5512,5830,6046,forward,0.4691531785127845,MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ,MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ*,ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAAGACTGCTTGCACCAGTTGCTATTGTAAAAGGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGA +MK115211.1,rev_exon1,5432,5513,5969,6047,forward,0.564475664826894,MAGRSGDSDEELLRITRTIKFLYQNSE,MAGRSGDSDEELLRITRTIKFLYQNSE,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGAG +MK115211.1,env,5687,8198,6224,8795,forward,0.6631820277358986,MHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL,MRVKGTKKNWQPSWRWGTMLIWGWATMLLGRSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDPEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTTINNTSSIEEGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSATITQACPKVSFEPIPIH*VQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRKAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACCCAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTACTATAAACAATACCAGTAGTATAGAAGAAGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCGCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTAGGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACACTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGTAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA +MK115211.1,vpu,5731,5794,6259,6310,forward,0.6405797101449275,MGNHAHLGMGHHAPWEVNDL,MGNHAHLGMGHHAPWEVNDL*,ATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAA +MK115211.1,tat_exon2,7779,7872,8376,8469,forward,0.4464285714285715,RPSSQPRGDPTGPKESEKKVERETETDPVT,RPSSQPRGDPTGPKESEKKVERETETDPVT*,AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG +MK115211.1,rev_exon2,7780,8056,8377,8653,forward,0.4267425320056898,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE*,GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG +MK115211.1,nef,8199,8823,8796,9417,forward,0.5148270181219111,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDPDKEVLVWKFDSRLAFRHVAREIHPEYYKNC,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDPDKEVLVWKFDSRLAFRHVAREIHPEYYKNC*,ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACCCAGATAAAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA +MK115158.1,gag,316,1819,234,1731,forward,0.38499312512276596,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQRIEIKDTKEALDKIEEEQNKSKKKTQQAAADPGNSNQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPMSSLKSLFGNDP,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQRIEIKDTKEALDKIEEEQNKSKKKTQQAAADPGNSNQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPMSSLKSLFGNDP*SQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGACAAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAACTGGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCTGACCCAGGAAATAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCAGCCTTATCAGAAGGAGCCACCCCACAAGATTTGAACACGATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACTATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCGATTGCACCAGGCCAGATGAGAGAACCGAGGGGAAGTGACATAGCAGGAACCACCAGTACCCTTCAGGAGCAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTACAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGTCCAGCAGCTACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAACTCAGCTTCCGTGATGATGCAGAAAGGCAATTTTAGGAGCCAAAGAAAGATGGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCAAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAA +MK115158.1,pol,1611,4623,1526,4535,forward,0.30843043180260443,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPYVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDMVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNIPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED,FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPYVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDMVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNIPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAACTTTCCTCAAAGCAGAACAGAACCAACAGCCCCACCAGAGGAGAGCTTCAGTTTTGGGGAAAAGACAACAACTCCTTCTCAGAAGCAGGAGCCGATAGACCAGGAACTATACCCTATGTCTTCCCTCAAATCACTCTTTGGCAACGACCCTTAGTCACAGTAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTGTTAGAAGAAATGAGTCTACCAGGAAGATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAAACAGTATGATCAGATATCAGTAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAATATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTAAGGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGTACTAAATGGAGAAAGTTAGTAGATTTTAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCCTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATGGTTATATATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAGTAGAGGAACTGAGACAACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAACTGCCAGAGAAAGACAGCTGGACTGTCAATGATATACAGAAATTAGTGGGAAAGTTGAATTGGGCAAGTCAGATTTATCCAGGGATTAAAGTAAGGCAGTTATGTAAACTACTTAGAGGAGCCAAGGCGCTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATCCTAAAAGAACCAGTGCATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAACTTGAAAACAGGAAAATATGCAAGAACAAGGGGTGCTCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAGACTACCCATTCAAAAAGAAACATGGGAATCATGGTGGACAGAATATTGGCAAGCCACTTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACAAAATCAGGAAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAAGTTATCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAGGCACAACCAGATAAAAGCGAATCAGAGTTAGTCAATCAAATAATAGAGCAATTAATACAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGGGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTACTATTTTTGGATGGAATAGATAAGGCCCAAGAAGATCATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACATACCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCTAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCCCCAACTTCACCAGTAATGCAGTTAAGGCCGCCTGTTGGTGGGCAGGGGTCAAACAAGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTTGTAGAATCTATGAATAAACAATTAAAACAAATTATAGGACAGGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAGGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAAATTATTAGGGATTATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAA +MK115158.1,vif,4567,5149,4479,5061,forward,0.48399487836107546,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKRTEDRWNEPQRTKGHRESHTMNGH,MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKRTEDRWNEPQRTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGGTTGTGTGGCAAGTAGACAGGATGAGGATTAACATGTGGAAAAGTTTAGTAAAATACCATATACATAATTCAGGGAAGGCTAGGGGATGGTGTTATAGACATCACTATGAAAGCAGACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGAGGAGAGGCTAGACTAGTAGTGACAACATATTGGGGCCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTAATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAATAAGGTAGGATCTCTGCAGTACTTGGCACTAATAGCATTAATACCACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTGAGGAAACGGACAGAGGATAGATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG +MK115158.1,vpr,5088,5379,5000,5291,forward,0.43053960964408733,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS*,ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTGGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115158.1,tat_exon1,5359,5578,5271,5487,forward,0.4464285714285715,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ*,ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGA +MK115158.1,rev_exon1,5498,5579,5410,5488,forward,0.5622384937238494,MAGRSGDSDEELLKITRTIKFLYQNSE,MAGRSGDSDEELLKITRTIKFLYQNSE,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGAG +MK115158.1,vpu,5590,5860,5502,5748,forward,0.5374618963580942,MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWDVNDL,MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWDVNDL*,ATGCAATCCTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTTGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA +MK115158.1,env,5753,8315,5665,8212,forward,0.5078662118966413,MRVKGTKKNWQPSWRWGTMLIWGWATMLLGMSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCENVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL,MRVKGTKKNWQPSWRWGTMLIWGWATMLLGMSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCENVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTGAAAATGTTAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGGGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTATATCTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA +MK115158.1,tat_exon2,7896,8004,7793,7901,forward,0.5826923076923078,RPSSQPRGDPTGPKESEKKVERETETDPVT,RPSSQPRGDPTGPKESEKKVERETETDPVT*RMDS*,AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAA +MK115158.1,rev_exon2,7897,8173,7794,8070,forward,0.45968205324650446,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGSGTSGTQGVGSPQVLVESPAVLEPGTKE,DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGSGTSGTQGVGSPQVLVESPAVLEPGTKE*,GACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGAGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAG +MK115158.1,nef,8316,8940,8213,8834,forward,0.5254870367657829,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC,MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC*,ATGGGAGGCATATTCTCAAAACGTAATGGGGGTGGGTGGCCTGCAATAAGGGAAAGAATGAGACGAGCTCAGCCAGCAGCAGATGGGGTAGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCTCTCACAAGTAACAATACACCAGCTAACAATGCTGATTGTGCCTGGCTGGAAGCACAGGAAGAGGGGGAAGAAGTGGGTTTCCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACATACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAACAAAGACAAGATATCCTTGATCTGTGGGTCTACAACACACAGGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAATGGAACCAGAGGATGTAGAAAAGGCCACTGAAGGAGAGGACAACATCTTGTTACACCCTGTGAACCAGCATGGGATGGACGACTCAGAGAGAGAAGTGTTAGTGTGGAAGTTTGACAGCCGCCTAGCATTCCGTCACGTGGCCCGAGAGATACATCCGGAGTACTACAAGAACTGCTGA +MK114705.1,gag,532,2047,789,2292,forward,0.27270615563298484,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNTIAVLYCVHQRIDVKDTKEALNKIEEEQNKSKKKAQQAAADTGNSSQSSQVSQNYPIVQNHQGQMVYQALSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIIMGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQATGGATNIMMQKGNFRNQGKPIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCSERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTAPPQKQEPTDKELYPFSSLKSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNTIAVLYCVHQRIDVKDTKEALNKIEEEQNKSKKKAQQAAADTGNSSQSSQVSQNYPIVQNHQGQMVYQALSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIIMGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQATGGATNIMMQKGNFRNQGKPIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCSERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTAPPQKQEPTDKELYPFSSLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGCGAATTAGATAGATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCGGTTAATCCTGGCCTGTTAGAAACATCAGAGGGCTGTAGGCAAATACTGGGACAGCTACAACCGTCCCTTCAAACAGGATCAGAAGAACTTAAATCATTATTTAATACAATAGCAGTCCTTTATTGCGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTCTAAATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGTCAGAGCAGTCAAGTCAGCCAAAATTACCCTATAGTGCAGAACCATCAGGGGCAAATGGTATATCAGGCTCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCCGAGGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAGGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTACATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGGACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCATGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTATCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCTTTTAGAGATTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAAGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTCTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCAGCCACAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGCAACAGGTGGTGCAACTAACATAATGATGCAGAAAGGCAATTTTAGGAACCAAGGAAAACCTATTAAGTGTTTCAATTGTGGCAAAGAAGGGCACCTAGCTAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCTCTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAA +MK114705.1,pol,1839,4851,2084,5096,forward,0.21944123990570308,FFRENLAFPQGKAREFPPEQTRANSPTRRELQVWGRDNSSPSEAGADRQGTVSLLFPQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTKIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRAKVENLREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVITLTEEAELELAENREILKEPVHGVYYDPSKDLVAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVVWGKIPKFRLPIQKETWETWWMEYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVIPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEDHEKYHSNWKAMASDFNIPPVVAKEIIASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGEYCAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGKAREFPPEQTRANSPTRRELQVWGRDNSSPSEAGADRQGTVSLLFPQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTKIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRAKVENLREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVITLTEEAELELAENREILKEPVHGVYYDPSKDLVAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVVWGKIPKFRLPIQKETWETWWMEYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVIPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEDHEKYHSNWKAMASDFNIPPVVAKEIIASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGEYCAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTCCAGAACAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAGCTCCCCCTCAGAAGCAGGAGCCGACAGACAAGGAACTGTATCCCTTCTCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAGTAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAAATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTAAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAACCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCCGTATTTGCCATAAAGAAAAAGGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAGCTTAATAAAAGAACTCAAGACTTTTGGGAGGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTCTCAGTTCCTTTAGATGAAAGCTTCAGAAAGTATACTGCATTTACCATACCTAGTACTAACAATGAGACACCCGGGATTAGGTATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTTCAAAGTAGCATGACGAAAATCTTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTACCAATACATGGATGATTTATACGTAGGATCTGACTTAGAAATAGAGCAGCATAGAGCAAAAGTAGAGAACCTGAGAGAGCATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGCTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAGCTAAGGCACTAACAGAAGTGATAACACTAACAGAAGAAGCAGAGCTAGAATTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAGTAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACCTATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACAAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAAGCAGTTCAAAAAATAGCCACAGAGAGCATAGTAGTATGGGGAAAGATTCCTAAATTTAGATTACCCATACAGAAAGAAACATGGGAAACATGGTGGATGGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAGTACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGGCAAAAAGTTATCCCCTTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGACAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAGAAGGTCTACCTGACATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCAGGAATCAGGAAAATACTATTTTTGGATGGAATAGATAAGGCCCAGGAAGATCATGAGAAATATCATAGTAATTGGAAAGCAATGGCTAGTGATTTTAACATACCACCTGTGGTAGCAAAAGAGATAATAGCCAGCTGTGATAAATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTCCATGTAGCCAGTGGGTACATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAATAATACATACAGATAATGGTAGCAATTTCACCAGCACTACAGTCAAGGCCGCCTGCTGGTGGGCAGGTGTTAAGCAGGAGTTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTGGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGAGTACTGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAGGGACCAGCAAAGCTGCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAAATCAAAGTAGTACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK114705.1,vif,4795,5374,5040,5619,forward,0.3566796368352788,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWVYRHHYESTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYNTQVDPGLADQLIHMYYFDCFSESAIRQAILGHRVSPSCEYQAGHNKVGSLQYLALAVLVAPKKIKPPLPSVARLTEDRWNKPRKIKGHRESHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWVYRHHYESTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYNTQVDPGLADQLIHMYYFDCFSESAIRQAILGHRVSPSCEYQAGHNKVGSLQYLALAVLVAPKKIKPPLPSVARLTEDRWNKPRKIKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTCTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGGTTTACAGACACCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAACACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATATGTACTATTTTGATTGTTTTTCAGAATCTGCTATAAGACAAGCCATATTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTGCAATACCTGGCACTAGCAGTATTAGTAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTCGCGAGACTGACAGAGGATAGATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAG +MK114705.1,vpr,5313,5598,5558,5843,forward,0.5871212121212123,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS*,ATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAGCTTAAGAGGGAAGCTGTTAGACATTTTCCTAGGGAATGGCTCCATAGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGGCAGGAGTAGGAGCCATAATAAGAATACTGCAACAATTACTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGGATAGGCATACTGAGGAGAACAAGAAATGGAGCCCGTAGATCCTAG +MK114705.1,tat_exon1,5578,5797,5830,6046,forward,0.5138258357408172,MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ,MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ*,ATGGAGCCCGTAGATCCTAGACTGGAACCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTAACAATTGCTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGCTTCACAAAAAAGGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAGTAA +MK114705.1,rev_exon1,5717,5798,5969,6047,forward,0.5940834141610087,MAGRSGDRDEDLLETVRFIKFLYQNSK,MAGRSGDRDEDLLETVRFIKFLYQNSK,ATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAGTAAG +MK114705.1,env,5972,8549,6224,8795,forward,0.49969138043715056,MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL,MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL*,ATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAGTGCTACAAACATGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGATGCAACCACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGATACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCGACCCACAAGAAGTAGTACTGGAAAATGTGACAGAAAATTATAATATGGGAAAAAATAACATGGTGGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTACTCTTAACCCCATTCTGTGTCACTTTAAATTGCACTGATGCTAACATCACCAGCACTAATAATAGTAGAGATAAGAAGGAAGGAGAAAGTACATTGGAGGAGACGAAAGGAGAAATAAAAAACTGCTCTTTCAATATGACTTCAAGCATGAGCGATAAGTCTCAGAAACAACGTGCACTTTTTTATAAGCTTGATGTGGTACAAATAGATGAGACTAATAATAATAGTTATAGGTTGATAAGTTGTAACACCTCAGTCGTCACACAGGCTTGTCCAAAGGTATCCTTTGATCCAATCCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGAAATTCAATGGAACAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAACCTGTAGTGTCAACCCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAAGTAATGATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTGCAGCTGAAGACACCTGTACAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGGATAAGTATGGGACCAGGGAGAGTAATTTATGCAACAGGACAAATAATAGGAGATATAAGAAAAGCACATTGCAACATTAGTAGAGCAGAATGGAATACAACTTTAAAGCAGATAGTTACACAATTAAGAAAGCAGTGGAATAGAACCATAATCTTTAACTCATCCTCAGGAGGGGACCCAGAAATTGTGATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACAAAACTATTTAATAGTACTTGGCCACGTAATAGTACTTGGAATAATACTGAAGGGTCAAATGACACTGAAATAATCACACTCCCGTGCAGAATAAAACAAATTGTAAACAGGTGGCAGGAAGTAGGCAAAGCAATGTATGCCCCTCCCATCCAAGGACAAATTAGTTGTTCATCAAATATTACAGGGCTGCTACTAGTTAGAGATGGTGGAATTAACACCAGTGAGAGCAACGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAGGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATGCTGGGAGCTATGTTCCTTGGGCTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGTTGACGGTACAGACCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGACTCCTAGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAACACTAGTTGGAGTAATAGATCTTATGAAGATATTTGGAACAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAGGCTTAATATACACCTTAATTGAAAAATCGCAGAACCAGCAGGAAATAAATGAACAAGAACTATTGTCATTGGATAAGTGGGCAAGCCTGTGGAATTGGTTTAATATAACAAATTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAGTGCTATCAGCTTGCTCAACGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGAATCATAGGAGTAGTACAAAGAACTTGGAGAGCTTTTATCCACATACCTAGGAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA +MK114705.1,vpu,6010,6055,6259,6310,forward,0.5970755483346872,MEMGRLAPWNVDDL,MEMGRLAPWNVDDL*,ATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAG +MK114705.1,tat_exon2,8130,8226,8376,8469,forward,0.6074396517609815,RPPAQPQGDPTGPKKSKKEVEKETETDQCD,RPPAQPQGDPTGPKKSKKEVEKETETDQCD**,AGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGA +MK114705.1,rev_exon2,8131,8407,8377,8653,forward,0.47688921496698455,DPPPSPKGTRQARRNRRRRWRKRQRQISAISDWILRTFLGRSTKPVPLQLPPLERLNLDCSEGCGNSGTQGVGSPQILVESPAVLESGNKE,DPPPSPKGTRQARRNRRRRWRKRQRQISAISDWILRTFLGRSTKPVPLQLPPLERLNLDCSEGCGNSGTQGVGSPQILVESPAVLESGNKE*,GACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAG +MK114705.1,nef,8550,8985,8796,9417,forward,0.6054615099468668,MGGKWSKKSGGGWPAVREKMKRTEPAAEGVGAASRDLDKYGAITSSNTAQTNPDCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDMSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGGCFK,MGGKWSKKSGGGWPAVREKMKRTEPAAEGVGAASRDLDKYGAITSSNTAQTNPDCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDMSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGGCFK*,ATGGGTGGAAAATGGTCAAAAAAGAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAAAATGAAGCGAACTGAGCCAGCAGCAGAGGGGGTGGGAGCAGCATCTCGAGACCTGGACAAATATGGAGCAATCACAAGTAGCAATACAGCACAGACCAATCCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAAGAGGTAGGCTTTCCAGTCAGACCCCAGGTACCTTTGAGACCAATGACTTACAAGGCAGCTGTGGATATGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAGAGACAAGATATCCTTGATCTGTGGATCTATCACACACAAGGCTACTTCCCTGATTGGCAAAATTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGAGGGTGCTTCAAGTAG +MK114856.1,gag,120,2022,789,2292,forward,0.7511039743075072,MTKTLLVQNANPDCKTILKALGPAATLEEMMTACQKVRRPGHKAKVLAEAMSQATGAANIMMQRGNFKNQRKPVKCFNCGKEKHIAKNCKAPKKKGC,MLHISSCFLPVLGLSGWTRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSGSNLVQPERPSSSGARTGT*KRK*NQRRSLDAGLGLLKRARQEAKGGDW*VRHF*LAEARRRKIGARASVLSRGELDR*EKIQLRPRRKKKYRLKHIV*ASKELERFAVNPGLLETSGGCKQILEQLQPSLQTRSEELRSLYNTVATLYYVHQKIDVKDTKEALDKVEEEQNKSKKKAQQAAADTRNRGQTSQNFPIVQNLQGQIVHQAISPRTLNA*VKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVSRHQAAMQMLKKVINEEAAE*DRLHPVHARPIAPGQIREPKRSDIARTTSTLQEQIR*MTHNPPIPVRKIYKR*IILGLNKIVKMYSPTSILDIKQRPKEPFRDYVDQFYKTLRAKQATQKVKN*MTKTLLVQNANPDCKTILKALGPAATLEEMMTACQKVRRPGHKAKVLAEAMSQATGAANIMMQRGNFKNQRKPVKCFNCGKEKHIAKNCKAPKKKGC*KYRKERHQMKNCTKRQANFLKKIWPSHKGRPENFPQSRPEIPQSRPEPTAPPAPPEKSFKFEEATTPSQKQETIDKELYPLTSLRSLFGNDPSSQ*,ATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACTGGGTCTCTCTGGTTGGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGTTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGGCTCAAATCTGGTCCAACCAGAGAGACCCAGTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGTAGAACCAGAGAAGATCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAAAGGCGGCGACTGGTGAGTACGCCATTTTTGACTAGCAGAGGCTAGAAGGAGAAAGATAGGTGCGAGAGCGTCAGTATTGAGCAGAGGAGAATTAGATAGATAGGAGAAAATTCAGTTAAGGCCAAGGAGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAAGGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGGAGGCTGTAAACAGATATTAGAACAGCTACAACCATCCCTTCAGACAAGATCAGAAGAACTTAGATCATTATATAATACAGTAGCCACCCTCTATTATGTACATCAAAAGATAGATGTAAAAGACACCAAAGAAGCGTTAGACAAAGTAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCGGCAGCTGACACAAGAAACAGAGGCCAGACCAGTCAAAATTTCCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAGGCCATATCACCTAGAACTTTAAATGCATAAGTAAAAGTAGTAGAAGAAAAAGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACTATGCTAAACACAGTAAGTAGACACCAAGCAGCTATGCAAATGTTAAAAAAGGTCATCAATGAAGAAGCTGCAGAATAAGATAGATTACATCCAGTGCATGCAAGGCCTATTGCACCAGGCCAGATAAGAGAACCAAAAAGAAGTGACATAGCAAGAACTACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAAGAAAGATTTATAAAAGATAAATAATTCTAGGACTAAATAAAATAGTAAAAATGTATAGCCCTACCAGCATTTTAGACATAAAGCAAAGGCCAAAAGAACCCTTTAGAGACTATGTAGACCAGTTCTATAAAACTTTAAGAGCCAAGCAAGCTACACAGAAAGTAAAAAATTAGATGACAAAAACCTTGTTAGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAAAAAGTAAGAAGACCCGGCCATAAAGCAAAAGTTTTAGCTGAAGCAATGAGCCAAGCAACAGGTGCAGCCAACATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAAAGCACATAGCCAAAAATTGCAAGGCCCCTAAGAAAAAAGGCTGTTAGAAATATAGAAAAGAAAGACACCAAATGAAAAATTGCACTAAGAGACAGGCTAATTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA +MK114856.1,pol,1787,4826,2084,5096,forward,0.7637180771917039,ASQIYAKIKVKQLCKLLKRTKALTEVVPLTEEAELELAENRKILKDPVHRAYYDPAKDLIAELQKQREGQWTYQIYQKPFKNLKTEKYARTKGAHTNDVKQLTEAVQKISTESIVI,FFKENLAFPQRKARELSSKPTRNSSEQTRANSPTSPSRKELQV*RSNNSLSEAGDNRQGAVSSNFPQITLWQRPIVTIKIREQLKKALLNTGADDTVLEDIDLPRK*KPKMIRRIRSFIKVRQYEQVPIEISRHKAISTVLVGPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPRMDGPKVKQ*PLTEEKIKALVEICTEIEKEKKISKIRPENPYNTPVFAIKKKDGTK*KKLVDFRELNKKTQDF*EIQLSIPHPAKLKKKKSVTVLDVGDAYFSVPLDKDFKKYTAFTIPSINNETPKIRYQYNVLPQR*KRSPAIFQSSMTKILEPFRKTNPDIVIYQYIDDLYVRSDLEIRQHRTKVKELRQHLMR*RFTTPDKKHQKEPPFL*MRYELHPDKWTVQPIVLPEKES*TVNDIQKLVRKLN*ASQIYAKIKVKQLCKLLKRTKALTEVVPLTEEAELELAENRKILKDPVHRAYYDPAKDLIAELQKQREGQWTYQIYQKPFKNLKTEKYARTKGAHTNDVKQLTEAVQKISTESIVI*RRTPKFRLPIQKET*ET*WTDYWQAT*IPK*EFVNTPPLVKL*YQLEKEPIIRAETFYVDKAANKDNKSRKARYVTDRRRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALKIIQAQPDKSESELVSQIIEQLIKKKKVYLA*VPAHKRIKRNEQVNKLVSARIKKVLFLDKIEKAQEDHKKYHSN*RTMASNFNLPPIVAKEIVASCDKCQLKKEAMHRQVDCSPGIWQLDCTHLEEKIILVAVHVASRYIEAEVIPAETRQETAYFILKLARRWPVKTIHTDNGRNFTSNTVKAAC**AKIKQEFSIPYNPQSQEVVKSINNELKKIIRQVKDQAKHLKTAVQMAVFIHNFKRKGGIEGYSAEERIVDIIATEIQTKELQKQITKIQNFQVYYKDSRDPL*KGPAKLLWKGEKAVVIQDNSDIKVVPRRKAKIIKDYKKQMASDDCVASRQDED*,TTTTTTAAAGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGAGAACTTTCCTCAAAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAAAGAGCTTCAAGTTTGAAGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATAGACAAGGAGCTGTATCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAAGAGAGCAACTAAAGAAAGCTTTATTAAATACAGGAGCAGATGATACAGTATTAGAAGACATAGATTTGCCAAGAAAATAGAAACCAAAAATGATAAGAAGAATTAGAAGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCAGACACAAAGCTATAAGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTAGAAGAAATCTGTTGACTCAGCTTAGTTGCACTCTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAAGAATGGACGGCCCAAAAGTTAAACAATAGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATAGAAAAAGAAAAGAAAATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATAGAAAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAACTCAAGATTTCTAAGAAATTCAATTAAGTATACCACATCCTGCAAAGCTAAAAAAGAAAAAATCAGTCACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAAGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAAAGATTAGATATCAGTATAATGTGCTTCCACAAAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTAGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAAGGCAACATAGAACAAAAGTAAAGGAACTGAGGCAACATCTAATGAGGTAAAGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTAGATGAGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAAGAAAGTTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAATTGAATTAGGCAAGTCAGATTTATGCAAAGATTAAAGTGAAGCAATTATGTAAGCTCCTTAAAAGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAAAGATTCTAAAAGATCCAGTACATAGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAAAGAGAAGGTCAGTGGACATATCAAATTTATCAAAAGCCATTTAAAAATCTAAAAACAGAGAAATATGCAAGAACGAAAGGTGCCCATACTAATGATGTAAAGCAATTAACAGAAGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATAAAGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATAAGAAACATAGTGGACAGATTATTGGCAAGCCACCTAGATCCCTAAGTAAGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATAGTACCAGTTAGAAAAAGAACCCATAATAAGAGCAGAAACCTTCTATGTAGATAAGGCAGCTAATAAAGATAATAAATCAAGAAAAGCAAGATATGTTACTGACAGAAGAAGACAAAAAGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAAGATTCAGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAAAGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGAAAAAAGTCTACCTGGCATAAGTGCCAGCCCACAAAAGAATTAAAAGAAATGAACAGGTAAATAAACTAGTCAGTGCTAGAATCAAGAAAGTACTATTTTTAGATAAAATAGAAAAAGCCCAAGAAGACCATAAAAAATATCACAGTAATTAAAGAACAATGGCTAGTAATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAAAAGAAGCTATGCATAGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAAGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAAGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTAGTAGGCAAAAATCAAGCAAGAATTTAGTATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAAAATCTATAAATAATGAATTAAAGAAAATTATAAGACAAGTAAAAGATCAGGCTAAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGAAGGATACAGTGCAGAGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCAGGTTTATTACAAGGACAGCAGAGATCCACTTTAGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAAAGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAAGGATTATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAG +MK114856.1,vif,4425,5349,5040,5619,forward,0.7566838361540349,MIVWQVDRMKIRTWKSLVKYHMYISKKAKK,MN*RKL*DK*KIRLNILRQQYKWQYSSTILKEKGGLKDTVQRKE**T**QQKYRLKNYKNKLQKFKIFRFITRTAEIHFRKDQQSFSGKVKRQ**YKITVT*K*CQEEKQKSLRIIKNRWQVMIVWQVDRMKIRTWKSLVKYHMYISKKAKK*AYRHHYETTHPRISSEVHIPLGGARLVITTY*GLHTGEKDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVRSLQYLALTALITPKKIKPPLPSVKKLTEDR*NKPQKTKGHRKSHTMNRH*,ATGAATTAAAGAAAATTATAAGACAAGTAAAAGATCAGGCTAAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGAAGGATACAGTGCAGAGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCAGGTTTATTACAAGGACAGCAGAGATCCACTTTAGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAAAGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAAGGATTATAAAAAACAGATGGCAAGTGATGATTGTGTGGCAAGTAGACAGGATGAAGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAGAAATAGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGAGGTGCTAGATTAGTAATAACAACATATTAAGGTCTGCATACAGGAGAAAAAGACTGGCATTTAGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAAGTAAGATCTCTACAATACTTGGCACTAACAGCATTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAAGATAGATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAG +MK114856.1,vpr,5033,5579,5558,5843,forward,0.7661862444925697,MLFIHFRIKCHHSRIGIVLQRRARNRASRS,MEKKEI*HTSRP*LSRPSNSCALF*LFFRICYKKCHIRT*S*P*V*ISSRT*QSKISTILGTNSINNTKEDKATFA*CKETDRR*IEQAPEDQRPQKEPYNE*TLELLEELKREAVRHFPRPWLQNLRQYIYETYKDTWTRVEAIIRILQQMLFIHFRIKCHHSRIGIVLQRRARNRASRS*,ATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAAGTAAGATCTCTACAATACTTGGCACTAACAGCATTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAAGATAGATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAAACCTAAGACAATATATCTATGAAACTTATAAAGATACTTGGACAAGAGTAGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAG +MK114856.1,tat_exon1,5486,5780,5830,6046,forward,0.7619181418001311,MLFIHFRIKCHHSRIGIVLQRRARNRASRS,MLFIHFRIKCHHSRIGIVLQRRARNRASRS*TRALEASRKSA*DSLYQLLL*KVLLSLPSVFYTKSLRHLLWQEEAETETKISSRQSDSSSSSTKAVN,ATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAAT +MK114856.1,vpu,5486,6206,6259,6310,forward,0.7713277742142399,MRYLCKEMQIPLYSVHQMLKHMIQKYIMFRPHMPVYPQTLTHMK,MLFIHFRIKCHHSRIGIVLQRRARNRASRS*TRALEASRKSA*DSLYQLLL*KVLLSLPSVFYTKSLRHLLWQEEAETETKISSRQSDSSSSSTKAVNNTCNATFKDISNSSISSSSNNSNSCVDHSRHKI*ENIKTKKNR*NN**NKKKSRRQWQ*ERKRSGRIVSTCSRNKARCSLRC**SVVLRKICKSQSIMRYLCKEMQIPLYSVHQMLKHMIQKYIMFRPHMPVYPQTLTHMK*,ATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAATAATACATGTAATGCAACCTTTAAAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGACCATAGTAGGCATAAAATATAAGAAAATATTAAGACAAAGAAAAATAGATAGAATAATTAATAGAATAAGAAAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTAAGTCACAGTCTATTATGAGGTACCTGTGTAAAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAG +MK114856.1,rev_exon1,5698,5779,5969,6047,forward,0.524971623155505,MAGRSGDRDEDLLKTVRLIKFLYQSSK,MAGRSGDRDEDLLKTVRLIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAA +MK114856.1,env,5953,8521,6224,8795,forward,0.7529994904340572,MTNCSFNITTEIRDKVRKEYALFYKLDVMPIDKDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPASFAILKCKDEMFNRTRPCKNVSTVQCTHRIRPVVSTQLLLNSSLAEKKIVLRSENFTDNTKNIIVQLNRSIVINCTRPNNNTRKSISVAKRAIYATRQIIEDIRQAHCNISETD,MRAKKIRKNCQHL**K*GTMLLKMLMICSAAENL*VTVYYEVPV*RDANTTLFCASDAKAYDTEVHNV*ATHACVPTDPNPHEVELKNVTENFNM*KNNMVDQMHEDIINL*DQSLKPCAKLTPLCVTLNCTDLKNNTVGNQTNYHLNETNTIQRKEMTNCSFNITTEIRDKVRKEYALFYKLDVMPIDKDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPASFAILKCKDEMFNRTRPCKNVSTVQCTHRIRPVVSTQLLLNSSLAEKKIVLRSENFTDNTKNIIVQLNRSIVINCTRPNNNTRKSISVAKRAIYATRQIIEDIRQAHCNISETD*SDTLSKIVEKLKEKFRKNKTIIFKQSSREDIEIETHSFNCREEFFYCNTTRLFNST*SVNRTSINRTNNKNITLPCRIKQIINRWQEVRKAMYAPPISKIIRCSSNITGLILTRDSSTTNSKEETFRPRERNMKDN*RSELYKYKVVKIEPLKVAPTKAQRKVVQREKRAIRTLGAMFLRFLRTAGSTIGAASLTLTVQARQLLSGIVQQQNNLLKAIEAQQHMLQLTV*GIKQLQARVLSVERYLQDQQLLKI*SCSRKLICTTTVP*NTS*SNKSYSTI*DNMT*MQ*DREIQNYTKIIYNLLKESQIQQKKNEKELLELDQ*ANL*N*FSITKWL*YIKIFIMIVGGLVSLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRRPDRPERIEEEGEKRDRGRSRRLVTRFLPLI*DDLQSLCLFSYHHLKDLLLIVLKTVQILGHKK*EILKY**SLLQY*IQELKNSAVSLLNTIAIAVAERTDKVIEVRQKISRAFLHIPRKIRQGLEKALQ*,ATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTAAGTCACAGTCTATTATGAGGTACCTGTGTAAAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTAGAAAAATAACATGGTAGACCAGATGCATGAGGATATAATCAATTTATGAGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAAAAATAATACTGTAGGAAATCAAACAAATTATCATCTCAATGAAACTAATACAATACAAAGAAAAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAATATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAAAGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTAGTTTTGCAATTCTAAAGTGTAAAGATGAGATGTTCAATAGAACAAGACCATGTAAGAATGTCAGCACAGTACAATGTACACATAGAATTAGACCAGTAGTGTCAACTCAACTGCTGTTAAATAGTAGCCTAGCAGAAAAAAAGATAGTACTTAGATCTGAAAATTTCACAGACAATACTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAAAGAGAGCAATTTATGCAACAAGACAGATAATAGAAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGAAGTGACACTTTAAGCAAAATAGTTGAAAAATTAAAGGAAAAATTTAGAAAAAATAAAACAATAATCTTTAAGCAATCATCAAGAGAGGACATAGAAATTGAAACGCACAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGAAGTGTTAATAGAACTAGCATAAACAGAACTAACAATAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATCAACAGGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCTATCAGTAAGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATAGTAGTACAACTAATAGTAAAGAAGAGACCTTCAGACCTAGAGAAAGAAATATGAAGGACAATTAGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAAAAGTAGCACCCACCAAGGCACAAAGAAAAGTAGTGCAGAGAGAAAAAAGAGCAATAAGAACGTTAGGAGCTATGTTCCTCAGGTTCTTAAGAACAGCAGGAAGCACTATAGGCGCAGCGTCACTGACGCTGACAGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTAAGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTAGAAAGATACCTACAAGATCAACAGCTCCTGAAGATTTGAAGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATACTAGTTAGAGCAATAAATCTTACAGTACCATCTAAGATAACATGACCTAGATGCAGTAGGACAGAGAAATTCAAAATTACACAAAGATAATATACAACTTACTTAAAGAATCGCAAATCCAACAGAAAAAGAATGAAAAAGAATTATTAGAACTAGATCAATGAGCAAATTTGTAGAATTAGTTTAGTATAACAAAATGGCTATAGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAAGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAAAGAACAGATAAAGTTATAGAAGTAAGACAAAAAATTAGCAGAGCTTTTCTCCACATACCTAGAAAGATAAGACAAGGCTTAGAAAAGGCTTTGCAATAA +MK114856.1,nef,7916,9176,8796,9417,forward,0.7587548638132295,MSLHRMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC,MKKNY*N*INEQICRISLV*QNGYSI*KYS****EA**V*K*FLLYFL**IELGKDTHHYHCRPSSQPQEDQTGPKE*KKKVKRETEADPED**LDSYHSSKTTCKACAFSATTT*KTYS*LC*RLCKF*DTRSKRSSSTSRVSCSIKFRN*RIVLLACSIP*LSQ*LKEQIKL*K*DKKLAELFSTYLER*DKA*KRLCNKISGK*SKSCMAR*PAVKERIERVNPRPAAKKEQAEPAAAKVRAASRDLEKYRAITSSNTSTTNAACA*LEAQEEEEVGFPVRPQVPLRPMTYKAALNLSHFLKEKGGLEGLI*SQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRNPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHRMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC*,ATGAAAAAGAATTATTAGAACTAGATCAATGAGCAAATTTGTAGAATTAGTTTAGTATAACAAAATGGCTATAGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAAGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAAAGAACAGATAAAGTTATAGAAGTAAGACAAAAAATTAGCAGAGCTTTTCTCCACATACCTAGAAAGATAAGACAAGGCTTAGAAAAGGCTTTGCAATAAAATAAGTGGCAAGTAGTCAAAAAGTTGTATGGCTAGATAGCCTGCTGTAAAAGAAAGAATAGAAAGAGTTAATCCAAGGCCTGCTGCAAAGAAAGAACAAGCTGAGCCAGCAGCAGCTAAGGTAAGAGCAGCATCTCGAGACTTAGAAAAATATAGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTAGCTAGAAGCACAAGAGGAAGAAGAAGTAGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTAAATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTAGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGAAATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATAGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA +MK114856.1,tat_exon2,8102,8198,8376,8469,forward,0.7134052388289676,RPSSQPQEDQTGPKE,RPSSQPQEDQTGPKE*KKKVKRETEADPED**,AGACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGA +MK114856.1,rev_exon2,8103,8379,8377,8653,forward,0.6888374145157732,ILTTHLRRPAKPVPFQLPPLKRLTLDCAEDCANSRTQEVRDPQVLVESPAVLNSGTKE,DPPPNPKKTRQARKNRRRR*KERQRQIQKISD*ILTTHLRRPAKPVPFQLPPLKRLTLDCAEDCANSRTQEVRDPQVLVESPAVLNSGTKE*,GACCCTCCTCCCAACCCCAAGAAGACCAGACAGGCCCGAAAGAATAGAAGAAGAAGGTGAAAAGAGAGACAGAGGCAGATCCAGAAGATTAGTGACTAGATTCTTACCACTCATCTAAGACGACCTGCAAAGCCTGTGCCTTTTCAGCTACCACCACTTAAAAGACTTACTCTTGATTGTGCTGAAGACTGTGCAAATTCTAGGACACAAGAAGTAAGAGATCCTCAAGTACTAGTAGAGTCTCCTGCAGTATTAAATTCAGGAACTAAAGAATAG +MK115009.1,gag,302,1715,789,2292,forward,0.7484174646972894,MTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVRGPGHKARVLAEAMSQVTKSASIIVQGGNFKNQRKNVKCFNCGKERHTAKNCRAPKKKGC,MGARASVLSGGKLDR*EKIYLRPEGKKKYRLKHIV*ASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQIVHQPLSPRTLNA*VKVIEEKAFSPEVIPMFSALSERATPQDLNTMLNTVRGHQAAMQMLKETINDEAAE*DRLHPVHAGPIAPGQMKEPRGSDIAGTTSTLQEQIR*MTHNPPIPVGEIYKRWIILRLNKIVRMYSPVSILDIRQRPKEPFKDYVDRFYKTLKAEQASQDVKN*MTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVRGPGHKARVLAEAMSQVTKSASIIVQGGNFKNQRKNVKCFNCGKERHTAKNCRAPKKKGC*KCRKERHQMKDCTKRPD*DG*FFKEDLAFPQGKAREFSPEQTRANSPASRELQV*,ATGGGTGCTAGAGCGTCAGTATTAAGCGGCGGAAAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGAAGGAAAGAAAAAATATAGATTAAAACATATAGTATAGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATAGTACATCAACCATTATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAAGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATAGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAAGAACCAAGAGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACACATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAAGGCTAAACAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAAGAACCCTTTAAAGATTATGTAGACCGATTCTATAAAACTCTAAAGGCTGAGCAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAAGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATAGTGCAAGGAGGCAATTTTAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAAGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAAAAATGTAGAAAGGAAAGACACCAAATGAAAGATTGTACTAAGAGACCAGACTAAGACGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGA +MK115009.1,pol,1613,4625,2084,5096,forward,0.7535201229073285,AEIKQEFSIPYNPQSQRVVKSMNNKLKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED,FFKEDLAFPQGKAREFSPEQTRANSPASRELQV*GRGNSSSSEAGDERPRTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEINLPGK*KPKMIKRIRSFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKSGIDSPRVKQWPLTEEKIKALIEICAEIEKEKKITKIRPENPYNTPVFAIKKKDSTK*KKLVDFRELNKRTQDF*EVQLRIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPEVKYQYNVLPQR*KRSPAIFQSSMTKILEPFRKENPDIVIYQYIDDLYVRSDLEIEQHRTKIEELRQHLLR*GLTTPDKKHQKKPPFL*ISYELHPDK*TVQPIQLPDKDS*TVNDIQKLVRKLN*ASQIYPEIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQRRDQ*TYQIYQEPFKNLKTEKYARTRGAHTNDVKQLTEAVQKIALEAIVI*RKTPKFKLPIQKET*KMWWTKYWQAT*IPE*EFVNTPPLVKL*YQLEKEPIVRAETFYVNKAANRKTKLRKAEYVTDRRRQKVVSLIDTTNQRTKLHAIHLALQDSGSTVNIVTDSQYALKIIQAQPDKSESELVSQIIEQLIKKEKIYLA*VPAHKRIEGNEQVDKLVSNRIRRVLFLDRIDKAQEEHEKYHSN*RAMASNFNLPPVVAKEIVASCDKCQLKGEPMHRQVDCSPGIWQLDCTHLERKIILVAVHVASRYIEAEVIAAETGQETAYFILKLAGR*PVKIIHTDNGSNFTSTTVKAAC**AEIKQEFSIPYNPQSQRVVKSMNNKLKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED*,TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGAGGAAGAGGCAACAGCTCCTCCTCAGAAGCAGGAGACGAAAGACCAAGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATGATAAAAAGAATTAGAAGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACGCCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGTCAGGAATAGATAGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAGGAAAAGAAAATTACAAAAATTAGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAAAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTAAGAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGAAGTTAAATATCAGTACAATGTGCTTCCACAGAGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAGGTGAGGACTCACCACACCAGACAAGAAACATCAGAAAAAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTGATAAATAGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTGAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGAGGAGAGACCAATAGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGAAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAGAAAATGTGGTGGACAAAGTATTGGCAAGCCACCTAGATTCCTGAGTAAGAATTTGTCAATACCCCTCCCTTAGTAAAACTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAAATAAGGCAGCTAATAGAAAGACTAAATTAAGAAAAGCAGAGTATGTTACGGACAGAAGAAGACAAAAGGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAGATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTGAAGGAAATGAACAAGTAGATAAATTAGTCAGTAATAGAATCAGAAGAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAGAGAGCAATGGCTAGTAATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATAGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATAGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAAGCCGCCTGTTAGTAGGCAGAGATCAAGCAGGAATTTAGTATTCCCTACAATCCTCAAAGTCAAAGAGTAGTAAAATCTATGAATAATAAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCATAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGAGATTATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115009.1,vif,4289,4988,5040,5619,forward,0.7624714704923379,MAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED,MAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED*NIEQFSKTPYAYFKESSEIGLQTSL*KP*SKNKFRSTHPIRGSKIGNKNILRSAYRRKRLAFRPGSIHRMEEKEI*HTSRPWPGRPTNSPVLF*LFFRICYKECHIRT*S*S*V*ISGRT*,ATGGCAGTATTCATTCATAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGAGATTATAGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATAGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATAGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGAGGAAGCAAGATTGGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAA +MK115009.1,vpr,5087,5249,5558,5843,forward,0.6703246973639554,MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT,MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT*,ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTAG +MK115009.1,tat_exon1,5235,5577,5830,6046,forward,0.6325105553751218,MTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ,MEILRQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPSLEP*KHPGSQPMTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ*,ATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGCTTAGAGCCCTAGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAACAAAAAGCTTAGGCATCTCCTATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA +MK115009.1,rev_exon1,5497,5578,5969,6047,forward,0.564475664826894,MAGRSRDSDEELLTAVRIIKRLYQSSK,MAGRSRDSDEELLTAVRIIKRLYQSSK,ATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG +MK115009.1,vpu,5589,5835,6259,6310,forward,0.7710195148641255,MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP,MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP*DSNDM*,ATGCATGCCTTAGAAATAGCAGCAATAGCAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAG +MK115009.1,env,5752,8353,6224,8795,forward,0.7518870380010406,MKEKGEIKNCSFNVTTGIRDKVTKEHALFYKLNVVPIDEDSKNTTGKYKMINCNTSVITQACPKVSFKPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEKIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYRTDIIRDIRQAHYNISKKD,MRVTRTKRNYPHLWR*GILFLKIVMICSANNL*VTVYYKVPV*KEATTTLFCASDAKAYETEKHNV*ATHACVPTDPSPQEVALENVTETFNM*KNDMVEQMHEDIISL*DQSLKPCVKLTPLCVTLNCTDNLNLTCPNNNTCSNNTNYNMKEKGEIKNCSFNVTTGIRDKVTKEHALFYKLNVVPIDEDSKNTTGKYKMINCNTSVITQACPKVSFKPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEKIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYRTDIIRDIRQAHYNISKKD*NDTLKQIVIKLKEKFKNKTIVFTQSSGEDPEIVMHSFNCREEFFYCNTTQLFNST*NNST*NSTDN*NSTESNSTITLPCRIKQIINLWQEVRRAMYAPPIQRQIRCSSNITGLLLVRDGRSNNSSNDTETFRPRGRDMKDN*RSKLYKYKVVKIKPLRIAPTHAKRRVVQKEKRAIRLEAFFLRFLRAAGSTIGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTV*GIKQLQAKVLALERYLKDQQLLRI*SCSGKLICTTNVPWNISWSPRWNRSLDKI*TNMT*KQ*EKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDK*ASL*N*FDITQ*L*YIKIFIIIVRGLISLRIVFTILSIVNKVRQGYSPLSLQTLLPTQRGPDRPERTEERGRKKDRGTSTRLVHRFLALI*DDLRSLFLFSYHRLKDLLLIAARIVELLGRRK*EALKY**NLLQY*SQEIKNSAVSLLNTTAIAVAERTDKIIEVLQRGFRAILHIPTRIRQGLEKALL*,ATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATAAAGTACCTGTATAGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTAGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTAGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAACTTGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATGAAAGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTAATGTAGTACCAATAGATGAAGATAGTAAAAATACTACGGGCAAATATAAGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTAAACGGCAGTCTAGCAGAAGAAAAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATAGAACAGACATAATAAGAGATATAAGACAAGCGCATTATAACATTAGTAAGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGAAGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAGTACTTAGAATAGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAAGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTAGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTAGAGGAAGAGATATGAAGGACAATTAGAGAAGTAAATTATATAAATATAAAGTAGTCAAAATTAAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAAGGAAAAAAGAGCAATAAGACTTGAAGCTTTCTTCCTTAGGTTCTTAAGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACAGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAAAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTAAGGATTTAAAGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAAGAAAAAGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTAGGCAAGTTTGTAAAATTAGTTTGACATTACACAGTAGCTATAGTATATAAAAATATTCATAATAATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGAGGACAGATAAAATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA +MK115009.1,nef,7748,8996,8796,9417,forward,0.7344808947652905,MTYKRALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFR,MNKTYWH*ISRQVCKISLTLHSSYSI*KYS****EA**V*E*FLLYCL**IKLGKDTHHCHCRPSSQPREDPTGPKEQKKEVERKTEAHPRD*CIDS*HSSRTTCGACSSSVTTA*KTYS*LRREL*NF*DAESKKH*SISRISCSIRVRK*RIVQLACSTPQQ*Q*LRGQIKL*KYYKEALELFSTYLHE*DRA*KRLCYKIGNKLSRRLKARWPAIKEKIRRARPVREPEPATAKVRAASRDLKRHGALTSSNTAATNADVACLEAQQKKEEVSFPVRPQVPLRPMTYKRALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFR*CFKLVPVDPDKVEEASVRENNCLLSPENLHRMEDEHREVLQWRFDSRLAFHHIARELHPEYYKDC*,ATGAACAAGACTTATTGGCATTAGATAAGTAGGCAAGTTTGTAAAATTAGTTTGACATTACACAGTAGCTATAGTATATAAAAATATTCATAATAATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGAGGACAGATAAAATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAAGATAGGTAACAAGTTGTCAAGAAGGCTCAAGGCTAGATGGCCTGCCATAAAGGAGAAAATAAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAACAGCTAAGGTAAGAGCAGCATCTCGAGACCTGAAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGAAGAAAGAAGAGGTAAGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGAGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAAGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATAGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAAGTAGAAGAGGCCAGTGTAAGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACATAGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA +MK115009.1,tat_exon2,7934,8027,8376,8469,forward,0.5633802816901408,RPSSQPREDPTGPKEQKKEVERKTEAHPRD,RPSSQPREDPTGPKEQKKEVERKTEAHPRD*,AGACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAG +MK115009.1,rev_exon2,7935,8211,8377,8653,forward,0.6781884553958476,ILSTHLGRPAEPVPLQLPPLERLTLNCGENCRTSRTQKVRSTEVLVESPAVLESGNKE,DPPPNPERTRQARKNRRKR*KERQRHIHEISA*ILSTHLGRPAEPVPLQLPPLERLTLNCGENCRTSRTQKVRSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGAGGACCCGACAGGCCCGAAAGAACAGAAGAAAGAGGTAGAAAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAAAGACTTACTCTTAATTGCGGCGAGAATTGTAGAACTTCTAGGACGCAGAAAGTAAGAAGCACTGAAGTATTAGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG +MK115387.1,gag,292,1795,789,2292,forward,0.25442849599155104,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADTGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ,MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADTGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAGGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACACAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAAGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCGCAGGATGTAAAAAATTGGATGACAGAAACCTTATTGGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MK115387.1,pol,1587,4599,2084,5096,forward,0.17509882471546434,FFREDLAFLQGKARELSSEQTRANSPTRGELQVWGGDSNSSSEAGAGGQGSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVQLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSIPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMIKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPDKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLKGAKALTEVIQLTEEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALGIIQAQPDRSESEIVNQIIEQLIKKERVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNLPPVVAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKAIHTDNGTNFTSATVKAACWWAGIKQECGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFLQGKARELSSEQTRANSPTRGELQVWGGDSNSSSEAGAGGQGSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVQLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSIPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMIKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPDKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLKGAKALTEVIQLTEEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALGIIQAQPDRSESEIVNQIIEQLIKKERVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNLPPVVAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKAIHTDNGTNFTSATVKAACWWAGIKQECGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGGAAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTGGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAGGGAAGATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACACCCCGCAGGGTTAAAAAAGAAGAAATCAATAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAATAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATGGATGACTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTGGGGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAGACAATTATGTAAACTCCTTAAGGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGGGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATGGGATACCTGGTGGACAGAATATTGGCAAGCCACCTGGATTCCCGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAAAAAGAACCTATTGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAGGAATAATTCAAGCACAACCAGATAGGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAGGGTCTACCTTGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTCCTATTTTTGGATGGAATAGATAAGGCCCAAGAGGAGCATGAGAAATATCACAATAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAGGGAGAAGCCACGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTGGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115387.1,vif,4543,5122,5040,5619,forward,0.34158090650317496,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIECRERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRGSQTMNGH,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIECRERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRGSQTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAGGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTCCATAGAATGCAGGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAG +MK115387.1,vpr,5061,5352,5558,5843,forward,0.5625083211290105,MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS,MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTGGGACAACATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115387.1,tat_exon1,5332,5551,5830,6046,forward,0.37688442211055273,MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ,MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ*,ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTGA +MK115387.1,rev_exon1,5471,5552,5969,6047,forward,0.4807692307692307,MAGRSGDSDEDLLKTVRLIKYLYQSSE,MAGRSGDSDEDLLKTVRLIKYLYQSSE,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTGAG +MK115387.1,env,5726,8288,6224,8795,forward,0.52645935624659,MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL,MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL*,ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGGAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTACTTGGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATGAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAGAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAGGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAGGAAGCCATACAAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAGGACCAGGGAGAGCATTTTACACAACAGGAGATATAATAGGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAATAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAGGGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAGGGGAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTGGACTAAAAATGGTACTGGTAGTTGGCAGTCTAATGATACTCAGAATGGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGGAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAACTGTACATCAAATATTACAGGGCTGGTTTTAACAAGAGATGGGGGGAAGGTGATTAATGAAACTGAGACCTTTAGACCTGGAGGAGGAAATATGAAGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAAAGAGAGAAAAGAGCAGTAGGACTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCCGGAAGCACTATGGGCGCAGCGTCAATAGCGCTGACGGAACAGGCCAGACGAGTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATCGTAGTTGGGGTGGGCATAACAAAAATCTAGATGACATTTGGGGTAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAGAAAAGAATGAACAAGAATTATTGGCATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAGGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAGGGACAGATAGGATAATAGAAATATTACAAAGAATTGGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA +MK115387.1,vpu,5770,5809,6259,6310,forward,0.5690703735881842,MGHHAPWNVDDL,MGHHAPWNVDDL*,ATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAG +MK115387.1,tat_exon2,7869,7965,8376,8469,forward,0.6995153473344102,RPSSQLRGEPTGPKE,RPSSQLRGEPTGPKE*KKEVERETKADPVD**,AGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGA +MK115387.1,rev_exon2,7870,8146,8377,8653,forward,0.32366339007432277,DPPPSSEGSRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDCDQDCGTSGTQGVGSPQILVESPAVLESGTKE,DPPPSSEGSRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDCDQDCGTSGTQGVGSPQILVESPAVLESGTKE*,GACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAG +MK115387.1,nef,8289,8940,8796,9417,forward,0.46946145391741245,MGGKWSKSSRVGWNAVRERMRRAQPTADRERAEPAADGVGAASRDLEKYGALTSRNTAATNADCAWLEAQEEEDEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHGMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC,MGGKWSKSSRVGWNAVRERMRRAQPTADRERAEPAADGVGAASRDLEKYGALTSRNTAATNADCAWLEAQEEEDEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHGMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTAGGGTTGGATGGAATGCAGTGAGGGAAAGAATGAGACGAGCTCAGCCAACAGCAGATAGGGAACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAATATGGAGCACTTACAAGTAGGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGATGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATGGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGGTTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA +MK115491.1,gag,521,2021,789,2292,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115491.1,pol,1813,4825,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115491.1,vif,4769,5348,5040,5619,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115491.1,vpr,5287,5578,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115491.1,tat_exon1,5558,5777,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115491.1,rev_exon1,5697,5778,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115491.1,env,5952,8493,6224,8795,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115491.1,vpu,5990,6035,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115491.1,tat_exon2,8074,8170,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115491.1,rev_exon2,8075,8351,8377,8653,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115491.1,nef,8494,9109,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK116110.1,gag,62,1601,140,1643,forward,0.7362754920106639,MSQVNSTTVMMQKGNFRNQKKTVKCFNCGKIGHIAKNCRAPRRKGCWKCGQEGHQMKDCSERQANFLGKLWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPIDRELYSLASLKSLFGNDPSSQ,MAGSPHSLTCCHHFF*CSCWSQCF*NSLTIWVCILDQQGFCHPIFYIL*SLLGS*GFIEPVYIVSKGFLWSLFYVQNADRTIHSYYFI*SQDYPSFIDISYWDRWIICHPSYLFLKGTSSSCYITSPWFSHLAWCNRPCMHWMQSIPFCSFLIDGLF*YLHCCLMSPHCI*HGVYILWGGSFC*C*KHRYYFWAKSLFFYYFYPCI*SSR*HGLMYHLPLEVLHYRVVLADLAVISCASCCLLCFHLAFVLLFLYLI*RSLGVLYLYPLMYTIEDRYCII**FKLF*PCLEGWL*LSQYLSTAF*CF*QARINCESF*LPACPY*MF*PIFFLSSWP*PNFFPLV*FSPA*Y*RSRTHLSPSSLR*VKTFLAYSPVAEAMSQVNSTTVMMQKGNFRNQKKTVKCFNCGKIGHIAKNCRAPRRKGCWKCGQEGHQMKDCSERQANFLGKLWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPIDRELYSLASLKSLFGNDPSSQ*,ATGGCCGGGTCCCCCCACTCCCTGACATGCTGTCATCATTTCTTCTAGTGTAGCTGCTGGTCCCAATGCTTTTAAAATAGTCTTACAATCTGGGTTTGCATTTTGGACCAACAAGGTTTCTGTCATCCAATTTTTTACATCCTGTGAAGCTTGCTCGGCTCTTAGGGTTTTATAGAACCGGTCTACATAGTCTCTAAAGGGTTCCTTTGGTCCTTGTTTTATGTCCAAAATGCTGACAGGACTATACATTCTTACTATTTTATTTAATCCCAGGATTACCCATCTTTTATAGATATCTCCTACTGGGATAGGTGGATTATTTGTCATCCATCCTATTTGTTCCTGAAGGGTACTAGTAGTTCCTGCTATATCACTTCCCCTTGGTTCTCTCATTTGGCCTGGTGCAACAGGCCCTGCATGCACTGGATGCAATCTATCCCATTCTGCAGCTTCCTCATTGATGGTCTCTTTTAATATTTGCATTGCTGCTTGATGTCCCCCCACTGTATTTAGCATGGTGTTTATATCTTGTGGGGTGGCTCCTTCTGCTAATGCTGAAAACATAGGTATTACTTCTGGGCTAAAAGCCTTTTCTTCTACTACTTTTACCCATGCATTTAAAGTTCTAGGTGACATGGCCTGATGTACCATTTGCCCCTGGAGGTTTTGCACTATAGGGTAGTTTTGGCTGACCTGGCTGTTATTTCCTGCGCCAGCTGCTGCTTGCTGTGCTTTCATCTTGCTTTTGTTTTGCTCTTCCTCTATCTTATCTAGCGCTCCCTTGGTGTCTTGTATCTCTATCCTTTGATGTATACAATAGAGGACCGCTACTGTATTATATAATGATTTAAGCTCTTCTGACCCTGTTTGGAGGGATGGCTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGCTCCCTGCTTGCCCATACTAGATGTTTTAACCTATATTTTTTCTTTCCTCCTGGCCTTAACCGAATTTTTTCCCATTGGTCTAATTTTCCCCCGCTTAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGGTAAAAACTTTTTTGGCGTACTCACCAGTCGCCGAAGCAATGAGCCAAGTAAATTCAACTACCGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAAGAAGACTGTTAAGTGTTTCAACTGTGGTAAAATAGGGCATATAGCAAAAAATTGCAGGGCCCCCAGGAGAAAGGGCTGTTGGAAATGTGGACAGGAAGGACACCAGATGAAAGATTGTAGTGAGAGACAGGCTAATTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAA +MK116110.1,pol,1393,4405,1435,4447,forward,0.2475474244944199,FFRETLAFPQGEAREFPSEQTRANSPTRGELQVWGRDSNSPSEAGADRQGTIFLSFPQITLWQRPLVSIKVGGQLKEALLDTGADDTVLEEMCLPGKWKPKMIGGIGGFIKVRQYDQIPIEIYGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVRLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDMVIYQYMDDLYIGSDLELGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPITLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKSLTEVVPLTREAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARIKGTHTNDVKQLTQAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAANRDTKLGKAGYVTDRGRQKIVPLTDTTNQKTELQAIYLALQDSGSEVNIVSDSQYAIGILQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVNTIHTDNGSNFTSTAVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGRYSAGERIVDMIASDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED,FFRETLAFPQGEAREFPSEQTRANSPTRGELQVWGRDSNSPSEAGADRQGTIFLSFPQITLWQRPLVSIKVGGQLKEALLDTGADDTVLEEMCLPGKWKPKMIGGIGGFIKVRQYDQIPIEIYGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVRLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDMVIYQYMDDLYIGSDLELGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPITLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKSLTEVVPLTREAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARIKGTHTNDVKQLTQAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAANRDTKLGKAGYVTDRGRQKIVPLTDTTNQKTELQAIYLALQDSGSEVNIVSDSQYAIGILQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVNTIHTDNGSNFTSTAVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGRYSAGERIVDMIASDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED*,TTTTTTAGGGAAACTCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTTCAGGTTTGGGGAAGAGACAGCAACTCCCCCTCAGAAGCAGGAGCCGATAGACAGGGAACTATATTCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCTCAATAAAAGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGTGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACCCATAGAAATCTATGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATTTGTTGACTCAGATTGGGTGCACTTTAAATTTTCCCATTAGTCCTATCGAAACTGTACCAGTAAGATTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAGATTTCAAAGATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGGGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTGGGAATACCGCATCCCGCAGGATTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCATTAGATAAAGACTTTAGGAAGTATACTGCATTTACCATACCCAGTGTAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAGCTAGCATGACAAAAATTTTAGAGCCTTTTAGGAAGCAAAATCCAGACATGGTTATTTATCAATACATGGATGATCTATATATAGGATCTGACTTGGAATTAGGACAGCATAGGACAAAAATAGAGGAACTGAGACAACATCTATTGAGGTGGGGGTTTACCACACCAGACAAGAAGCATCAGAAAGAACCTCCATTCCTCTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAACACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTAGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAGTTATGTAAACTCCTTAGAGGAACCAAATCACTAACAGAAGTAGTACCACTAACAAGAGAGGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAAAAGCAGGGACAAGGCCAGTGGACTTATCAGATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATAAAGGGTACCCACACTAATGATGTAAAACAATTAACACAGGCTGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGACCCCATAGTAGGAGCAGAAACATTCTATGTCGATGGGGCAGCCAATAGGGATACTAAATTAGGAAAAGCAGGATATGTTACTGACAGGGGAAGACAAAAAATTGTCCCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTACCTAGCTCTGCAGGATTCAGGATCAGAAGTAAACATAGTATCAGACTCACAGTATGCAATAGGAATTCTTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGACATGGGTGCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTATTATTCTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCCATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGTTAAAAGGAGAAGCCATACATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTGGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTGGCCAGTGGATATATTGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATATTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAACACAATACATACAGACAATGGCAGCAACTTCACTAGCACTGCGGTTAAAGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGGGATCAAGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGAGGTACAGTGCAGGGGAAAGAATAGTAGACATGATAGCATCAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGAAATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGACTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAA +MK116110.1,vif,4349,4928,4391,4970,forward,0.4686663095875737,MENRWQVMIVWQVDRMRINAWKSLVKHHMHVSRKVERWVYKHHYESTNPRISSEVHIPLGDARLKITTYWGLHTGERDWHLGQGVSIEWRKKSYNTQVDPEVADQLIHLYYFDCFSESAIRKAIVGHRVSPSCEYQAGHNKVGSLQYLALAALVKSKKTKPPLPSVTKLTEDRWNKPQRTKGRRGNHIMNGH,MENRWQVMIVWQVDRMRINAWKSLVKHHMHVSRKVERWVYKHHYESTNPRISSEVHIPLGDARLKITTYWGLHTGERDWHLGQGVSIEWRKKSYNTQVDPEVADQLIHLYYFDCFSESAIRKAIVGHRVSPSCEYQAGHNKVGSLQYLALAALVKSKKTKPPLPSVTKLTEDRWNKPQRTKGRRGNHIMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAATGCATGGAAAAGCTTAGTAAAGCACCATATGCATGTTTCAAGGAAAGTTGAGAGATGGGTTTATAAACATCACTATGAAAGTACTAATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAAAAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAAGAGCTATAATACACAAGTAGACCCTGAAGTAGCAGACCAACTAATCCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAAAGCCATAGTAGGACATAGAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCCCTACAGTACTTGGCATTAGCAGCATTAGTAAAATCAAAAAAGACAAAGCCACCTTTGCCTAGCGTTACGAAGCTGACGGAGGATAGATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAG +MK116110.1,vpr,4867,5158,4909,5200,forward,0.4534005037783373,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPRIWLQSLGQYVYETYGDTWTGVEAIIRILQQMLFIHFRIGCQHSRIGIIRRGRTRNGASRP,MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPRIWLQSLGQYVYETYGDTWTGVEAIIRILQQMLFIHFRIGCQHSRIGIIRRGRTRNGASRP*,ATGGAACAAGCCCCAGAGGACCAAGGGCCGCAGAGGGAACCATATAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGGATATGGCTTCAGAGCTTAGGACAATACGTCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTTTGCAACAAATGCTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACGAGGGAGAACAAGAAATGGAGCCAGTAGACCCTAG +MK116110.1,tat_exon1,5138,5357,5180,5399,forward,0.48719691819623834,MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKQ,MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKQ*,ATGGAGCCAGTAGACCCTAGCCTAGCGCCCTGGAAGCACCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGCTATTGTAAAAAGTGCTGCTTACATTGCCAAGTTTGTTTCACAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGTAA +MK116110.1,rev_exon1,5277,5358,5319,5397,forward,0.4807692307692307,MAGRSGDSDEDLLKAVRLIKILYQSSK,MAGRSGDSDEDLLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGTAAG +MK116110.1,vpu,5369,5615,5411,5657,forward,0.46373488953730724,MQSLQIGAIVALVVGTIIAIVVWSIVLIEYRKILRQKKIDRIIDRIVERAEDSGNESEGDQEELSALVERGHDAPWNVNDL,MQSLQIGAIVALVVGTIIAIVVWSIVLIEYRKILRQKKIDRIIDRIVERAEDSGNESEGDQEELSALVERGHDAPWNVNDL*,ATGCAATCTTTGCAAATAGGAGCAATAGTAGCATTAGTAGTAGGAACAATAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAAGAAAATAGATAGAATAATAGATAGAATAGTAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAG +MK116110.1,env,5532,8073,5574,8124,forward,0.47450452559300893,MRVKETRKSYQHWWKGGMMLLGMLMICSAATNLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVLLGNVTEDFNAWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILHCTDVNNTRNGMTGELKNCSFNITTKITNKVQKEYALFYKLDVVPINNKDNDTSFNNNSYRLISCNTSVITQACPKVSFEPIPIHYCTPAGYAILRCNNETFSGKGPCTNVSSIQCTHGIRPVVSTQLLLNGSLAKQEVVIRSQNFSDNVKTIIVQLKTPVKINCTRPNNNTRKSIHAGPGKVIYATGEIIGDIRQAHCNISAAEWNDTLGQIVTKLQEQFGNKTIVFNQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWNNNGTNTWNSTGNITLPCKIRQIVNMWQKVGKAMYAPPIRGQIKCSSNITGLLLTRDGGNESESETFRPGGGDMRDNWRSELYKYKVVRIEPLGLAPTKAKRRVVQREKRAIGTLGAVFLGFLGTAGSTMGAASMTLTVQARQLLSGIVQQQNNLLKAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNASWSNKSLNEIWDNMTWMEWEKEISNYTQLIYTLIEESQSQQEKNEQELLALDKWDSLWSWFSITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEDGGERDRDRSTRLVTGFLPLFWDDLRSLCLFSYHRLRDLLLIAARIVELLGHRGWEILKHWWSLLQYWSQELKKSAVSLLNATAIAVAEGTDRIIEVVQRACRAILHIPVRLRQGLERALL,MRVKETRKSYQHWWKGGMMLLGMLMICSAATNLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVLLGNVTEDFNAWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILHCTDVNNTRNGMTGELKNCSFNITTKITNKVQKEYALFYKLDVVPINNKDNDTSFNNNSYRLISCNTSVITQACPKVSFEPIPIHYCTPAGYAILRCNNETFSGKGPCTNVSSIQCTHGIRPVVSTQLLLNGSLAKQEVVIRSQNFSDNVKTIIVQLKTPVKINCTRPNNNTRKSIHAGPGKVIYATGEIIGDIRQAHCNISAAEWNDTLGQIVTKLQEQFGNKTIVFNQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWNNNGTNTWNSTGNITLPCKIRQIVNMWQKVGKAMYAPPIRGQIKCSSNITGLLLTRDGGNESESETFRPGGGDMRDNWRSELYKYKVVRIEPLGLAPTKAKRRVVQREKRAIGTLGAVFLGFLGTAGSTMGAASMTLTVQARQLLSGIVQQQNNLLKAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNASWSNKSLNEIWDNMTWMEWEKEISNYTQLIYTLIEESQSQQEKNEQELLALDKWDSLWSWFSITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEDGGERDRDRSTRLVTGFLPLFWDDLRSLCLFSYHRLRDLLLIAARIVELLGHRGWEILKHWWSLLQYWSQELKKSAVSLLNATAIAVAEGTDRIIEVVQRACRAILHIPVRLRQGLERALL*,ATGAGAGTGAAGGAGACCAGGAAGAGTTATCAGCACTGGTGGAAAGGGGGCATGATGCTCCTTGGAATGTTAATGATCTGTAGTGCTGCAACAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACGCATGCCTGTGTACCCACGGACCCCAACCCACAAGAAGTATTATTGGGAAATGTGACAGAAGATTTTAATGCATGGAAAAATAACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTTTGTGTTATTTTGCATTGCACTGATGTCAACAATACTAGAAATGGGATGACAGGAGAACTAAAAAACTGCTCTTTCAATATCACCACAAAAATAACAAATAAGGTACAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTACCAATAAATAATAAGGATAATGATACTAGCTTTAATAATAATAGCTATAGGTTGATAAGTTGTAACACCTCAGTTATTACACAGGCTTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTACTGTACCCCGGCTGGTTATGCAATTCTAAGGTGTAACAATGAGACATTCAGTGGAAAAGGGCCATGTACAAATGTCAGCTCAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGCAGTCTAGCAAAACAGGAGGTAGTAATTAGATCTCAAAATTTCTCGGACAATGTTAAAACCATAATAGTACAGCTGAAGACCCCTGTAAAAATTAACTGTACAAGGCCCAATAACAATACAAGAAAAAGTATACATGCAGGACCAGGGAAAGTAATTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGCAACATTAGTGCAGCAGAGTGGAATGATACTTTAGGACAGATAGTTACAAAATTACAAGAACAATTTGGGAATAAAACAATAGTCTTCAATCAATCGTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTTTACTGTAATTCAACACAACTGTTTAATAGTACTTGGAATAATAATGGTACTAATACTTGGAATAGTACAGGTAATATCACACTCCCATGTAAAATAAGGCAAATTGTAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCTCCTCCCATCCGTGGACAAATTAAATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAACGAGAGTGAGAGCGAAACCTTCAGACCTGGCGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGACTAGCACCCACTAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAACACTGGGAGCTGTGTTCCTTGGGTTCTTGGGAACAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGCAATAAATCTCTGAATGAAATTTGGGATAACATGACCTGGATGGAGTGGGAAAAAGAAATTAGTAATTACACACAATTAATATACACTTTAATTGAAGAATCGCAGAGCCAGCAAGAAAAGAATGAACAAGAATTATTGGCACTAGATAAGTGGGACAGCTTGTGGAGTTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAATAGGGTTAAGAATAGTTTTTACTGTACTTTCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTGTCATTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTAGTACAAAGAGCTTGTAGAGCTATTCTCCACATACCTGTAAGACTAAGACAAGGCTTAGAAAGAGCTTTGCTATAA +MK116110.1,tat_exon2,7654,7750,7705,7798,forward,0.4464285714285715,RPASQPRGDPTGPKESKKTVERETETDPHA,RPASQPRGDPTGPKESKKTVERETETDPHA**,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAA +MK116110.1,rev_exon2,7655,7940,7706,7982,forward,0.46126825660935467,DPLPSPEGTRQARRNRRRRWRERQRQIHTLSNRILTTFLGRPEEPVPLQLPPLERLTLDCSEDCGTSGTQGVGNPQTLVESPTILESGTKKKCC,DPLPSPEGTRQARRNRRRRWRERQRQIHTLSNRILTTFLGRPEEPVPLQLPPLERLTLDCSEDCGTSGTQGVGNPQTLVESPTILESGTKKKCC*,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGACGGTGGAGAGAGAGACAGAGACAGATCCACACGCTTAGTAACCGGATTCTTACCACTTTTCTGGGACGACCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTACAATATTGGAGTCAGGAACTAAAAAAAAGTGCTGTTAG +MK116110.1,nef,8074,8695,8125,8752,forward,0.437094682230869,MGGKWSKSSVVGWPAVRERIRRAGPAAEGVGAVSRDLDKHGAITSNNTPATNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGMIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPLETEQVEAATGGENNCLLHPLNQHGMDDPEREVLMWKFDSSLAFHHRAKELHPEYYKDC,MGGKWSKSSVVGWPAVRERIRRAGPAAEGVGAVSRDLDKHGAITSNNTPATNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGMIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPLETEQVEAATGGENNCLLHPLNQHGMDDPEREVLMWKFDSSLAFHHRAKELHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTGTAAGGGAAAGAATAAGAAGAGCTGGGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGACAAACATGGAGCAATCACAAGTAACAATACACCAGCTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTTAGGCCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGATGATATACTCCCAGCAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGACCAGGGGTCAGGTTTCCACTGACCTTTGGATGGTGCTTCAAACTAGTACCACTTGAGACAGAGCAGGTAGAAGCGGCCACTGGAGGAGAGAACAACTGCTTGTTACACCCTTTGAACCAGCATGGGATGGATGACCCGGAGAGAGAAGTACTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAGAGCCAAAGAGCTGCATCCGGAGTACTACAAAGACTGCTGA +MK115527.1,gag,683,2183,789,2292,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115527.1,pol,1975,4987,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115527.1,vif,4931,5510,5040,5619,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115527.1,vpr,5449,5740,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115527.1,tat_exon1,5720,5939,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115527.1,rev_exon1,5859,5940,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115527.1,env,6114,8655,6224,8795,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115527.1,vpu,6152,6197,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115527.1,tat_exon2,8236,8332,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115527.1,rev_exon2,8237,8513,8377,8653,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115527.1,nef,8656,9271,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK114997.1,gag,210,1719,789,2292,forward,0.27936962750716343,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPSLQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDTIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPSTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASIMAQGGNFRNQKRNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDKELYPLASLRSLFGNDP,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPSLQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDTIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPSTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASIMAQGGNFRNQKRNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDKELYPLASLRSLFGNDP*YQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCAAAGGATAAATGTAAAAGACACCAAGGAAGCTCTAGACACAATAGAGGAAGAACAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCATATCACCTAGCACTTTAAATGCATGGGTAAAAGTGATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATACTCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGATCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCCTCCATAATGGCGCAAGGAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA +MK114997.1,pol,1511,4523,2084,5096,forward,0.26443159013103534,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGQGTVSFSFPQITLWQRPIISIRIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEDKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDQDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGQGTVSFSFPQITLWQRPIISIRIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEDKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDQDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAGAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGACAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAGGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACCAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAGATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTCCATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTAACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAATGGAATCAGAAAAATACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCCGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK114997.1,vif,4467,5046,5040,5619,forward,0.3903081914030819,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPSLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPSLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTAGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCCTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG +MK114997.1,env,4985,8207,6224,8795,forward,0.6663239775063792,MHSFNCGGEFFYCNTTQLFNSTWNGTDNWNGTESNNTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAVGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWRQWEKEIDNYTDTIYNLIELSQNQQEQNEQDLLALDKWASLWSWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*LRALETSRKSTYDCL*QLLL*TVLLSLPSLLHHKRLRHLLWQEEAETATKSFSQQSESSSGSTRAVSITCNACLKNSSNSRISSSNNNSNSCVVYSINRI*ENFKTKENRQVN**NKRKSRRQWQ*K*RRPGGIISTCGDGASCSLGW**YVVPTTCGSQSIMGYLYGKKQLPLYFVHQMLKHMRQRSIMFGQPMPVYPQTPAHRK*H*KM*QKHLTCGKMTW*SRCMRI*SVYGIKA*SHV*N*PHSVLL*IALIV*VIILIII*RKKEK*KTALSMSPQE*EIG*QKNMHFSIDLM*YQ*MKIVEILRANIG**IVTPQSLHKPVQRYPLSQFPYIFVPRLVLRF*SVEIRNSMEQENVEMSAQYNVHMELGQ*YQLNCC*TAV*QKKR**LDLPISRTMLKP**YS*INL*KLIVQDPTIIQEEVYT*DQGEHFMEQT**GI*DKRIVTLVGKIGMTL*KKIVIKLKEKFENKTIVFNQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNGTDNWNGTESNNTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAVGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWRQWEKEIDNYTDTIYNLIELSQNQQEQNEQDLLALDKWASLWSWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL*,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAGTATTACATGTAATGCATGCCTTAAAAATAGCAGCAATAGTAGGATTAGTAGTAGCAACAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGAAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAGTTTAAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAGACTTGATGTAGTATCAATAGATGAAGATAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGATATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAACCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGAGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAAAAAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAACACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCTCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACATTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAGTGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAGGCAGTGGGAAAAGGAAATTGACAATTACACAGACACAATATATAACTTAATTGAACTATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAGTTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTACTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA +MK114997.1,vpr,4985,5276,5558,5843,forward,0.5793112277557293,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK114997.1,tat_exon1,5256,5475,5830,6046,forward,0.4691531785127845,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*,ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA +MK114997.1,rev_exon1,5395,5476,5969,6047,forward,0.5290287574606619,MAGRSGDSDEELLTAVRIIKRLYQSSK,MAGRSGDSDEELLTAVRIIKRLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG +MK114997.1,vpu,5694,5733,6259,6310,forward,0.6901936289818864,MGHLVPWDGDDM,MGHLVPWDGDDM*,ATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG +MK114997.1,tat_exon2,7788,7881,8376,8469,forward,0.4918032786885247,RPSSQPRGDPTGPKEQKKEVERETEAHPRD,RPSSQPRGDPTGPKEQKKEVERETEAHPRD*,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG +MK114997.1,rev_exon2,7789,8065,8377,8653,forward,0.40871934604904625,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG +MK114997.1,nef,8208,8850,8796,9417,forward,0.5478186258332784,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTFKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHMARELHPEYFKDC,MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTFKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHMARELHPEYFKDC*,ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTGCCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTTCAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTTCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGTCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAATATTTCAAGGACTGCTGA +MK115518.1,gag,739,2239,789,2292,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115518.1,pol,2031,5043,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115518.1,vif,4987,5566,5040,5619,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115518.1,vpr,5505,5796,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115518.1,tat_exon1,5776,5995,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115518.1,rev_exon1,5915,5996,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115518.1,env,6170,8711,6224,8795,forward,0.45675101255163,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115518.1,vpu,6208,6253,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115518.1,tat_exon2,8292,8388,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115518.1,rev_exon2,8293,8569,8377,8653,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115518.1,nef,8712,9327,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115065.1,gag,221,1730,789,2292,forward,0.2880084183556756,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHPRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINAEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRRNVKCFNCGKEGHTAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP,MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHPRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINAEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRRNVKCFNCGKEGHTAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP*YQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATGGGAAAAAATTTACCTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAGTCATCAGAAGGCTGCAGGCAAATTCTGGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGCGTACATCCAAGGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAGGGGCAAATGGTACATCAACCCTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGCTGAGGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAGGGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATCCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGGGATTATGTAGACCGGTTCTATAAAACTCTAAGGGCTGAGCAAGCGTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAATCAGCCTCCATAATGGTGCAGGGAGGCAATTTTAGGAACCAAAGAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA +MK115065.1,pol,1522,4534,2084,5096,forward,0.25117173416656646,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTINDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED,FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTINDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTGGGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAGGAGGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATTTGTGGACATAAAGCTATAGGTACAGTATTAATAGGACCTACACCAGTCAACATAATTGGAAGAAATCTGTTGACCCAGCTTGGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATGGAAAAGGAAGGGAAAATTACAAAAATTGGGCCTGAGAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCCGAGAACTTAATAAGAGAACACAAGACTTCTGGGAAGTTCAACTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAGGAGTTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGGAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGATTTAGAAATAGAGCAGCACAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGGGGTGGGGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTGGACTATCAATGACATACAGAAGTTAGTGGGAAAGTTGAATTGGGCAAGCCAAATCTATCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGGGGAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCTATACAAAAGGAAACATGGGAAATGTGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAGACTAAATTAGGAAAAGCAGGGTATGTTACGGACAGAGGAAGACAAAAGGTTGTCTCCCTAATGGACACAACAAATCAGAGGACTGAGTTACACGCTATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAGGGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAGGAAAAGATTTACCTGGCATGGGTCCCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTAGTGGAATCAGAAAAGTACTATTTCTGGATGGAATAGATAAGGCCCAAGAAGAACATGAAAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAGGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115065.1,vif,4478,5057,5040,5619,forward,0.3903081914030819,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGGGTTTACAGACATCACTATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAGGGGAAGCAAGATTGGTAATAAAAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGCCAGGGAGTATCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAG +MK115065.1,vpr,4996,5287,5558,5843,forward,0.5872377841979652,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*,ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115065.1,tat_exon1,5267,5486,5830,6046,forward,0.48954161103693805,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ,MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*,ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA +MK115065.1,rev_exon1,5406,5487,5969,6047,forward,0.5290287574606619,MAGRSGDSDEELLTAVRIIKRLYQSSK,MAGRSGDSDEELLTAVRIIKRLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG +MK115065.1,env,5661,8262,6224,8795,forward,0.5065440396179699,MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL,MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL*,ATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAGTACTTGGAATGGTACTGACAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATAGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACCTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA +MK115065.1,vpu,5705,5744,6259,6310,forward,0.6901936289818864,MGHLVPWDGDDM,MGHLVPWDGDDM*,ATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG +MK115065.1,tat_exon2,7843,7936,8376,8469,forward,0.4918032786885247,RPSSQPRGDPTGPKEQKKEVERETEAHPRD,RPSSQPRGDPTGPKEQKKEVERETEAHPRD*,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAG +MK115065.1,rev_exon2,7844,8120,8377,8653,forward,0.40871934604904625,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE,DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAG +MK115065.1,nef,8263,8905,8796,9417,forward,0.5386842636859471,MGNKLSRGLRAGWPTIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC,MGNKLSRGLRAGWPTIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC*,ATGGGTAACAAGTTGTCAAGAGGGCTCAGGGCTGGATGGCCTACCATAAGGGAGAGAATGAGACGAGCTAGGCCAGTAAGAGAGCCAGAGCCAGCAGCAGCTGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTGGAAGCACAACAGGAGGAGGAAGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAGGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAGGGCCAGGAGTCAGATTTCCACTGTGTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAGAACCTGCATGGAATGGAGGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA +MK115464.1,gag,527,2297,789,2292,forward,0.706855791962175,MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKERHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLRKIWPSSKGRPRNFLQSRPEPTAPPEESFRFREETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ,MLQAIPGLRVGPAGSHPFALTSLSASCCHQSQSPGCSGATARTGT*KRK*NQRSSLDAGLGLLKRARQEARGGDW*VRQF*LAEARRREISARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADKGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNA*VKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAE*DRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIA*MTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKN*MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKERHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLRKIWPSSKGRPRNFLQSRPEPTAPPEESFRFREETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ*,ATGCTTCAGGCCATCCCTGGTTTGAGGGTGGGTCCCGCCGGGTCCCACCCCTTTGCACTCACGAGCCTGTCTGCCTCCTGCTGCCATCAATCACAGAGCCCAGGCTGTTCGGGCGCCACTGCCCGAACAGGGACCTGAAAGCGAAAGTAGAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGAGGCGGCGACTGGTGAGTACGCCAATTTTGACTAGCAGAGGCTAGAAGGAGAGAGATAAGTGCGAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAAGGATGTAAACAAATACTGGAACAGCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAAGATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACAAAGGAAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCGCCTAGAACTTTAAATGCATAGGTGAAAGTAGTAGAAGAGAAGGCCTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATAAGATAGAGTGCATCCAGTGCATGCAGGGCCTGTTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATAGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGTCCTAAGAGCCGAGCAAGCATCACAGGATGTAAAAAATTAGATGACAGAAACCTTATTAGTCCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTAAGACCAGCAGCAACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAAGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MK115464.1,pol,2089,5101,2084,5096,forward,0.7518376924488996,AKIKQECGIPYNPQSQEVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIKDYGKQMAGDDCVASRQDED,FFKEDLAFLQRKAKELSSEQTRANSPTRRELQV*GGDSNSSSEAGAGGQRSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEISLPGR*KPKMIEGIGGFIKVRQYDQITIEICGHKAIGTVLVGPTPVNIIGRNLLTQISCTLNFPISPIETVPVQLKPGIDSPKVKQWPLTEEKIKALVEICTEMEKEKKISKIRPENPYNTPVFAIKKKDSTK*RKLVDFKELNKRTQDF*EVQLRIPHPARLKKKKSITVLDVGDAYFSIPLDKDFKKYTAFTIPSINNKTPEIRYQYNVLPQG*KRSPAIFQSSMIKILEPFRKQNPDIVIYQYIDDLYVRSDLEIRQHRTKIEELRQHLLK*RLTTPDKKHQKEPPFLWISYELHPDKWTVQPIVLPDKDSWTVNDIQKLVRKLN*ASQIYAEIKVRQLCKLLKGAKALTEVIQLTEEAELELAENKEILKEPVHEVYYDPSKDLIAELQKQRQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVI*GKTPRFKLPIQKET*DT*WTEYWQAT*IPE*EFVNTPPLVKL*YQLEKEPIVGAETFYVDRAANKETKLRKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALRIIQAQPDKSESEIVNQIIEQLIKKEKVYLA*VPAHKRIRRNEQVDKLVSARIRKVLFLDRIDKAQEEHKKYHNN*RAMASDFNLPPVVAKKIVASCDKCQLKKEATHRQVDCSPRIWQLDCTHLEGKVILVAVHVASRYIEAEVIPAETGQETAYFLLKLAGR*PVKAIHTDNGTNFTSATVKAAC**AKIKQECGIPYNPQSQEVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIKDYGKQMAGDDCVASRQDED*,TTTTTTAAGGAAGATCTGGCCTTCCTCCAAAGGAAGGCCAAGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTAGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAAGAAGTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAAATAAGTTTGCCAGGAAGATAGAAACCAAAAATGATAGAAGGAATTGGAGGCTTTATCAAAGTAAGACAGTATGATCAGATAACTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTAGTTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAACCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAAGAAAAAAAGATTTCAAAAATTAGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGAAGAAAATTAGTAGATTTCAAGGAACTTAATAAAAGAACTCAAGACTTCTAAGAAGTTCAATTAAGAATACCACACCCCGCAAGGTTAAAAAAGAAGAAATCAATAACAGTACTAGATGTAGGTGATGCATATTTTTCAATTCCCTTAGATAAAGACTTCAAGAAGTATACTGCATTTACCATACCTAGTATAAATAATAAGACACCAGAGATTAGATATCAGTACAATGTGCTTCCACAGGGATAGAAAAGATCACCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGACATAGTTATCTATCAATACATAGATGACTTGTATGTAAGATCTGACTTAGAAATAAGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTAGAGATTGACCACACCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATAAGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGACATACAGAAGCTAGTAAGAAAATTGAATTGAGCAAGTCAGATTTATGCAGAGATTAAAGTGAGACAATTATGTAAACTCCTTAAAGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAAGAAATTCTAAAAGAACCAGTACATGAAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGAGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAACGAGAGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTGATATGAGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAAACATAAGATACCTAGTGGACAGAATATTGGCAAGCCACCTAGATTCCCGAGTAAGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATAGTACCAATTAGAAAAAGAGCCTATTGTAGGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAAAGAGACTAAATTAAGAAAAGCAGGATATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCACTAAGAATAATTCAAGCACAACCAGATAAGAGTGAATCAGAGATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTACCTTGCATAGGTACCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTGCTAGAATCAGGAAAGTCCTATTTTTAGATAGAATAGATAAGGCCCAAGAAGAGCATAAGAAATATCACAATAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAAAAATAGTAGCCAGTTGTGATAAATGCCAGCTAAAAAAAGAAGCCACGCATAGACAAGTAGACTGTAGTCCAAGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTAGTAGCAGTTCATGTAGCCAGTAGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTAGCCAGTGAAAGCAATACATACAGACAATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTAGTAGGCAAAGATCAAGCAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTCCAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAAGGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAAGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115464.1,vif,5045,5624,5040,5619,forward,0.7067546928117459,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTY,MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTY*GLHTGERDWHLGQGVSIEWKERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRRSQTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAAACACCATATGTACATTTCAAAGAAAGCCCAAGGATGGTTTTATAGACATCACTATGAAAATCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAATAACAACATATTAGGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGTCAAGGAGTCTCCATAGAATGGAAGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGTGACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAG +MK115464.1,vpr,5563,5854,5558,5843,forward,0.5898566703417862,MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS,MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTAGGACAACATATCTATGAAACTTATGAGGATACTTGGACAGGAGTAGGAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTAGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAG +MK115464.1,tat_exon1,5563,6052,5830,6046,forward,0.7571801566579635,MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS,MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS*IRALEASRKSA*DSLYQMLL*KMLLSLPSLFHNKRLRHLLWQEEAETATKTSSRQSDSSSTSTKAV,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTAGGACAACATATCTATGAAACTTATGAGGATACTTGGACAGGAGTAGGAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTAGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTG +MK115464.1,rev_exon1,5973,6054,5969,6047,forward,0.4807692307692307,MAGRSGDSDEDLLKTVRLIKYLYQSSE,MAGRSGDSDEDLLKTVRLIKYLYQSSE,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTGAG +MK115464.1,env,6228,8799,6224,8795,forward,0.7471048806788873,MTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST,MRVKEIKRSYQHL*R*GIMLLRMLMIYSTADQWWVTVYYKVPVWREANTTLFCASDAKAYSTEAHNV*ATHACVPTDPNPQEIVIENVTEDFNMWKNNMVDQMHEDIISL*DQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTSNTT*GEMTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST*TKNGTDSWQSNDTQNSNITLQCRIKQIINLWQEVRKAMYAPPISRQINCTSNITGLVLTRDRRNETKTFRPGRENMKDNWRSKLYKYKVVRIEPLRIAPTKAKRRVVQREKRAVRLGAMFLKFLGAARSTIGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTV*GIKQLQARVLAVERYLQDQQLLGL*GCSRKLICTTTVP*NRS*GRHNKNYKSLDDI*DNMT*IE*EKEIDNYTSLIYTLITESHSQQKKNEQELLALDK*ASL*N*FDISQWLWYIKIFIMIVGGLVSLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPERIEERGRERDKGRSGRLVNGFLALI*DDLRSLCLFSYHRLSDLLLIVIKIVELLRRKR*EALKY**NLLQY*SQELKNSAVSLLNTTAIVVAERTDKIIEILQRISRAFLHIPRRIRQGLEKALL*,ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATAAGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGAAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTTCTAATACTACTTAGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATAAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAAAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAAGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATAGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAAGAAGCCATACGAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAAGACCAGGAAGAGCATTTTATACAACAGGAGATATAATAAGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAGTAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAAAGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAAAGAAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTAGACTAAAAATGGTACTGATAGTTGGCAGTCTAATGATACTCAGAATAGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCCATCAGTAGACAAATTAACTGTACATCAAATATTACAGGGCTAGTTTTAACAAGAGATAGGAGGAATGAAACTAAGACCTTTAGACCTGGAAGAGAAAATATGAAGGATAATTGGAGAAGTAAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAAGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAGAAAAGAGCAGTAAGACTAGGAGCTATGTTCCTTAAGTTCTTAGGAGCAGCCAGAAGCACTATAGGCGCAGCGTCGATAGCGCTGACGGAACAGGCCAGACGAGTCTTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTAAGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATCGTAGTTGAGGTAGGCATAACAAAAATTACAAAAGTCTAGATGACATTTAGGATAACATGACCTAGATAGAGTAGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAAAAAAGAATGAACAAGAATTATTGGCATTAGATAAATAGGCAAGTTTGTAGAATTAGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAAGGACAGATAAGATAATAGAAATATTACAAAGAATTAGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA +MK115464.1,vpu,6228,6276,6259,6310,forward,0.7611885546588408,MRVKEIKRSYQHL,MRVKEIKRSYQHL*R*,ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAG +MK115464.1,nef,8194,9451,8796,9417,forward,0.6542937183493158,MTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPRIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHRMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC,MNKNYWH*INRQVCRISLTYHNGCGI*KYS****EA**V*E*FLLYFL**IKLGKDTHHYHFRPSSQPREEPTGPKE*KKEVERETKADPVD**TDS*HLSRTIYGAYASSATTA*ATYS*L*SRLWNF*DARGKKPSNISRISCSTRVRN*RIVLSACSTPQL***LKGQIR**KYYKELVELFSTYLGE*DRA*KRLCYKIGGKWSKSSKVK*NAVKERIRRAQPTADKERAEPAADKVRAASRDLEKYGALTSKNTAATNADCAWLEAQEEEDEVGFPVRPQLPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPRIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHRMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC*,ATGAACAAGAATTATTGGCATTAGATAAATAGGCAAGTTTGTAGAATTAGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAAGGACAGATAAGATAATAGAAATATTACAAAGAATTAGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAAGATAGGTGGCAAGTGGTCAAAAAGTAGTAAGGTTAAATAGAATGCAGTGAAAGAAAGAATAAGACGAGCTCAGCCAACAGCAGATAAAGAACGAGCTGAGCCAGCAGCAGATAAGGTAAGAGCAGCATCTAGAGACCTAGAAAAATATGGAGCACTTACAAGTAAGAATACAGCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGATGAGGTAGGTTTTCCAGTCAGACCTCAGTTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTGAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTACACACCAGGGCCAAGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCACTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCTCTGAGCCAGCATAGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGATTTGACAGCCGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA +MK115464.1,tat_exon2,8380,8476,8376,8469,forward,0.6995153473344102,RPSSQPREEPTGPKE,RPSSQPREEPTGPKE*KKEVERETKADPVD**,AGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGA +MK115464.1,rev_exon2,8381,8657,8377,8653,forward,0.5716671727907683,RERQRQIRSISERILSTYLGRSTEPMPLQLPPLERLTLDCDQDCGTSKTQEVRSPQILVESPAVLESGTKE,DPPPSPERSRQARKNRRKR*RERQRQIRSISERILSTYLGRSTEPMPLQLPPLERLTLDCDQDCGTSKTQEVRSPQILVESPAVLESGTKE*,GACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAG +MK115530.1,gag,746,2246,789,2292,forward,0.3014827756125966,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115530.1,pol,2038,5050,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115530.1,vif,4994,5573,5040,5619,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115530.1,vpr,5512,5803,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115530.1,tat_exon1,5783,6002,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115530.1,rev_exon1,5922,6003,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115530.1,env,6177,8718,6224,8795,forward,0.4585964351370794,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115530.1,vpu,6215,6260,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115530.1,tat_exon2,8299,8395,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115530.1,rev_exon2,8300,8576,8377,8653,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115530.1,nef,8719,9334,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115520.1,gag,695,2195,789,2292,forward,0.3014827756125966,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115520.1,pol,1986,5004,2084,5096,forward,0.6033592883813991,YGKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,IF*GKSGLPTKEGQGTSFRADQSQQPHQKRASGLGKRQ*LPLRSRSQETRNCIPCLPSNHSLAATPRQNKNRGAI*RSFLRYRSR*YSIRRNEFARKMETKNDRGNWRFYQSKTV*SDTHRNLWTQSYRYSINRTYTCQHNWKKSVDSAWLYLKFSH*SY*NCTSKIKARNGWPKS*TMAIDRRKNKSISRNLYRNGKGRKNFKNWA*KSIQYSSICYKEKRQY*MEKISRFQRTQ*ENSRLLGSSIRNTTSCRVKKEKISNSTGCG*CIFFNSLR*RIQEVYCIYHT*YK**DTRD*V*VQCAATGMERITSNIPK*HDKNLRAF*KAKSRYSYLSIHG*FVCRI*LRNRAA*NKNRGTKTTSVKVGTYHTRQKTSERTSIPLDGL*TPS*YGKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,ATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAAATAGGGGGGCAATTTAAAGAAGCTTTCTTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATATGGGAAATGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115520.1,vif,4948,5527,5040,5619,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115520.1,vpr,5466,5757,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115520.1,tat_exon1,5737,5956,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115520.1,rev_exon1,5876,5957,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115520.1,env,6131,8672,6224,8795,forward,0.4569687738004571,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115520.1,vpu,6169,6214,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115520.1,tat_exon2,8253,8349,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115520.1,rev_exon2,8254,8530,8377,8653,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115520.1,nef,8673,9288,8796,9417,forward,0.4756067663643049,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAATGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGATACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTGCACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115503.1,gag,817,2317,789,2292,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115503.1,pol,2109,5121,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115503.1,vif,5065,5644,5040,5619,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115503.1,vpr,5583,5874,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115503.1,tat_exon1,5854,6073,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115503.1,rev_exon1,5993,6074,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115503.1,env,6248,8789,6224,8795,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115503.1,vpu,6286,6331,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115503.1,tat_exon2,8370,8466,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115503.1,rev_exon2,8371,8647,8377,8653,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115503.1,nef,8790,9405,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115570.1,gag,687,2187,789,2292,forward,0.2967573174581697,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCGACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115570.1,pol,1979,4991,2084,5096,forward,0.19298018391400085,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115570.1,vif,4935,5514,5040,5619,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115570.1,vpr,5453,5744,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115570.1,tat_exon1,5724,5943,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115570.1,rev_exon1,5863,5944,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115570.1,env,6118,8659,6224,8795,forward,0.4604674291397314,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCTCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAAGGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115570.1,vpu,6156,6201,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115570.1,tat_exon2,8240,8336,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115570.1,rev_exon2,8241,8517,8377,8653,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115570.1,nef,8660,9275,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115509.1,gag,555,2055,789,2292,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115509.1,pol,1847,4859,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115509.1,vif,4803,5382,5040,5619,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115509.1,vpr,5321,5612,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115509.1,tat_exon1,5592,5811,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115509.1,rev_exon1,5731,5812,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115509.1,env,5986,8527,6224,8795,forward,0.4564898680537425,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115509.1,vpu,6024,6069,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115509.1,tat_exon2,8108,8204,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115509.1,rev_exon2,8109,8385,8377,8653,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115509.1,nef,8528,9143,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115702.1,gag,246,1782,789,2292,forward,0.35613851839948674,MGARASVLSGGELDKWEKIRLRPGGRKRYKLKHIVWASRELERFAVNPGLLETSEGCKQIMGQLQPALQTGSEELRSLYNTVAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKTQQAAAADTGNNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVQAGPVAPGQIREPRGSDIAGTTSTLQEQIAWMTHNPPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMSQATGAHAIMMQRGNFKNQRKTVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESLRPTAPPVESFRFGEETAAPFQKQEPRDKEMSPLASLKSLFGNDQ,MGARASVLSGGELDKWEKIRLRPGGRKRYKLKHIVWASRELERFAVNPGLLETSEGCKQIMGQLQPALQTGSEELRSLYNTVAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKTQQAAAADTGNNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVQAGPVAPGQIREPRGSDIAGTTSTLQEQIAWMTHNPPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMSQATGAHAIMMQRGNFKNQRKTVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESLRPTAPPVESFRFGEETAAPFQKQEPRDKEMSPLASLKSLFGNDQ*SQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGCGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAGGAAACGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGCTTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAAACAAATAATGGGACAACTCCAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAGTCCTCTATTGTGTACATCAGAGGATAGATGTAAAGGATACCAAAGAAGCTTTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGACACAGCAAGCAGCAGCCGCTGACACAGGAAACAACAGCCAAGTCAGCCAAAATTACCCCATAGTGCAGAACATGCAGGGACAAATGGTACATCAGGCCATATCACCCAGAACCCTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCATTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAGGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCAGGCAGGACCTGTTGCACCAGGCCAGATAAGGGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACACATAATCCACCCGTCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGACTCTAAGAGCTGAGCAAGCTTCACAGGAAGTAAAAGGTTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACCATTTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCAGCCATAAGGCAAGGGTTTTGGCAGAAGCAATGAGCCAAGCAACAGGTGCACATGCCATAATGATGCAGAGAGGCAATTTTAAGAACCAAAGAAAGACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGACTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAA +MK115702.1,pol,1544,4586,2084,5096,forward,0.24526399193257942,FFRENLAFPQGKAGEFPSEQTRANSPTRGEPQANSPTSRELQVWGRDSSPLSEAGTERQGDVSLSFPQITLWQRPVVTIKIGGQIKEALLDTGADDTVLEEMALPGRWKPKMIGGIGGFIKVRQYDQIAIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSVPLDEEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRGHLLKWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIKLPEKEIWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPTKELIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKISTESIVIWGKTPKFKLPIQKETWEIWWTDYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYITDRGRQKVVTLNDTTNQKTELQAILLALQDSGLEANIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLTWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDRAQEEHERYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQIDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQISKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGKAGEFPSEQTRANSPTRGEPQANSPTSRELQVWGRDSSPLSEAGTERQGDVSLSFPQITLWQRPVVTIKIGGQIKEALLDTGADDTVLEEMALPGRWKPKMIGGIGGFIKVRQYDQIAIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSVPLDEEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRGHLLKWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIKLPEKEIWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPTKELIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKISTESIVIWGKTPKFKLPIQKETWEIWWTDYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYITDRGRQKVVTLNDTTNQKTELQAILLALQDSGLEANIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLTWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDRAQEEHERYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQIDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQISKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCCTCAGGCCAACAGCCCCACCAGTAGAGAGCTTCAGGTTTGGGGAAGAGACAGCAGCCCCCTTTCAGAAGCAGGAACCGAGAGACAAGGAGATGTCTCCCTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCAGTAGTCACAATAAAGATAGGGGGGCAAATAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGAAATGGCGTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATAGCCATAGAAATTTGTGGACATAAAGCAATTGGTACAGTATTAGTAGGACCTACACCTGTCAATATAATTGGAAGAAATCTATTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAGTTAAAGCCAGGAATGGATGGCCCAAAAATTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATAGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGACAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGATTAAAAAAGAAAAAATCAATAACAGTACTGGATGTGGGTGATGCCTATTTTTCAGTTCCCTTAGATGAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATATTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGACGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTAAGAGGACATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAGGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAGATGGACAGTACAGCCTATAAAGCTGCCAGAGAAAGAAATCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATTAAAGTAAAACAATTATGTAAACTCCTTAGGGGAACCAAAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGAATTAATAGCAGAAATACAGAAGCAAGGGCAAGGCCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCGAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAGAAAATATCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAATATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAGTACTCCTCCCCTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCATCTAATAGGGAGACTAAATTAGGAAAAGCAGGATATATTACTGACAGAGGAAGACAAAAGGTTGTCACCCTAAATGACACAACCAATCAAAAGACAGAGTTACAAGCAATTCTTCTAGCATTGCAGGATTCAGGATTAGAAGCAAACATAGTGACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGATCTACCTGACATGGGTACCAGCACATAAAGGAATTGGAGGAAATGAGCAAGTAGATAAATTAGTCAGTACTGGGATTAGGAAAGTATTATTTTTAGATGGAATAGATAGGGCCCAAGAAGAGCATGAGAGATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTCAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGACAAATAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACACTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTACCACAGTTAAGGCCGCCTGTTGGTGGGCGGGGGTCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTGGTAGAATCTATGAATAAAGAATTAAAGAAAATAATAGGACAGGTCAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTTCAAAAATTCAAAACTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCGGTAGTAATACAGGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115702.1,vif,4530,5109,5040,5619,forward,0.3566796368352788,MANRWQVMIVWQVDRMRIRTWNSLVKHHMYVSKKTKGWFYRHHYESTHPKISSEVHIPLGDAELVVTTYWGLQPGERDWHLGQGVSIEWRKGRYRTHVDPNLADQLIHLHYFDCFSESAIRHAILGHRVSPRCEYQAGHNKVGSLQYLALAALRAPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMYVSKKTKGWFYRHHYESTHPKISSEVHIPLGDAELVVTTYWGLQPGERDWHLGQGVSIEWRKGRYRTHVDPNLADQLIHLHYFDCFSESAIRHAILGHRVSPRCEYQAGHNKVGSLQYLALAALRAPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAAGAAAACTAAGGGATGGTTTTATAGACATCACTATGAGAGCACTCATCCAAAAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTGAGTTGGTAGTAACAACATATTGGGGTTTGCAGCCAGGGGAAAGGGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAGGGAGATATAGAACACACGTGGACCCTAACCTAGCAGACCAACTAATTCATCTGCATTACTTTGATTGTTTTTCAGAATCTGCTATAAGACATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAAGAGCACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACTAAACTAACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG +MK115702.1,vpr,5048,5339,5558,5843,forward,0.5731147540983608,MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS,MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAACTTAAGCAGGAAGCTGTTAGGCATTTTCCTAGGCCATGGCTTCATAGCTTAGGGCAATATATCTATGAAACTTATGGGGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATGCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATCCCACAGAGGAGAGCAAGAAATGGATCCAGTAGATCCTAA +MK115702.1,tat_exon1,5319,5538,5830,6046,forward,0.5237430167597767,MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ,MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ*,ATGGATCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTAACAATTGCTATTGTAAAAAGTGTTGCCTTCATTGCCAAGTTTGTTTCACACGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAGTAA +MK115702.1,rev_exon1,5458,5539,5969,6047,forward,0.5622384937238494,MAGRSGDGDEDLLKAVRLIKTLYQSSK,MAGRSGDGDEDLLKAVRLIKTLYQSSK,ATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAGTAAG +MK115702.1,env,5716,8257,6224,8795,forward,0.5208458282639616,MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE,MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE*DRA*,ATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAATGCTACAGAACAATTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACAACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTATTAATAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAAATGCAAGAGGACATAGTCAGCTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACCTTAAATTGCACTAATTTGACCATTGAGCCAAACAATGCTACTAAAGCCAATATTAGTGGGAGGTTAGAGGGGAAAGGAGAAATGACAAACTGCTCTTTCAATGTCACCACAAGCCTAAGAGATAAGAGGAAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTAGCAACAGGTGAAAATAATAACAGCTTTAGGTTGATAAGTTGTAATACCTCAGAGATTACACAGGCCTGTCCAAAGGTATCATTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAAAAGTTCAATGGAACAGGAAAATGTAACAATGTCAGCATAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAGTTAGATCTGCCAATTTCTCAGACAATACTAAGACCATAATAGTACAGCTGAACAAAACTGTAGTAATTAATTGTACAAGACCCAACAACAATACAAGGAGAAGTATACATATAGCACCAGGGAGAGCATTTTATGCAACAGGAGATATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAAAGAAGATTGGAATACCACTTTAAACCAGGTGGCTAAAAAATTACAAGAACAATTTGAGAATGCAACAATAGACTTTAAACCATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACGGAACTATTTTCTTGGAATGCTACAACAAAACTGTTTACTTGGAATGCTACAAATAGCAATAATGGAACCATCATACTCCCATGTAGAATAAAACAAATTATAAACATGTGGCAAGAGGTAGGAAAAGCAATGTATGCCCCTCCCATTCGTGGACAAATTAGATGTTCGTCAAATATTACAGGACTGCTATTAACAAGAGATGGTGGGACTAACGGGACAGGGAACAGGAATGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAGAAATTAAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGACCATAGGAGCTATGTTCCTTGGGTTCCTGGGGGCAGCAGGAAGCACTATGGGCGCAGCATCACTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCGATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATAAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGGTACCTAAGAGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATATTAGTTGGAGTAATAGAACTCTGAATAACATTTGGGACAATTTGACTTGGATGCAGTGGGATACAGAAATTAACAATTACACAAACAAAATATACCAATTACTTGAGGAAGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAATTTGTGGAATTGGTTTGACATATCAAACTGGCTGTGGTACATAAAAATATTCATATTAATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTAAATGCCACAGCCATAGTAGTAGCTGAGGGGACAGATAGGATTATAGAATTAGCACAAAGAATTTGTAGAGCAGAATAAGACAGGGCTTGA +MK115702.1,vpu,5760,5808,6259,6310,forward,0.70010183299389,MGHNAHHAPWDIND,MGHNAHHAPWDIND**,ATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAA +MK115702.1,tat_exon2,7874,7970,8376,8469,forward,0.3921568627450981,RPASQSRGDPTGPKEPKKKVERETETDPTD,RPASQSRGDPTGPKEPKKKVERETETDPTD**,AGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGA +MK115702.1,rev_exon2,7875,8151,8377,8653,forward,0.3471418653089562,DPLPSPEGTRQARRNRRRRWRERQRQIRQISERILDTYLGRSEEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSTQILVECPAILESGTKE,DPLPSPEGTRQARRNRRRRWRERQRQIRQISERILDTYLGRSEEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSTQILVECPAILESGTKE*,GACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAG +MK115702.1,nef,8275,8896,8796,9417,forward,0.47444962236863253,MGGKWSKHSKSEWADVRERMAQTEAAADGVGAVSRDLERHGAITSSNTATNNAACAWLEAQEEEEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPDKVEEANKGENNSLLHPMSLHGMEDTEREVLMWKFDSRLAFHHVAREKHPEYFKDC,MGGKWSKHSKSEWADVRERMAQTEAAADGVGAVSRDLERHGAITSSNTATNNAACAWLEAQEEEEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPDKVEEANKGENNSLLHPMSLHGMEDTEREVLMWKFDSRLAFHHVAREKHPEYFKDC*,ATGGGTGGCAAGTGGTCAAAACATAGTAAGAGTGAATGGGCTGATGTAAGGGAAAGAATGGCACAAACTGAGGCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGATCTGGAAAGACATGGAGCAATCACAAGTAGCAATACAGCAACTAACAATGCTGCTTGTGCTTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAAACCTCAGGTGCCTTTGAGACCAATGACCTACAAGGGAGCTTTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTATTCCCAAAAAAGACAAGACATCCTTGATCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGAACCAGATTCCCACTGTGCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTGGAAGAAGCCAATAAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACACCGAGAGAGAGGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACGTAGCCAGAGAGAAACATCCGGAGTACTTCAAGGACTGCTGA +MK115095.1,gag,2,1697,789,2292,forward,0.7478034493979825,MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNLKNQRKNVKCFNCGKEGHTAKNCRAPKKKGC,SGFSFTFKSLFGRHLGDLKAKEKPEELSRRRTRLAERARQEARRGD**VRRNF*LAEARRREIGARASVLSGGELDR*EKIYLRPGRKKKYRLKHIVWASRELERFAVNPGLLKSSEGCRQILGQLQPALQTGSEELRSLYNTIAVLYCVHQKINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQIVHQPISPRTLNA*VKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVRRHQAAMQMLKETINDEAAE*DRLHPVHAGPIAPGQMKEPRRSDIAGTTSTLQEQIR*MTNNPPIPVGEIYKR*IILRLNKIVKMYSPVSILDIRQRPKEPFKDYVDRFYKTLKAEQASQDVKN*MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNLKNQRKNVKCFNCGKEGHTAKNCRAPKKKGC*KCGKKGHQMKDCTKRQANFLRKIWPSHKGRPKNFLQSRPEPTAPPAESFRFKEEATAPPQKQETKDQELYPLASLRSLFGNDP*YQ*,TCTGGTTTCTCTTTCACTTTCAAGTCCCTGTTCGGGCGCCATCTAGGGGACCTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCACGGCAAGAGGCGAGGCGCGGCGACTAGTGAGTACGCCGAAATTTTTGACTAGCAGAGGCTAGAAGGAGAGAGATAGGTGCGAGAGCGTCAGTATTAAGCGGCGGAGAATTAGATAGATAGGAAAAAATTTACCTAAGGCCAGGGAGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAAAGTCATCAGAAGGCTGCAGGCAAATTCTAGGACAGCTACAACCAGCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAGATAAATGTAAAAGACACCAAAGAAGCTCTAGACAAAATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAACAAGCAGCAGCTGACACAGGAAACAGCAGCAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTACAAGGGCAAATAGTACATCAACCCATATCACCTAGAACTTTAAATGCATAGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCACTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGAGAAGACATCAAGCAGCCATGCAGATGTTAAAAGAAACCATCAATGATGAAGCTGCAGAATAAGATAGATTGCATCCAGTGCATGCAGGCCCTATTGCACCAGGCCAGATGAAGGAACCAAGAAGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAAGATAGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATAGATAATCCTGAGGTTAAATAAAATAGTAAAAATGTATAGCCCTGTCAGCATTTTGGACATAAGACAAAGACCAAAGGAACCCTTTAAGGATTATGTAGACCGGTTCTATAAAACTCTAAAGGCTGAACAAGCGTCACAGGATGTAAAAAATTAGATGACAGAAACCTTGTTAGTCCAAAATGCGAATCCAGATTGTAAGACCATTTTAAAAGCATTAAGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCACAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAAGTCAGCCTCCATAATGGTGCAAGGAGGCAATTTAAAGAACCAAAGAAAGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGACACACAGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAGAAATGTGGAAAGAAAGGACACCAAATGAAAGATTGTACTAAGAGACAGGCTAATTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAA +MK115095.1,pol,1489,4501,2084,5096,forward,0.7624366800883231,KPKIIEGIRGFIKVRQYDQVPIKICRHKAISTVLIRPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPGIDSPKVKQWPLTEEKIKALIEICAEIEKERKITKIRPKNPYNTPVFAIKKKDSTK,FFKEDLAFPQGKAKEFSPEQTRANSPASRELQV*GRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIRRQLKEALLDTGADDTVLKEINLPGK*KPKIIEGIRGFIKVRQYDQVPIKICRHKAISTVLIRPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPGIDSPKVKQWPLTEEKIKALIEICAEIEKERKITKIRPKNPYNTPVFAIKKKDSTK*RKLVDFRELNKRTQDF*KVQLRIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPRVRYQYNVLPQR*KGSPAIFQSSMTKILEPFRKENPDIVIYQYIDDLYVRSDLEIEQHRTKIEELRQHLLK*RLTTPDKKHQKEPPFL*ISYELHPNKWTVQPIQLPDKDS*TVNDIQKLVRKLN*ASQIYPEIKVKQLCKLLRRTKALTEVVPLTEEAELELAENKEILKEPVHRAYYDPSKDLIAEVQKQGGDQWTYQIYQKPFKNLKTKKYARTRGAHTNDVKQLTEAVQKIALEAIVI*RKTPKFKLPIQKET*EM**TEYWQAT*IPE*EFVNTPPLVKL*YQLEKEPIVRAETFYVDRAANRETKLRKARYVTDRRRQKVVSLIDTTNQRTKLHAIHLALQDSGSTVNIVTDSQYALKIIQAQPDKSESELVSQIIEQLIKKEKIYLA*VPAHKRIRRNEQVDKLVSSRIRKVLFLDRIDKAQEEHEKYHSN*RAMASDFNLPPVVAKEIVASCDKCQLKREPMHGQVDCSPGIWQLDCTHLERKIILVAVHVASRYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAAC**ARIKQKFSIPYNPQSQGVVESMNNELKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKRGIGGYSAEERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLL*KGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED*,TTTTTTAAGGAAGATTTGGCCTTCCCACAAGGGAAGGCCAAAGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCGCCAGCAGAGAGCTTCAGGTTTAAGGAAGAGGCAACAGCTCCCCCTCAGAAGCAGGAGACGAAGGACCAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCATAATATCAATAAAAATAAGAAGGCAACTAAAAGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAAAAGAAATAAATTTGCCAGGAAAATAGAAACCAAAAATAATAGAAGGAATTAGAGGTTTTATCAAAGTAAGACAGTATGATCAGGTACCCATAAAAATTTGTAGACATAAAGCTATAAGTACAGTATTAATAAGACCTACACCAGTCAACATAATTAGAAGAAATCTGTTGACCCAGCTTAGTTGCACTTTAAATTTTCCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATAGATAGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAGGCATTAATAGAAATTTGTGCAGAAATAGAAAAAGAAAGGAAAATTACAAAAATTAGGCCTAAGAATCCATATAATACTCCAGTGTTTGCCATAAAGAAAAAAGACAGTACTAAATAGAGAAAATTAGTAGATTTCCGAGAACTTAATAAAAGAACACAAGACTTTTAAAAAGTTCAACTAAGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTAGGTGATGCATATTTTTCAGTTCCTTTAGACAAAGACTTCAGAAAGTATACTGCATTTACCATACCCAGTATAAACAATGAGGCACCAAGAGTTAGATATCAGTACAATGTGCTTCCACAAAGATAGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAAGAAAATCCAGACATAGTTATCTATCAATACATAGATGATTTGTATGTAAGATCTGACTTAGAAATAGAGCAGCACAGAACAAAAATAGAAGAACTGAGACAACATCTGTTAAAGTAAAGACTCACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTTCTTTAGATAAGTTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATACAGCTGCCAGACAAAGACAGCTAGACTGTCAATGACATACAGAAGTTAGTAAGAAAGTTAAATTAGGCAAGCCAGATCTATCCAGAGATTAAAGTAAAGCAATTATGTAAACTCCTTAGAAGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAAGGAGATTCTAAAAGAACCAGTACATAGAGCATATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGAGGAGACCAATGGACATATCAAATTTATCAGAAGCCATTTAAAAATCTGAAAACAAAGAAATATGCAAGAACGAGAGGTGCCCACACTAATGATGTAAAACAGTTAACAGAGGCAGTGCAAAAAATAGCCCTAGAAGCCATAGTAATATAGAGAAAGACTCCTAAATTTAAACTACCTATACAAAAAGAAACATAAGAAATGTAGTAGACAGAGTATTGGCAAGCCACCTAGATTCCTGAGTAGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATAGTACCAGTTAGAGAAAGAACCCATAGTAAGAGCAGAAACTTTCTATGTAGATAGGGCAGCTAATAGAGAGACTAAATTAAGAAAAGCAAGGTATGTTACGGACAGAAGAAGACAAAAAGTTGTCTCCCTAATAGACACAACAAATCAGAGGACTAAGTTACACGCAATTCATCTAGCTTTGCAGGATTCAGGATCAACAGTAAACATAGTGACAGACTCACAATATGCCTTAAAGATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGCTAATAAAAAAAGAAAAAATTTACCTGGCATAAGTCCCAGCACACAAAAGAATTAGAAGAAATGAACAAGTAGATAAATTAGTCAGTAGTAGAATCAGAAAAGTACTATTTCTAGATAGAATAGATAAAGCCCAAGAAGAACATGAAAAATATCACAGTAATTAAAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGTTAAAAAGAGAACCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAAGAAAAATTATCCTAGTAGCAGTTCATGTAGCCAGTAGATACATAGAAGCAGAAGTTATTGCAGCAGAAACAGGGCAGGAAACAGCATACTTTATCTTAAAATTGGCAGGAAGATGGCCAGTAAAAATAATACATACAGACAATGGCAGCAATTTCACCAGCACTACAGTTAAGGCCGCCTGTTAGTAGGCAAGGATCAAGCAGAAATTTAGTATTCCCTACAATCCTCAAAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAAAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAAGGGGGATTGGGGGGTACAGTGCAGAGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAACTACAAAAACAAATTTCAAAAATTCAAAATTTTCAGGTTTATTACAGGGACAGCAGAGAACCACTTTGGAAAGGACCAGCAAAGCTCCTCTAGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCCTTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAG +MK115095.1,vif,4445,5024,5040,5619,forward,0.7494633160752622,MENRWQVMIVWQVDKMRIRTWNSLVKHHMHISKKAQR,MENRWQVMIVWQVDKMRIRTWNSLVKHHMHISKKAQR*VYRHHHESHNPKTSSEVHIPLREARLVIKTY*GLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVRPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKRSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAAGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATATTTCAAAGAAAGCTCAGAGATGAGTTTACAGACATCACCATGAAAGCCATAATCCAAAAACAAGTTCAGAAGTACACATCCCATTAAGGGAAGCAAGATTAGTAATAAAAACATATTAAGGTCTGCATACAGGAGAAAGAGACTGGCATTTAGGCCAGGGAGTATCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGGCCTGGCAGACCAACTAATTCACCTGTATTATTTTGATTGTTTTTCAGAATCTGCTATAAGGAATGCCATATTAGGACATAGAGTTAGGCCTAGGTGTGAATATCAGGCAGGACATAACAAGGTAGGATCCCTACAATACTTGGCATTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCCAGTGTTACAAAATTAACAGAGGACAGATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAG +MK115095.1,vpr,4963,5254,5558,5843,forward,0.6112852664576804,MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS,MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS*,ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAA +MK115095.1,tat_exon1,5111,5453,5830,6046,forward,0.6409453748630458,MTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEQ,MEILGQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPNLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEQ*,ATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAACTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA +MK115095.1,rev_exon1,5373,5454,5969,6047,forward,0.5290287574606619,MAGRSGDSDEELLTAVRIIKRLYQSSK,MAGRSGDSDEELLTAVRIIKRLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG +MK115095.1,vpu,5465,5711,6259,6310,forward,0.7683007254341614,MHALEIAAIVRLVVAAIIAIVV,MHALEIAAIVRLVVAAIIAIVV*SIVLIEYKKILRQKKIDRLIDRIRERAEDSGNESDEDQEELSAIVEIGHLVP*DSDDM*,ATGCATGCCTTAGAAATAGCAGCAATAGTAAGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAAGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAG +MK115095.1,env,5628,8229,6224,8795,forward,0.7513561129398668,MTEEGEIKNCSFNVTTGIRDKVTKEHALFYKLDVVPIDESSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPRRAFYRTDIIGDIRQAHCNISRKD,MKVTRTRKNYQQL*R*GILFLRIVMICSANNL*VTVYYEVPVWKEATTTLFCASDAKAYKTEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISL*DQSLKPCVKLTPLCVTLNCTDELNLNCPNNNTCSNNTKYNMTEEGEIKNCSFNVTTGIRDKVTKEHALFYKLDVVPIDESSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPRRAFYRTDIIGDIRQAHCNISRKD*NDTLKQIVIKLKEKFKNKTIVFNQSSGKDPEIVMHSFNCREEFFYCNTTQLFNST*NNNT*NGTDN*NSTESNSTITLPCRIKQIINLWQEVRRAMYAPPIQGQIRCSSNITGLLLVRDSKSNNSSNDTKTFRPRRGDMKDN*RSELYKYKVVKIEPLRIAPTHAKRRVVQKEKRAIGLRAFFLKFLGAAGSTIGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTV*GIKQLQARVLALERYLKDQQLLKI*GCSRKLICTTNVP*NVS*SPR*NRSLDKI*TNMT*KQ*EKEIDNYTDTIYNLIEQSQNQQEQNEQDLLALDK*ASL*N*FDITQWL*YIKIFIMIVRGLISLRIVFTILSIVNRVRQRYSPLSLQTLLPTQRGPDRPERTEEGGREKDRGTSTRLVHRFLALI*DDLRSLFLFSYHRLRDLLLIVARIVELLRRRK*EALKYW*NLLQY*SQEIKNSAVSLLNTTAIAVAERTDKIIEVLQRGFRAILHIPTRIRQGLEKALL*,ATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAGTGCCAACAACTTGTAGGTCACAGTCTATTATGAGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTAGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATAAGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATGAATTAAATCTAAATTGCCCTAACAATAATACTTGTAGTAATAATACTAAATATAATATGACGGAAGAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAAGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAAGTAGTGGAAATACTACAGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATAGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATAGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAAGGAGGGCATTTTATAGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTAGGAAAGATTAGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAAAGGACCCAGAGATAGTGATGCATAGTTTTAATTGTAGAGAAGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTAGAATAATAATACTTAGAATGGTACTGATAATTAGAATAGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAAGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATAGTAAGAGCAACAATAGTAGTAATGATACAAAGACCTTCAGGCCTAGAAGAGGAGATATGAAGGACAATTAGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAAGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAGAAAGAAAAAAGAGCAATAGGACTTAGAGCTTTCTTCCTTAAGTTCTTAGGAGCAGCAGGAAGCACTATAGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTAAGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTAGAAAGATACCTAAAAGATCAACAGCTCCTGAAGATTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTAATGTGCCCTAAAATGTTAGTTAGAGCCCTAGATAGAATAGATCTCTAGATAAGATTTAGACTAACATGACCTAGAAGCAGTAGGAAAAAGAAATTGACAATTATACAGACACAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTAGCATTAGATAAGTAGGCAAGTTTGTAGAATTAGTTTGACATTACACAGTGGCTATAGTATATAAAAATATTCATAATGATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGAGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAAAGGACAGATAAGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAAGCTTTGCTATAA +MK115095.1,nef,7624,8872,8796,9417,forward,0.5872017754762344,MRQARPVRKPEPAATKVRAASRDLERHGALTSSNTAATNADVACLEAQQEEKEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPRPGVRFPLCFRWCFKLVPVDPDKVEEASVGENNCLLSPENLHRIEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC,MNKTY*H*ISRQVCRISLTLHSGYSI*KYS****EA**V*E*FLLYCL**IELGRDTHHCHCRPSSQPRGDPTGPKEQKKEVERKTEAHPRD*CIDS*HSSRTTCGACSSSVTTA*ETYS*LWRGL*NF*DAESKKH*SIGRISCSIRVRK*RIVQLACSTPQQ*Q*LKGQIRL*KYYKEALELFSTYLHE*DRA*KKLCYKIGNKLSRRLRARWPAIKERMRQARPVRKPEPAATKVRAASRDLERHGALTSSNTAATNADVACLEAQQEEKEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPRPGVRFPLCFRWCFKLVPVDPDKVEEASVGENNCLLSPENLHRIEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC*,ATGAACAAGACTTATTAGCATTAGATAAGTAGGCAAGTTTGTAGAATTAGTTTGACATTACACAGTGGCTATAGTATATAAAAATATTCATAATGATAGTAAGAGGCTTGATAAGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGAGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAAAGGACAGATAAGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAAAGCTTTGCTATAAGATAGGTAACAAGTTGTCAAGAAGGCTCAGGGCTAGATGGCCTGCCATAAAAGAAAGAATGAGACAAGCTAGGCCAGTAAGAAAGCCAGAGCCAGCAGCAACTAAGGTAAGAGCAGCATCTCGAGACCTAGAAAGACATGGAGCACTTACAAGTAGCAATACAGCAGCTACCAATGCTGATGTTGCCTGCCTAGAAGCACAACAGGAAGAAAAAGAGGTAGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGGCCAATGACTTACAAAGGAGCTCTAGATATTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAAAGAAGACAAGACATCCTTGATCTGTGGCTCTACAACACACAAGGCTACTTCCCTGACTGGCAGAACTACACACCAAGGCCAGGAGTCAGATTTCCACTGTGTTTTAGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGATAAGGTAGAAGAGGCCAGTGTAGGAGAGAACAACTGCTTGTTAAGCCCCGAAAACCTGCATAGAATAGAAGACGAACACAGAGAAGTATTGCAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTATTACAAGGACTGCTGA +MK115095.1,tat_exon2,7810,7903,8376,8469,forward,0.5303030303030303,RPSSQPRGDPTGPKEQKKEVERKTEAHPRD,RPSSQPRGDPTGPKEQKKEVERKTEAHPRD*,AGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAG +MK115095.1,rev_exon2,7811,8087,8377,8653,forward,0.6667847862036381,ILSTHLGRPAEPVPLQLPPLERLTLDCGEDCRTSKTQKVRSTEVLVESPAVLESGNKE,DPPPNPEGTRQARKNRRRR*RERQRHIHEISA*ILSTHLGRPAEPVPLQLPPLERLTLDCGEDCRTSKTQKVRSTEVLVESPAVLESGNKE*,GACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAAGAACAGAAGAAGGAGGTAGAGAGAAAGACAGAGGCACATCCACGAGATTAGTGCATAGATTCTTAGCACTCATCTAGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGTGGCGAGGATTGTAGAACTTCTAAGACGCAGAAAGTAAGAAGCACTGAAGTATTGGTAGAATCTCCTGCAGTATTAGAGTCAGGAAATAAAGAATAG +MK115490.1,gag,549,2049,789,2292,forward,0.3014827756125966,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATTCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGTTAATGTAATGATGCAAAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115490.1,pol,1841,4853,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACGTGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCCATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTGCTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115490.1,vif,4797,5376,5040,5619,forward,0.378905844492889,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACACATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAATCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115490.1,vpr,5315,5606,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115490.1,tat_exon1,5586,5805,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115490.1,rev_exon1,5725,5806,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115490.1,env,5980,8521,6224,8795,forward,0.4585964351370794,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115490.1,vpu,6018,6063,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115490.1,tat_exon2,8102,8198,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115490.1,rev_exon2,8103,8379,8377,8653,forward,0.40871934604904625,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115490.1,nef,8522,9137,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTACAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGGTGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +MK115576.1,gag,468,1968,789,2292,forward,0.3014827756125966,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE,MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE*,ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATTTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACATCAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGGTACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGGGGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTAATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAA +MK115576.1,pol,1760,4772,2084,5096,forward,0.19610372855115465,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGAAGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCAGCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATACTGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAAATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTCCCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATATACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGAGGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCTAGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCATTAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MK115576.1,vif,4716,5295,5040,5619,forward,0.36908151428872715,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH,MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH*,ATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGATGGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAGAATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAGTTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGAGAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +MK115576.1,vpr,5234,5525,5558,5843,forward,0.5872120921305184,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS,MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*,ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MK115576.1,tat_exon1,5505,5724,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA +MK115576.1,rev_exon1,5644,5725,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKILYQSSK,MAGRSGDSDEELLKAVRLIKILYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG +MK115576.1,env,5899,8440,6224,8795,forward,0.45675101255163,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL,MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA +MK115576.1,vpu,5937,5982,6259,6310,forward,0.46723952738990326,MEMGHHAPGDVDDL,MEMGHHAPGDVDDL*,ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG +MK115576.1,tat_exon2,8021,8117,8376,8469,forward,0.32608695652173914,RPTSQPRGDPTGPKESKKKVEKETETDQFD,RPTSQPRGDPTGPKESKKKVEKETETDQFD**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA +MK115576.1,rev_exon2,8022,8298,8377,8653,forward,0.38952607660679506,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE,DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAG +MK115576.1,nef,8441,9056,8796,9417,forward,0.4690990320178705,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC,MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGTAAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGACCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGCCTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCCTTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTACAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATATCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCAAGGAGAGAACAACTGCTTGTTACACCCTACGAGCCTGCATGGGATGGAGGACCCGGAAAAAGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTACTACAAAGACTGCTGA +OQ092466,gag,825,2361,789,2292,forward,0.2559303794507086,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPSLLETAEGCRQILGQLQPSLQTGSEELKSLYNTLATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVRHTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSHVTNSSAIMMQRGNFRNQRKAVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPPEEIFRFVEETTTPSQKQEPIDKELYPPLASLKSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPSLLETAEGCRQILGQLQPSLQTGSEELKSLYNTLATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVRHTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSHVTNSSAIMMQRGNFRNQRKAVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPPEEIFRFVEETTTPSQKQEPIDKELYPPLASLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAACCCTAGCCTGTTAGAAACAGCAGAAGGCTGTAGACAAATATTGGGACAGTTACAACCGTCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACATTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGGTAAAAGACACCAAGGAAGCCTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGCAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAGCAGCCAGGTTAGACACACAGGAAACAGCAGCCAGGTCAGCCAAAATTACCCTATAGTACAGAACCTTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGACTACATCCAGTGCATGCAGGGCCCATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTACACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGGGGACCCGGACATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCACGTAACAAATTCAAGTGCCATAATGATGCAGAGGGGCAATTTTAGAAACCAAAGAAAGGCTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA +OQ092466,pol,2147,5165,2084,5096,forward,0.19193360134872262,FFRENLAFPQRKAREFSPEQTRANSPTTRRDLQVCGRDNNSLSETGANRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPYRTRNPEMVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPNKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGEGQWTFQIYQEPFKNLKTGKYARARGAHTNDVKQLTEAVQKIATEGIVIWGKIPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQIIKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQRKAREFSPEQTRANSPTTRRDLQVCGRDNNSLSETGANRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPYRTRNPEMVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPNKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGEGQWTFQIYQEPFKNLKTGKYARARGAHTNDVKQLTEAVQKIATEGIVIWGKIPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQIIKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAATTTTCTCCAGAGCAGACCAGAGCCAACAGCCCCACCACCAGAAGAGATCTTCAGGTTTGTGGAAGAGACAACAACTCCCTCTCAGAAACAGGAGCCAATAGACAAGGAACTGTATCCTCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGGGCAGATGATACAGTATTAGAAGACATGAATTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAGGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGCATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAATCTTAGAGCCTTATAGAACACGAAATCCAGAAATGGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGACTTAGAAATAGGGCAACATAGAGCAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGATTTACTACCCCAGACAAAAAACATCAAAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAAACAAAGACAGCTGGACTGTCAATGACATACAGAAACTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCGCTAACAGAAGAAGCAGAGTTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGGGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGGAAGGACAATGGACATTTCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGCGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAGGCATAGTAATATGGGGAAAAATTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAAACTAAAAAAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATACGCATTGGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTGATAAAAAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTGCTGGAATCAGGAAAGTATTATTTTTAGATGGAATAGAGAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAGTGTCAGCTAAAAGGAGAAGCCATACATGGACAGGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTCATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATACGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATCCAAACCAAAGAACTACAAAAACAAATTATAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTTATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +OQ092466,vif,5109,5688,5040,5619,forward,0.24677296886864086,MENRWQVMIVWQVDRMRIRTWNSLVKHHMYVSRKAKGWFYRHHFESNHPKISSEVHIPLEDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKRKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHMYVSRKAKGWFYRHHFESNHPKISSEVHIPLEDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKRKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATGTATGTTTCAAGGAAAGCTAAGGGATGGTTTTATAGACATCACTTTGAAAGCAATCATCCAAAAATAAGTTCAGAAGTACACATCCCACTGGAGGATGCTAGACTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGGGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTACTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACACATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGAGAAAGCCACCCTTGCCTAGTGTTAAGAAGCTAACAGAAGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAG +OQ092466,vpr,5627,5918,5558,5843,forward,0.5625083211290103,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAGCTTAGGGCAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATTCTGCAACAACTGTTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +OQ092466,tat_exon1,5898,6117,5830,6046,forward,0.40192926045016075,MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ,MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ*,ATGGAGCCAGTAGATCCTAGGCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCACTTGTTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAGTAA +OQ092466,rev_exon1,6037,6118,5969,6047,forward,0.4807692307692307,MAGRSGDSDDELLKTVRLIKVLYQSSK,MAGRSGDSDDELLKTVRLIKVLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAGTAAG +OQ092466,vpu,6129,6375,6259,6310,forward,0.7548091006737623,MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL,MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL*,ATGCAACCTTTAACAATATTAGCAATAGTAGCACTAGTAGTAGCAGCAATACTAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAA +OQ092466,env,6292,8875,6224,8795,forward,0.4841544358231281,MKVRGIRKNYQHWWRGGILLLGMLMICNATEQQLWVTVYYGVPVWKEANTTLFCASDAKAYSTEVHNVWATHACVPTDPNPQEVVLKNVTENFNMWENNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNTTRSSGNTTNEMKNCSFYTETDIRDKKRKEYALFYELDIVPIDEDNKNKSNNISYSRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEDEVVIKSSNFTNNAKTIIVQLNETVKINCTRPNNNTRKSIPIGPGRAFYATGDIIGDIRQAHCNISRANWTNTLKQIAEKLGKQFEENKTIVFNPSSGGDPEVVMHSFNCRGEFFYCNSTPLFNSTWKETNGIWTRIGESNDSATITLNDSDTITLQCKIRQIINLWQEVGKAMYAPPIKGQISCLSNITGLLLVRDGGNNTNGTEIFRPVGGEMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRATLGALFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNTSWSNKSLEKIWNNMTWMEWEREIDNYTSLIYTLLEESQNQQEKNEKELLELDTWASLWNWFDITNWLWYIKIFIMIIGGLVGLRIVFTVLSIVNRVRQGYSPLSFQIHPPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLALFWVDLRSLCLFSYHRLRDLLLIVARIVELLGRRGWEALKYGWSLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEILQRACRAILHIPTRIRQGLERALL,MKVRGIRKNYQHWWRGGILLLGMLMICNATEQQLWVTVYYGVPVWKEANTTLFCASDAKAYSTEVHNVWATHACVPTDPNPQEVVLKNVTENFNMWENNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNTTRSSGNTTNEMKNCSFYTETDIRDKKRKEYALFYELDIVPIDEDNKNKSNNISYSRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEDEVVIKSSNFTNNAKTIIVQLNETVKINCTRPNNNTRKSIPIGPGRAFYATGDIIGDIRQAHCNISRANWTNTLKQIAEKLGKQFEENKTIVFNPSSGGDPEVVMHSFNCRGEFFYCNSTPLFNSTWKETNGIWTRIGESNDSATITLNDSDTITLQCKIRQIINLWQEVGKAMYAPPIKGQISCLSNITGLLLVRDGGNNTNGTEIFRPVGGEMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRATLGALFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNTSWSNKSLEKIWNNMTWMEWEREIDNYTSLIYTLLEESQNQQEKNEKELLELDTWASLWNWFDITNWLWYIKIFIMIIGGLVGLRIVFTVLSIVNRVRQGYSPLSFQIHPPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLALFWVDLRSLCLFSYHRLRDLLLIVARIVELLGRRGWEALKYGWSLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEILQRACRAILHIPTRIRQGLERALL*,ATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAATGCTACAGAACAACAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGGTATTAAAAAATGTGACAGAAAATTTTAATATGTGGGAAAATAACATGGTAGAACAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAGTTAACTCCACTCTGTGTTACTCTAAATTGCACTAATACCACTAGGAGTAGTGGAAATACTACCAATGAAATGAAAAACTGCTCTTTCTATACCGAAACAGACATAAGAGATAAGAAGAGAAAGGAATATGCACTTTTTTATGAACTTGATATAGTACCCATAGATGAGGATAATAAGAATAAGAGTAATAATATTAGCTATTCTAGGTTAATAAGTTGCAACACCTCAGTTATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAGAAATTCAATGGAACAGGGCCATGTACAAATGTCAGCACAGTGCAATGTACACATGGTATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGACGAGGTAGTAATTAAATCTAGCAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTAAATGAAACTGTAAAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGACATAAGACAAGCACATTGTAACATCTCTAGAGCAAACTGGACAAACACTTTAAAACAGATAGCTGAAAAATTAGGAAAACAATTTGAGGAAAATAAAACAATAGTCTTTAATCCCTCCTCAGGAGGGGACCCAGAGGTTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATTCAACACCACTGTTTAATAGTACTTGGAAGGAGACTAATGGGATTTGGACTCGTATTGGAGAGTCAAATGATAGTGCTACTATCACACTAAATGATAGTGATACTATCACACTCCAATGTAAAATAAGGCAAATTATAAACTTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAAAGGACAAATTAGCTGTTTATCAAACATTACAGGGCTGCTATTAGTAAGAGATGGTGGCAATAACACGAACGGGACCGAGATCTTCAGACCTGTAGGAGGAGAAATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAGAAAAGAGCGACATTGGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGTAATAAATCTCTGGAAAAAATTTGGAATAATATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACCTTACTTGAAGAATCGCAGAACCAGCAAGAAAAAAATGAAAAAGAATTATTGGAATTAGATACATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAATAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTCCAGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAATATTACAAAGAGCTTGTAGAGCTATTCTCCATATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA +OQ092466,tat_exon2,8456,8552,8376,8469,forward,0.5303030303030303,RSTPQLRGDPTGPKESKEKVERETETDPVH,RSTPQLRGDPTGPKESKEKVERETETDPVH**,AGATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGA +OQ092466,rev_exon2,8457,8733,8377,8653,forward,0.3690449563855961,DPPPSSEGTRQARRNRRRRWRERQRQIRSISERILSTFLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGSSQIRVESPTILEPGTKE,DPPPSSEGTRQARRNRRRRWRERQRQIRSISERILSTFLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGSSQIRVESPTILEPGTKE*,GATCCACCCCCCAGCTCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGTCCATTAGTGAACGGATTCTTAGCACTTTTCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCTCTCAAATACGGGTGGAGTCTCCTACAATATTGGAGCCAGGAACTAAAGAATAG +OQ092466,nef,8876,9509,8796,9417,forward,0.4195274186357557,MGGKWSKSKLFGWPAVRERMRRAEPAAEPAADGVGAASRDLERHGAITSSNTPTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSPKRQEILDLWVYHTQGFFPDWDNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEREVLEWRFDSRLAFRHVARELHPEYYKDC,MGGKWSKSKLFGWPAVRERMRRAEPAAEPAADGVGAASRDLERHGAITSSNTPTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSPKRQEILDLWVYHTQGFFPDWDNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEREVLEWRFDSRLAFRHVARELHPEYYKDC*,ATGGGTGGCAAATGGTCAAAAAGTAAGCTATTTGGATGGCCTGCTGTAAGGGAAAGAATGAGAAGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAGACACGGAGCAATCACAAGTAGCAATACACCAACTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAAGAGGAGGAGGTGGGTTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCGGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCCAAAAAGACAAGAGATCCTTGATCTGTGGGTCTATCATACACAAGGTTTCTTCCCTGATTGGGATAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGTTAGTACCAGTGGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAATAACTGCTTGTTACACCCTATGAGCCAGCATGGGATGGAGGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCGTCACGTGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA +OQ092463,gag,801,2313,140,1643,forward,0.2589641434262949,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKTQQAAADTGNNSQTSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNFRNQRKNVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPVDKELYPLASLRSLFGNDPSSQ,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKTQQAAADTGNNSQTSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNFRNQRKNVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPVDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAGCGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAACACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGACCAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGACTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTCAGGAACCAGAGAAAGAATGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +OQ092463,pol,2105,5117,1435,4447,forward,0.14797358397500882,FFRENLAFPQGEAREFSSEQTRANSPTRRELQVWGGDNNSLSEAGAGRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVTTIHTDNGSNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEAREFSSEQTRANSPTRRELQVWGGDNNSLSEAGAGRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVTTIHTDNGSNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGGTAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAGTCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAGCAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAGAAAGAACCTATAGTAGGAGCAGAAACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGATACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGACCATGAGAAATATCACAGTAATTGGAGGGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGGTATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAACAACAATACATACAGACAATGGCAGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +OQ092463,vif,5061,5640,4391,4970,forward,0.32379004771642805,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSIHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKERYSTQVDPGLADQLIHRYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSIHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKERYSTQVDPGLADQLIHRYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTATTCATCCAAGAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAGGAGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCAACTAATTCATCGGTATTACTTTGATTGTTTTTCAGAATCTGCCATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG +OQ092463,vpr,5579,5870,4909,5200,forward,0.34013605442176864,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAITRILQQLLFIHFRIGCQHSRIGIIQRRRARNGTSRS,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAITRILQQLLFIHFRIGCQHSRIGIIQRRRARNGTSRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTCCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAACAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAACCAGTAGATCCTAG +OQ092463,tat_exon1,5850,6069,5180,5399,forward,0.38728632478632474,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ*,ATGGAACCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA +OQ092463,rev_exon1,5989,6070,5319,5397,forward,0.3620426829268293,MAGRSGDSDEELIKTVRLIKLLYQSSK,MAGRSGDSDEELIKTVRLIKLLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG +OQ092463,vpu,6081,6330,5411,5657,forward,0.44033465433729635,MQPLEISAIVALVVAIIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL,MQPLEISAIVALVVAIIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL*,ATGCAACCTTTAGAAATATCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG +OQ092463,env,6244,8827,5574,8124,forward,0.44564023273792597,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFYASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNTTSTKNTTPSTTASSGERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKKFNGSGPCTNVSTIQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKTIIVHLKDSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSSWKDESNGTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLITRDGGKNNESNTTEIFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKTLEQIWDNMTWMQWEREIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHPPAPRGPDRPEGIGEEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLNIPRRIRQGLERALL,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFYASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNTTSTKNTTPSTTASSGERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKKFNGSGPCTNVSTIQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKTIIVHLKDSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSSWKDESNGTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLITRDGGKNNESNTTEIFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKTLEQIWDNMTWMQWEREIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHPPAPRGPDRPEGIGEEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLNIPRRIRQGLERALL*,ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTATGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATACCACTAGTACCAAGAATACCACCCCTAGTACCACTGCTAGTAGCGGGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGAAGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAATACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAAGGATTCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTCTTGGAAGGATGAGTCAAATGGCACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAATAATGAGAGCAACACCACCGAGATTTTCAGACCTGAAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAGAGAAATTGACAATTACACAAGCTTGATATACACTTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGACAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCAACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA +OQ092463,tat_exon2,8408,8504,7705,7798,forward,0.4918032786885247,RPTPQPRGDPTGQKESEKKVERETETDPDH,RPTPQPRGDPTGQKESEKKVERETETDPDH**,AGACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGA +OQ092463,rev_exon2,8409,8685,7706,7982,forward,0.3471418653089562,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE*,GACCCACCCCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG +OQ092463,nef,8828,9461,8125,8752,forward,0.33768732280275404,MGGKWSKSSVVGWPAIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC,MGGKWSKSSVVGWPAIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC*,ATGGGTGGTAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTAGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGTTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATAAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTGCCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGATTGCTGA +OQ092465,gag,855,2358,200,1700,forward,0.38383718162342295,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQQIDVKDTKEALDKIEEEQNKSKKKVQQAATDTGNNSQASQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGAIMMQRGNFRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQQIDVKDTKEALDKIEEEQNKSKKKVQQAATDTGNNSQASQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGAIMMQRGNFRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGGTGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAACTAGAACGGTTTGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAACAGATAGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGTACAGCAAGCAGCAACTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAATCTCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAATCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGACTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTGCCATAATGATGCAGAGAGGCAATTTCAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +OQ092465,pol,2150,5162,1492,4504,forward,0.21863141758600757,FFREDLAFPQGEAREFSSEQTRANSPTSRELQVRGGDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLKGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFPQGEAREFSSEQTRANSPTSRELQVRGGDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLKGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTCGGGGAGGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTTGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTTTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGATGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGGTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGTTGCCAGAAAAAGACAGTTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAGGTAAAGCAATTATGTAGACTCCTTAAGGGAACCAAGGCACTAACAGAAGTAGTACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGACAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAGCTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAAACTACCCATACAAAGAGAAACATGGGACACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTTTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACCACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATCCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAAGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTGGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTATCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGTGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGCATACCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATCATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATACACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +OQ092465,vif,5106,5685,4448,5027,forward,0.3589413907639558,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWIYKHHYDSINPKISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADRLIHLYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALTALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWIYKHHYDSINPKISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADRLIHLYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALTALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAAGAAAGCTAAGGGATGGATTTATAAGCATCACTATGACAGTATTAATCCAAAAATAAGTTCAGAAGTTCACATCCCACTAGGGGATGCAAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGGCCTAGCAGACCGACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACGTATAGTTAGTCCTAGTTGTGATTATCAAGCAGGACATAACAAGGTAGGGTCTCTACAGTACTTGGCACTAACAGCACTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG +OQ092465,vpr,5624,5915,4966,5257,forward,0.2400808693454637,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGILQRRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGILQRRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTACACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATCCTTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +OQ092465,tat_exon1,5895,6114,5237,5456,forward,0.2515090543259557,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ*,ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA +OQ092465,rev_exon1,6034,6115,5376,5454,forward,0.4807692307692307,MAGRSGDSDEELIKTVRLIKLLYQSSK,MAGRSGDSDEELIKTVRLIKLLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG +OQ092465,vpu,6126,6375,5468,5708,forward,0.3726554787759131,MQPLVISAIVALVVVAIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL,MQPLVISAIVALVVVAIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL*,ATGCAACCTTTAGTAATATCAGCAATAGTAGCATTAGTAGTAGTAGCGATAATAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAATTGATAGAATAAGGGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAG +OQ092465,env,6289,8881,5631,8208,forward,0.48107374453325313,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTGANNTTSTNTTTPSTTVSSEERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNTSYRLISCNTSVITQACPKVTFEPIPIHYCAPAGFAILKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKNIIVHLNKSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLEQVFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSYWKGEGSNNDTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLLTRDGGNESETTDTETFRPVGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKNLTQIWDNMTWMQWEKEIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL,MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTGANNTTSTNTTTPSTTVSSEERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNTSYRLISCNTSVITQACPKVTFEPIPIHYCAPAGFAILKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKNIIVHLNKSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLEQVFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSYWKGEGSNNDTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLLTRDGGNESETTDTETFRPVGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKNLTQIWDNMTWMQWEKEIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL*,ATGAGAGCGAGGGGGATCAGGAAGAATTATCAGCACTTGTTGTGGAGATGGGGCACGATGCTCCTTGGGATGTTAATGATCTGTAGCGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTGTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACTGGTGCCAATAATACCACTAGTACCAATACTACCACCCCTAGTACCACTGTTAGTAGCGAGGAAAGGATGGGGGAAGGAGAAATAAAAAACTGCTCTTTCAATATCACCACAAGCCTAAGAGATAAGATGCAGAAAGAATATGCACTCTTTTATAGACCTGATATAGTACCAATAGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACAATAAGACGTTCAATGGATCAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAAACATAATAGTACATCTGAATAAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATGGGACCAGGGAAAGCATTTTTTGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGAGCAGGTATTTAAAAAGTTAAGAGAACAATTTGGGAATAATAAGACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACATCAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGGTGAAGGGTCAAACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACTCATTAAATGTTCATCAAATATCACAGGGCTACTATTAACAAGAGATGGTGGTAACGAGAGCGAGACCACCGACACTGAGACCTTCAGACCTGTAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGATTAGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAAAATCTGACACAGATTTGGGATAACATGACCTGGATGCAATGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTGAAGAATCGCAAAACCAACAAGAAAAGAATGAACTAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCTACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCAGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA +OQ092465,tat_exon2,8462,8558,7789,7885,forward,0.5337214944201844,RPTSQPRGDPTGQKESKEKVERETETDPDH,RPTSQPRGDPTGQKESKEKVERETETDPDH**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGA +OQ092465,rev_exon2,8463,8739,7790,8066,forward,0.41220115416323155,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGGAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCGGGGGGTGGGAACTCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG +OQ092465,nef,8882,9515,8209,8842,forward,0.3463855421686747,MGGKWSKSSVVGWPKIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC,MGGKWSKSSVVGWPKIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGGTTGGATGGCCTAAAATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGAACCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTATAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAGAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGATTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA +OQ092462,gag,767,2270,789,2292,forward,0.316486214000789,MGARASVLSGGELDKWEKIRLRPGGKKKYQLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFHTVATLYCVHQKIEVKDTKEALEKIEEEQKKSKKKAQQAAADTGNNSQVSQNYPIVQNMQGQMVHQPLSPRTLNAWVKVIEDKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQLREPRGSDIAGTTSNLQEQIAWMTHNPPVPVGEIYKRWILLGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQVANPASIMMQRGNFRNQRKPIKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKVWPSRKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKPEPIDKELYPLASLRSLFGNDPSSQ,MGARASVLSGGELDKWEKIRLRPGGKKKYQLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFHTVATLYCVHQKIEVKDTKEALEKIEEEQKKSKKKAQQAAADTGNNSQVSQNYPIVQNMQGQMVHQPLSPRTLNAWVKVIEDKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQLREPRGSDIAGTTSNLQEQIAWMTHNPPVPVGEIYKRWILLGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQVANPASIMMQRGNFRNQRKPIKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKVWPSRKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKPEPIDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAGCTAAAACATATAGTATGGGCAAGCAGGGAACTAGAGCGATTTGCAGTTAATCCCGGCCTGTTAGAAACATCGGAGGGCTGTAGACAAATACTAGGGCAACTACAGCCCGCTCTTCAGACAGGATCAGAAGAACTTAAATCACTATTTCATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTGAAAGACACCAAAGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAAGAAAAGTAAGAAAAAGGCACAGCAAGCAGCCGCTGACACAGGAAATAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATGCAGGGACAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGATAAGGCTTTCAGTCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCAGGCAGGGCCTGTTGCGCCAGGCCAGCTACGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAGCAAATAGCATGGATGACACATAATCCACCTGTCCCAGTAGGAGAAATCTATAAAAGATGGATACTTCTGGGATTAAATAAAATAGTAAGAATGTACAGCCCCGTCAGCATTCTGGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCTGAGCAGGCTTCACAGGATGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTAGGACCAGCAGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAGCAAATCCAGCTAGCATAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGCCTATTAAGTGTTTCAACTGTGGCAAAGAGGGGCATATTGCTAAAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +OQ092462,pol,2062,5074,2084,5096,forward,0.1943357603710517,FFREGLAFPQGEAREFPSEQTRANSPTRRELQVWGRDDNSLSETGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPILLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPYKNLKTGKYARMRGTHTNDIKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDKGRQKVVPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLKGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQEVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFVHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREGLAFPQGEAREFPSEQTRANSPTRRELQVWGRDDNSLSETGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPILLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPYKNLKTGKYARMRGTHTNDIKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDKGRQKVVPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLKGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQEVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFVHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGGTCTGGCCTTCCCGCAAGGGGAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACGACAACTCCCTCTCAGAAACCGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGAGGGCAACTAAAGGAAGCTCTATTAGACACAGGAGCAGATGATACAGTTTTAGAAGAAATGAATTTGCCAGGAAAATGGAAGCCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGCTGCACTTTAAATTTTCCCATTAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAGAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATCTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAGAAAGATGGTAATAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTTTGGGAAGTTCAATTAGGAATACCACATCCTGCTGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAGGACTTCAGGAAGTATACTGCATTTACAATACCTAGCACAAACAATGAGACACCAGGGATTAGATACCAGTACAATGTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTGGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTATATGTAGGGTCTGACTTAGAAATAGGGCAGCATAGAGCAAAAATAGAGGAACTGAGACAACATCTATTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCCGATAAATGGACAGTACAGCCTATATTGCTGCCAGAAAAAGACAGCTGGACTGTTAATGACATACAGAAGTTAGTGGGAAAATTGAACTGGGCAAGTCAGATTTACCCAGGAATCAAAGTAAGGCAGCTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAGTGGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGATTTAATAGCAGAACTACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATATAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGAGGTACCCACACTAATGATATAAAACAATTAACAGAGGCAGTGCAAAAAATAGCTACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAATTACCCATACAAAAGGAAACATGGGAAGCATGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAATGGGAGTTTGTCAATACCCCTCCCTTAGTGAAACTATGGTACCAGTTGGAAAAAGAACCCATAGTGGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAAACTAAATTAGGAAAAGCAGGATATGTTACTGACAAAGGAAGACAAAAAGTTGTCCCCCTAACGGACACAACAAATCAGAAGACTGAGCTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAACTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAGTTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTATAGTAGCAAAGGAAATAGTAGCCAGTTGTGACAAATGTCAGTTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCTGGAATATGGCAGCTAGATTGTACACATCTAAAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTGAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGCTGCCTGTTGGTGGGCGGGGATCAAACAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGAAGTAATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCAGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCGTCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACCAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATCAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +OQ092462,vif,5018,5597,5040,5619,forward,0.3566796368352788,MENRWQVMIVWQVDRMRIRTWKSLVKHHMHISRKAKNWLYRHHYESIHPRISSEVHIPLGDARLVITTYWGLLTGERDWHLGQGVSIEWRERRYRTQVDPDLADQLIHLYYFDCFSESAIRNALLGRVVSPRCEYQAGHNQVGSLQYLALTALITPKRRKPPLPSVRKLTEDRWNKPQKTKGHKGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMHISRKAKNWLYRHHYESIHPRISSEVHIPLGDARLVITTYWGLLTGERDWHLGQGVSIEWRERRYRTQVDPDLADQLIHLYYFDCFSESAIRNALLGRVVSPRCEYQAGHNQVGSLQYLALTALITPKRRKPPLPSVRKLTEDRWNKPQKTKGHKGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAGAGTTTAGTAAAACATCATATGCATATATCAAGGAAAGCTAAGAATTGGTTGTATAGACATCACTATGAAAGCATTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTAGTAATAACAACATATTGGGGTCTGCTTACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAGGAGATATAGAACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGTATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCTTATTAGGACGTGTAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACCAGGTAGGATCTCTACAGTACTTGGCACTAACAGCATTAATAACACCAAAAAGGAGAAAGCCACCTTTGCCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAG +OQ092462,vpr,5536,5827,5558,5843,forward,0.5656050955414005,MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS,MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAACTTAAAAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTTCATGGATTGGGACAGCATATCTATGAAACATATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCGACATAGCAGAATAGGCATTAATCTACAGAGGAGAGCAAGGAATGGATCCAGTAGATCCTAG +OQ092462,tat_exon1,5807,6026,5830,6046,forward,0.37688442211055273,MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKQ,MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKQ*,ATGGATCCAGTAGATCCTAGCCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGTTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGTAA +OQ092462,rev_exon1,5946,6027,5969,6047,forward,0.4807692307692307,MAGRSGDSDEELLKAVRLIKFLYQSSK,MAGRSGDSDEELLKAVRLIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGTAAG +OQ092462,vpu,6038,6284,6259,6310,forward,0.7548091006737623,MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL,MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL*,ATGCGACCTTTAGAAATAGCAGCAATAGTAGCACTAGTAGTAGCAGTACTAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAGTGGATAGAATAAGAGAAAGAGCAGAAGATAGTGGAAATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAG +OQ092462,env,6201,8778,6224,8795,forward,0.5096088152339575,MRVKGIRRNYQHWWRGGTLLLGILMICSATENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPSPQEVVLKNVTEKFNMWKNNMVEQMHQDIISLWDESLKPCVKLTPLCVTLNCTNATISGNATEEIKNCSFNVNTKIGGKKQKERALFYKLDVVPIDDDSTNRTNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCGDKEFNGTGLCRNVSTVQCTHGIRPVVSTQLLLNGSLAEGEVVIKSENITNNVKTIIVQLNETVSINCTRPSNNTRRSIHMGPGRAFYATGEIIGDIRKAQCILNKTDWSDTLTRIAKKLHKQFHGPIAFEQSSGGDPEITMHTFNCGGEFFYCNTSALFSGTWNGTAWTNATWGNIAGNNITLQCRIKQFINRWQEVGKAMYAPPIRGEIRCSSNITGLLLTRDGGSNTTNGGENGTQIGENVTQIFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVTFGALFLGFLGAAGSTMGAASMTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLRDQQLLGIWGCSGKLICTTTVPWNASWSNKTLEKIWGNMTWMEWEREIDNYTDLIYTLIEQSQNQQEKNEQELLELDKWAGLWNWFDITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPASRGPDRPEGTDEEGGERDRDRSGSLVNGFLALIWIDLRSLFLFSYLRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNAIAISVAEGTDRIIEAIQRICRAILHIPTRIRQGLERALL,MRVKGIRRNYQHWWRGGTLLLGILMICSATENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPSPQEVVLKNVTEKFNMWKNNMVEQMHQDIISLWDESLKPCVKLTPLCVTLNCTNATISGNATEEIKNCSFNVNTKIGGKKQKERALFYKLDVVPIDDDSTNRTNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCGDKEFNGTGLCRNVSTVQCTHGIRPVVSTQLLLNGSLAEGEVVIKSENITNNVKTIIVQLNETVSINCTRPSNNTRRSIHMGPGRAFYATGEIIGDIRKAQCILNKTDWSDTLTRIAKKLHKQFHGPIAFEQSSGGDPEITMHTFNCGGEFFYCNTSALFSGTWNGTAWTNATWGNIAGNNITLQCRIKQFINRWQEVGKAMYAPPIRGEIRCSSNITGLLLTRDGGSNTTNGGENGTQIGENVTQIFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVTFGALFLGFLGAAGSTMGAASMTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLRDQQLLGIWGCSGKLICTTTVPWNASWSNKTLEKIWGNMTWMEWEREIDNYTDLIYTLIEQSQNQQEKNEQELLELDKWAGLWNWFDITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPASRGPDRPEGTDEEGGERDRDRSGSLVNGFLALIWIDLRSLFLFSYLRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNAIAISVAEGTDRIIEAIQRICRAILHIPTRIRQGLERALL*,ATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAGTGCCACAGAGAACTTGTGGGTCACAGTCTACTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAGATGCCAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAGCCCACAAGAAGTAGTATTGAAAAATGTGACAGAAAAGTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATCAGGATATAATCAGTTTATGGGATGAAAGCCTAAAACCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATGCTACTATCAGTGGTAATGCAACAGAAGAAATAAAAAACTGCTCTTTCAATGTCAATACAAAAATAGGAGGTAAGAAGCAGAAAGAACGTGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAGTACTAATAGGACTAATACCAGCTATAGGTTGATAAGTTGTAACACTTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTGGAGATAAAGAGTTCAATGGAACAGGACTATGTAGAAATGTCAGCACAGTCCAATGTACACATGGAATCAGGCCAGTAGTATCAACTCAATTGCTGTTGAATGGCAGTCTAGCAGAAGGAGAGGTAGTAATTAAATCTGAAAATATCACGAACAATGTTAAAACCATAATAGTACAGCTAAATGAAACTGTATCAATTAATTGTACAAGACCTAGCAACAATACAAGAAGAAGCATACATATGGGACCAGGGAGAGCCTTTTATGCAACAGGAGAAATAATAGGAGATATAAGGAAAGCACAGTGTATCCTGAATAAGACAGACTGGAGTGACACTTTAACAAGGATAGCTAAAAAATTACACAAGCAATTTCATGGACCAATAGCATTTGAGCAATCCTCAGGAGGGGACCCTGAAATTACAATGCACACTTTTAATTGTGGAGGGGAATTTTTCTACTGCAACACATCAGCGTTGTTTAGCGGGACCTGGAATGGTACTGCTTGGACTAATGCTACTTGGGGTAATATTGCAGGTAACAATATCACACTCCAATGCAGAATAAAACAATTTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGAGAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGCAGTAACACAACAAATGGTGGCGAGAATGGGACCCAGATTGGCGAGAATGTGACCCAGATCTTCAGACCTGGAGGAGGGGATATGAGGGACAATTGGAGAAGTGAATTATACAAATATAAAGTAGTAAAAATTGAGCCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTAACATTCGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACACTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTTCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATGCTAGTTGGAGTAATAAAACTCTGGAAAAAATTTGGGGGAACATGACCTGGATGGAGTGGGAGAGAGAAATTGACAATTATACAGACTTAATATACACCTTAATTGAACAATCGCAGAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAGGCTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGTTTAGTAGGTTTAAGAATAGTTTTTGCTGTGCTTTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTATCATTCCAGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGTTTGCTTAATGCTATAGCTATATCAGTAGCGGAGGGAACAGATAGGATTATAGAAGCAATACAAAGAATTTGTAGAGCTATCTTACACATACCTACAAGGATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA +OQ092462,tat_exon2,8359,8455,8376,8469,forward,0.4542873367404884,RPASQPRGDPTGPKEPTKKVERETETDPDH,RPASQPRGDPTGPKEPTKKVERETETDPDH**,AGACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGA +OQ092462,rev_exon2,8360,8636,8377,8653,forward,0.38952607660679506,DPPPSLEGTRQARRNRRRRWRERQRQIRIISERILSTHLDRPAESVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKE,DPPPSLEGTRQARRNRRRRWRERQRQIRIISERILSTHLDRPAESVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKE*,GACCCGCCTCCCAGCCTCGAGGGGACCCGACAGGCCCGAAGGAACCGACGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGATCATTAGTGAACGGATTCTTAGCACTCATCTGGATCGACCTGCGGAGTCTGTTCCTCTTCAGCTACCTCCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG +OQ092462,nef,8779,9406,8796,9417,forward,0.5105263157894737,MGGKGSKMRGWVAVREKMRRTKPEDEPAANGVGAASRDLEKYGALTSSNTVATNADLAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVHHTQGYLPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDQVEEANAGENNSLLHPMSLHGIEDPEKEVLMWKFDSHLAFRHMARELHPEYYKDC,MGGKGSKMRGWVAVREKMRRTKPEDEPAANGVGAASRDLEKYGALTSSNTVATNADLAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVHHTQGYLPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDQVEEANAGENNSLLHPMSLHGIEDPEKEVLMWKFDSHLAFRHMARELHPEYYKDC*,ATGGGTGGCAAGGGGTCAAAAATGAGGGGATGGGTTGCTGTAAGGGAAAAAATGAGGCGAACTAAGCCAGAAGATGAGCCAGCAGCAAATGGGGTGGGGGCAGCATCTCGAGACTTGGAGAAATATGGCGCACTCACAAGTAGCAATACAGTAGCTACTAATGCTGATTTAGCTTGGCTAGAAGCACAAGAGGAAGAGGAGGTGGGCTTTCCAGTCAGACCTCAAGTACCTTTAAGACCAATGACTTACAAGGGAGCTCTAGATCTCAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCAAAAAAGGCAAGATATCCTTGATCTGTGGGTTCACCACACACAAGGCTATCTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCCCTGACTTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATCAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATAGAGGACCCGGAGAAAGAAGTCTTAATGTGGAAGTTTGACAGCCACCTAGCATTCCGTCACATGGCCCGAGAGCTGCATCCGGAGTATTACAAAGACTGCTGA +OQ092464,gag,773,2276,200,1700,forward,0.3910844507174782,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPALQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNLRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKEMYPLASLRSLFGNDPSSQ,MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPALQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNLRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKEMYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGGTTCGCAGTTAATCCTGGCCTGTTAGAAACATCAAAAGGCTGTAGACAAATATTGGAACAGCTACAGCCAGCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATCGATGTAAAAGACACCAAGGAAGCTTTAGATAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGCCAGCCAAAATTACCCTATAGTGCAGAACATTCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCGCTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAACAATGTACACAGGAGGTAAAAAATTGGATGACAGAAACATTATTGGTCCAAAATGCAAACCCAGATTGCAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCAGAAGCAATGAGCCAAGTAACAACTCCAGGTACCATAATGATGCAGAGAGGCAATTTAAGGAGCCAAAGAAAGACTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCATATAGCTAAAAATTGCAGGGCTCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAAGAAGGACACCAAATGAAAGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +OQ092464,pol,2068,5080,1492,4504,forward,0.21863141758600757,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQRNVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTREAELELAENREILKEPVHGVYYDPTKDLIVEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQRNVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTREAELELAENREILKEPVHGVYYDPTKDLIVEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAAGAAATGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGACTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTAACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATCTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTAAGACAACATCTGTTGAAGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTAGGGGAACCAAGGCACTAACAGAAGTAATACCACTAACAAGAGAAGCAGAGCTAGAACTGGCAGAGAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCAACAAAAGACTTAATAGTAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGAGGTGCCCACACTAATGATGTAAAACAACTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAGTTTAGACTACCCATACAAAGAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGTGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTTAATCAAATAATAGAACAGTTAATAAAAAAGGACAAGGTATACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGACGATCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGACTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGCCAGTTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACACTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCGGCAATTTCATCAGTACTACAGTTAAGGCCGCCTGCTGGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATACATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAGAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAACTTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +OQ092464,vif,5024,5603,4448,5027,forward,0.3264662839130924,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGGACATGGAAAAGTTTAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGGATGGGTTTATAAGCATCACTATGACAGTACTCATCCAAGAATAAGTTCAGAAGTTCACATCCCGCTAGGGGATGCGAAATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAGAAGAGATATAGCACACAAGTAGACCCTGGCTTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGCTGTGATTATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAG +OQ092464,vpr,5542,5833,4966,5257,forward,0.2678354029705382,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGIIQRRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGIIQRRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTCCACAGCTTAGGGCAACATATCTATGAAACATATGGAGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCATCATTCAACGGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +OQ092464,tat_exon1,5813,6032,5237,5456,forward,0.2515090543259557,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKQ,MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKQ*,ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA +OQ092464,rev_exon1,5952,6033,5376,5454,forward,0.4807692307692307,MAGRSGDSDEELLQTVRLIKLLYQSSK,MAGRSGDSDEELLQTVRLIKLLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG +OQ092464,vpu,6044,6290,5468,5708,forward,0.4384802297327147,MQPLHIAAIVALVVAIIIAIVVWSIVFIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVEMGHCAPWDVNDL,MQPLHIAAIVALVVAIIIAIVVWSIVFIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVEMGHCAPWDVNDL*,ATGCAACCTTTACACATAGCAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGATAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAG +OQ092464,env,6207,8790,5631,8208,forward,0.4684887834843129,MRVRGIKKNYQHLWRWGIVLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNNTTSTNDTTSTTTSSGEKMKEGEIKNCSFNITTSIRDKVQKEYALFYKPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGSCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSENFSDNAKTIIVHLNESVEITCIRPNNNTRKSIPIGPGRAFYATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNKTIIFTHSSGGDPEVVTHNFNCGGEFFYCNTTKLFNSTWNETSYWKDERSNDNDTITLPCRIKQIINLWQEVGKAMYAPPIRGYIKCSSNITGLLITRDGGKNESNTTETFRPGGGNMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWDNMTWMEWEREIDNYTSLIYTLIEKSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSGPLVNGFLTIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL,MRVRGIKKNYQHLWRWGIVLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNNTTSTNDTTSTTTSSGEKMKEGEIKNCSFNITTSIRDKVQKEYALFYKPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGSCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSENFSDNAKTIIVHLNESVEITCIRPNNNTRKSIPIGPGRAFYATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNKTIIFTHSSGGDPEVVTHNFNCGGEFFYCNTTKLFNSTWNETSYWKDERSNDNDTITLPCRIKQIINLWQEVGKAMYAPPIRGYIKCSSNITGLLITRDGGKNESNTTETFRPGGGNMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWDNMTWMEWEREIDNYTSLIYTLIEKSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSGPLVNGFLTIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL*,ATGAGAGTGAGGGGGATCAAGAAGAATTATCAGCACTTGTGGAGATGGGGCATTGTGCTCCTTGGGATGTTAATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGGAAATGTGACAGAGAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTGAATTGCACTAATTATAATAACACCAATAATACCACTAGTACCAATGATACCACTAGTACCACTACTAGTAGCGGGGAAAAGATGAAGGAAGGAGAGATAAAAAACTGCTCTTTCAATATCACCACAAGCATAAGAGATAAGGTGCAGAAAGAATATGCACTCTTTTATAAACCTGATATAGTACCAATAGATAATGATAATACTAGTTATAGGTTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGTAACGATAAGAAGTTCAATGGAACAGGATCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCTCAGACAATGCTAAAACCATAATAGTACATCTGAATGAATCTGTAGAAATTACTTGTATAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGACCAGGGAGAGCATTTTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAAAATTAATAAAACGAAATGGAATAACACTTTAGACCAGATATTTAAAAAGTTAAGAGAACAATTTGGGAATAAGACAATAATCTTTACTCACTCCTCAGGAGGGGACCCAGAAGTTGTAACGCACAATTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGAGACTAGTTATTGGAAGGATGAAAGGTCAAATGACAATGACACTATCACACTCCCATGCAGAATAAAGCAAATTATAAACCTGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGATACATTAAATGTTCATCAAATATCACAGGGCTGCTAATAACAAGAGATGGTGGGAAAAACGAGAGCAACACCACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACTATTATTGTCTGGTATAGTGCAACAGCAAAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGGATAACATGACCTGGATGGAATGGGAAAGAGAAATTGACAATTACACAAGCTTAATATACACTTTAATTGAAAAATCGCAAAACCAACAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGCGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAGTGCTATTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGCTGGGAGAGCTTTTCTCCACATACCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA +OQ092464,tat_exon2,8371,8467,7789,7885,forward,0.4971219256933542,RPTSQPRGDPTGQKESKKKVERETETDPDH,RPTSQPRGDPTGQKESKKKVERETETDPDH**,AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGA +OQ092464,rev_exon2,8372,8648,7790,8066,forward,0.3934495959166312,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILNNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGTPQILVESPAVLESGTKE,DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILNNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGTPQILVESPAVLESGTKE*,GACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCAGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGACCATTAGTGAACGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAACTCCTCAAATATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAACTAAAGAATAG +OQ092464,nef,8791,9412,8209,8842,forward,0.36363636363636354,MGGKWSKSSVVGWPAIRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEDEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIHSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC,MGGKWSKSSVVGWPAIRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEDEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIHSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC*,ATGGGTGGTAAGTGGTCAAAGAGTAGTGTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGATGAGGAGGTGGGTTTTCCAGTCAGGCCTCAGGTACCTTTAAGACCAATGACTTACAAGGGAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGGTATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACCCTATGAGCCTGCATGGGATGGATGACCCAGAGAAAGAAGTGTTAGTATGGAGGTTTGACAGTCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGGACTGCTGA +OQ092467,gag,808,2308,789,2292,forward,0.2880084183556755,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEEFRSLYNTVATLYCVHQKIEVKDTKEALEKIEEEQNQSKKKAAAAAADTGNRSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTIGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSNLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTVMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSKPEPTAPPEESFRFGEETTTPSQKQGPIDKELYPLASLKSLFGNDPSSQ,MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEEFRSLYNTVATLYCVHQKIEVKDTKEALEKIEEEQNQSKKKAAAAAADTGNRSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTIGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSNLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTVMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSKPEPTAPPEESFRFGEETTTPSQKQGPIDKELYPLASLKSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCCAGGAGGAAAAAAACAATACAAATTAAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTCTAGAGACATCAGAAGGGTGTAGACAAATACTGGGACAGCTACAACCAGCTCTTCAGACAGGATCAGAAGAATTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAAATAGAGGAAGAGCAAAACCAAAGTAAGAAAAAAGCAGCAGCTGCAGCAGCTGACACAGGAAACAGAAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCTTTATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTACAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAATAGGTGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCAGGCAGGGCCTGTTGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTAACCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGATATCTATAAAAGATGGATAATTCTGGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTGAGAGCCGAGCAAGCAACACAGGAAGTAAAGAATTGGATGACAGAAACTTTGCTGGTCCAAAATGCAAATCCAGATTGTAAGACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCAGGCCACAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAACTGTAATGATGCAGAGAGGCAATTTTAGGAATCAAAGAAAGACAGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCATATAGCAAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAAGAAGGGCACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAA +OQ092467,pol,2100,5112,2084,5096,forward,0.1880239208210378,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGKWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQSQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIAIESIVIWGKTPKFKLPIQKETWETWWTDYWQATWIPDWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETRSGKAGYVTDRGRQKVVPLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKILFLDGITKAQDDHERYHSNWRAMASDFNLPPIIAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSAAVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGKWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQSQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIAIESIVIWGKTPKFKLPIQKETWETWWTDYWQATWIPDWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETRSGKAGYVTDRGRQKVVPLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKILFLDGITKAQDDHERYHSNWRAMASDFNLPPIIAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSAAVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGGAGGCCAGGGAATTTTCTTCAGAGCAAACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGGGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAAATAGGGGGGCAGCTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGATATGAATTTACCAGGAAAATGGAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCGTAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGTACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGATAGTACTAAATGGAGAAAGTTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCGCATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTAGATGTGGGTGATGCCTATTTTTCAATTCCCTTAGATAAAGACTTCAGGAAGTACACTGCATTTACCATACCTAGTACAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAGGATATTAGAACCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTGGGATCTGACTTAGAAATAGGACAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAAGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGATAGCTGGACTGTCAATGACATCCAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGAATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACTAAAGCACTAACAGAAGTAGTACCACTAACAGCAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCAACGAAAGACCTAATAGCAGAACTACAGAAGCAGGGGCAAAGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTGAAACAATTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTAATATGGGGAAAGACCCCTAAATTTAAATTACCTATACAAAAAGAAACATGGGAAACATGGTGGACAGACTATTGGCAAGCCACCTGGATTCCTGATTGGGAGTTTGTCAATACCCCTCCCTTAGTAAAATTATGGTACCAGTTAGAAAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGAGAAACTAGATCAGGCAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCCCCCTAACAGACACAACAAACCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTGACAGACTCACAATATGCACTAGGAATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAGAAGGAAAAGGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGGAAGATACTATTTTTAGATGGAATAACTAAGGCCCAAGATGATCATGAGAGATACCACAGCAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTATAATAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCACGCATGGACAAGTAGACTGCAGTCCAGGAATATGGCAACTAGATTGTACGCATTTAGAAGGAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTCTCTTAAAACTAGCAGGAAGATGGCCAGTAAAGACAGTACATACAGATAATGGCAGCAATTTCACCAGTGCTGCGGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAAAGTCAAGGAGTCATAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAAGTAAGAGAGCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAAAGAATTACAGAAACAAATCACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGACCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAGGCAAAGATCATTAGAGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +OQ092467,vif,5056,5635,5040,5619,forward,0.41532297468972923,MENRWQVMIVWQVDRMRIRTWNSLVKHHIHVSKKAKGWVYRHHYESTHPRISSEVHIPLGEARLVIATYWGLHTGERDWHLGQGVSIEWREKKYITQVDPDLADQLIHLHYFDCFSESAIRNAIVGRLVSPQCEYQTGHNKVGSLQYLALVALITPKKRKPPLPSVRKLTEDRWNKPQKTKDHRGSHIMSGH,MENRWQVMIVWQVDRMRIRTWNSLVKHHIHVSKKAKGWVYRHHYESTHPRISSEVHIPLGEARLVIATYWGLHTGERDWHLGQGVSIEWREKKYITQVDPDLADQLIHLHYFDCFSESAIRNAIVGRLVSPQCEYQTGHNKVGSLQYLALVALITPKKRKPPLPSVRKLTEDRWNKPQKTKDHRGSHIMSGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAACAGTTTAGTAAAACACCATATACATGTCTCAAAGAAAGCTAAGGGATGGGTTTATAGACACCACTATGAAAGCACCCATCCAAGAATAAGTTCAGAAGTACATATCCCGCTAGGGGAAGCTAGATTAGTAATAGCAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCACTTGGGTCAGGGAGTCTCCATAGAATGGAGGGAAAAGAAATATATCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGCATTATTTTGATTGTTTTTCAGAGTCTGCTATAAGAAATGCCATAGTAGGACGTTTAGTTAGCCCTCAGTGTGAATATCAAACAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCACTAGTAGCATTAATAACGCCAAAAAAGAGAAAGCCACCTTTGCCTAGTGTTAGGAAATTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAG +OQ092467,vpr,5574,5865,5558,5843,forward,0.5793112277557293,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAGAGCTTTTAGAGGAACTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAACTTAGGACAATACATCTATGCAACTTATGGGGATACTTGGACAGGAGTGGAAGCTTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGGAATGGAGCCAGTAGATCCTAG +OQ092467,tat_exon1,5845,6064,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ,MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTCTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAGTAA +OQ092467,rev_exon1,5984,6065,5969,6047,forward,0.4807692307692307,MAGRSGDSDEDLLKTVRLIKQLYQSSK,MAGRSGDSDEDLLKTVRLIKQLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAGTAAG +OQ092467,env,6239,8807,6224,8795,forward,0.4718646278993922,MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL,MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL*,ATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAGTGCTAAAGAACAATTGTGGGTCACAGTTTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAAATGCTAAAGCATATGACCCAGAGGTGCATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAAGAAGTACCATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGACATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTATTTTAAATTGCACTAATGTGAATGTTACTACTAACAATAATAGTAGTAGTGAGGAACAGATGGAGGTAGGAGAAATAAAAAACTGCTCTTTCAATATTGCCACAAGAATAAAAAATAAGATAAAGAAAGAATATGCACTTTTTAATAGACTTGATGTAGTACCAATAGAGGATGATAATACAAGCTATATGTTGATAAATTGTAATACCTCAGTCACTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATACTAAAATGTAATGATAAAAAGTTCAATGGAACAGGACCATGTAACAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGATAGTAGTTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTGAATAAAACTATAGAAATTAATTGTATAAGACCCAACAATAATACAAGAAAAAGTATATCTTTAAGACCGGGGCAAGCAATTTATGCAACAGAAGACATAATAGGAAATATAAGACAAGCACATTGTAACATTAGGAGAAAAGACTGGGATAAAGCTTTAGAACAGGTAGTTGCAAAATTAAGAGAACAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGAGACCCAGAAATTGTAATGCATAGTTTTAATTGTGCAGGGGAATTTTTCTACTGTAACACAACAAAGCTGTTTAATAGTACTTGGAATGTTAATAACACTCGGAATAATACTACTGATAATAGCACCATCACTCTCCCGTGCAGAATAAAACAAATTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCTCCTCCCATCAAAGGGCAAATTAAATGTTCATCAAATATTACAGGGTTATTATTAACAAGAGATGGTGGTGTCCGCGAGGACAACGCCCCTGAGATCTTTAGACCTGGAGGAGGAGATATGAGGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTGGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAAAGAGGAAAAAGAGCAGTAACGCTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACTTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTACCTTGGAATACTAGTTGGAGTAATAAATCTTTGGAAAAGATTTGGAAAAACATGACCTGGATGGAGTGGGAGAAAGAAATTAACAATTACACAAGGACAATATACACCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAGGAATTATTGGAATTGGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATCATGATAGTAGGAGGTATAGTAGGGTTAAGAATAGTTTTTACTGTGCTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTGTCATTCCAGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAATTGGTAGAGCTATTCTCCACATCCCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA +OQ092467,vpu,6283,6340,6259,6310,forward,0.4380664652567977,MGHHVEMGHHAPWNVDDL,MGHHVEMGHHAPWNVDDL*,ATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAG +OQ092467,tat_exon2,8388,8484,8376,8469,forward,0.4918032786885247,RPASQLRGDQTGPKEQKKKVERETETDPGN,RPASQLRGDQTGPKEQKKKVERETETDPGN**,AGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGA +OQ092467,rev_exon2,8389,8674,8377,8653,forward,0.4123815285339786,DPLPSSEGTRQARRNRRRRWRERQRQIRAISERILCAYLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKEECC,DPLPSSEGTRQARRNRRRRWRERQRQIRAISERILCAYLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKEECC*,GACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAGTGCTGTTAG +OQ092467,nef,8808,9417,8796,9417,forward,0.49080954243253805,MGGKWSKCSLVGWPAIRERMRRAEPAPAAEGVGAASRDLEKHGALTTSNTAANNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGAMDLGHFLKEKGGLEGLIYSPKRQEILDLWVYHTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPDEGENSCLLHPMNQHGADDTEREVLMWKFDSSLAFHHKARELHPEYYKNC,MGGKWSKCSLVGWPAIRERMRRAEPAPAAEGVGAASRDLEKHGALTTSNTAANNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGAMDLGHFLKEKGGLEGLIYSPKRQEILDLWVYHTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPDEGENSCLLHPMNQHGADDTEREVLMWKFDSSLAFHHKARELHPEYYKNC*,ATGGGTGGCAAGTGGTCAAAATGTAGTCTGGTTGGATGGCCTGCTATAAGGGAAAGAATGAGACGAGCTGAGCCAGCTCCAGCAGCAGAAGGGGTGGGAGCAGCATCTCGAGACTTGGAAAAACATGGAGCACTCACAACTAGTAATACAGCAGCTAATAATGCTGCTTGTGCCTGGCTGGAAGCACAAGAGGAGGAAGAGGTGGGGTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGGAGCTATGGATCTTGGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTACTCCCCAAAAAGACAAGAAATCCTTGATCTGTGGGTCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTATACACCAGGGCCAGGGACTAGATATCCATTAACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCAGATGAAGGAGAGAATAGCTGTTTGCTACACCCGATGAACCAGCATGGGGCAGATGACACAGAAAGAGAAGTATTAATGTGGAAGTTTGACAGCAGCCTAGCATTTCATCACAAGGCCCGAGAGCTGCATCCGGAGTACTACAAAAACTGCTGA diff --git a/tests/expected-results-large-hxb2/errors.json b/tests/expected-results-large-hxb2/errors.json index 55666d9..ecef12f 100644 --- a/tests/expected-results-large-hxb2/errors.json +++ b/tests/expected-results-large-hxb2/errors.json @@ -3,52 +3,52 @@ { "sequence_name": "KX505501.1", "error": "DeletionInOrf", - "message": "ORF pol at 2084-5095 can have maximum deletions 30, got 2721" + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 2721" }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", - "message": "ORF env at 6223-8793 contains an internal stop codon at 6322" + "message": "ORF env at 6223-8794 contains an internal stop codon at 6322" }, { "sequence_name": "KX505501.1", "error": "InsertionInOrf", - "message": "Smaller ORF vif at 5040-5618 can have maximum insertions 90, got 909" + "message": "Smaller ORF vif at 5040-5619 can have maximum insertions 90, got 909" }, { "sequence_name": "KX505501.1", "error": "DeletionInOrf", - "message": "Smaller ORF vpr at 5558-5848 can have maximum deletions 30, got 66" + "message": "Smaller ORF vpr at 5558-5849 can have maximum deletions 30, got 66" }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6043 contains an internal stop codon at 5892" + "message": "Smaller ORF tat_exon1 at 5829-6045 contains an internal stop codon at 5892" }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon1 at 5968-6043 contains an internal stop codon at 6004" + "message": "Smaller ORF rev_exon1 at 5968-6046 contains an internal stop codon at 6004" }, { "sequence_name": "KX505501.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpu at 6060-6308 contains out of frame indels that impact 190 positions." + "message": "Smaller ORF vpu at 6060-6309 contains out of frame indels that impact 189 positions." }, { "sequence_name": "KX505501.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon2 at 8375-8467 contains out of frame indels that impact 51 positions." + "message": "Smaller ORF tat_exon2 at 8375-8471 contains out of frame indels that impact 51 positions." }, { "sequence_name": "KX505501.1", "error": "DeletionInOrf", - "message": "Smaller ORF rev_exon2 at 8376-8651 can have maximum deletions 30, got 96" + "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 96" }, { "sequence_name": "KX505501.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8795-9415 can have maximum insertions 90, got 867" + "message": "Smaller ORF nef at 8795-9416 can have maximum insertions 90, got 867" }, { "sequence_name": "KX505501.1", @@ -70,59 +70,59 @@ { "sequence_name": "MN691959", "error": "InternalStopInOrf", - "message": "Smaller ORF vpu at 6060-6308 contains an internal stop codon at 6123" + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon at 6123" }, { "sequence_name": "MN691959", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon2 at 8375-8467 contains an internal stop codon at 8420" + "message": "Smaller ORF tat_exon2 at 8375-8471 contains an internal stop codon at 8420" } ], "MN692074": [ { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "ORF pol at 2084-5095 can have maximum deletions 30, got 981" + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 981" }, { "sequence_name": "MN692074", "error": "InternalStopInOrf", - "message": "ORF env at 6223-8793 contains an internal stop codon at 6550" + "message": "ORF env at 6223-8794 contains an internal stop codon at 6550" }, { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF vif at 5040-5618 can have maximum deletions 30, got 81" + "message": "Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 81" }, { "sequence_name": "MN692074", "error": "InsertionInOrf", - "message": "Smaller ORF vpr at 5558-5848 can have maximum insertions 90, got 207" + "message": "Smaller ORF vpr at 5558-5849 can have maximum insertions 90, got 207" }, { "sequence_name": "MN692074", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6043 contains an internal stop codon at 5892" + "message": "Smaller ORF tat_exon1 at 5829-6045 contains an internal stop codon at 5892" }, { "sequence_name": "MN692074", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6060-6308 can have maximum insertions 90, got 249" + "message": "Smaller ORF vpu at 6060-6309 can have maximum insertions 90, got 249" }, { "sequence_name": "MN692074", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon2 at 8375-8467 contains out of frame indels that impact 85 positions." + "message": "Smaller ORF tat_exon2 at 8375-8471 contains out of frame indels that impact 40 positions." }, { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF rev_exon2 at 8376-8651 can have maximum deletions 30, got 204" + "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 204" }, { "sequence_name": "MN692074", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8795-9415 can have maximum insertions 90, got 882" + "message": "Smaller ORF nef at 8795-9416 can have maximum insertions 90, got 882" }, { "sequence_name": "MN692074", @@ -140,12 +140,12 @@ { "sequence_name": "MN090335", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 822" + "message": "ORF gag at 789-2292 contains an internal stop codon at 822" }, { "sequence_name": "MN090335", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon2 at 8375-8467 contains an internal stop codon at 8420" + "message": "Smaller ORF tat_exon2 at 8375-8471 contains an internal stop codon at 8420" }, { "sequence_name": "MN090335", @@ -172,7 +172,7 @@ { "sequence_name": "MN090376", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 822" + "message": "ORF gag at 789-2292 contains an internal stop codon at 822" }, { "sequence_name": "MN090376", @@ -256,7 +256,7 @@ { "sequence_name": "MK115211.1", "error": "InternalStopInOrf", - "message": "ORF env at 6223-8793 contains an internal stop codon at 6889" + "message": "ORF env at 6223-8794 contains an internal stop codon at 6889" }, { "sequence_name": "MK115211.1", @@ -295,59 +295,59 @@ { "sequence_name": "MK114705.1", "error": "DeletionInOrf", - "message": "Smaller ORF nef at 8795-9415 can have maximum deletions 30, got 186" + "message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 186" } ], "MK114856.1": [ { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 873" + "message": "ORF gag at 789-2292 contains an internal stop codon at 873" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2213" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2213" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "ORF env at 6223-8793 contains an internal stop codon at 6325" + "message": "ORF env at 6223-8794 contains an internal stop codon at 6325" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vif at 5040-5618 contains an internal stop codon at 5172" + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon at 5172" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vpr at 5558-5848 contains an internal stop codon at 5594" + "message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon at 5594" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6043 contains an internal stop codon at 5919" + "message": "Smaller ORF tat_exon1 at 5829-6045 contains an internal stop codon at 5919" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vpu at 6060-6308 contains an internal stop codon at 6126" + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon at 6126" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon2 at 8375-8467 contains an internal stop codon at 8420" + "message": "Smaller ORF tat_exon2 at 8375-8471 contains an internal stop codon at 8420" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon2 at 8376-8651 contains an internal stop codon at 8433" + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon at 8433" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF nef at 8795-9415 contains an internal stop codon at 8852" + "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon at 8852" }, { "sequence_name": "MK114856.1", @@ -359,42 +359,42 @@ { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 834" + "message": "ORF gag at 789-2292 contains an internal stop codon at 834" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2183" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2183" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "ORF env at 6223-8793 contains an internal stop codon at 6349" + "message": "ORF env at 6223-8794 contains an internal stop codon at 6349" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vif at 5040-5618 contains an internal stop codon at 5373" + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon at 5373" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vpr at 5558-5848 contains an internal stop codon at 5717" + "message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon at 5717" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6043 contains an internal stop codon at 5859" + "message": "Smaller ORF tat_exon1 at 5829-6045 contains an internal stop codon at 5859" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon2 at 8376-8651 contains an internal stop codon at 8433" + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon at 8433" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "Smaller ORF nef at 8795-9415 contains an internal stop codon at 8873" + "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon at 8873" }, { "sequence_name": "MK115009.1", @@ -416,7 +416,7 @@ { "sequence_name": "MK115387.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon2 at 8375-8467 contains an internal stop codon at 8420" + "message": "Smaller ORF tat_exon2 at 8375-8471 contains an internal stop codon at 8420" } ], "MK115491.1": [], @@ -424,7 +424,7 @@ { "sequence_name": "MK116110.1", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 834" + "message": "ORF gag at 789-2292 contains an internal stop codon at 834" }, { "sequence_name": "MK116110.1", @@ -447,7 +447,7 @@ { "sequence_name": "MK114997.1", "error": "InternalStopInOrf", - "message": "ORF env at 6223-8793 contains an internal stop codon at 6511" + "message": "ORF env at 6223-8794 contains an internal stop codon at 6511" } ], "MK115518.1": [], @@ -477,47 +477,47 @@ { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 924" + "message": "ORF gag at 789-2292 contains an internal stop codon at 924" }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2183" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2183" }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "ORF env at 6223-8793 contains an internal stop codon at 6424" + "message": "ORF env at 6223-8794 contains an internal stop codon at 6424" }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vif at 5040-5618 contains an internal stop codon at 5247" + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon at 5247" }, { "sequence_name": "MK115464.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6043 contains out of frame indels that impact 104 positions." + "message": "Smaller ORF tat_exon1 at 5829-6045 contains out of frame indels that impact 104 positions." }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vpu at 6060-6308 contains an internal stop codon at 6126" + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon at 6126" }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon2 at 8375-8467 contains an internal stop codon at 8420" + "message": "Smaller ORF tat_exon2 at 8375-8471 contains an internal stop codon at 8420" }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon2 at 8376-8651 contains an internal stop codon at 8433" + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon at 8433" }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "Smaller ORF nef at 8795-9415 contains an internal stop codon at 8873" + "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon at 8873" }, { "sequence_name": "MK115464.1", @@ -530,7 +530,7 @@ { "sequence_name": "MK115520.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2198" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2198" }, { "sequence_name": "MK115520.1", @@ -583,42 +583,42 @@ { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 927" + "message": "ORF gag at 789-2292 contains an internal stop codon at 927" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2183" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2183" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "ORF env at 6223-8793 contains an internal stop codon at 6550" + "message": "ORF env at 6223-8794 contains an internal stop codon at 6550" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vif at 5040-5618 contains an internal stop codon at 5151" + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon at 5151" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6043 contains an internal stop codon at 5859" + "message": "Smaller ORF tat_exon1 at 5829-6045 contains an internal stop codon at 5859" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vpu at 6060-6308 contains an internal stop codon at 6126" + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon at 6126" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon2 at 8376-8651 contains an internal stop codon at 8433" + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon at 8433" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "Smaller ORF nef at 8795-9415 contains an internal stop codon at 8873" + "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon at 8873" }, { "sequence_name": "MK115095.1", diff --git a/tests/expected-results-large-hxb2/holistic.json b/tests/expected-results-large-hxb2/holistic.json index 250dea5..2b57969 100644 --- a/tests/expected-results-large-hxb2/holistic.json +++ b/tests/expected-results-large-hxb2/holistic.json @@ -6,9 +6,9 @@ "blast_matched_qlen": 1997, "blast_sseq_coverage": 0.2498456472525211, "blast_qseq_coverage": 1.2158237356034052, - "blast_sseq_orfs_coverage": 0.17666166916541728, + "blast_sseq_orfs_coverage": 0.17663960024984385, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 4 }, "MN691959": { @@ -20,7 +20,7 @@ "blast_qseq_coverage": 1.1086063415148004, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MN692074": { @@ -30,9 +30,9 @@ "blast_matched_qlen": 4178, "blast_sseq_coverage": 0.5042189750977567, "blast_qseq_coverage": 1.1728099569171853, - "blast_sseq_orfs_coverage": 0.411544227886057, + "blast_sseq_orfs_coverage": 0.4114928169893816, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 4 }, "MN692145": { @@ -44,7 +44,7 @@ "blast_qseq_coverage": 1.1271545051088863, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MN090335": { @@ -54,9 +54,9 @@ "blast_matched_qlen": 9069, "blast_sseq_coverage": 0.9842560197571517, "blast_qseq_coverage": 1.0603153600176425, - "blast_sseq_orfs_coverage": 1.000374812593703, + "blast_sseq_orfs_coverage": 1.0003747657713928, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MN090376": { @@ -66,9 +66,9 @@ "blast_matched_qlen": 8985, "blast_sseq_coverage": 0.9784935171846059, "blast_qseq_coverage": 1.0604340567612687, - "blast_sseq_orfs_coverage": 0.9943778110944528, + "blast_sseq_orfs_coverage": 0.9943785134291068, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MK115581.1": { @@ -80,7 +80,7 @@ "blast_qseq_coverage": 1.0046340179041602, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115690.1": { @@ -92,7 +92,7 @@ "blast_qseq_coverage": 1.0113530808132933, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MK115571.1": { @@ -104,7 +104,7 @@ "blast_qseq_coverage": 1.0113902490951672, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115514.1": { @@ -116,7 +116,7 @@ "blast_qseq_coverage": 1.0173736943082499, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115488.1": { @@ -128,7 +128,7 @@ "blast_qseq_coverage": 1.0325262392185388, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 6 }, "MK115030.1": { @@ -140,7 +140,7 @@ "blast_qseq_coverage": 1.0655270655270654, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MK115498.1": { @@ -152,7 +152,7 @@ "blast_qseq_coverage": 1.0080329774865235, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115211.1": { @@ -164,7 +164,7 @@ "blast_qseq_coverage": 1.0598981399468557, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MK115158.1": { @@ -176,7 +176,7 @@ "blast_qseq_coverage": 1.043749316416931, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK114705.1": { @@ -188,7 +188,7 @@ "blast_qseq_coverage": 1.122622463075125, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 6 }, "MK114856.1": { @@ -200,7 +200,7 @@ "blast_qseq_coverage": 1.0812493405085997, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 4 }, "MK115009.1": { @@ -212,7 +212,7 @@ "blast_qseq_coverage": 1.0590854784403172, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MK115387.1": { @@ -224,7 +224,7 @@ "blast_qseq_coverage": 1.040936952714536, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115491.1": { @@ -236,7 +236,7 @@ "blast_qseq_coverage": 1.0299299511780937, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK116110.1": { @@ -246,9 +246,9 @@ "blast_matched_qlen": 8967, "blast_sseq_coverage": 0.9846676270837621, "blast_qseq_coverage": 1.0635664101706257, - "blast_sseq_orfs_coverage": 0.9986256871564217, + "blast_sseq_orfs_coverage": 0.9986258588382261, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 4 }, "MK115527.1": { @@ -260,7 +260,7 @@ "blast_qseq_coverage": 1.0056956017297753, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK114997.1": { @@ -272,7 +272,7 @@ "blast_qseq_coverage": 1.0516841524019878, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115518.1": { @@ -284,7 +284,7 @@ "blast_qseq_coverage": 0.9996854356715948, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MK115065.1": { @@ -296,7 +296,7 @@ "blast_qseq_coverage": 1.069459518124593, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 6 }, "MK115464.1": { @@ -308,7 +308,7 @@ "blast_qseq_coverage": 0.9893407844354756, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115530.1": { @@ -320,7 +320,7 @@ "blast_qseq_coverage": 0.9992665549036044, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115520.1": { @@ -330,9 +330,9 @@ "blast_matched_qlen": 9589, "blast_sseq_coverage": 0.9786993208479111, "blast_qseq_coverage": 0.987902805297737, - "blast_sseq_orfs_coverage": 1.012118940529735, + "blast_sseq_orfs_coverage": 1.0121174266083697, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MK115503.1": { @@ -344,7 +344,7 @@ "blast_qseq_coverage": 0.9953207861079338, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115570.1": { @@ -356,7 +356,7 @@ "blast_qseq_coverage": 1.0057986294148655, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115509.1": { @@ -368,7 +368,7 @@ "blast_qseq_coverage": 1.0197797498128942, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115702.1": { @@ -378,9 +378,9 @@ "blast_matched_qlen": 9098, "blast_sseq_coverage": 0.9874459765383824, "blast_qseq_coverage": 1.0596834469114091, - "blast_sseq_orfs_coverage": 1.0198650674662668, + "blast_sseq_orfs_coverage": 1.0198625858838226, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 4 }, "MK115095.1": { @@ -392,7 +392,7 @@ "blast_qseq_coverage": 1.060085367188355, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 2 }, "MK115490.1": { @@ -404,7 +404,7 @@ "blast_qseq_coverage": 1.0204343639670483, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "MK115576.1": { @@ -416,7 +416,7 @@ "blast_qseq_coverage": 1.0342110943233327, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "OQ092466": { @@ -428,7 +428,7 @@ "blast_qseq_coverage": 1.1192442700805285, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "OQ092463": { @@ -440,7 +440,7 @@ "blast_qseq_coverage": 1.1118167621030712, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "OQ092465": { @@ -452,7 +452,7 @@ "blast_qseq_coverage": 1.108292783932084, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "OQ092462": { @@ -464,7 +464,7 @@ "blast_qseq_coverage": 1.1301214741610048, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "OQ092464": { @@ -476,7 +476,7 @@ "blast_qseq_coverage": 1.115634156550858, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 }, "OQ092467": { @@ -488,7 +488,7 @@ "blast_qseq_coverage": 1.0962157809983897, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 } } \ No newline at end of file diff --git a/tests/expected-results-large-hxb2/orfs.json b/tests/expected-results-large-hxb2/orfs.json index 42a43bf..a7e0d52 100644 --- a/tests/expected-results-large-hxb2/orfs.json +++ b/tests/expected-results-large-hxb2/orfs.json @@ -5,7 +5,7 @@ "start": 0, "end": 1824, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.7623480451210163, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -17,7 +17,7 @@ "start": 0, "end": 1824, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7647696476964769, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -29,7 +29,7 @@ "start": 0, "end": 1824, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.764345579086969, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -41,7 +41,7 @@ "start": 1, "end": 1750, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", "distance": 0.7680130480667754, "protein": "MRKLQNGIDCIQCMQGLLHQAR", @@ -53,7 +53,7 @@ "start": 336, "end": 1824, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3997973809613161, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -65,7 +65,7 @@ "start": 1306, "end": 1750, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", "distance": 0.758082497212932, "protein": "MRTQIVKLF", @@ -77,7 +77,7 @@ "start": 1599, "end": 1824, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.763687959047407, "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -89,7 +89,7 @@ "start": 1599, "end": 1824, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.7672672672672673, "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -101,7 +101,7 @@ "start": 1627, "end": 1927, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7724330674761569, "protein": "GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI", @@ -113,7 +113,7 @@ "start": 1746, "end": 1824, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.7616257781032589, "protein": "RCIRSTTRTADTELFTRDFPLGTFQ", @@ -125,7 +125,7 @@ "start": 1747, "end": 1927, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.7620186257236345, "protein": "DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI", @@ -139,7 +139,7 @@ "start": 639, "end": 2142, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.0801186943620179, "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ", @@ -151,7 +151,7 @@ "start": 1934, "end": 4946, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.054722889368558514, "protein": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -163,7 +163,7 @@ "start": 4890, "end": 5469, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.09157509157509158, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -175,7 +175,7 @@ "start": 5408, "end": 5699, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.09157509157509158, "protein": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS", @@ -185,33 +185,33 @@ { "name": "tat_exon1", "start": 5679, - "end": 5895, + "end": 5898, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5818, - "end": 5896, + "end": 5899, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEDLLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEDLLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5910, "end": 6156, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.7333193453629878, "protein": "MQPIQIAIVALVVAIIIAIVV", @@ -223,7 +223,7 @@ "start": 6070, "end": 8656, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.1405525502318391, "protein": "MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL", @@ -235,7 +235,7 @@ "start": 8237, "end": 8333, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.6672629695885509, "protein": "RPTSQTRGDPTGPKE", @@ -247,7 +247,7 @@ "start": 8238, "end": 8514, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.210025203024363, "protein": "DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE", @@ -259,7 +259,7 @@ "start": 8657, "end": 9278, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.06582796957284964, "protein": "MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC", @@ -273,7 +273,7 @@ "start": 0, "end": 4059, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.7628009106462086, "protein": "MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ", @@ -285,7 +285,7 @@ "start": 2, "end": 4115, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.76033143808071, "protein": "MNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", @@ -297,7 +297,7 @@ "start": 789, "end": 2292, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.19470123431286457, "protein": "MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ", @@ -309,7 +309,7 @@ "start": 2084, "end": 4115, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.5617851221088768, "protein": "FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", @@ -321,7 +321,7 @@ "start": 3617, "end": 4115, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7631664499349805, "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", @@ -333,7 +333,7 @@ "start": 3617, "end": 4115, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.7629151983204618, "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", @@ -345,7 +345,7 @@ "start": 3617, "end": 4115, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.7634058115037564, "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", @@ -357,7 +357,7 @@ "start": 3823, "end": 4084, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", "distance": 0.7659115426105717, "protein": "MVPVRERTHSRSRNLLCRWGS", @@ -369,9 +369,9 @@ "start": 3823, "end": 4084, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.7645569620253164, + "distance": 0.7610789980732178, "protein": "MVPVRERTHSRSRNLLCRWGS", "aminoacids": "MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY", "nucleotides": "ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC" @@ -381,7 +381,7 @@ "start": 4080, "end": 4164, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.769292994661058, "protein": "IPSGEPSDAAYKQLLFACTGSLWLDQI", @@ -393,7 +393,7 @@ "start": 4081, "end": 4153, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.7667894365645325, "protein": "YPVASPQMLHISSCFLPVLGLSG", @@ -407,7 +407,7 @@ "start": 775, "end": 2281, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.20784453738651432, "protein": "MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ", @@ -419,7 +419,7 @@ "start": 2070, "end": 5085, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.14843087362171337, "protein": "FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED", @@ -431,7 +431,7 @@ "start": 5029, "end": 5608, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.2608047690014903, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH", @@ -443,7 +443,7 @@ "start": 5547, "end": 5838, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS", @@ -453,33 +453,33 @@ { "name": "tat_exon1", "start": 5818, - "end": 6034, + "end": 6037, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG" + "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ*", + "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5957, - "end": 6035, + "end": 6038, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.4267425320056898, - "protein": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT" + "distance": 0.4274965800273598, + "protein": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6049, "end": 6298, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.520618556701031, "protein": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL", @@ -491,7 +491,7 @@ "start": 6212, "end": 8783, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4766895145301081, "protein": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL", @@ -501,21 +501,21 @@ { "name": "tat_exon2", "start": 8364, - "end": 8457, + "end": 8460, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.3921568627450981, "protein": "RPASQPRGDPTGPKESKKKVERETETDPLH", - "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH*", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG" + "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH**", + "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGA" }, { "name": "rev_exon2", "start": 8365, "end": 8641, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.29843322556577967, "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE", @@ -527,7 +527,7 @@ "start": 8784, "end": 9387, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.3966849799795139, "protein": "MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC", @@ -541,7 +541,7 @@ "start": 315, "end": 1665, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.596665989022159, "protein": "MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ", @@ -553,7 +553,7 @@ "start": 1427, "end": 4469, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.27887169154684477, "protein": "FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -565,7 +565,7 @@ "start": 4413, "end": 4992, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH", @@ -577,7 +577,7 @@ "start": 4931, "end": 5222, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.3808935556928146, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS", @@ -587,33 +587,33 @@ { "name": "tat_exon1", "start": 5202, - "end": 5418, + "end": 5421, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.42503863987635226, + "distance": 0.40192926045016075, "protein": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ", - "aminoacids": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ", - "nucleotides": "ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAG" + "aminoacids": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ*", + "nucleotides": "ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5341, - "end": 5419, + "end": 5422, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5275498241500586, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGT" + "distance": 0.524971623155505, + "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAG" }, { "name": "vpu", "start": 5433, "end": 5682, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5244381020335356, "protein": "MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL", @@ -625,7 +625,7 @@ "start": 5596, "end": 8158, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.5152420275849952, "protein": "MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ", @@ -637,7 +637,7 @@ "start": 7739, "end": 7835, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.6842105263157894, "protein": "RPSSQPRGDQTGPKE", @@ -649,7 +649,7 @@ "start": 7740, "end": 8016, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.4267425320056898, "protein": "DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE", @@ -661,7 +661,7 @@ "start": 8159, "end": 8813, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4891015417331206, "protein": "MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", @@ -675,7 +675,7 @@ "start": 522, "end": 1590, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.6828741441147701, "protein": "MYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP", @@ -687,7 +687,7 @@ "start": 1382, "end": 4394, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.23966680468616797, "protein": "FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGEDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNIPPVVAKEIVACCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED", @@ -699,7 +699,7 @@ "start": 4338, "end": 4920, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.42479043044174425, "protein": "MENRWQVMVVWQVDRMRISMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRKKPPLPSVRKLTEDRWKEPQRTKGHRESHTMNGH", @@ -711,7 +711,7 @@ "start": 4859, "end": 5150, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.36116422349691957, "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", @@ -721,33 +721,33 @@ { "name": "tat_exon1", "start": 5130, - "end": 5346, + "end": 5349, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.484764542936288, + "distance": 0.46628407460545196, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5269, - "end": 5347, + "end": 5350, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5797101449275363, - "protein": "MAGRSGDSDEELLRIAGTIKFLYQSS", - "aminoacids": "MAGRSGDSDEELLRIAGTIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAGT" + "distance": 0.5763097949886105, + "protein": "MAGRSGDSDEELLRIAGTIKFLYQSSK", + "aminoacids": "MAGRSGDSDEELLRIAGTIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5361, "end": 5631, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5680096940321115, "protein": "MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLDMGHHAPWDVNDL", @@ -759,7 +759,7 @@ "start": 5524, "end": 8110, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.49863720871464445, "protein": "MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", @@ -771,7 +771,7 @@ "start": 7691, "end": 7784, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4464285714285715, "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", @@ -783,7 +783,7 @@ "start": 7692, "end": 7968, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.4267425320056898, "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE", @@ -795,7 +795,7 @@ "start": 8111, "end": 8735, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.5036770338593534, "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTSANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC", @@ -809,7 +809,7 @@ "start": 680, "end": 2180, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -821,7 +821,7 @@ "start": 1972, "end": 4984, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -833,7 +833,7 @@ "start": 4928, "end": 5507, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -845,7 +845,7 @@ "start": 5446, "end": 5737, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -857,9 +857,9 @@ "start": 5717, "end": 5936, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -867,21 +867,21 @@ { "name": "rev_exon1", "start": 5856, - "end": 5934, + "end": 5937, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5948, "end": 6194, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5399181166837258, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -893,7 +893,7 @@ "start": 6111, "end": 8652, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4581222593756883, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -905,7 +905,7 @@ "start": 8233, "end": 8329, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -917,7 +917,7 @@ "start": 8234, "end": 8510, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -929,7 +929,7 @@ "start": 8653, "end": 9268, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -943,7 +943,7 @@ "start": 777, "end": 2286, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.24076694150363465, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQIMGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKVEEEQNKSKKKAQQAAAGAGNSSQTSTSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATNSAAIMMQRGNFRNQRKSVKCFNCGKDGHIAKNCRAPRRKGCWKCGKEGHQMKDCPERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTTPSQKQEKIDQDLYPLASLKSLFGNDPSSQ", @@ -955,7 +955,7 @@ "start": 2078, "end": 5090, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.185747174550021, "protein": "FFRENLAFPQGEAREFSSEQTRANSPTRGELQVWGGDNNSLSEAGEDRPGPVSFSFPQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPYRKQNPNIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIALPEKESWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVIPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQEQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKSGKAGYVTNRGRQKVIPLTDTTNQKTELHAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQEAAYFILKLAGRWPVATIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDVIATDLQTKELQKQITKIQHFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -967,7 +967,7 @@ "start": 5034, "end": 5613, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.39766435115272314, "protein": "MENRWQVMIVWQVDRMRIRRWHSLVKHHIYISGKARGWVYKHHYENTHPRISSKVYIPLGEARLAVTTYWGLHTGERDWHLGQGVSIEWRKKEYSTQVDPNLADQLIHLYYFDCFSESAIRNVILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSITKLTEDRWNKPQRTKGHRGNHTMNGH", @@ -979,7 +979,7 @@ "start": 5552, "end": 5843, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.37688442211055273, "protein": "MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS", @@ -989,33 +989,33 @@ { "name": "tat_exon1", "start": 5823, - "end": 6039, + "end": 6042, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.42503863987635226, + "distance": 0.40192926045016075, "protein": "MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ", - "aminoacids": "MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAATCTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCACTTGCTATTGTAAACAGTGTTGCTATCATTGCCAAGTTTGTTTCATAACCAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAATCTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCACTTGCTATTGTAAACAGTGTTGCTATCATTGCCAAGTTTGTTTCATAACCAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5962, - "end": 6040, + "end": 6043, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5275498241500586, - "protein": "MAGRSGDNDEDLLKTVRFIKLLYQSS", - "aminoacids": "MAGRSGDNDEDLLKTVRFIKLLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAGT" + "distance": 0.524971623155505, + "protein": "MAGRSGDNDEDLLKTVRFIKLLYQSSK", + "aminoacids": "MAGRSGDNDEDLLKTVRFIKLLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6054, "end": 6300, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.48542752364408415, "protein": "MQSLAILAIVALVVAAIIAIVVWTIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDHEELSALMEMGHHAPWDVDDL", @@ -1027,7 +1027,7 @@ "start": 6217, "end": 8800, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.49751604598034604, "protein": "MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ", @@ -1039,7 +1039,7 @@ "start": 8381, "end": 8474, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPTSQPRGDPTGPKEPETKVESKTETDPLT", @@ -1051,7 +1051,7 @@ "start": 8382, "end": 8658, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.42982806877249113, "protein": "DPPPSPEGTRQARRNRRRRWRARQRQIHSLSGWILSNYLGRPAEPVPLPLPPLERLTLDCSEDCGTSGTQGVGSPQILVEPSAVLEAGTKE", @@ -1063,7 +1063,7 @@ "start": 8801, "end": 9425, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.459649710885779, "protein": "MGGKWSKCSIVGWPTVRERIRRAEPAAEGVGAVSRDLEKHGAITSSNANNADCTWLEAQKEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGIIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVEPERVEEENKRENRCLLHPMSQHGMDDPEKEVLQWRFDSRLAFHHVARELHPEYYKN", @@ -1077,7 +1077,7 @@ "start": 579, "end": 2079, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -1089,7 +1089,7 @@ "start": 1871, "end": 4883, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -1101,7 +1101,7 @@ "start": 4827, "end": 5406, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -1113,7 +1113,7 @@ "start": 5345, "end": 5636, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -1123,33 +1123,33 @@ { "name": "tat_exon1", "start": 5616, - "end": 5832, + "end": 5835, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5755, - "end": 5833, + "end": 5836, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5847, "end": 6093, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5278361344537815, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -1161,7 +1161,7 @@ "start": 6010, "end": 8551, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4585964351370794, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -1173,7 +1173,7 @@ "start": 8132, "end": 8228, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -1185,7 +1185,7 @@ "start": 8133, "end": 8409, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -1197,7 +1197,7 @@ "start": 8552, "end": 9167, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -1211,7 +1211,7 @@ "start": 584, "end": 2084, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -1223,7 +1223,7 @@ "start": 1876, "end": 4888, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19298018391400085, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -1235,7 +1235,7 @@ "start": 4832, "end": 5411, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -1247,7 +1247,7 @@ "start": 5350, "end": 5641, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -1259,9 +1259,9 @@ "start": 5621, "end": 5840, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -1269,21 +1269,21 @@ { "name": "rev_exon1", "start": 5760, - "end": 5838, + "end": 5841, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5852, "end": 6098, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5278361344537815, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -1295,7 +1295,7 @@ "start": 6015, "end": 8556, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.46021436825118545, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -1307,7 +1307,7 @@ "start": 8137, "end": 8233, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -1319,7 +1319,7 @@ "start": 8138, "end": 8414, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -1331,7 +1331,7 @@ "start": 8557, "end": 9172, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -1345,7 +1345,7 @@ "start": 707, "end": 2207, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -1357,7 +1357,7 @@ "start": 1999, "end": 5011, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19298018391400085, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -1369,7 +1369,7 @@ "start": 4955, "end": 5534, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -1381,7 +1381,7 @@ "start": 5473, "end": 5764, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -1393,9 +1393,9 @@ "start": 5744, "end": 5963, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -1403,21 +1403,21 @@ { "name": "rev_exon1", "start": 5883, - "end": 5961, + "end": 5964, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5975, "end": 6221, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5278361344537815, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -1429,7 +1429,7 @@ "start": 6138, "end": 8679, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.46021436825118545, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -1441,7 +1441,7 @@ "start": 8260, "end": 8356, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -1453,7 +1453,7 @@ "start": 8261, "end": 8537, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -1465,7 +1465,7 @@ "start": 8680, "end": 9295, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -1479,7 +1479,7 @@ "start": 176, "end": 1685, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.27304152847199525, "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRKNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP", @@ -1491,7 +1491,7 @@ "start": 1477, "end": 4489, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.26443159013103534, "protein": "FFREDLAFPQGEAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", @@ -1503,7 +1503,7 @@ "start": 4433, "end": 5012, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3903081914030819, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", @@ -1515,7 +1515,7 @@ "start": 4951, "end": 5242, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", @@ -1527,9 +1527,9 @@ "start": 5222, "end": 5441, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.5060292850990527, + "distance": 0.48954161103693805, "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ", "aminoacids": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" @@ -1537,21 +1537,21 @@ { "name": "rev_exon1", "start": 5361, - "end": 5439, + "end": 5442, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.531617235590375, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" + "distance": 0.5290287574606619, + "protein": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "vpu", "start": 5453, "end": 5699, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5682727709300466, "protein": "MHILEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSAIVEMGHLVPWDGDDM", @@ -1563,7 +1563,7 @@ "start": 5616, "end": 8217, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.5091484869809996, "protein": "MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", @@ -1575,7 +1575,7 @@ "start": 7798, "end": 7891, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", @@ -1587,7 +1587,7 @@ "start": 7799, "end": 8075, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", @@ -1599,7 +1599,7 @@ "start": 8218, "end": 8860, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.5432971135257649, "protein": "MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNTDVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", @@ -1613,7 +1613,7 @@ "start": 663, "end": 2163, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -1625,7 +1625,7 @@ "start": 1955, "end": 4967, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -1637,7 +1637,7 @@ "start": 4911, "end": 5490, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -1649,7 +1649,7 @@ "start": 5429, "end": 5720, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -1659,33 +1659,33 @@ { "name": "tat_exon1", "start": 5700, - "end": 5916, + "end": 5919, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5839, - "end": 5917, + "end": 5920, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5931, "end": 6177, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5399181166837258, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -1697,7 +1697,7 @@ "start": 6094, "end": 8635, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4581222593756883, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -1709,7 +1709,7 @@ "start": 8216, "end": 8312, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -1721,7 +1721,7 @@ "start": 8217, "end": 8493, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -1733,7 +1733,7 @@ "start": 8636, "end": 9251, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -1747,7 +1747,7 @@ "start": 250, "end": 1753, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.25132972351334526, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQKIEIKDTKEALDKIEEEQNKSKKKTQQAAADTGHNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNAVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP", @@ -1759,7 +1759,7 @@ "start": 1545, "end": 4557, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.2540106951871657, "protein": "FFRENLAFPQGKARELSSKQDRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISRIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVIAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDGCVASRQDED", @@ -1771,7 +1771,7 @@ "start": 4501, "end": 5083, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.40472673559822736, "protein": "MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKLTEDRWNEPQRTKGHRESHTMNGH", @@ -1783,7 +1783,7 @@ "start": 5022, "end": 5313, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.3808935556928146, "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", @@ -1793,33 +1793,33 @@ { "name": "tat_exon1", "start": 5293, - "end": 5509, + "end": 5512, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.48719691819623834, + "distance": 0.4691531785127845, "protein": "MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAAGACTGCTTGCACCAGTTGCTATTGTAAAAGGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAAGACTGCTTGCACCAGTTGCTATTGTAAAAGGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGA" }, { "name": "rev_exon1", "start": 5432, - "end": 5510, + "end": 5513, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5698711595639246, - "protein": "MAGRSGDSDEELLRITRTIKFLYQNS", - "aminoacids": "MAGRSGDSDEELLRITRTIKFLYQNS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGT" + "distance": 0.564475664826894, + "protein": "MAGRSGDSDEELLRITRTIKFLYQNSE", + "aminoacids": "MAGRSGDSDEELLRITRTIKFLYQNSE", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGAG" }, { "name": "vpu", "start": 5524, "end": 5794, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5776066350710902, "protein": "MQSLEILAIVALVVAFIIAIVVWSIVFIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWEVNDL", @@ -1831,7 +1831,7 @@ "start": 5687, "end": 8198, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.6638103647692366, "protein": "MHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", @@ -1843,7 +1843,7 @@ "start": 7779, "end": 7872, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4464285714285715, "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", @@ -1855,7 +1855,7 @@ "start": 7780, "end": 8056, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.4267425320056898, "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE", @@ -1867,7 +1867,7 @@ "start": 8199, "end": 8823, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.5093153589821267, "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDPDKEVLVWKFDSRLAFRHVAREIHPEYYKNC", @@ -1881,7 +1881,7 @@ "start": 316, "end": 1819, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.24298292552415257, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQRIEIKDTKEALDKIEEEQNKSKKKTQQAAADPGNSNQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPMSSLKSLFGNDP", @@ -1893,7 +1893,7 @@ "start": 1611, "end": 4623, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.25132972351334504, "protein": "FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPYVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDMVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNIPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED", @@ -1905,7 +1905,7 @@ "start": 4567, "end": 5149, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.42180774748923966, "protein": "MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKRTEDRWNEPQRTKGHRESHTMNGH", @@ -1917,7 +1917,7 @@ "start": 5088, "end": 5379, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.36116422349691957, "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", @@ -1927,33 +1927,33 @@ { "name": "tat_exon1", "start": 5359, - "end": 5575, + "end": 5578, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.48719691819623834, + "distance": 0.4691531785127845, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGA" }, { "name": "rev_exon1", "start": 5498, - "end": 5576, + "end": 5579, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5929978118161925, - "protein": "MAGRSGDSDEELLKITRTIKFLYQNS", - "aminoacids": "MAGRSGDSDEELLKITRTIKFLYQNS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGT" + "distance": 0.5884955752212389, + "protein": "MAGRSGDSDEELLKITRTIKFLYQNSE", + "aminoacids": "MAGRSGDSDEELLKITRTIKFLYQNSE", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGAG" }, { "name": "vpu", "start": 5590, "end": 5860, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5680096940321115, "protein": "MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWDVNDL", @@ -1965,7 +1965,7 @@ "start": 5753, "end": 8315, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4938011995318903, "protein": "MRVKGTKKNWQPSWRWGTMLIWGWATMLLGMSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCENVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", @@ -1977,7 +1977,7 @@ "start": 7896, "end": 7989, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4464285714285715, "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", @@ -1989,7 +1989,7 @@ "start": 7897, "end": 8173, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.44369946756063905, "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGSGTSGTQGVGSPQVLVESPAVLEPGTKE", @@ -2001,7 +2001,7 @@ "start": 8316, "end": 8940, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.5036770338593534, "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC", @@ -2015,7 +2015,7 @@ "start": 532, "end": 2047, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.27270615563298484, "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNTIAVLYCVHQRIDVKDTKEALNKIEEEQNKSKKKAQQAAADTGNSSQSSQVSQNYPIVQNHQGQMVYQALSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIIMGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQATGGATNIMMQKGNFRNQGKPIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCSERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTAPPQKQEPTDKELYPFSSLKSLFGNDPSSQ", @@ -2027,7 +2027,7 @@ "start": 1839, "end": 4851, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.21944123990570308, "protein": "FFRENLAFPQGKAREFPPEQTRANSPTRRELQVWGRDNSSPSEAGADRQGTVSLLFPQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTKIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRAKVENLREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVITLTEEAELELAENREILKEPVHGVYYDPSKDLVAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVVWGKIPKFRLPIQKETWETWWMEYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVIPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEDHEKYHSNWKAMASDFNIPPVVAKEIIASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGEYCAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -2039,7 +2039,7 @@ "start": 4795, "end": 5374, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWVYRHHYESTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYNTQVDPGLADQLIHMYYFDCFSESAIRQAILGHRVSPSCEYQAGHNKVGSLQYLALAVLVAPKKIKPPLPSVARLTEDRWNKPRKIKGHRESHTMNGH", @@ -2051,7 +2051,7 @@ "start": 5313, "end": 5598, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.39401343921808174, "protein": "MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS", @@ -2061,33 +2061,33 @@ { "name": "tat_exon1", "start": 5578, - "end": 5794, + "end": 5797, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.5284227381905524, + "distance": 0.5138258357408172, "protein": "MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ", - "nucleotides": "ATGGAGCCCGTAGATCCTAGACTGGAACCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTAACAATTGCTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGCTTCACAAAAAAGGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ*", + "nucleotides": "ATGGAGCCCGTAGATCCTAGACTGGAACCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTAACAATTGCTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGCTTCACAAAAAAGGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAGTAA" }, { "name": "rev_exon1", "start": 5717, - "end": 5795, + "end": 5798, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5989458552946814, - "protein": "MAGRSGDRDEDLLETVRFIKFLYQNS", - "aminoacids": "MAGRSGDRDEDLLETVRFIKFLYQNS", - "nucleotides": "ATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAGT" + "distance": 0.5940834141610087, + "protein": "MAGRSGDRDEDLLETVRFIKFLYQNSK", + "aminoacids": "MAGRSGDRDEDLLETVRFIKFLYQNSK", + "nucleotides": "ATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAGTAAG" }, { "name": "vpu", "start": 5809, "end": 6055, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5727186311787074, "protein": "MQPLEISAIVALVVVAIIAIVVWTIVLLEYRKILRQKKIDRLINRISERAEDSGNESDGDQEELSALMEMGRLAPWNVDDL", @@ -2099,7 +2099,7 @@ "start": 5972, "end": 8549, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.5021538905210794, "protein": "MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL", @@ -2109,21 +2109,21 @@ { "name": "tat_exon2", "start": 8130, - "end": 8223, + "end": 8226, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.6074396517609815, "protein": "RPPAQPQGDPTGPKKSKKEVEKETETDQCD", - "aminoacids": "RPPAQPQGDPTGPKKSKKEVEKETETDQCD*", - "nucleotides": "AGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAG" + "aminoacids": "RPPAQPQGDPTGPKKSKKEVEKETETDQCD**", + "nucleotides": "AGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGA" }, { "name": "rev_exon2", "start": 8131, "end": 8407, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.47688921496698455, "protein": "DPPPSPKGTRQARRNRRRRWRKRQRQISAISDWILRTFLGRSTKPVPLQLPPLERLNLDCSEGCGNSGTQGVGSPQILVESPAVLESGNKE", @@ -2135,7 +2135,7 @@ "start": 8550, "end": 8985, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.6017710152157637, "protein": "MGGKWSKKSGGGWPAVREKMKRTEPAAEGVGAASRDLDKYGAITSSNTAQTNPDCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDMSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGGCFK", @@ -2149,7 +2149,7 @@ "start": 120, "end": 2022, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.7511039743075072, "protein": "MTKTLLVQNANPDCKTILKALGPAATLEEMMTACQKVRRPGHKAKVLAEAMSQATGAANIMMQRGNFKNQRKPVKCFNCGKEKHIAKNCKAPKKKGC", @@ -2161,7 +2161,7 @@ "start": 1787, "end": 4826, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7637180771917039, "protein": "ASQIYAKIKVKQLCKLLKRTKALTEVVPLTEEAELELAENRKILKDPVHRAYYDPAKDLIAELQKQREGQWTYQIYQKPFKNLKTEKYARTKGAHTNDVKQLTEAVQKISTESIVI", @@ -2173,7 +2173,7 @@ "start": 4425, "end": 5349, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7566838361540349, "protein": "MIVWQVDRMKIRTWKSLVKYHMYISKKAKK", @@ -2185,7 +2185,7 @@ "start": 5033, "end": 5579, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.7334692430420138, "protein": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS", @@ -2195,33 +2195,33 @@ { "name": "tat_exon1", "start": 5486, - "end": 5777, + "end": 5780, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", "distance": 0.7619181418001311, "protein": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS", - "aminoacids": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS*TRALEASRKSA*DSLYQLLL*KVLLSLPSVFYTKSLRHLLWQEEAETETKISSRQSDSSSSSTKAV", - "nucleotides": "ATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTA" + "aminoacids": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS*TRALEASRKSA*DSLYQLLL*KVLLSLPSVFYTKSLRHLLWQEEAETETKISSRQSDSSSSSTKAVN", + "nucleotides": "ATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAAT" }, { "name": "rev_exon1", "start": 5698, - "end": 5776, + "end": 5779, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5275498241500586, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGT" + "distance": 0.524971623155505, + "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAA" }, { "name": "vpu", "start": 5790, "end": 6039, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.7407973896196594, "protein": "MQPLKILAIVALVVAAIIAIVV", @@ -2233,7 +2233,7 @@ "start": 5953, "end": 8521, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.7529994904340572, "protein": "MTNCSFNITTEIRDKVRKEYALFYKLDVMPIDKDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPASFAILKCKDEMFNRTRPCKNVSTVQCTHRIRPVVSTQLLLNSSLAEKKIVLRSENFTDNTKNIIVQLNRSIVINCTRPNNNTRKSISVAKRAIYATRQIIEDIRQAHCNISETD", @@ -2245,7 +2245,7 @@ "start": 7916, "end": 9176, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.7587548638132295, "protein": "MSLHRMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", @@ -2257,7 +2257,7 @@ "start": 8102, "end": 8198, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.7134052388289676, "protein": "RPSSQPQEDQTGPKE", @@ -2269,7 +2269,7 @@ "start": 8103, "end": 8379, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.6888374145157732, "protein": "ILTTHLRRPAKPVPFQLPPLKRLTLDCAEDCANSRTQEVRDPQVLVESPAVLNSGTKE", @@ -2283,7 +2283,7 @@ "start": 302, "end": 1715, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.7484174646972894, "protein": "MTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVRGPGHKARVLAEAMSQVTKSASIIVQGGNFKNQRKNVKCFNCGKERHTAKNCRAPKKKGC", @@ -2295,7 +2295,7 @@ "start": 1613, "end": 4625, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7535201229073285, "protein": "AEIKQEFSIPYNPQSQRVVKSMNNKLKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED", @@ -2307,7 +2307,7 @@ "start": 4289, "end": 4988, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7624714704923379, "protein": "MAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED", @@ -2319,7 +2319,7 @@ "start": 5087, "end": 5378, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.6737199868435482, "protein": "MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT", @@ -2329,33 +2329,33 @@ { "name": "tat_exon1", "start": 5235, - "end": 5574, + "end": 5577, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.6412492073557388, + "distance": 0.6325105553751218, "protein": "MTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ", - "aminoacids": "MEILRQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPSLEP*KHPGSQPMTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ", - "nucleotides": "ATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGCTTAGAGCCCTAGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAACAAAAAGCTTAGGCATCTCCTATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAG" + "aminoacids": "MEILRQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPSLEP*KHPGSQPMTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ*", + "nucleotides": "ATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGCTTAGAGCCCTAGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAACAAAAAGCTTAGGCATCTCCTATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" }, { "name": "rev_exon1", "start": 5497, - "end": 5575, + "end": 5578, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5678884873515746, - "protein": "MAGRSRDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSRDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" + "distance": 0.564475664826894, + "protein": "MAGRSRDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSRDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "vpu", "start": 5589, "end": 5835, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.6256860592755216, "protein": "MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP", @@ -2367,7 +2367,7 @@ "start": 5752, "end": 8353, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.7518870380010406, "protein": "MKEKGEIKNCSFNVTTGIRDKVTKEHALFYKLNVVPIDEDSKNTTGKYKMINCNTSVITQACPKVSFKPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEKIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYRTDIIRDIRQAHYNISKKD", @@ -2379,7 +2379,7 @@ "start": 7748, "end": 8996, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.7328145265888457, "protein": "MTYKRALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFR", @@ -2391,7 +2391,7 @@ "start": 7934, "end": 8027, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.5633802816901408, "protein": "RPSSQPREDPTGPKEQKKEVERKTEAHPRD", @@ -2403,7 +2403,7 @@ "start": 7935, "end": 8211, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.6781884553958476, "protein": "ILSTHLGRPAEPVPLQLPPLERLTLNCGENCRTSRTQKVRSTEVLVESPAVLESGNKE", @@ -2417,7 +2417,7 @@ "start": 292, "end": 1795, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.25442849599155104, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADTGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ", @@ -2429,7 +2429,7 @@ "start": 1587, "end": 4599, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.17509882471546434, "protein": "FFREDLAFLQGKARELSSEQTRANSPTRGELQVWGGDSNSSSEAGAGGQGSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVQLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSIPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMIKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPDKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLKGAKALTEVIQLTEEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALGIIQAQPDRSESEIVNQIIEQLIKKERVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNLPPVVAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKAIHTDNGTNFTSATVKAACWWAGIKQECGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -2441,7 +2441,7 @@ "start": 4543, "end": 5122, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.34158090650317496, "protein": "MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIECRERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRGSQTMNGH", @@ -2453,7 +2453,7 @@ "start": 5061, "end": 5352, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.2608047690014903, "protein": "MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS", @@ -2463,33 +2463,33 @@ { "name": "tat_exon1", "start": 5332, - "end": 5548, + "end": 5551, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.40192926045016075, + "distance": 0.37688442211055273, "protein": "MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTGA" }, { "name": "rev_exon1", "start": 5471, - "end": 5549, + "end": 5552, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSS", - "aminoacids": "MAGRSGDSDEDLLKTVRLIKYLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSSE", + "aminoacids": "MAGRSGDSDEDLLKTVRLIKYLYQSSE", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTGAG" }, { "name": "vpu", "start": 5563, "end": 5809, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5560235888795282, "protein": "MQSLYILTIVALVVAAILAIVVWAIVLIEYKKILKQRRIDRLIDRIIDRAEDSGNESEGDQEELSALVEMGHHAPWNVDDL", @@ -2501,7 +2501,7 @@ "start": 5726, "end": 8288, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.5276672448389619, "protein": "MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL", @@ -2511,21 +2511,21 @@ { "name": "tat_exon2", "start": 7869, - "end": 7962, + "end": 7965, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.6995153473344102, "protein": "RPSSQLRGEPTGPKE", - "aminoacids": "RPSSQLRGEPTGPKE*KKEVERETKADPVD*", - "nucleotides": "AGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAG" + "aminoacids": "RPSSQLRGEPTGPKE*KKEVERETKADPVD**", + "nucleotides": "AGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGA" }, { "name": "rev_exon2", "start": 7870, "end": 8146, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.32366339007432277, "protein": "DPPPSSEGSRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDCDQDCGTSGTQGVGSPQILVESPAVLESGTKE", @@ -2537,7 +2537,7 @@ "start": 8289, "end": 8940, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4631394979345408, "protein": "MGGKWSKSSRVGWNAVRERMRRAQPTADRERAEPAADGVGAASRDLEKYGALTSRNTAATNADCAWLEAQEEEDEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHGMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC", @@ -2551,7 +2551,7 @@ "start": 521, "end": 2021, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -2563,7 +2563,7 @@ "start": 1813, "end": 4825, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -2575,7 +2575,7 @@ "start": 4769, "end": 5348, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -2587,7 +2587,7 @@ "start": 5287, "end": 5578, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -2599,9 +2599,9 @@ "start": 5558, "end": 5777, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -2609,21 +2609,21 @@ { "name": "rev_exon1", "start": 5697, - "end": 5775, + "end": 5778, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5789, "end": 6035, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5399181166837258, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -2635,7 +2635,7 @@ "start": 5952, "end": 8493, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4581222593756883, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -2647,7 +2647,7 @@ "start": 8074, "end": 8170, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -2659,7 +2659,7 @@ "start": 8075, "end": 8351, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -2671,7 +2671,7 @@ "start": 8494, "end": 9109, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -2685,7 +2685,7 @@ "start": 488, "end": 1601, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.7372881355932204, "protein": "MSQVNSTTVMMQKGNFRNQKKTVKCFNCGKIGHIAKNCRAPRRKGCWKCGQEGHQMKDCSERQANFLGKLWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPIDRELYSLASLKSLFGNDPSSQ", @@ -2697,7 +2697,7 @@ "start": 1393, "end": 4405, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.2413273001508296, "protein": "FFRETLAFPQGEAREFPSEQTRANSPTRGELQVWGRDSNSPSEAGADRQGTIFLSFPQITLWQRPLVSIKVGGQLKEALLDTGADDTVLEEMCLPGKWKPKMIGGIGGFIKVRQYDQIPIEIYGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVRLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDMVIYQYMDDLYIGSDLELGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPITLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKSLTEVVPLTREAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARIKGTHTNDVKQLTQAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAANRDTKLGKAGYVTDRGRQKIVPLTDTTNQKTELQAIYLALQDSGSEVNIVSDSQYAIGILQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVNTIHTDNGSNFTSTAVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGRYSAGERIVDMIASDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED", @@ -2709,7 +2709,7 @@ "start": 4349, "end": 4928, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.41532297468972923, "protein": "MENRWQVMIVWQVDRMRINAWKSLVKHHMHVSRKVERWVYKHHYESTNPRISSEVHIPLGDARLKITTYWGLHTGERDWHLGQGVSIEWRKKSYNTQVDPEVADQLIHLYYFDCFSESAIRKAIVGHRVSPSCEYQAGHNKVGSLQYLALAALVKSKKTKPPLPSVTKLTEDRWNKPQRTKGRRGNHIMNGH", @@ -2721,7 +2721,7 @@ "start": 4867, "end": 5158, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.4489646325820048, "protein": "MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPRIWLQSLGQYVYETYGDTWTGVEAIIRILQQMLFIHFRIGCQHSRIGIIRRGRTRNGASRP", @@ -2733,9 +2733,9 @@ "start": 5138, "end": 5357, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.5378373277976978, + "distance": 0.5243736647892796, "protein": "MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKQ", "aminoacids": "MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKQ*", "nucleotides": "ATGGAGCCAGTAGACCCTAGCCTAGCGCCCTGGAAGCACCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGCTATTGTAAAAAGTGCTGCTTACATTGCCAAGTTTGTTTCACAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGTAA" @@ -2743,21 +2743,21 @@ { "name": "rev_exon1", "start": 5277, - "end": 5355, + "end": 5358, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5275498241500586, - "protein": "MAGRSGDSDEDLLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEDLLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGT" + "distance": 0.524971623155505, + "protein": "MAGRSGDSDEDLLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEDLLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGTAAG" }, { "name": "vpu", "start": 5369, "end": 5615, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5568484042553192, "protein": "MQSLQIGAIVALVVGTIIAIVVWSIVLIEYRKILRQKKIDRIIDRIVERAEDSGNESEGDQEELSALVERGHDAPWNVNDL", @@ -2769,7 +2769,7 @@ "start": 5532, "end": 8073, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.48179770175870373, "protein": "MRVKETRKSYQHWWKGGMMLLGMLMICSAATNLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVLLGNVTEDFNAWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILHCTDVNNTRNGMTGELKNCSFNITTKITNKVQKEYALFYKLDVVPINNKDNDTSFNNNSYRLISCNTSVITQACPKVSFEPIPIHYCTPAGYAILRCNNETFSGKGPCTNVSSIQCTHGIRPVVSTQLLLNGSLAKQEVVIRSQNFSDNVKTIIVQLKTPVKINCTRPNNNTRKSIHAGPGKVIYATGEIIGDIRQAHCNISAAEWNDTLGQIVTKLQEQFGNKTIVFNQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWNNNGTNTWNSTGNITLPCKIRQIVNMWQKVGKAMYAPPIRGQIKCSSNITGLLLTRDGGNESESETFRPGGGDMRDNWRSELYKYKVVRIEPLGLAPTKAKRRVVQREKRAIGTLGAVFLGFLGTAGSTMGAASMTLTVQARQLLSGIVQQQNNLLKAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNASWSNKSLNEIWDNMTWMEWEKEISNYTQLIYTLIEESQSQQEKNEQELLALDKWDSLWSWFSITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEDGGERDRDRSTRLVTGFLPLFWDDLRSLCLFSYHRLRDLLLIAARIVELLGHRGWEILKHWWSLLQYWSQELKKSAVSLLNATAIAVAEGTDRIIEVVQRACRAILHIPVRLRQGLERALL", @@ -2781,7 +2781,7 @@ "start": 7654, "end": 7750, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4464285714285715, "protein": "RPASQPRGDPTGPKESKKTVERETETDPHA", @@ -2793,7 +2793,7 @@ "start": 7655, "end": 7940, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.46126825660935467, "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIHTLSNRILTTFLGRPEEPVPLQLPPLERLTLDCSEDCGTSGTQGVGNPQTLVESPTILESGTKKKCC", @@ -2805,7 +2805,7 @@ "start": 8074, "end": 8695, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.48762337458875116, "protein": "MGGKWSKSSVVGWPAVRERIRRAGPAAEGVGAVSRDLDKHGAITSNNTPATNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGMIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPLETEQVEAATGGENNCLLHPLNQHGMDDPEREVLMWKFDSSLAFHHRAKELHPEYYKDC", @@ -2819,7 +2819,7 @@ "start": 683, "end": 2183, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -2831,7 +2831,7 @@ "start": 1975, "end": 4987, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -2843,7 +2843,7 @@ "start": 4931, "end": 5510, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -2855,7 +2855,7 @@ "start": 5449, "end": 5740, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -2867,9 +2867,9 @@ "start": 5720, "end": 5939, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -2877,21 +2877,21 @@ { "name": "rev_exon1", "start": 5859, - "end": 5937, + "end": 5940, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5951, "end": 6197, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5399181166837258, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -2903,7 +2903,7 @@ "start": 6114, "end": 8655, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4581222593756883, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -2915,7 +2915,7 @@ "start": 8236, "end": 8332, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -2927,7 +2927,7 @@ "start": 8237, "end": 8513, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -2939,7 +2939,7 @@ "start": 8656, "end": 9271, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -2953,7 +2953,7 @@ "start": 210, "end": 1719, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.27936962750716343, "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPSLQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDTIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPSTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASIMAQGGNFRNQKRNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDKELYPLASLRSLFGNDP", @@ -2965,7 +2965,7 @@ "start": 1511, "end": 4523, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.26443159013103534, "protein": "FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGQGTVSFSFPQITLWQRPIISIRIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEDKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDQDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", @@ -2977,7 +2977,7 @@ "start": 4467, "end": 5046, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3903081914030819, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPSLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", @@ -2989,7 +2989,7 @@ "start": 4985, "end": 8207, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.6657028690558922, "protein": "MHSFNCGGEFFYCNTTQLFNSTWNGTDNWNGTESNNTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAVGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWRQWEKEIDNYTDTIYNLIELSQNQQEQNEQDLLALDKWASLWSWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", @@ -3001,7 +3001,7 @@ "start": 4985, "end": 5276, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.3120665742024965, "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", @@ -3011,33 +3011,33 @@ { "name": "tat_exon1", "start": 5256, - "end": 5472, + "end": 5475, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.48719691819623834, + "distance": 0.4691531785127845, "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ", - "aminoacids": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAG" + "aminoacids": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" }, { "name": "rev_exon1", "start": 5395, - "end": 5473, + "end": 5476, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.531617235590375, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" + "distance": 0.5290287574606619, + "protein": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "vpu", "start": 5487, "end": 5733, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5568484042553192, "protein": "MHALKIAAIVGLVVATIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM", @@ -3049,7 +3049,7 @@ "start": 7788, "end": 7881, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", @@ -3061,7 +3061,7 @@ "start": 7789, "end": 8065, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", @@ -3073,7 +3073,7 @@ "start": 8208, "end": 8850, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.5432971135257649, "protein": "MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTFKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHMARELHPEYFKDC", @@ -3087,7 +3087,7 @@ "start": 739, "end": 2239, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3099,7 +3099,7 @@ "start": 2031, "end": 5043, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3111,7 +3111,7 @@ "start": 4987, "end": 5566, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3123,7 +3123,7 @@ "start": 5505, "end": 5796, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -3133,33 +3133,33 @@ { "name": "tat_exon1", "start": 5776, - "end": 5992, + "end": 5995, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5915, - "end": 5993, + "end": 5996, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6007, "end": 6253, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5399181166837258, "protein": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -3171,7 +3171,7 @@ "start": 6170, "end": 8711, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4583799776107468, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -3181,21 +3181,21 @@ { "name": "tat_exon2", "start": 8292, - "end": 8385, + "end": 8388, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD*", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG" + "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD**", + "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA" }, { "name": "rev_exon2", "start": 8293, "end": 8569, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -3207,7 +3207,7 @@ "start": 8712, "end": 9327, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -3221,7 +3221,7 @@ "start": 221, "end": 1730, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.2880084183556756, "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHPRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINAEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRRNVKCFNCGKEGHTAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP", @@ -3233,7 +3233,7 @@ "start": 1522, "end": 4534, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.25117173416656646, "protein": "FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTINDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", @@ -3245,7 +3245,7 @@ "start": 4478, "end": 5057, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3903081914030819, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", @@ -3257,7 +3257,7 @@ "start": 4996, "end": 5287, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.3351206434316354, "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", @@ -3269,9 +3269,9 @@ "start": 5267, "end": 5486, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.5060292850990527, + "distance": 0.48954161103693805, "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ", "aminoacids": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" @@ -3279,21 +3279,21 @@ { "name": "rev_exon1", "start": 5406, - "end": 5484, + "end": 5487, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.531617235590375, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" + "distance": 0.5290287574606619, + "protein": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "vpu", "start": 5498, "end": 5744, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5568484042553192, "protein": "MHALEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM", @@ -3305,7 +3305,7 @@ "start": 5661, "end": 8262, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.5052314240113498, "protein": "MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", @@ -3317,7 +3317,7 @@ "start": 7843, "end": 7936, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", @@ -3329,7 +3329,7 @@ "start": 7844, "end": 8120, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", @@ -3341,7 +3341,7 @@ "start": 8263, "end": 8905, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.5339772804571117, "protein": "MGNKLSRGLRAGWPTIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", @@ -3355,7 +3355,7 @@ "start": 527, "end": 2297, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.706855791962175, "protein": "MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKERHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLRKIWPSSKGRPRNFLQSRPEPTAPPEESFRFREETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ", @@ -3367,7 +3367,7 @@ "start": 2089, "end": 5101, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7518376924488996, "protein": "AKIKQECGIPYNPQSQEVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIKDYGKQMAGDDCVASRQDED", @@ -3379,7 +3379,7 @@ "start": 5045, "end": 5624, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7067546928117459, "protein": "MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTY", @@ -3391,7 +3391,7 @@ "start": 5563, "end": 5854, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.37688442211055273, "protein": "MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS", @@ -3403,7 +3403,7 @@ "start": 5563, "end": 6052, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", "distance": 0.7571801566579635, "protein": "MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS", @@ -3413,21 +3413,21 @@ { "name": "rev_exon1", "start": 5973, - "end": 6051, + "end": 6054, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSS", - "aminoacids": "MAGRSGDSDEDLLKTVRLIKYLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSSE", + "aminoacids": "MAGRSGDSDEDLLKTVRLIKYLYQSSE", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTGAG" }, { "name": "vpu", "start": 6065, "end": 6311, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.7521692502681095, "protein": "MQSLYILTIVALVVAAILAIVV", @@ -3439,7 +3439,7 @@ "start": 6228, "end": 8799, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.7471048806788873, "protein": "MTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST", @@ -3451,7 +3451,7 @@ "start": 8194, "end": 9451, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.6514772974147296, "protein": "MTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPRIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHRMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC", @@ -3463,7 +3463,7 @@ "start": 8380, "end": 8476, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.6995153473344102, "protein": "RPSSQPREEPTGPKE", @@ -3475,7 +3475,7 @@ "start": 8381, "end": 8657, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.5716671727907683, "protein": "RERQRQIRSISERILSTYLGRSTEPMPLQLPPLERLTLDCDQDCGTSKTQEVRSPQILVESPAVLESGTKE", @@ -3489,7 +3489,7 @@ "start": 746, "end": 2246, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3501,7 +3501,7 @@ "start": 2038, "end": 5050, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3513,7 +3513,7 @@ "start": 4994, "end": 5573, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3525,7 +3525,7 @@ "start": 5512, "end": 5803, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -3537,9 +3537,9 @@ "start": 5783, "end": 6002, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -3547,21 +3547,21 @@ { "name": "rev_exon1", "start": 5922, - "end": 6000, + "end": 6003, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6014, "end": 6260, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5278361344537815, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -3573,7 +3573,7 @@ "start": 6177, "end": 8718, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.46021436825118545, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -3585,7 +3585,7 @@ "start": 8299, "end": 8395, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -3597,7 +3597,7 @@ "start": 8300, "end": 8576, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -3609,7 +3609,7 @@ "start": 8719, "end": 9334, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -3623,7 +3623,7 @@ "start": 695, "end": 2195, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3635,7 +3635,7 @@ "start": 1986, "end": 5004, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.6033592883813991, "protein": "YGKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3647,7 +3647,7 @@ "start": 4948, "end": 5527, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3659,7 +3659,7 @@ "start": 5466, "end": 5757, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -3669,33 +3669,33 @@ { "name": "tat_exon1", "start": 5737, - "end": 5953, + "end": 5956, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5876, - "end": 5954, + "end": 5957, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5968, "end": 6214, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5278361344537815, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -3707,7 +3707,7 @@ "start": 6131, "end": 8672, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4585964351370794, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -3717,21 +3717,21 @@ { "name": "tat_exon2", "start": 8253, - "end": 8346, + "end": 8349, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD*", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG" + "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD**", + "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA" }, { "name": "rev_exon2", "start": 8254, "end": 8530, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -3743,7 +3743,7 @@ "start": 8673, "end": 9288, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -3757,7 +3757,7 @@ "start": 817, "end": 2317, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3769,7 +3769,7 @@ "start": 2109, "end": 5121, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3781,7 +3781,7 @@ "start": 5065, "end": 5644, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3793,7 +3793,7 @@ "start": 5583, "end": 5874, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -3803,33 +3803,33 @@ { "name": "tat_exon1", "start": 5854, - "end": 6070, + "end": 6073, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5993, - "end": 6071, + "end": 6074, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6085, "end": 6331, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5399181166837258, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -3841,7 +3841,7 @@ "start": 6248, "end": 8789, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4581222593756883, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -3851,21 +3851,21 @@ { "name": "tat_exon2", "start": 8370, - "end": 8463, + "end": 8466, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD*", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG" + "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD**", + "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA" }, { "name": "rev_exon2", "start": 8371, "end": 8647, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -3877,7 +3877,7 @@ "start": 8790, "end": 9405, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -3891,7 +3891,7 @@ "start": 687, "end": 2187, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.2967573174581697, "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3903,7 +3903,7 @@ "start": 1979, "end": 4991, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19298018391400085, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3915,7 +3915,7 @@ "start": 4935, "end": 5514, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3927,7 +3927,7 @@ "start": 5453, "end": 5744, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -3937,33 +3937,33 @@ { "name": "tat_exon1", "start": 5724, - "end": 5940, + "end": 5943, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5863, - "end": 5941, + "end": 5944, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5955, "end": 6201, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5278361344537815, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -3975,7 +3975,7 @@ "start": 6118, "end": 8659, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4620723847297967, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -3987,7 +3987,7 @@ "start": 8240, "end": 8336, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -3999,7 +3999,7 @@ "start": 8241, "end": 8517, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -4011,7 +4011,7 @@ "start": 8660, "end": 9275, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -4025,7 +4025,7 @@ "start": 555, "end": 2055, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -4037,7 +4037,7 @@ "start": 1847, "end": 4859, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4049,7 +4049,7 @@ "start": 4803, "end": 5382, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -4061,7 +4061,7 @@ "start": 5321, "end": 5612, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -4071,33 +4071,33 @@ { "name": "tat_exon1", "start": 5592, - "end": 5808, + "end": 5811, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5731, - "end": 5809, + "end": 5812, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5823, "end": 6069, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5399181166837258, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -4109,7 +4109,7 @@ "start": 5986, "end": 8527, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4581222593756883, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -4121,7 +4121,7 @@ "start": 8108, "end": 8204, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -4133,7 +4133,7 @@ "start": 8109, "end": 8385, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -4145,7 +4145,7 @@ "start": 8528, "end": 9143, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -4159,7 +4159,7 @@ "start": 246, "end": 1782, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.35613851839948674, "protein": "MGARASVLSGGELDKWEKIRLRPGGRKRYKLKHIVWASRELERFAVNPGLLETSEGCKQIMGQLQPALQTGSEELRSLYNTVAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKTQQAAAADTGNNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVQAGPVAPGQIREPRGSDIAGTTSTLQEQIAWMTHNPPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMSQATGAHAIMMQRGNFKNQRKTVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESLRPTAPPVESFRFGEETAAPFQKQEPRDKEMSPLASLKSLFGNDQ", @@ -4171,7 +4171,7 @@ "start": 1544, "end": 4586, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.24526399193257942, "protein": "FFRENLAFPQGKAGEFPSEQTRANSPTRGEPQANSPTSRELQVWGRDSSPLSEAGTERQGDVSLSFPQITLWQRPVVTIKIGGQIKEALLDTGADDTVLEEMALPGRWKPKMIGGIGGFIKVRQYDQIAIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSVPLDEEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRGHLLKWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIKLPEKEIWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPTKELIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKISTESIVIWGKTPKFKLPIQKETWEIWWTDYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYITDRGRQKVVTLNDTTNQKTELQAILLALQDSGLEANIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLTWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDRAQEEHERYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQIDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQISKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4183,7 +4183,7 @@ "start": 4530, "end": 5109, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMYVSKKTKGWFYRHHYESTHPKISSEVHIPLGDAELVVTTYWGLQPGERDWHLGQGVSIEWRKGRYRTHVDPNLADQLIHLHYFDCFSESAIRHAILGHRVSPRCEYQAGHNKVGSLQYLALAALRAPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -4195,7 +4195,7 @@ "start": 5048, "end": 5339, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS", @@ -4205,33 +4205,33 @@ { "name": "tat_exon1", "start": 5319, - "end": 5535, + "end": 5538, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.5375823324292911, + "distance": 0.5237430167597767, "protein": "MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ", - "aminoacids": "MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ", - "nucleotides": "ATGGATCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTAACAATTGCTATTGTAAAAAGTGTTGCCTTCATTGCCAAGTTTGTTTCACACGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAG" + "aminoacids": "MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ*", + "nucleotides": "ATGGATCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTAACAATTGCTATTGTAAAAAGTGTTGCCTTCATTGCCAAGTTTGTTTCACACGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5458, - "end": 5536, + "end": 5539, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5657327586206896, - "protein": "MAGRSGDGDEDLLKAVRLIKTLYQSS", - "aminoacids": "MAGRSGDGDEDLLKAVRLIKTLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAGT" + "distance": 0.5622384937238494, + "protein": "MAGRSGDGDEDLLKAVRLIKTLYQSSK", + "aminoacids": "MAGRSGDGDEDLLKAVRLIKTLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5550, "end": 5808, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.6039004707464695, "protein": "MLSLEVIVAITALVVAGIIAIVVWTIVLIEYRKILRQRKIDKILDRIRERAEDSGNESEGDQEELSALVEMGHNAHHAPWDIND", @@ -4243,7 +4243,7 @@ "start": 5716, "end": 8257, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.5196110433453024, "protein": "MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE", @@ -4255,7 +4255,7 @@ "start": 7874, "end": 7970, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPASQSRGDPTGPKEPKKKVERETETDPTD", @@ -4267,7 +4267,7 @@ "start": 7875, "end": 8151, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.3471418653089562, "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRQISERILDTYLGRSEEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSTQILVECPAILESGTKE", @@ -4279,7 +4279,7 @@ "start": 8275, "end": 8896, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4680383864671438, "protein": "MGGKWSKHSKSEWADVRERMAQTEAAADGVGAVSRDLERHGAITSSNTATNNAACAWLEAQEEEEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPDKVEEANKGENNSLLHPMSLHGMEDTEREVLMWKFDSRLAFHHVAREKHPEYFKDC", @@ -4293,7 +4293,7 @@ "start": 2, "end": 1697, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.7478034493979825, "protein": "MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNLKNQRKNVKCFNCGKEGHTAKNCRAPKKKGC", @@ -4305,7 +4305,7 @@ "start": 1489, "end": 4501, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7624366800883231, "protein": "KPKIIEGIRGFIKVRQYDQVPIKICRHKAISTVLIRPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPGIDSPKVKQWPLTEEKIKALIEICAEIEKERKITKIRPKNPYNTPVFAIKKKDSTK", @@ -4317,7 +4317,7 @@ "start": 4445, "end": 5024, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7494633160752622, "protein": "MENRWQVMIVWQVDKMRIRTWNSLVKHHMHISKKAQR", @@ -4329,7 +4329,7 @@ "start": 4963, "end": 5254, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.4464285714285715, "protein": "MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS", @@ -4341,9 +4341,9 @@ "start": 5111, "end": 5453, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.6491818320844167, + "distance": 0.6409453748630458, "protein": "MTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEQ", "aminoacids": "MEILGQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPNLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEQ*", "nucleotides": "ATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAACTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" @@ -4351,21 +4351,21 @@ { "name": "rev_exon1", "start": 5373, - "end": 5451, + "end": 5454, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.531617235590375, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" + "distance": 0.5290287574606619, + "protein": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "vpu", "start": 5465, "end": 5711, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.7521692502681095, "protein": "MHALEIAAIVRLVVAAIIAIVV", @@ -4377,7 +4377,7 @@ "start": 5628, "end": 8229, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.7513561129398668, "protein": "MTEEGEIKNCSFNVTTGIRDKVTKEHALFYKLDVVPIDESSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPRRAFYRTDIIGDIRQAHCNISRKD", @@ -4389,7 +4389,7 @@ "start": 7624, "end": 8872, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.5833488893037148, "protein": "MRQARPVRKPEPAATKVRAASRDLERHGALTSSNTAATNADVACLEAQQEEKEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPRPGVRFPLCFRWCFKLVPVDPDKVEEASVGENNCLLSPENLHRIEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", @@ -4401,7 +4401,7 @@ "start": 7810, "end": 7903, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.5303030303030303, "protein": "RPSSQPRGDPTGPKEQKKEVERKTEAHPRD", @@ -4413,7 +4413,7 @@ "start": 7811, "end": 8087, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.6667847862036381, "protein": "ILSTHLGRPAEPVPLQLPPLERLTLDCGEDCRTSKTQKVRSTEVLVESPAVLESGNKE", @@ -4427,7 +4427,7 @@ "start": 549, "end": 2049, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -4439,7 +4439,7 @@ "start": 1841, "end": 4853, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4451,7 +4451,7 @@ "start": 4797, "end": 5376, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -4463,7 +4463,7 @@ "start": 5315, "end": 5606, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -4473,33 +4473,33 @@ { "name": "tat_exon1", "start": 5586, - "end": 5802, + "end": 5805, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5725, - "end": 5803, + "end": 5806, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5817, "end": 6063, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5278361344537815, "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -4511,7 +4511,7 @@ "start": 5980, "end": 8521, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.46021436825118545, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -4523,7 +4523,7 @@ "start": 8102, "end": 8198, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -4535,7 +4535,7 @@ "start": 8103, "end": 8379, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -4547,7 +4547,7 @@ "start": 8522, "end": 9137, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -4561,7 +4561,7 @@ "start": 468, "end": 1968, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -4573,7 +4573,7 @@ "start": 1760, "end": 4772, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4585,7 +4585,7 @@ "start": 4716, "end": 5295, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -4597,7 +4597,7 @@ "start": 5234, "end": 5525, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.31767642387111417, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", @@ -4607,33 +4607,33 @@ { "name": "tat_exon1", "start": 5505, - "end": 5721, + "end": 5724, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5644, - "end": 5722, + "end": 5725, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 5736, "end": 5982, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5399181166837258, "protein": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", @@ -4645,7 +4645,7 @@ "start": 5899, "end": 8440, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4583799776107468, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", @@ -4657,7 +4657,7 @@ "start": 8021, "end": 8117, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -4669,7 +4669,7 @@ "start": 8022, "end": 8298, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -4681,7 +4681,7 @@ "start": 8441, "end": 9056, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.4624277456647399, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -4695,7 +4695,7 @@ "start": 825, "end": 2361, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.2559303794507086, "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPSLLETAEGCRQILGQLQPSLQTGSEELKSLYNTLATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVRHTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSHVTNSSAIMMQRGNFRNQRKAVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPPEEIFRFVEETTTPSQKQEPIDKELYPPLASLKSLFGNDPSSQ", @@ -4707,7 +4707,7 @@ "start": 2147, "end": 5165, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19193360134872262, "protein": "FFRENLAFPQRKAREFSPEQTRANSPTTRRDLQVCGRDNNSLSETGANRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPYRTRNPEMVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPNKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGEGQWTFQIYQEPFKNLKTGKYARARGAHTNDVKQLTEAVQKIATEGIVIWGKIPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQIIKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4719,7 +4719,7 @@ "start": 5109, "end": 5688, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.24677296886864086, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMYVSRKAKGWFYRHHFESNHPKISSEVHIPLEDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKRKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -4731,7 +4731,7 @@ "start": 5627, "end": 5918, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.2608047690014903, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", @@ -4741,33 +4741,33 @@ { "name": "tat_exon1", "start": 5898, - "end": 6114, + "end": 6117, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.42503863987635226, + "distance": 0.40192926045016075, "protein": "MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGGCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCACTTGTTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGGCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCACTTGTTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 6037, - "end": 6115, + "end": 6118, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDDELLKTVRLIKVLYQSS", - "aminoacids": "MAGRSGDSDDELLKTVRLIKVLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDDELLKTVRLIKVLYQSSK", + "aminoacids": "MAGRSGDSDDELLKTVRLIKVLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAGTAAG" }, { "name": "vpu", "start": 6129, "end": 6375, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5077548005908421, "protein": "MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL", @@ -4779,7 +4779,7 @@ "start": 6292, "end": 8875, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.48559709145147756, "protein": "MKVRGIRKNYQHWWRGGILLLGMLMICNATEQQLWVTVYYGVPVWKEANTTLFCASDAKAYSTEVHNVWATHACVPTDPNPQEVVLKNVTENFNMWENNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNTTRSSGNTTNEMKNCSFYTETDIRDKKRKEYALFYELDIVPIDEDNKNKSNNISYSRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEDEVVIKSSNFTNNAKTIIVQLNETVKINCTRPNNNTRKSIPIGPGRAFYATGDIIGDIRQAHCNISRANWTNTLKQIAEKLGKQFEENKTIVFNPSSGGDPEVVMHSFNCRGEFFYCNSTPLFNSTWKETNGIWTRIGESNDSATITLNDSDTITLQCKIRQIINLWQEVGKAMYAPPIKGQISCLSNITGLLLVRDGGNNTNGTEIFRPVGGEMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRATLGALFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNTSWSNKSLEKIWNNMTWMEWEREIDNYTSLIYTLLEESQNQQEKNEKELLELDTWASLWNWFDITNWLWYIKIFIMIIGGLVGLRIVFTVLSIVNRVRQGYSPLSFQIHPPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLALFWVDLRSLCLFSYHRLRDLLLIVARIVELLGRRGWEALKYGWSLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEILQRACRAILHIPTRIRQGLERALL", @@ -4791,7 +4791,7 @@ "start": 8456, "end": 8552, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.5303030303030303, "protein": "RSTPQLRGDPTGPKESKEKVERETETDPVH", @@ -4803,7 +4803,7 @@ "start": 8457, "end": 8733, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.3690449563855961, "protein": "DPPPSSEGTRQARRNRRRRWRERQRQIRSISERILSTFLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGSSQIRVESPTILEPGTKE", @@ -4815,7 +4815,7 @@ "start": 8876, "end": 9509, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.411658382286489, "protein": "MGGKWSKSKLFGWPAVRERMRRAEPAAEPAADGVGAASRDLERHGAITSSNTPTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSPKRQEILDLWVYHTQGFFPDWDNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEREVLEWRFDSRLAFRHVARELHPEYYKDC", @@ -4829,7 +4829,7 @@ "start": 801, "end": 2313, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.23466145196773402, "protein": "MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKTQQAAADTGNNSQTSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNFRNQRKNVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPVDKELYPLASLRSLFGNDPSSQ", @@ -4841,7 +4841,7 @@ "start": 2105, "end": 5117, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.15051124744376299, "protein": "FFRENLAFPQGEAREFSSEQTRANSPTRRELQVWGGDNNSLSEAGAGRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVTTIHTDNGSNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4853,7 +4853,7 @@ "start": 5061, "end": 5640, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3120665742024965, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSIHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKERYSTQVDPGLADQLIHRYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -4865,7 +4865,7 @@ "start": 5579, "end": 5870, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.34013605442176864, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAITRILQQLLFIHFRIGCQHSRIGIIQRRRARNGTSRS", @@ -4875,33 +4875,33 @@ { "name": "tat_exon1", "start": 5850, - "end": 6066, + "end": 6069, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.28735632183908044, + "distance": 0.2515090543259557, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ", - "nucleotides": "ATGGAACCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ*", + "nucleotides": "ATGGAACCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5989, - "end": 6067, + "end": 6070, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.2712477396021701, - "protein": "MAGRSGDSDEELIKTVRLIKLLYQSS", - "aminoacids": "MAGRSGDSDEELIKTVRLIKLLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGT" + "distance": 0.27969018932874357, + "protein": "MAGRSGDSDEELIKTVRLIKLLYQSSK", + "aminoacids": "MAGRSGDSDEELIKTVRLIKLLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6081, "end": 6330, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.4674390731122653, "protein": "MQPLEISAIVALVVAIIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL", @@ -4913,7 +4913,7 @@ "start": 6244, "end": 8827, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4447986679391638, "protein": "MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFYASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNTTSTKNTTPSTTASSGERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKKFNGSGPCTNVSTIQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKTIIVHLKDSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSSWKDESNGTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLITRDGGKNNESNTTEIFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKTLEQIWDNMTWMQWEREIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHPPAPRGPDRPEGIGEEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLNIPRRIRQGLERALL", @@ -4925,7 +4925,7 @@ "start": 8408, "end": 8504, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4971219256933542, "protein": "RPTPQPRGDPTGQKESEKKVERETETDPDH", @@ -4937,7 +4937,7 @@ "start": 8409, "end": 8685, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.3471418653089562, "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE", @@ -4949,7 +4949,7 @@ "start": 8828, "end": 9461, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.3499750374438344, "protein": "MGGKWSKSSVVGWPAIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", @@ -4963,7 +4963,7 @@ "start": 855, "end": 2358, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.24608716590416602, "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQQIDVKDTKEALDKIEEEQNKSKKKVQQAATDTGNNSQASQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGAIMMQRGNFRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ", @@ -4975,7 +4975,7 @@ "start": 2150, "end": 5162, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.1574036511156186, "protein": "FFREDLAFPQGEAREFSSEQTRANSPTSRELQVRGGDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLKGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4987,7 +4987,7 @@ "start": 5106, "end": 5685, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.34607778510217535, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWIYKHHYDSINPKISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADRLIHLYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALTALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -4999,7 +4999,7 @@ "start": 5624, "end": 5915, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.2936340145642471, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGILQRRRARNGASRS", @@ -5009,33 +5009,33 @@ { "name": "tat_exon1", "start": 5895, - "end": 6111, + "end": 6114, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.28735632183908044, + "distance": 0.2515090543259557, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 6034, - "end": 6112, + "end": 6115, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.2712477396021701, - "protein": "MAGRSGDSDEELIKTVRLIKLLYQSS", - "aminoacids": "MAGRSGDSDEELIKTVRLIKLLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGT" + "distance": 0.27969018932874357, + "protein": "MAGRSGDSDEELIKTVRLIKLLYQSSK", + "aminoacids": "MAGRSGDSDEELIKTVRLIKLLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6126, "end": 6375, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.4993502877297198, "protein": "MQPLVISAIVALVVVAIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL", @@ -5047,7 +5047,7 @@ "start": 6289, "end": 8881, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4503018671784417, "protein": "MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTGANNTTSTNTTTPSTTVSSEERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNTSYRLISCNTSVITQACPKVTFEPIPIHYCAPAGFAILKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKNIIVHLNKSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLEQVFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSYWKGEGSNNDTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLLTRDGGNESETTDTETFRPVGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKNLTQIWDNMTWMQWEKEIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL", @@ -5059,7 +5059,7 @@ "start": 8462, "end": 8558, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4542873367404884, "protein": "RPTSQPRGDPTGQKESKEKVERETETDPDH", @@ -5071,7 +5071,7 @@ "start": 8463, "end": 8739, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.3471418653089562, "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE", @@ -5083,7 +5083,7 @@ "start": 8882, "end": 9515, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.3499750374438344, "protein": "MGGKWSKSSVVGWPKIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", @@ -5097,7 +5097,7 @@ "start": 767, "end": 2270, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.316486214000789, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKKYQLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFHTVATLYCVHQKIEVKDTKEALEKIEEEQKKSKKKAQQAAADTGNNSQVSQNYPIVQNMQGQMVHQPLSPRTLNAWVKVIEDKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQLREPRGSDIAGTTSNLQEQIAWMTHNPPVPVGEIYKRWILLGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQVANPASIMMQRGNFRNQRKPIKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKVWPSRKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKPEPIDKELYPLASLRSLFGNDPSSQ", @@ -5109,7 +5109,7 @@ "start": 2062, "end": 5074, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.1943357603710517, "protein": "FFREGLAFPQGEAREFPSEQTRANSPTRRELQVWGRDDNSLSETGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPILLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPYKNLKTGKYARMRGTHTNDIKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDKGRQKVVPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLKGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQEVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFVHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -5121,7 +5121,7 @@ "start": 5018, "end": 5597, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMHISRKAKNWLYRHHYESIHPRISSEVHIPLGDARLVITTYWGLLTGERDWHLGQGVSIEWRERRYRTQVDPDLADQLIHLYYFDCFSESAIRNALLGRVVSPRCEYQAGHNQVGSLQYLALTALITPKRRKPPLPSVRKLTEDRWNKPQKTKGHKGSHTMNGH", @@ -5133,7 +5133,7 @@ "start": 5536, "end": 5827, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.2608047690014903, "protein": "MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS", @@ -5145,9 +5145,9 @@ "start": 5807, "end": 6026, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.40192926045016075, + "distance": 0.37688442211055273, "protein": "MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKQ", "aminoacids": "MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKQ*", "nucleotides": "ATGGATCCAGTAGATCCTAGCCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGTTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGTAA" @@ -5155,21 +5155,21 @@ { "name": "rev_exon1", "start": 5946, - "end": 6024, + "end": 6027, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGTAAG" }, { "name": "vpu", "start": 6038, "end": 6284, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.5464940306036659, "protein": "MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL", @@ -5181,7 +5181,7 @@ "start": 6201, "end": 8778, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.5083167934238491, "protein": "MRVKGIRRNYQHWWRGGTLLLGILMICSATENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPSPQEVVLKNVTEKFNMWKNNMVEQMHQDIISLWDESLKPCVKLTPLCVTLNCTNATISGNATEEIKNCSFNVNTKIGGKKQKERALFYKLDVVPIDDDSTNRTNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCGDKEFNGTGLCRNVSTVQCTHGIRPVVSTQLLLNGSLAEGEVVIKSENITNNVKTIIVQLNETVSINCTRPSNNTRRSIHMGPGRAFYATGEIIGDIRKAQCILNKTDWSDTLTRIAKKLHKQFHGPIAFEQSSGGDPEITMHTFNCGGEFFYCNTSALFSGTWNGTAWTNATWGNIAGNNITLQCRIKQFINRWQEVGKAMYAPPIRGEIRCSSNITGLLLTRDGGSNTTNGGENGTQIGENVTQIFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVTFGALFLGFLGAAGSTMGAASMTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLRDQQLLGIWGCSGKLICTTTVPWNASWSNKTLEKIWGNMTWMEWEREIDNYTDLIYTLIEQSQNQQEKNEQELLELDKWAGLWNWFDITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPASRGPDRPEGTDEEGGERDRDRSGSLVNGFLALIWIDLRSLFLFSYLRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNAIAISVAEGTDRIIEAIQRICRAILHIPTRIRQGLERALL", @@ -5193,7 +5193,7 @@ "start": 8359, "end": 8455, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4034761018001243, "protein": "RPASQPRGDPTGPKEPTKKVERETETDPDH", @@ -5205,7 +5205,7 @@ "start": 8360, "end": 8636, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSLEGTRQARRNRRRRWRERQRQIRIISERILSTHLDRPAESVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKE", @@ -5217,7 +5217,7 @@ "start": 8779, "end": 9406, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.5049429657794677, "protein": "MGGKGSKMRGWVAVREKMRRTKPEDEPAANGVGAASRDLEKYGALTSSNTVATNADLAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVHHTQGYLPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDQVEEANAGENNSLLHPMSLHGIEDPEKEVLMWKFDSHLAFRHMARELHPEYYKDC", @@ -5231,7 +5231,7 @@ "start": 773, "end": 2276, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.22891685660489547, "protein": "MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPALQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNLRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKEMYPLASLRSLFGNDPSSQ", @@ -5243,7 +5243,7 @@ "start": 2068, "end": 5080, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.1363825363825364, "protein": "FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQRNVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTREAELELAENREILKEPVHGVYYDPTKDLIVEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -5255,7 +5255,7 @@ "start": 5024, "end": 5603, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.27432333577176293, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -5267,7 +5267,7 @@ "start": 5542, "end": 5833, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.2678354029705382, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGIIQRRRARNGASRS", @@ -5279,9 +5279,9 @@ "start": 5813, "end": 6032, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.3496503496503496, + "distance": 0.31992687385740404, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA" @@ -5289,21 +5289,21 @@ { "name": "rev_exon1", "start": 5952, - "end": 6030, + "end": 6033, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.35828025477707015, - "protein": "MAGRSGDSDEELLQTVRLIKLLYQSS", - "aminoacids": "MAGRSGDSDEELLQTVRLIKLLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGT" + "distance": 0.3620426829268293, + "protein": "MAGRSGDSDEELLQTVRLIKLLYQSSK", + "aminoacids": "MAGRSGDSDEELLQTVRLIKLLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6044, "end": 6290, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.47888975762314323, "protein": "MQPLHIAAIVALVVAIIIAIVVWSIVFIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVEMGHCAPWDVNDL", @@ -5315,7 +5315,7 @@ "start": 6207, "end": 8790, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.42511905011642803, "protein": "MRVRGIKKNYQHLWRWGIVLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNNTTSTNDTTSTTTSSGEKMKEGEIKNCSFNITTSIRDKVQKEYALFYKPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGSCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSENFSDNAKTIIVHLNESVEITCIRPNNNTRKSIPIGPGRAFYATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNKTIIFTHSSGGDPEVVTHNFNCGGEFFYCNTTKLFNSTWNETSYWKDERSNDNDTITLPCRIKQIINLWQEVGKAMYAPPIRGYIKCSSNITGLLITRDGGKNESNTTETFRPGGGNMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWDNMTWMEWEREIDNYTSLIYTLIEKSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSGPLVNGFLTIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL", @@ -5327,7 +5327,7 @@ "start": 8371, "end": 8467, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4034761018001243, "protein": "RPTSQPRGDPTGQKESKKKVERETETDPDH", @@ -5339,7 +5339,7 @@ "start": 8372, "end": 8648, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.29843322556577967, "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILNNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGTPQILVESPAVLESGTKE", @@ -5351,7 +5351,7 @@ "start": 8791, "end": 9412, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.3303271812080537, "protein": "MGGKWSKSSVVGWPAIRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEDEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIHSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", @@ -5365,7 +5365,7 @@ "start": 808, "end": 2308, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.2880084183556755, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEEFRSLYNTVATLYCVHQKIEVKDTKEALEKIEEEQNQSKKKAAAAAADTGNRSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTIGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSNLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTVMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSKPEPTAPPEESFRFGEETTTPSQKQGPIDKELYPLASLKSLFGNDPSSQ", @@ -5377,7 +5377,7 @@ "start": 2100, "end": 5112, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.1880239208210378, "protein": "FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGKWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQSQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIAIESIVIWGKTPKFKLPIQKETWETWWTDYWQATWIPDWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETRSGKAGYVTDRGRQKVVPLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKILFLDGITKAQDDHERYHSNWRAMASDFNLPPIIAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSAAVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -5389,7 +5389,7 @@ "start": 5056, "end": 5635, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.41532297468972923, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHIHVSKKAKGWVYRHHYESTHPRISSEVHIPLGEARLVIATYWGLHTGERDWHLGQGVSIEWREKKYITQVDPDLADQLIHLHYFDCFSESAIRNAIVGRLVSPQCEYQTGHNKVGSLQYLALVALITPKKRKPPLPSVRKLTEDRWNKPQKTKDHRGSHIMSGH", @@ -5401,7 +5401,7 @@ "start": 5574, "end": 5865, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.3120665742024965, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS", @@ -5411,33 +5411,33 @@ { "name": "tat_exon1", "start": 5845, - "end": 6061, + "end": 6064, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTCTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTCTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5984, - "end": 6062, + "end": 6065, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKTVRLIKQLYQSS", - "aminoacids": "MAGRSGDSDEDLLKTVRLIKQLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKTVRLIKQLYQSSK", + "aminoacids": "MAGRSGDSDEDLLKTVRLIKQLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6076, "end": 6340, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.4955621301775147, "protein": "MQPLVILAIVALVVAAIIAIVVWTIVLIEYRKILRQRKIDSIINRIRERAEDSGNESEGDQEELSALVEMGHHVEMGHHAPWNVDDL", @@ -5449,7 +5449,7 @@ "start": 6239, "end": 8807, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4703403690377699, "protein": "MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL", @@ -5459,21 +5459,21 @@ { "name": "tat_exon2", "start": 8388, - "end": 8481, + "end": 8484, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPASQLRGDQTGPKEQKKKVERETETDPGN", - "aminoacids": "RPASQLRGDQTGPKEQKKKVERETETDPGN*", - "nucleotides": "AGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAG" + "aminoacids": "RPASQLRGDQTGPKEQKKKVERETETDPGN**", + "nucleotides": "AGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGA" }, { "name": "rev_exon2", "start": 8389, "end": 8674, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.4123815285339786, "protein": "DPLPSSEGTRQARRNRRRRWRERQRQIRAISERILCAYLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKEECC", @@ -5485,7 +5485,7 @@ "start": 8808, "end": 9417, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.484764542936288, "protein": "MGGKWSKCSLVGWPAIRERMRRAEPAPAAEGVGAASRDLEKHGALTTSNTAANNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGAMDLGHFLKEKGGLEGLIYSPKRQEILDLWVYHTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPDEGENSCLLHPMNQHGADDTEREVLMWKFDSSLAFHHKARELHPEYYKNC", diff --git a/tests/expected-results-large/errors.json b/tests/expected-results-large/errors.json index a8ce147..7ec8e1b 100644 --- a/tests/expected-results-large/errors.json +++ b/tests/expected-results-large/errors.json @@ -3,52 +3,47 @@ { "sequence_name": "KX505501.1", "error": "DeletionInOrf", - "message": "ORF pol at 2084-5095 can have maximum deletions 30, got 2721" + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 2721" }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6323" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6323" }, { "sequence_name": "KX505501.1", "error": "InsertionInOrf", - "message": "Smaller ORF vif at 5040-5618 can have maximum insertions 90, got 909" + "message": "Smaller ORF vif at 5040-5619 can have maximum insertions 90, got 909" }, { "sequence_name": "KX505501.1", - "error": "DeletionInOrf", - "message": "Smaller ORF vpr at 5558-5849 can have maximum deletions 30, got 84" + "error": "FrameshiftInOrf", + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 195 positions." }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5893" + "message": "Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5893" }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon1 at 5969-6044 contains an internal stop codon at 6005" - }, - { - "sequence_name": "KX505501.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 129" + "message": "Smaller ORF rev_exon1 at 5969-6047 contains an internal stop codon at 6005" }, { "sequence_name": "KX505501.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon2 at 8376-8468 contains out of frame indels that impact 71 positions." + "message": "Smaller ORF tat_exon2 at 8376-8469 contains out of frame indels that impact 40 positions." }, { "sequence_name": "KX505501.1", "error": "DeletionInOrf", - "message": "Smaller ORF rev_exon2 at 8377-8652 can have maximum deletions 30, got 96" + "message": "Smaller ORF rev_exon2 at 8377-8653 can have maximum deletions 30, got 96" }, { "sequence_name": "KX505501.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 1116" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 1116" }, { "sequence_name": "KX505501.1", @@ -70,64 +65,59 @@ { "sequence_name": "MN691959", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MN691959", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 129" - }, - { - "sequence_name": "MN691959", - "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 249" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 249" } ], "MN692074": [ { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "ORF pol at 2084-5095 can have maximum deletions 30, got 981" + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 981" }, { "sequence_name": "MN692074", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6551" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6551" }, { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF vif at 5040-5618 can have maximum deletions 30, got 81" + "message": "Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 81" }, { "sequence_name": "MN692074", "error": "InsertionInOrf", - "message": "Smaller ORF vpr at 5558-5849 can have maximum insertions 90, got 234" + "message": "Smaller ORF vpr at 5558-5843 can have maximum insertions 90, got 261" }, { "sequence_name": "MN692074", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5893" + "message": "Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5893" }, { "sequence_name": "MN692074", - "error": "FrameshiftInOrf", - "message": "Smaller ORF vpu at 6061-6309 contains out of frame indels that impact 168 positions." + "error": "InsertionInOrf", + "message": "Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 108" }, { "sequence_name": "MN692074", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon2 at 8376-8468 contains out of frame indels that impact 76 positions." + "message": "Smaller ORF tat_exon2 at 8376-8469 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF rev_exon2 at 8377-8652 can have maximum deletions 30, got 204" + "message": "Smaller ORF rev_exon2 at 8377-8653 can have maximum deletions 30, got 204" }, { "sequence_name": "MN692074", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 1131" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 1131" }, { "sequence_name": "MN692074", @@ -144,39 +134,29 @@ { "sequence_name": "MN692145", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MN692145", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MN692145", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 231" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 231" } ], "MN090335": [ { "sequence_name": "MN090335", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 822" + "message": "ORF gag at 789-2292 contains an internal stop codon at 822" }, { "sequence_name": "MN090335", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MN090335", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" - }, - { - "sequence_name": "MN090335", - "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 282" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 282" }, { "sequence_name": "MN090335", @@ -203,22 +183,17 @@ { "sequence_name": "MN090376", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 822" + "message": "ORF gag at 789-2292 contains an internal stop codon at 822" }, { "sequence_name": "MN090376", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MN090376", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 219" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MN090376", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 252" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 252" }, { "sequence_name": "MN090376", @@ -245,34 +220,24 @@ { "sequence_name": "MK115581.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." - }, - { - "sequence_name": "MK115581.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115581.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "MK115690.1": [ { "sequence_name": "MK115690.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MK115690.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MK115690.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" }, { "sequence_name": "MK115690.1", @@ -289,17 +254,12 @@ { "sequence_name": "MK115571.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115571.1", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" - }, - { - "sequence_name": "MK115571.1", - "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" }, { "sequence_name": "MK115571.1", @@ -316,17 +276,12 @@ { "sequence_name": "MK115514.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." - }, - { - "sequence_name": "MK115514.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115514.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" }, { "sequence_name": "MK115514.1", @@ -338,17 +293,12 @@ { "sequence_name": "MK115488.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." - }, - { - "sequence_name": "MK115488.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115488.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" }, { "sequence_name": "MK115488.1", @@ -360,17 +310,12 @@ { "sequence_name": "MK115030.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MK115030.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MK115030.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 270" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 270" }, { "sequence_name": "MK115030.1", @@ -387,39 +332,29 @@ { "sequence_name": "MK115498.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." - }, - { - "sequence_name": "MK115498.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115498.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "MK115211.1": [ { "sequence_name": "MK115211.1", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6890" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6890" }, { "sequence_name": "MK115211.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MK115211.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 219" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MK115211.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 252" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 252" }, { "sequence_name": "MK115211.1", @@ -458,64 +393,59 @@ { "sequence_name": "MK114705.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 77 positions." - }, - { - "sequence_name": "MK114705.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 74 positions." }, { "sequence_name": "MK114705.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF nef at 8796-9416 contains out of frame indels that impact 122 positions." + "message": "Smaller ORF nef at 8796-9417 contains out of frame indels that impact 122 positions." } ], "MK114856.1": [ { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 873" + "message": "ORF gag at 789-2292 contains an internal stop codon at 873" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2213" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2213" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6326" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6326" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vif at 5040-5618 contains an internal stop codon at 5172" + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon at 5172" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon at 5594" + "message": "Smaller ORF vpr at 5558-5843 contains an internal stop codon at 5594" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5920" + "message": "Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5920" }, { "sequence_name": "MK114856.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 111" + "error": "FrameshiftInOrf", + "message": "Smaller ORF vpu at 6259-6310 contains out of frame indels that impact 67 positions." }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon2 at 8377-8652 contains an internal stop codon at 8434" + "message": "Smaller ORF rev_exon2 at 8377-8653 contains an internal stop codon at 8434" }, { "sequence_name": "MK114856.1", "error": "InternalStopInOrf", - "message": "Smaller ORF nef at 8796-9416 contains an internal stop codon at 8853" + "message": "Smaller ORF nef at 8796-9417 contains an internal stop codon at 8853" }, { "sequence_name": "MK114856.1", @@ -527,47 +457,47 @@ { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 834" + "message": "ORF gag at 789-2292 contains an internal stop codon at 834" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2183" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2183" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6350" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6350" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vif at 5040-5618 contains an internal stop codon at 5373" + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon at 5373" }, { "sequence_name": "MK115009.1", - "error": "InternalStopInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon at 5717" + "error": "DeletionInOrf", + "message": "Smaller ORF vpr at 5558-5843 can have maximum deletions 30, got 75" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5860" + "message": "Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5860" }, { "sequence_name": "MK115009.1", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 177" + "message": "Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 177" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon2 at 8377-8652 contains an internal stop codon at 8434" + "message": "Smaller ORF rev_exon2 at 8377-8653 contains an internal stop codon at 8434" }, { "sequence_name": "MK115009.1", "error": "InternalStopInOrf", - "message": "Smaller ORF nef at 8796-9416 contains an internal stop codon at 8874" + "message": "Smaller ORF nef at 8796-9417 contains an internal stop codon at 8874" }, { "sequence_name": "MK115009.1", @@ -589,41 +519,31 @@ { "sequence_name": "MK115387.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MK115387.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MK115387.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 279" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 279" } ], "MK115491.1": [ { "sequence_name": "MK115491.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115491.1", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" - }, - { - "sequence_name": "MK115491.1", - "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "MK116110.1": [ { "sequence_name": "MK116110.1", "error": "InternalStopInOrf", - "message": "ORF gag at 140-1642 contains an internal stop codon at 185" + "message": "ORF gag at 140-1643 contains an internal stop codon at 185" }, { "sequence_name": "MK116110.1", @@ -640,73 +560,53 @@ { "sequence_name": "MK115527.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115527.1", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" - }, - { - "sequence_name": "MK115527.1", - "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "MK114997.1": [ { "sequence_name": "MK114997.1", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6512" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6512" }, { "sequence_name": "MK114997.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MK114997.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MK114997.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 270" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 270" } ], "MK115518.1": [ { "sequence_name": "MK115518.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115518.1", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" - }, - { - "sequence_name": "MK115518.1", - "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "MK115065.1": [ { "sequence_name": "MK115065.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MK115065.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MK115065.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 270" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 270" }, { "sequence_name": "MK115065.1", @@ -733,47 +633,42 @@ { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 924" + "message": "ORF gag at 789-2292 contains an internal stop codon at 924" }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2183" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2183" }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6425" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6425" }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vif at 5040-5618 contains an internal stop codon at 5247" + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon at 5247" }, { "sequence_name": "MK115464.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MK115464.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon1 at 5830-6044 contains out of frame indels that impact 104 positions." - }, - { - "sequence_name": "MK115464.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 108" + "message": "Smaller ORF tat_exon1 at 5830-6046 contains out of frame indels that impact 104 positions." }, { "sequence_name": "MK115464.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon2 at 8377-8652 contains an internal stop codon at 8434" + "message": "Smaller ORF rev_exon2 at 8377-8653 contains an internal stop codon at 8434" }, { "sequence_name": "MK115464.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF nef at 8796-9416 contains out of frame indels that impact 393 positions." + "message": "Smaller ORF nef at 8796-9417 contains out of frame indels that impact 393 positions." }, { "sequence_name": "MK115464.1", @@ -785,39 +680,29 @@ { "sequence_name": "MK115530.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." - }, - { - "sequence_name": "MK115530.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115530.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "MK115520.1": [ { "sequence_name": "MK115520.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2198" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2198" }, { "sequence_name": "MK115520.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115520.1", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" - }, - { - "sequence_name": "MK115520.1", - "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" }, { "sequence_name": "MK115520.1", @@ -834,34 +719,24 @@ { "sequence_name": "MK115503.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." - }, - { - "sequence_name": "MK115503.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115503.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "MK115570.1": [ { "sequence_name": "MK115570.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." - }, - { - "sequence_name": "MK115570.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115570.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" }, { "sequence_name": "MK115570.1", @@ -878,34 +753,24 @@ { "sequence_name": "MK115509.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." - }, - { - "sequence_name": "MK115509.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115509.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "MK115702.1": [ { "sequence_name": "MK115702.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 77 positions." - }, - { - "sequence_name": "MK115702.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 204" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 74 positions." }, { "sequence_name": "MK115702.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 249" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 249" }, { "sequence_name": "MK115702.1", @@ -932,47 +797,47 @@ { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 927" + "message": "ORF gag at 789-2292 contains an internal stop codon at 927" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "ORF pol at 2084-5095 contains an internal stop codon at 2183" + "message": "ORF pol at 2084-5096 contains an internal stop codon at 2183" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6551" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6551" }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "Smaller ORF vif at 5040-5618 contains an internal stop codon at 5151" + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon at 5151" }, { "sequence_name": "MK115095.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5860" + "message": "Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5860" }, { "sequence_name": "MK115095.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 108" + "error": "FrameshiftInOrf", + "message": "Smaller ORF vpu at 6259-6310 contains out of frame indels that impact 43 positions." }, { "sequence_name": "MK115095.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon2 at 8377-8652 contains an internal stop codon at 8434" + "message": "Smaller ORF rev_exon2 at 8377-8653 contains an internal stop codon at 8434" }, { "sequence_name": "MK115095.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 213" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 213" }, { "sequence_name": "MK115095.1", @@ -984,51 +849,41 @@ { "sequence_name": "MK115490.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." - }, - { - "sequence_name": "MK115490.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115490.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "MK115576.1": [ { "sequence_name": "MK115576.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 79 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MK115576.1", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" - }, - { - "sequence_name": "MK115576.1", - "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 243" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 243" } ], "OQ092466": [ { "sequence_name": "OQ092466", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "OQ092466", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 195" }, { "sequence_name": "OQ092466", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 261" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 261" }, { "sequence_name": "OQ092466", @@ -1059,17 +914,17 @@ { "sequence_name": "OQ092462", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "OQ092462", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 195" + "message": "Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 195" }, { "sequence_name": "OQ092462", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 255" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 255" }, { "sequence_name": "OQ092462", @@ -1098,17 +953,12 @@ { "sequence_name": "OQ092467", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "OQ092467", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 213" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "OQ092467", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 237" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 237" }, { "sequence_name": "OQ092467", diff --git a/tests/expected-results-large/holistic.json b/tests/expected-results-large/holistic.json index b31a2d7..a84c4dc 100644 --- a/tests/expected-results-large/holistic.json +++ b/tests/expected-results-large/holistic.json @@ -6,9 +6,9 @@ "blast_matched_qlen": 1997, "blast_sseq_coverage": 0.2498199403230785, "blast_qseq_coverage": 1.2158237356034052, - "blast_sseq_orfs_coverage": 0.17663960024984385, + "blast_sseq_orfs_coverage": 0.17661753684736448, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MN691959": { @@ -20,7 +20,7 @@ "blast_qseq_coverage": 1.1086063415148004, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN692074": { @@ -30,9 +30,9 @@ "blast_matched_qlen": 4178, "blast_sseq_coverage": 0.5041670953801831, "blast_qseq_coverage": 1.1728099569171853, - "blast_sseq_orfs_coverage": 0.4114928169893816, + "blast_sseq_orfs_coverage": 0.41144141893579816, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MN692145": { @@ -44,7 +44,7 @@ "blast_qseq_coverage": 1.1271545051088863, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN090335": { @@ -54,9 +54,9 @@ "blast_matched_qlen": 9069, "blast_sseq_coverage": 0.9842576396748637, "blast_qseq_coverage": 1.0603153600176425, - "blast_sseq_orfs_coverage": 1.0003747657713928, + "blast_sseq_orfs_coverage": 1.0003747189607795, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN090376": { @@ -66,9 +66,9 @@ "blast_matched_qlen": 8985, "blast_sseq_coverage": 0.9784957300133759, "blast_qseq_coverage": 1.0604340567612687, - "blast_sseq_orfs_coverage": 0.9943785134291068, + "blast_sseq_orfs_coverage": 0.9943792155883088, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115581.1": { @@ -80,7 +80,7 @@ "blast_qseq_coverage": 1.0046340179041602, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115690.1": { @@ -92,7 +92,7 @@ "blast_qseq_coverage": 0.9949427185468056, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115571.1": { @@ -104,7 +104,7 @@ "blast_qseq_coverage": 1.0113902490951672, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115514.1": { @@ -116,7 +116,7 @@ "blast_qseq_coverage": 1.0173736943082499, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115488.1": { @@ -128,7 +128,7 @@ "blast_qseq_coverage": 1.0325262392185388, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 6 }, "MK115030.1": { @@ -140,7 +140,7 @@ "blast_qseq_coverage": 1.0655270655270654, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115498.1": { @@ -152,7 +152,7 @@ "blast_qseq_coverage": 1.0080329774865235, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115211.1": { @@ -164,7 +164,7 @@ "blast_qseq_coverage": 1.0598981399468557, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115158.1": { @@ -176,7 +176,7 @@ "blast_qseq_coverage": 0.9699223449633599, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 234, - "orfs_end": 8211, + "orfs_end": 8212, "blast_n_conseqs": 1 }, "MK114705.1": { @@ -188,7 +188,7 @@ "blast_qseq_coverage": 1.122622463075125, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 6 }, "MK114856.1": { @@ -200,7 +200,7 @@ "blast_qseq_coverage": 1.0812493405085997, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MK115009.1": { @@ -212,7 +212,7 @@ "blast_qseq_coverage": 1.0590854784403172, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115387.1": { @@ -224,7 +224,7 @@ "blast_qseq_coverage": 1.040936952714536, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115491.1": { @@ -236,7 +236,7 @@ "blast_qseq_coverage": 1.0299299511780937, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK116110.1": { @@ -246,9 +246,9 @@ "blast_matched_qlen": 8967, "blast_sseq_coverage": 0.9957759004001778, "blast_qseq_coverage": 0.9972119995539199, - "blast_sseq_orfs_coverage": 0.9986220719027934, + "blast_sseq_orfs_coverage": 0.998622244488978, "orfs_start": 140, - "orfs_end": 8123, + "orfs_end": 8124, "blast_n_conseqs": 3 }, "MK115527.1": { @@ -260,7 +260,7 @@ "blast_qseq_coverage": 1.0056956017297753, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK114997.1": { @@ -272,7 +272,7 @@ "blast_qseq_coverage": 1.0516841524019878, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115518.1": { @@ -284,7 +284,7 @@ "blast_qseq_coverage": 0.9996854356715948, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115065.1": { @@ -296,7 +296,7 @@ "blast_qseq_coverage": 1.069459518124593, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 6 }, "MK115464.1": { @@ -308,7 +308,7 @@ "blast_qseq_coverage": 0.9893407844354756, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115530.1": { @@ -320,7 +320,7 @@ "blast_qseq_coverage": 0.9992665549036044, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115520.1": { @@ -330,9 +330,9 @@ "blast_matched_qlen": 9589, "blast_sseq_coverage": 0.9787015125012861, "blast_qseq_coverage": 0.987902805297737, - "blast_sseq_orfs_coverage": 1.0121174266083697, + "blast_sseq_orfs_coverage": 1.012115913065201, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115503.1": { @@ -344,7 +344,7 @@ "blast_qseq_coverage": 0.9953207861079338, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115570.1": { @@ -356,7 +356,7 @@ "blast_qseq_coverage": 1.0057986294148655, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115509.1": { @@ -368,7 +368,7 @@ "blast_qseq_coverage": 1.0197797498128942, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115702.1": { @@ -378,9 +378,9 @@ "blast_matched_qlen": 9098, "blast_sseq_coverage": 0.987447268237473, "blast_qseq_coverage": 1.0596834469114091, - "blast_sseq_orfs_coverage": 1.0198625858838226, + "blast_sseq_orfs_coverage": 1.019860104921309, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MK115095.1": { @@ -392,7 +392,7 @@ "blast_qseq_coverage": 1.060085367188355, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 2 }, "MK115490.1": { @@ -404,7 +404,7 @@ "blast_qseq_coverage": 1.0204343639670483, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MK115576.1": { @@ -416,7 +416,7 @@ "blast_qseq_coverage": 1.0342110943233327, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "OQ092466": { @@ -428,7 +428,7 @@ "blast_qseq_coverage": 1.1192442700805285, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "OQ092463": { @@ -440,7 +440,7 @@ "blast_qseq_coverage": 0.9884435190005205, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 140, - "orfs_end": 8123, + "orfs_end": 8124, "blast_n_conseqs": 2 }, "OQ092465": { @@ -452,7 +452,7 @@ "blast_qseq_coverage": 0.9620043482762191, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 200, - "orfs_end": 8207, + "orfs_end": 8208, "blast_n_conseqs": 2 }, "OQ092462": { @@ -464,7 +464,7 @@ "blast_qseq_coverage": 1.1301214741610048, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "OQ092464": { @@ -476,7 +476,7 @@ "blast_qseq_coverage": 0.9678735872750105, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 200, - "orfs_end": 8207, + "orfs_end": 8208, "blast_n_conseqs": 2 }, "OQ092467": { @@ -488,7 +488,7 @@ "blast_qseq_coverage": 1.0962157809983897, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 } } \ No newline at end of file diff --git a/tests/expected-results-large/orfs.json b/tests/expected-results-large/orfs.json index 5c54b42..54fc619 100644 --- a/tests/expected-results-large/orfs.json +++ b/tests/expected-results-large/orfs.json @@ -5,7 +5,7 @@ "start": 0, "end": 1824, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.7626080297560442, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -17,7 +17,7 @@ "start": 0, "end": 1824, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7647696476964769, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -29,7 +29,7 @@ "start": 0, "end": 1824, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.7645782478980201, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -41,7 +41,7 @@ "start": 1, "end": 1750, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", "distance": 0.7680130480667754, "protein": "MRKLQNGIDCIQCMQGLLHQAR", @@ -53,7 +53,7 @@ "start": 336, "end": 1824, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3997973809613161, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -65,7 +65,7 @@ "start": 1306, "end": 1750, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", "distance": 0.758082497212932, "protein": "MRTQIVKLF", @@ -75,21 +75,21 @@ { "name": "vpr", "start": 1599, - "end": 1749, + "end": 1824, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.7677189534455227, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTR", - "aminoacids": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTR", - "nucleotides": "ATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGA" + "distance": 0.7638478800047243, + "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", + "aminoacids": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*", + "nucleotides": "ATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG" }, { "name": "pol", "start": 1627, "end": 1927, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7724330674761569, "protein": "GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI", @@ -101,36 +101,36 @@ "start": 1746, "end": 1824, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.7616257781032589, "protein": "RCIRSTTRTADTELFTRDFPLGTFQ", "aminoacids": "RCIRSTTRTADTELFTRDFPLGTFQ*", "nucleotides": "AGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG" }, - { - "name": "vpu", - "start": 1747, - "end": 1927, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.769715460635405, - "protein": "DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI", - "aminoacids": "DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*", - "nucleotides": "GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA" - }, { "name": "rev_exon2", "start": 1747, "end": 1927, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.7620186257236345, "protein": "DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI", "aminoacids": "DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*", "nucleotides": "GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA" + }, + { + "name": "vpu", + "start": 1748, + "end": 1778, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.7688723205964585, + "protein": "MHPEYYKDC", + "aminoacids": "MHPEYYKDC*", + "nucleotides": "ATGCATCCGGAGTACTACAAGGACTGCTGA" } ], "MN691959": [ @@ -139,7 +139,7 @@ "start": 639, "end": 2142, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.0801186943620179, "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ", @@ -151,7 +151,7 @@ "start": 1934, "end": 4946, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.054722889368558514, "protein": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -163,7 +163,7 @@ "start": 4890, "end": 5469, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.09157509157509158, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -173,69 +173,69 @@ { "name": "vpr", "start": 5408, - "end": 5702, + "end": 5699, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5562531391260666, + "distance": 0.5391891891891883, "protein": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS", - "aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS*T", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS*", + "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5679, - "end": 5895, + "end": 5898, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5818, - "end": 5896, + "end": 5899, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEDLLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5910, - "end": 6156, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.53246275519588, - "protein": "SIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEVSALVEMGVEMGHHAPWDIDDL", - "aminoacids": "MQPIQIAIVALVVAIIIAIVV*SIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEVSALVEMGVEMGHHAPWDIDDL*", - "nucleotides": "ATGCAACCTATACAAATAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEDLLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6070, "end": 8656, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.13638128518734216, "protein": "MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL", "aminoacids": "MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL*", "nucleotides": "ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" }, + { + "name": "vpu", + "start": 6105, + "end": 6156, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.2321981424148607, + "protein": "MGVEMGHHAPWDIDDL", + "aminoacids": "MGVEMGHHAPWDIDDL*", + "nucleotides": "ATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8237, "end": 8333, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.6672629695885509, "protein": "RPTSQTRGDPTGPKE", @@ -247,7 +247,7 @@ "start": 8238, "end": 8514, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.210025203024363, "protein": "DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE", @@ -259,7 +259,7 @@ "start": 8657, "end": 9278, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.08588605782994552, "protein": "MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC", @@ -273,7 +273,7 @@ "start": 0, "end": 4059, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.763072203234748, "protein": "MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ", @@ -285,19 +285,31 @@ "start": 2, "end": 4115, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.7604257801108195, "protein": "MNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", "aminoacids": "EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*", "nucleotides": "GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA" }, + { + "name": "vpu", + "start": 2, + "end": 2084, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.7659115426105717, + "protein": "MGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ", + "aminoacids": "EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*", + "nucleotides": "GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAA" + }, { "name": "gag", "start": 789, "end": 2292, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.19470123431286457, "protein": "MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ", @@ -309,7 +321,7 @@ "start": 2084, "end": 4115, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.5617851221088768, "protein": "FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", @@ -321,7 +333,7 @@ "start": 3617, "end": 4115, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7631664499349805, "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", @@ -331,21 +343,21 @@ { "name": "vpr", "start": 3617, - "end": 4085, + "end": 4115, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.7632679688399402, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDT", - "aminoacids": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDT", - "nucleotides": "ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACC" + "distance": 0.7635778016363703, + "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", + "aminoacids": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*", + "nucleotides": "ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA" }, { "name": "tat_exon1", "start": 3823, "end": 4084, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", "distance": 0.7659115426105717, "protein": "MVPVRERTHSRSRNLLCRWGS", @@ -357,31 +369,19 @@ "start": 3823, "end": 4084, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.7645569620253164, + "distance": 0.7610789980732178, "protein": "MVPVRERTHSRSRNLLCRWGS", "aminoacids": "MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY", "nucleotides": "ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC" }, - { - "name": "vpu", - "start": 4080, - "end": 4164, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.7708418891170431, - "protein": "IPSGEPSDAAYKQLLFACTGSLWLDQI", - "aminoacids": "IPSGEPSDAAYKQLLFACTGSLWLDQI*", - "nucleotides": "ATACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGA" - }, { "name": "tat_exon2", "start": 4080, "end": 4164, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.7699443413729128, "protein": "IPSGEPSDAAYKQLLFACTGSLWLDQI", @@ -393,7 +393,7 @@ "start": 4081, "end": 4153, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.7667894365645325, "protein": "YPVASPQMLHISSCFLPVLGLSG", @@ -407,7 +407,7 @@ "start": 775, "end": 2281, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.20784453738651432, "protein": "MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ", @@ -419,7 +419,7 @@ "start": 2070, "end": 5085, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.14843087362171337, "protein": "FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED", @@ -431,7 +431,7 @@ "start": 5029, "end": 5608, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.2608047690014903, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH", @@ -443,9 +443,9 @@ "start": 5547, "end": 5838, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6002510555745751, + "distance": 0.587876570313453, "protein": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS", "aminoacids": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS*", "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG" @@ -453,69 +453,69 @@ { "name": "tat_exon1", "start": 5818, - "end": 6034, + "end": 6037, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG" + "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ*", + "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5957, - "end": 6035, + "end": 6038, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.4267425320056898, - "protein": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 6049, - "end": 6298, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5326633165829145, - "protein": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL", - "aminoacids": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL*", - "nucleotides": "ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" + "distance": 0.4274965800273598, + "protein": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6212, "end": 8783, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.47520309038232134, "protein": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL", "aminoacids": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 6253, + "end": 6298, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.3649167733674775, + "protein": "MEMGHHAPWDVDDL", + "aminoacids": "MEMGHHAPWDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8364, - "end": 8457, + "end": 8460, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.3921568627450981, "protein": "RPASQPRGDPTGPKESKKKVERETETDPLH", - "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH*", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG" + "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH**", + "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGA" }, { "name": "rev_exon2", "start": 8365, "end": 8641, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.29843322556577967, "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE", @@ -527,7 +527,7 @@ "start": 8784, "end": 9387, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4049958673891082, "protein": "MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC", @@ -541,7 +541,7 @@ "start": 315, "end": 1665, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.596665989022159, "protein": "MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ", @@ -553,7 +553,7 @@ "start": 1427, "end": 4469, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.27887169154684477, "protein": "FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -565,7 +565,7 @@ "start": 4413, "end": 4992, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH", @@ -575,69 +575,69 @@ { "name": "vpr", "start": 4931, - "end": 5225, + "end": 5222, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6187165775401071, + "distance": 0.6083541998634192, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS", - "aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS*T", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAGACT" + "aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS*", + "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAG" }, { "name": "tat_exon1", "start": 5202, - "end": 5418, + "end": 5421, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.42503863987635226, + "distance": 0.40192926045016075, "protein": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ", - "aminoacids": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ", - "nucleotides": "ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAG" + "aminoacids": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ*", + "nucleotides": "ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5341, - "end": 5419, + "end": 5422, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.5275498241500586, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5433, - "end": 5682, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5368311327310633, - "protein": "MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL", - "aminoacids": "MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL*", - "nucleotides": "ATGCAATCTTTAGAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTAGGCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAAAATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG" + "distance": 0.524971623155505, + "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAG" }, { "name": "env", "start": 5596, "end": 8158, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.5139610675592354, "protein": "MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ", "aminoacids": "MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ*", "nucleotides": "ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA" }, + { + "name": "vpu", + "start": 5643, + "end": 5682, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.5690703735881842, + "protein": "MGHDAPWDVDDL", + "aminoacids": "MGHDAPWDVDDL*", + "nucleotides": "ATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 7739, "end": 7835, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.6842105263157894, "protein": "RPSSQPRGDQTGPKE", @@ -649,7 +649,7 @@ "start": 7740, "end": 8016, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.4267425320056898, "protein": "DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE", @@ -661,7 +661,7 @@ "start": 8159, "end": 8813, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.49485619884358334, "protein": "MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", @@ -675,7 +675,7 @@ "start": 522, "end": 1590, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.6828741441147701, "protein": "MYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP", @@ -687,7 +687,7 @@ "start": 1382, "end": 4394, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.23966680468616797, "protein": "FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGEDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNIPPVVAKEIVACCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED", @@ -699,7 +699,7 @@ "start": 4338, "end": 4920, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.42479043044174425, "protein": "MENRWQVMVVWQVDRMRISMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRKKPPLPSVRKLTEDRWKEPQRTKGHRESHTMNGH", @@ -709,69 +709,69 @@ { "name": "vpr", "start": 4859, - "end": 5153, + "end": 5150, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5940302029259086, + "distance": 0.580763397371082, "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS*T", - "nucleotides": "ATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCTTGGCTTCATGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS*", + "nucleotides": "ATGGAAAGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCTTGGCTTCATGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAATTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5130, - "end": 5346, + "end": 5349, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.484764542936288, + "distance": 0.46628407460545196, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDSRNHQVSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5269, - "end": 5347, + "end": 5350, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.5797101449275363, - "protein": "MAGRSGDSDEELLRIAGTIKFLYQSS", - "aminoacids": "MAGRSGDSDEELLRIAGTIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5361, - "end": 5631, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5776066350710902, - "protein": "MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLDMGHHAPWDVNDL", - "aminoacids": "MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLDMGHHAPWDVNDL*", - "nucleotides": "ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTACTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA" + "distance": 0.5763097949886105, + "protein": "MAGRSGDSDEELLRIAGTIKFLYQSSK", + "aminoacids": "MAGRSGDSDEELLRIAGTIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAGCCGGAACCATCAAGTTTCTTTATCAAAGCAGTAAG" }, { "name": "env", "start": 5524, "end": 8110, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4972760674014952, "protein": "MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", "aminoacids": "MRVKGTKKNWQPSWRWGTMLIWIWATMLLGMSMTCNAEDSWVTVYYGVPVWKEAATTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNINSTNINNTNSIEREMTNCSFNVTTVIRDKVQKQYALFYKLDVVQIKDDNTSYNTSYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCKNVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRRSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIRQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAATGCAGAGGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAGCCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACACAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTAACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTAACATCAATAGCACTAATATAAACAATACCAATAGTATAGAAAGAGAAATGACAAACTGCTCTTTTAATGTCACCACAGTCATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAAACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGCTATAGGTTGATAAATTGTAACTCCTCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAATTTCAATGGAACAGGACTATGTAAAAATGTTAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAACACAGAAGTAAATATTATCACACTCCCATGCAAGATAAGGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACATTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGAAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 5568, + "end": 5631, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.6063492063492064, + "protein": "MGNHAHLDMGHHAPWDVNDL", + "aminoacids": "MGNHAHLDMGHHAPWDVNDL*", + "nucleotides": "ATGGGGAACCATGCTCATTTGGATATGGGCCACCATGCTCCTTGGGATGTCAATGACCTGTAA" + }, { "name": "tat_exon2", "start": 7691, "end": 7784, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.4464285714285715, "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", @@ -783,7 +783,7 @@ "start": 7692, "end": 7968, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.4267425320056898, "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE", @@ -795,7 +795,7 @@ "start": 8111, "end": 8735, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.5093153589821267, "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTSANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC", @@ -809,7 +809,7 @@ "start": 680, "end": 2180, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -821,7 +821,7 @@ "start": 1972, "end": 4984, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -833,7 +833,7 @@ "start": 4928, "end": 5507, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -843,23 +843,23 @@ { "name": "vpr", "start": 5446, - "end": 5740, + "end": 5737, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5717, "end": 5936, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -867,45 +867,45 @@ { "name": "rev_exon1", "start": 5856, - "end": 5934, + "end": 5937, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5948, - "end": 6194, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5513972055888224, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6111, "end": 8652, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4564898680537425, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6149, + "end": 6194, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8233, "end": 8329, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -917,7 +917,7 @@ "start": 8234, "end": 8510, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -929,7 +929,7 @@ "start": 8653, "end": 9268, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -943,7 +943,7 @@ "start": 777, "end": 2286, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.24076694150363465, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQIMGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKVEEEQNKSKKKAQQAAAGAGNSSQTSTSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATNSAAIMMQRGNFRNQRKSVKCFNCGKDGHIAKNCRAPRRKGCWKCGKEGHQMKDCPERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTTPSQKQEKIDQDLYPLASLKSLFGNDPSSQ", @@ -955,7 +955,7 @@ "start": 2078, "end": 5090, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.185747174550021, "protein": "FFRENLAFPQGEAREFSSEQTRANSPTRGELQVWGGDNNSLSEAGEDRPGPVSFSFPQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPYRKQNPNIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIALPEKESWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVIPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQEQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKSGKAGYVTNRGRQKVIPLTDTTNQKTELHAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQEAAYFILKLAGRWPVATIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDVIATDLQTKELQKQITKIQHFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -967,7 +967,7 @@ "start": 5034, "end": 5613, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.39766435115272314, "protein": "MENRWQVMIVWQVDRMRIRRWHSLVKHHIYISGKARGWVYKHHYENTHPRISSKVYIPLGEARLAVTTYWGLHTGERDWHLGQGVSIEWRKKEYSTQVDPNLADQLIHLYYFDCFSESAIRNVILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSITKLTEDRWNKPQRTKGHRGNHTMNGH", @@ -977,69 +977,69 @@ { "name": "vpr", "start": 5552, - "end": 5846, + "end": 5843, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6098398169336385, + "distance": 0.5980919765166341, "protein": "MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS", - "aminoacids": "MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS*S", - "nucleotides": "ATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACACATCTATGAGACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAAGTCTGCAACAACTGCTGTTCATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGAATTACTCCACAGAGGAGAACAAGAAATGGAGCCAGTAGATCCTAATCT" + "aminoacids": "MEQAPENQGPPREPYNEWALELLEELKSEAVRHFPRMWLHGLGQHIYETYGDTWTGVEALIRSLQQLLFIHFRIGCRHSRIGITPQRRTRNGASRS*", + "nucleotides": "ATGGAACAAGCCCCAGAGAACCAAGGGCCACCGAGGGAACCATACAATGAATGGGCACTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACACATCTATGAGACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAAGTCTGCAACAACTGCTGTTCATTCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGAATTACTCCACAGAGGAGAACAAGAAATGGAGCCAGTAGATCCTAA" }, { "name": "tat_exon1", "start": 5823, - "end": 6039, + "end": 6042, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.42503863987635226, + "distance": 0.40192926045016075, "protein": "MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ", - "aminoacids": "MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAATCTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCACTTGCTATTGTAAACAGTGTTGCTATCATTGCCAAGTTTGTTTCATAACCAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPNLEPWKHPGSQPKTACTTCYCKQCCYHCQVCFITKGLGISYGRKKRRQRRGSPQDSQIHQASLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAATCTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCACTTGCTATTGTAAACAGTGTTGCTATCATTGCCAAGTTTGTTTCATAACCAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5962, - "end": 6040, + "end": 6043, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5275498241500586, - "protein": "MAGRSGDNDEDLLKTVRFIKLLYQSS", - "aminoacids": "MAGRSGDNDEDLLKTVRFIKLLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 6054, - "end": 6300, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.4999062089664228, - "protein": "MQSLAILAIVALVVAAIIAIVVWTIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDHEELSALMEMGHHAPWDVDDL", - "aminoacids": "MQSLAILAIVALVVAAIIAIVVWTIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDHEELSALMEMGHHAPWDVDDL*", - "nucleotides": "ATGCAATCTTTAGCAATATTAGCAATAGTAGCATTAGTAGTAGCAGCCATAATAGCAATAGTTGTGTGGACCATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGATAGTGGCAATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAG" + "distance": 0.524971623155505, + "protein": "MAGRSGDNDEDLLKTVRFIKLLYQSSK", + "aminoacids": "MAGRSGDNDEDLLKTVRFIKLLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAACGACGAGGATCTCCTCAAGACAGTCAGATTCATCAAGCTTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6217, "end": 8800, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4988789237668163, "protein": "MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ", "aminoacids": "MRVKGIMKNYQHLWRWGIMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYDTEAHNVWATHACVPTDPNPQEIGLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTKLEINSTKKTNSTNNGTNINATDDSWGEMKNCSFNTTASIRDKVQREFALFYKLDIVPIDNDDINYRLISCNTSVLTQACPKVSFEPIPIHYCAPAGFAILKCRDKNFNGTGQCKNVSTVQCTHGIRPVVSTQLLFNGSLAEEEVVIKSENITDNTKTIIVQLNASVAIVCTRPNNNTRKSIPIGPGRAFYAAGDIIGDIRRAHCILNKTTWDNTIEQVAKKLREQFENKTIVFSESSGGDPEITMISFNCGGEFFYCNSVQLFNSTWHNNGSSTTGSSSSEGNITLPCKIKQIINMWQEVGKAMYAPPIRGPISCESNITGLLLTRDGGNDANGNNTETFRPGGGNMRDNWRSELYRYKVVKIEPLGVAPTRAQRRVVQREKRAVGLGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGLWGCSGKLICNTAVPWNTSWSNKSLDDIWHNMTWMEWEREIDNYTNIIYSLIEASQTQQEKNEQELLELDKWASLWNWFSISNWLWYIKIFIMIVGGLVGLRIVFTILSIVNRVRKGYSPLSFQTHLPAQRGPDRPEGTGDEGGEQDRDRSTHLVDGFLAIIWVDLRNLCLFLYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWRRELKNSAVSLLNATAIAVAEGTDRVIEGLRRAFRAIIHIPRRIRQGLERALQ*", "nucleotides": "ATGAGAGTGAAGGGGATCATGAAGAATTATCAGCACTTATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTTACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAAACACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCTACAGACCCCAACCCACAAGAAATAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTACATTGCACTAAGTTGGAGATTAATAGCACTAAGAAGACTAATAGCACTAATAATGGTACTAACATCAATGCCACTGATGATAGTTGGGGGGAAATGAAAAACTGCTCTTTCAATACCACTGCAAGCATAAGAGATAAGGTACAGAGAGAATTTGCGCTTTTTTATAAACTTGATATAGTACCAATAGATAATGATGATATCAACTATAGGTTAATAAGTTGTAACACCTCAGTCCTTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAGAGATAAAAATTTCAATGGAACAGGACAATGTAAAAATGTCAGCACAGTGCAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTCAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAAATCTGAAAATATCACAGACAATACTAAAACTATAATAGTACAGCTGAATGCATCTGTAGCAATTGTTTGTACAAGACCCAACAACAATACAAGAAAAAGTATACCTATAGGGCCAGGGAGAGCGTTTTATGCAGCAGGAGATATAATAGGAGACATAAGACGAGCACACTGTATCCTTAACAAAACAACATGGGATAACACAATAGAACAGGTAGCTAAAAAATTAAGAGAACAATTTGAGAATAAGACAATAGTCTTTAGTGAATCCTCGGGAGGGGACCCAGAAATTACAATGATTAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAGTACAACTGTTTAATAGTACTTGGCATAATAATGGGAGTAGTACTACAGGGTCAAGTAGCAGTGAAGGCAATATCACACTCCCATGCAAAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACCAATTAGCTGCGAGTCAAATATTACAGGGTTGCTACTAACAAGAGATGGTGGGAATGACGCTAACGGGAACAACACCGAGACCTTCAGACCTGGAGGAGGAAATATGAGGGACAATTGGCGAAGTGAATTATATAGATACAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAGGGCACAGAGAAGAGTGGTACAGAGAGAAAAAAGAGCAGTGGGTCTCGGAGCCTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGCTTTGGGGTTGCTCTGGAAAACTCATCTGCAACACTGCTGTGCCTTGGAATACTAGTTGGAGTAACAAATCTCTGGATGATATTTGGCATAACATGACCTGGATGGAGTGGGAAAGAGAAATTGACAATTACACAAACATAATATACAGCTTAATTGAGGCATCGCAAACCCAGCAAGAAAAGAATGAACAAGAATTACTAGAATTAGACAAATGGGCAAGTCTGTGGAATTGGTTTAGCATATCAAACTGGCTGTGGTACATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTACTATACTTTCTATAGTGAATAGAGTTAGGAAGGGATACTCACCATTATCGTTTCAGACCCACCTCCCAGCCCAGAGGGGACCCGACAGGCCCGAAGGAACCGGAGACGAAGGTGGAGAGCAAGACAGAGACAGATCCACTCACTTAGTGGATGGATTCTTAGCAATTATCTGGGTCGACCTGCGGAACCTGTGCCTCTTCCTCTACCACCGCTTGAGAGACTTACTCTTGATTGCAGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATACTGGTGGAACCTTCTGCAGTATTGGAGGCGGGAACTAAAGAATAGTGCTGTTAGCTTGCTTAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGAGTTATAGAAGGATTGCGCAGAGCTTTTAGAGCTATTATCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGAGCTTTGCAATAA" }, + { + "name": "vpu", + "start": 6255, + "end": 6300, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.3649167733674775, + "protein": "MEMGHHAPWDVDDL", + "aminoacids": "MEMGHHAPWDVDDL*", + "nucleotides": "ATGGAGATGGGGCATCATGCTCCTTGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8381, "end": 8474, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.5303030303030303, "protein": "RPTSQPRGDPTGPKEPETKVESKTETDPLT", @@ -1051,7 +1051,7 @@ "start": 8382, "end": 8658, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.42982806877249113, "protein": "DPPPSPEGTRQARRNRRRRWRARQRQIHSLSGWILSNYLGRPAEPVPLPLPPLERLTLDCSEDCGTSGTQGVGSPQILVEPSAVLEAGTKE", @@ -1063,7 +1063,7 @@ "start": 8801, "end": 9425, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.46635769262600346, "protein": "MGGKWSKCSIVGWPTVRERIRRAEPAAEGVGAVSRDLEKHGAITSSNANNADCTWLEAQKEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGIIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVEPERVEEENKRENRCLLHPMSQHGMDDPEKEVLQWRFDSRLAFHHVARELHPEYYKN", @@ -1077,7 +1077,7 @@ "start": 579, "end": 2079, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -1089,7 +1089,7 @@ "start": 1871, "end": 4883, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEAFLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -1101,7 +1101,7 @@ "start": 4827, "end": 5406, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -1111,69 +1111,69 @@ { "name": "vpr", "start": 5345, - "end": 5639, + "end": 5636, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5616, - "end": 5832, + "end": 5835, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5755, - "end": 5833, + "end": 5836, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5847, - "end": 6093, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5399181166837258, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6010, "end": 8551, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4569687738004571, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6048, + "end": 6093, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8132, "end": 8228, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -1185,7 +1185,7 @@ "start": 8133, "end": 8409, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -1197,7 +1197,7 @@ "start": 8552, "end": 9167, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4756067663643049, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -1211,7 +1211,7 @@ "start": 584, "end": 2084, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -1223,7 +1223,7 @@ "start": 1876, "end": 4888, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19298018391400085, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -1235,7 +1235,7 @@ "start": 4832, "end": 5411, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -1245,23 +1245,23 @@ { "name": "vpr", "start": 5350, - "end": 5644, + "end": 5641, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5621, "end": 5840, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -1269,45 +1269,45 @@ { "name": "rev_exon1", "start": 5760, - "end": 5838, + "end": 5841, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5852, - "end": 6098, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5399181166837258, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6015, "end": 8556, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4585964351370794, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6053, + "end": 6098, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8137, "end": 8233, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -1319,7 +1319,7 @@ "start": 8138, "end": 8414, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -1331,7 +1331,7 @@ "start": 8557, "end": 9172, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4756067663643049, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -1345,7 +1345,7 @@ "start": 707, "end": 2207, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -1357,7 +1357,7 @@ "start": 1999, "end": 5011, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19298018391400085, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -1369,7 +1369,7 @@ "start": 4955, "end": 5534, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGDIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -1379,23 +1379,23 @@ { "name": "vpr", "start": 5473, - "end": 5767, + "end": 5764, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5744, "end": 5963, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -1403,45 +1403,45 @@ { "name": "rev_exon1", "start": 5883, - "end": 5961, + "end": 5964, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5975, - "end": 6221, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5399181166837258, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6138, "end": 8679, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4585964351370794, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNITNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIVFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATATTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAGTCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTAAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6176, + "end": 6221, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8260, "end": 8356, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -1453,7 +1453,7 @@ "start": 8261, "end": 8537, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -1465,7 +1465,7 @@ "start": 8680, "end": 9295, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4756067663643049, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDGEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -1479,7 +1479,7 @@ "start": 176, "end": 1685, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.27304152847199525, "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRKNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP", @@ -1491,7 +1491,7 @@ "start": 1477, "end": 4489, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.26443159013103534, "protein": "FFREDLAFPQGEAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", @@ -1503,7 +1503,7 @@ "start": 4433, "end": 5012, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3903081914030819, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", @@ -1513,23 +1513,23 @@ { "name": "vpr", "start": 4951, - "end": 5245, + "end": 5242, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6069164265129684, + "distance": 0.594871162618666, "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "aminoacids": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*L", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGACATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGCTT" + "aminoacids": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEDIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*", + "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGACATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5222, "end": 5441, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5060292850990527, + "distance": 0.48954161103693805, "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ", "aminoacids": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" @@ -1537,45 +1537,45 @@ { "name": "rev_exon1", "start": 5361, - "end": 5439, + "end": 5442, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.531617235590375, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" - }, - { - "name": "vpu", - "start": 5453, - "end": 5699, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5783961231827419, - "protein": "MHILEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSAIVEMGHLVPWDGDDM", - "aminoacids": "MHILEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSAIVEMGHLVPWDGDDM*", - "nucleotides": "ATGCATATCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG" + "distance": 0.5290287574606619, + "protein": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "env", "start": 5616, "end": 8217, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.510440428145289, "protein": "MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", "aminoacids": "MKVTGTRRNYQQLWRWGILFLGMVMICSARNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNNTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAMGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWNPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL*", "nucleotides": "ATGAAAGTGACGGGGACCAGGAGGAATTATCAGCAATTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAGAAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGGGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAATACTTGGAATGGTACTGATAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGAAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATGGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAACCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACTTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTAAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 5660, + "end": 5699, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.6901936289818864, + "protein": "MGHLVPWDGDDM", + "aminoacids": "MGHLVPWDGDDM*", + "nucleotides": "ATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG" + }, { "name": "tat_exon2", "start": 7798, "end": 7891, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", @@ -1587,7 +1587,7 @@ "start": 7799, "end": 8075, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", @@ -1599,7 +1599,7 @@ "start": 8218, "end": 8860, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.5478186258332784, "protein": "MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNTDVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", @@ -1613,7 +1613,7 @@ "start": 663, "end": 2163, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -1625,7 +1625,7 @@ "start": 1955, "end": 4967, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -1637,7 +1637,7 @@ "start": 4911, "end": 5490, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -1647,69 +1647,69 @@ { "name": "vpr", "start": 5429, - "end": 5723, + "end": 5720, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5700, - "end": 5916, + "end": 5919, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5839, - "end": 5917, + "end": 5920, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5931, - "end": 6177, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5513972055888224, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6094, "end": 8635, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4564898680537425, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6132, + "end": 6177, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8216, "end": 8312, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -1721,7 +1721,7 @@ "start": 8217, "end": 8493, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -1733,7 +1733,7 @@ "start": 8636, "end": 9251, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -1747,7 +1747,7 @@ "start": 250, "end": 1753, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.25132972351334526, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQKIEIKDTKEALDKIEEEQNKSKKKTQQAAADTGHNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNAVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPLSSLKSLFGNDP", @@ -1759,7 +1759,7 @@ "start": 1545, "end": 4557, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.2540106951871657, "protein": "FFRENLAFPQGKARELSSKQDRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPSVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISRIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNLPPVIAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDGCVASRQDED", @@ -1771,7 +1771,7 @@ "start": 4501, "end": 5083, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.40472673559822736, "protein": "MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKLTEDRWNEPQRTKGHRESHTMNGH", @@ -1783,9 +1783,9 @@ "start": 5022, "end": 5313, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6010897287271042, + "distance": 0.5885636318531382, "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRTLQQLVFIHFRIGCQHSRIGILTHRRARNGASRS*", "nucleotides": "ATGGAACGAGCCCCAGAGGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAACTTTTAGAAGAGCTTAAAAGAGAAGCTGTTAGGCACTTTCCTAGGGCATGGCTTCACGGCCTAGGACAACATATCTATGAAACTTATGGAGATACTTGGGCAGGAGTGGAAGCTCTAATAAGAACTCTGCAACAACTGGTGTTTATTCATTTCAGAATTGGATGTCAGCACAGCAGGATAGGCATTCTTACCCATAGAAGGGCAAGAAATGGAGCCAGTAGATCCTAG" @@ -1793,57 +1793,57 @@ { "name": "tat_exon1", "start": 5293, - "end": 5509, + "end": 5512, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48719691819623834, + "distance": 0.4691531785127845, "protein": "MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAAGACTGCTTGCACCAGTTGCTATTGTAAAAGGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTSCYCKRCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAAGACTGCTTGCACCAGTTGCTATTGTAAAAGGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGA" }, { "name": "rev_exon1", "start": 5432, - "end": 5510, + "end": 5513, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.5698711595639246, - "protein": "MAGRSGDSDEELLRITRTIKFLYQNS", - "aminoacids": "MAGRSGDSDEELLRITRTIKFLYQNS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGT" - }, - { - "name": "vpu", - "start": 5524, - "end": 5794, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5867864387134165, - "protein": "MQSLEILAIVALVVAFIIAIVVWSIVFIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWEVNDL", - "aminoacids": "MQSLEILAIVALVVAFIIAIVVWSIVFIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWEVNDL*", - "nucleotides": "ATGCAATCTTTAGAAATATTAGCAATAGTAGCATTAGTAGTAGCATTTATAATAGCAATAGTCGTTTGGTCTATAGTATTCATAGAATATAGGAAAATAAGAAGACAAAAGAGAATAGACAGGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAA" + "distance": 0.564475664826894, + "protein": "MAGRSGDSDEELLRITRTIKFLYQNSE", + "aminoacids": "MAGRSGDSDEELLRITRTIKFLYQNSE", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAGGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGAG" }, { "name": "env", "start": 5687, "end": 8198, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.6631820277358986, "protein": "MHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", "aminoacids": "MRVKGTKKNWQPSWRWGTMLIWGWATMLLGRSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDPEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTTINNTSSIEEGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSATITQACPKVSFEPIPIH*VQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRKAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARHLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEVIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGACCAAGAAGAATTGGCAGCCCTCGTGGAGATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAATGCAGAAGACTCGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCCAAAGCATATGACCCAGAGGTACACAATGTCTGGGCCACACATGCTTGTGTACCCACAGACCCCAACCCACAAGAAGTAGAATTAAATGTGACAGAAAATTTTAATGCATGGAAAAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAACTGCACTGAGTTGAAGAATAGCACTACTATAAACAATACCAGTAGTATAGAAGAAGGAGAAATGAAAAACTGTTCTTTTAATGCCACCACAGCAATAAGAGATAAGGTGCAAAAACAATATGCACTTTTTTATAGACTTGATGTAGTACAAATTAAGGATGATAATACCAGCTATAATACCAGATATAGGTTGATAAATTGTAACTCCGCAACCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTAGGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGGCAGTCTAGCAGAAGGAGATGTAGTAATTAGATCTAAAAATTTCTCTAACAGTGCTCAAACCATAATAGTACAATTAAACGAAACTGTAGAAATTAATTGTACAAGACCCAACAATAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGCAACAGGAGACATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGAGACTTTAAAAAAGATAGCTGATAAATTAAGGGAACAATTTAAAAGAGAGACAATAGTCTTTAACCAATCCTCAGGAGGGGACCCAGAGATTGTAATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACACAACTGTTTAATAGTACTTGGAATAGTACTACAGGAGTAAATGGTACAGGAGCAAATAATACAGAAGTAAATATTATCACACTCCCATGCAAGATAAAGCAAATTGTAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGTTGTTCATCAAATATTACAGGGCTACTACTACTAAGAGATGGTGGTAATAGCAATGACACGGATAACACGACTGAAATCTTCAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAGGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCATCAATAACGCTGACGGTACAAGCCAGACACTTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATATGTTGCAGCTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTTAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTAATGTGCCTTGGAATAGTAGTTGGAGTAATAAAAATCTGAGTTACATTTGGAATAACATGACCTGGATGCAGTGGGAAAAAGAAATTGACAATTACACAGAGGTAATATACAACTTAATTGAACAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGACTTGGTTTGACATAACAAAGTGGCTGTGGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAACAATTATCTGGGTCGACCTGCGGAGCCTGTTCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTAGCGAGGGGTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAGTATTGGTGGAATCTCCTGCAGTATTGGAGCCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGTTTATAGAAGTATTGCAAAGAATTTTTAGAGCTATTATCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 5731, + "end": 5794, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.6405797101449275, + "protein": "MGNHAHLGMGHHAPWEVNDL", + "aminoacids": "MGNHAHLGMGHHAPWEVNDL*", + "nucleotides": "ATGGGGAACCATGCTCATTTGGGGATGGGCCACCATGCTCCTTGGGAGGTCAATGACCTGTAA" + }, { "name": "tat_exon2", "start": 7779, "end": 7872, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.4464285714285715, "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", @@ -1855,7 +1855,7 @@ "start": 7780, "end": 8056, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.4267425320056898, "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGCGTSGTQGVGSPQVLVESPAVLEPGTKE", @@ -1867,7 +1867,7 @@ "start": 8199, "end": 8823, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.5148270181219111, "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDPDKEVLVWKFDSRLAFRHVAREIHPEYYKNC", @@ -1881,7 +1881,7 @@ "start": 316, "end": 1819, "subtype_start": 234, - "subtype_end": 1730, + "subtype_end": 1731, "orientation": "forward", "distance": 0.38499312512276596, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFNTVAVLYCVHQRIEIKDTKEALDKIEEEQNKSKKKTQQAAADPGNSNQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASVMMQKGNFRSQRKMVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRTEPTAPPEESFSFGEKTTTPSQKQEPIDQELYPMSSLKSLFGNDP", @@ -1893,7 +1893,7 @@ "start": 1611, "end": 4623, "subtype_start": 1526, - "subtype_end": 4534, + "subtype_end": 4535, "orientation": "forward", "distance": 0.30843043180260443, "protein": "FFRENLAFPQGEARELSSKQNRTNSPTRGELQFWGKDNNSFSEAGADRPGTIPYVFPQITLWQRPLVTVRIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVKQYDQISVEICGHKAIGTVLIGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICKEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDMVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGAKALTEVVPLTEEAELELAENREILKEPVHGVYYDPTKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWESWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKSGKAGYVTDRGRQKVISLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIQKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEDHEKYHSNWRAMASDFNIPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVTTIHTDNGPNFTSNAVKAACWWAGVKQEFGIPYNPQSQGVVESMNKQLKQIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDGCVASRQDED", @@ -1905,7 +1905,7 @@ "start": 4567, "end": 5149, "subtype_start": 4479, - "subtype_end": 5060, + "subtype_end": 5061, "orientation": "forward", "distance": 0.48399487836107546, "protein": "MENRWQVMVVWQVDRMRINMWKSLVKYHIHNSGKARGWCYRHHYESRHPRISSEVHIPLGGEARLVVTTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADQLIHLYYFNCFSESAIRQAILGHRVSPRCEYQAGHNKVGSLQYLALIALIPPKRRKPPLPSVRKRTEDRWNEPQRTKGHRESHTMNGH", @@ -1917,7 +1917,7 @@ "start": 5088, "end": 5379, "subtype_start": 5000, - "subtype_end": 5290, + "subtype_end": 5291, "orientation": "forward", "distance": 0.43053960964408733, "protein": "MERAPEDQGPQREPYNEWTLELLEELKREAVRHFPRAWLHGLGQHIYETYGDTWAGVEALIRILQQLVFIHFRIGCQHSRIGILTHRRARNGASRS", @@ -1927,33 +1927,33 @@ { "name": "tat_exon1", "start": 5359, - "end": 5575, + "end": 5578, "subtype_start": 5271, - "subtype_end": 5485, + "subtype_end": 5487, "orientation": "forward", - "distance": 0.46628407460545196, + "distance": 0.4464285714285715, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKKCCYHCPRCFLTKGLGISYGRKKRRQRRRTPQDNQNHQVSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTGGAGCCCTGGAAGCATCCAGGGAGTCAGCCTAGGACTGCGTGCACCAGTTGCTATTGTAAAAAGTGTTGCTATCATTGCCCGCGATGCTTCTTAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGA" }, { "name": "rev_exon1", "start": 5498, - "end": 5576, + "end": 5579, "subtype_start": 5410, - "subtype_end": 5485, + "subtype_end": 5488, "orientation": "forward", - "distance": 0.5678884873515746, - "protein": "MAGRSGDSDEELLKITRTIKFLYQNS", - "aminoacids": "MAGRSGDSDEELLKITRTIKFLYQNS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGT" + "distance": 0.5622384937238494, + "protein": "MAGRSGDSDEELLKITRTIKFLYQNSE", + "aminoacids": "MAGRSGDSDEELLKITRTIKFLYQNSE", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAACTCCTCAAGATAACCAGAACCATCAAGTTTCTTTATCAAAACAGTGAG" }, { "name": "vpu", "start": 5590, "end": 5860, "subtype_start": 5502, - "subtype_end": 5747, + "subtype_end": 5748, "orientation": "forward", "distance": 0.5374618963580942, "protein": "MQSLEILAIVALVVAFIIAIVVWSIVLIEYRKIRRQKRIDRILDRIRERAEDSGNESEGDQEELAALVEMGNHAHLGMGHHAPWDVNDL", @@ -1965,7 +1965,7 @@ "start": 5753, "end": 8315, "subtype_start": 5665, - "subtype_end": 8211, + "subtype_end": 8212, "orientation": "forward", "distance": 0.5078662118966413, "protein": "MRVKGTKKNWQPSWRWGTMLIWGWATMLLGMSMTCNAEDSWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVELNVTENFNAWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTELKNSTGEMKNCSFNATTAIRDKVQKQYALFYRLDVVQIKDDNTSYNTRYRLINCNSSTITQACPKVSFEPIPIHYCAPAGFAILKCKDKNFNGTGLCENVSTVQCTHGIRPVVSTQLLLNGSLAEGDVVIRSKNFSNSAQTIIVQLNETVEINCTRPNNNTRKSIHIGPGRAFYATGDIIGDIRQAHCNISRAKWNETLKKIADKLREQFKRETIVFNQSSGGDPEIVMHSFNCRGEFFYCNTTQLFNSTWNSTTGVNGTGANNTEVNIITLPCKIKQIVNMWQEVGKAMYAPPISGQISCSSNITGLLLLRDGGNSNDTDNTTEIFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTKARRRVVQREKRAVGIGALFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKNLSYIWNNMTWMQWEKEIDNYTEEIYNLIEQSQNQQEKNEQELLELDKWASLWTWFDITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTLLPAARGPDRPEGIGEEGGERDRDRSSHLAHGFLTIIWVDLRSLFLFSYHHLRDLLLIVARGVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRFIEVLQRIFRAIIHIPTRIRQGLERALL", @@ -1975,21 +1975,21 @@ { "name": "tat_exon2", "start": 7896, - "end": 7989, + "end": 8004, "subtype_start": 7793, - "subtype_end": 7885, + "subtype_end": 7901, "orientation": "forward", - "distance": 0.5279960707269156, + "distance": 0.5826923076923078, "protein": "RPSSQPRGDPTGPKESEKKVERETETDPVT", - "aminoacids": "RPSSQPRGDPTGPKESEKKVERETETDPVT*", - "nucleotides": "AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAG" + "aminoacids": "RPSSQPRGDPTGPKESEKKVERETETDPVT*RMDS*", + "nucleotides": "AGACCCTCCTCCCAGCCGCGAGGGGACCCGACAGGCCCGAAGGAATCGGAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCAGTCACTTAGCGCATGGATTCTTAA" }, { "name": "rev_exon2", "start": 7897, "end": 8173, "subtype_start": 7794, - "subtype_end": 8069, + "subtype_end": 8070, "orientation": "forward", "distance": 0.45968205324650446, "protein": "DPPPSREGTRQARRNRRRRWRERQRQIQSLSAWILNNYLGRPAEPVPLQLPPLERLTLDCSEGSGTSGTQGVGSPQVLVESPAVLEPGTKE", @@ -2001,7 +2001,7 @@ "start": 8316, "end": 8940, "subtype_start": 8213, - "subtype_end": 8833, + "subtype_end": 8834, "orientation": "forward", "distance": 0.5254870367657829, "protein": "MGGIFSKRNGGGWPAIRERMRRAQPAADGVGAASRDLERHGALTSNNTPANNADCAWLEAQEEGEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIYSQQRQDILDLWVYNTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPMEPEDVEKATEGEDNILLHPVNQHGMDDSEREVLVWKFDSRLAFRHVAREIHPEYYKNC", @@ -2015,7 +2015,7 @@ "start": 532, "end": 2047, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.27270615563298484, "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNTIAVLYCVHQRIDVKDTKEALNKIEEEQNKSKKKAQQAAADTGNSSQSSQVSQNYPIVQNHQGQMVYQALSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIIMGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQATGGATNIMMQKGNFRNQGKPIKCFNCGKEGHLARNCRAPRKKGCWKCGKEGHQMKDCSERQANFLGKIWPSHKGRPGNFLQNRPEPTAPPEESFRFGEETTAPPQKQEPTDKELYPFSSLKSLFGNDPSSQ", @@ -2027,7 +2027,7 @@ "start": 1839, "end": 4851, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.21944123990570308, "protein": "FFRENLAFPQGKAREFPPEQTRANSPTRRELQVWGRDNSSPSEAGADRQGTVSLLFPQITLWQRPLVTVKIGGQLKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTKIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDESFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIEQHRAKVENLREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVITLTEEAELELAENREILKEPVHGVYYDPSKDLVAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIATESIVVWGKIPKFRLPIQKETWETWWMEYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVIPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQEDHEKYHSNWKAMASDFNIPPVVAKEIIASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGEYCAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -2039,7 +2039,7 @@ "start": 4795, "end": 5374, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWVYRHHYESTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYNTQVDPGLADQLIHMYYFDCFSESAIRQAILGHRVSPSCEYQAGHNKVGSLQYLALAVLVAPKKIKPPLPSVARLTEDRWNKPRKIKGHRESHTMNGH", @@ -2051,9 +2051,9 @@ "start": 5313, "end": 5598, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.599565614997714, + "distance": 0.5871212121212123, "protein": "MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS", "aminoacids": "MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPREWLHSLGQHIYETYGDTWAGVGAIIRILQQLLFIHFRIGCHHSRIGILRRTRNGARRS*", "nucleotides": "ATGGAACAAGCCCCGGAAGATCAAGGGCCACAGAGAGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAGCTTAAGAGGGAAGCTGTTAGACATTTTCCTAGGGAATGGCTCCATAGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGGCAGGAGTAGGAGCCATAATAAGAATACTGCAACAATTACTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGGATAGGCATACTGAGGAGAACAAGAAATGGAGCCCGTAGATCCTAG" @@ -2061,69 +2061,69 @@ { "name": "tat_exon1", "start": 5578, - "end": 5794, + "end": 5797, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5284227381905524, + "distance": 0.5138258357408172, "protein": "MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ", - "nucleotides": "ATGGAGCCCGTAGATCCTAGACTGGAACCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTAACAATTGCTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGCTTCACAAAAAAGGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTPCNNCYCKKCCFHCQVCFTKKGLGISYGRKKRRQRRGPPRDSQIHQVPLPKQ*", + "nucleotides": "ATGGAGCCCGTAGATCCTAGACTGGAACCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTAACAATTGCTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGCTTCACAAAAAAGGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAGTAA" }, { "name": "rev_exon1", "start": 5717, - "end": 5795, + "end": 5798, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.5989458552946814, - "protein": "MAGRSGDRDEDLLETVRFIKFLYQNS", - "aminoacids": "MAGRSGDRDEDLLETVRFIKFLYQNS", - "nucleotides": "ATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAGT" - }, - { - "name": "vpu", - "start": 5809, - "end": 6055, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5826369545032498, - "protein": "MQPLEISAIVALVVVAIIAIVVWTIVLLEYRKILRQKKIDRLINRISERAEDSGNESDGDQEELSALMEMGRLAPWNVDDL", - "aminoacids": "MQPLEISAIVALVVVAIIAIVVWTIVLLEYRKILRQKKIDRLINRISERAEDSGNESDGDQEELSALMEMGRLAPWNVDDL*", - "nucleotides": "ATGCAACCTTTAGAGATATCAGCAATAGTAGCATTAGTAGTAGTAGCAATAATAGCAATAGTTGTGTGGACCATAGTACTCTTAGAGTATAGGAAAATATTAAGGCAAAAGAAAATAGACAGATTAATTAATAGAATAAGTGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAG" + "distance": 0.5940834141610087, + "protein": "MAGRSGDRDEDLLETVRFIKFLYQNSK", + "aminoacids": "MAGRSGDRDEDLLETVRFIKFLYQNSK", + "nucleotides": "ATGGCAGGAAGAAGCGGCGACAGAGACGAGGACCTCCTCGAGACAGTCAGATTCATCAAGTTCCTCTACCAAAACAGTAAG" }, { "name": "env", "start": 5972, "end": 8549, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.49969138043715056, "protein": "MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL", "aminoacids": "MRVTGIKKNCQLLWRWGALLLGMLMICSATNMWVTVYYGVPVWKDATTTLFCASDAKAYDTEIHNVWATHACVPTDPDPQEVVLENVTENYNMGKNNMVEQMHEDIISLWDQSLKPCVLLTPFCVTLNCTDANITSTNNSRDKKEGESTLEETKGEIKNCSFNMTSSMSDKSQKQRALFYKLDVVQIDETNNNSYRLISCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKKFNGTGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENFTNNAKTIIVQLKTPVQINCTRPNNNTRKRISMGPGRVIYATGQIIGDIRKAHCNISRAEWNTTLKQIVTQLRKQWNRTIIFNSSSGGDPEIVMHSFNCRGEFFYCNTTKLFNSTWPRNSTWNNTEGSNDTEIITLPCRIKQIVNRWQEVGKAMYAPPIQGQISCSSNITGLLLVRDGGINTSESNETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGMLGAMFLGLLGAAGSTMGAASVTLTVQTRLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARLLAVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNRSYEDIWNNMTWMEWEKEIDNYTGLIYTLIEKSQNQQEINEQELLSLDKWASLWNWFNITNWLWYIKIFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSLQTPRPAPRGPDRPEEIEEGGGERDRDRSVRLVTGFFALFWDDLRSLCLFSYHHLRDLILIVVRVVEILGRRGWEALKYWWNLLQYWSQEIKNSAISLLNATAIAVAEGTDRIIGVVQRTWRAFIHIPRRIRQGFERALL*", "nucleotides": "ATGAGAGTGACGGGGATCAAGAAGAATTGTCAGCTCTTATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAGTGCTACAAACATGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGATGCAACCACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGATACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCGACCCACAAGAAGTAGTACTGGAAAATGTGACAGAAAATTATAATATGGGAAAAAATAACATGGTGGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTACTCTTAACCCCATTCTGTGTCACTTTAAATTGCACTGATGCTAACATCACCAGCACTAATAATAGTAGAGATAAGAAGGAAGGAGAAAGTACATTGGAGGAGACGAAAGGAGAAATAAAAAACTGCTCTTTCAATATGACTTCAAGCATGAGCGATAAGTCTCAGAAACAACGTGCACTTTTTTATAAGCTTGATGTGGTACAAATAGATGAGACTAATAATAATAGTTATAGGTTGATAAGTTGTAACACCTCAGTCGTCACACAGGCTTGTCCAAAGGTATCCTTTGATCCAATCCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGAAATTCAATGGAACAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAACCTGTAGTGTCAACCCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAAGTAATGATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTGCAGCTGAAGACACCTGTACAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGGATAAGTATGGGACCAGGGAGAGTAATTTATGCAACAGGACAAATAATAGGAGATATAAGAAAAGCACATTGCAACATTAGTAGAGCAGAATGGAATACAACTTTAAAGCAGATAGTTACACAATTAAGAAAGCAGTGGAATAGAACCATAATCTTTAACTCATCCTCAGGAGGGGACCCAGAAATTGTGATGCACAGTTTTAATTGTAGAGGGGAATTTTTCTACTGTAATACAACAAAACTATTTAATAGTACTTGGCCACGTAATAGTACTTGGAATAATACTGAAGGGTCAAATGACACTGAAATAATCACACTCCCGTGCAGAATAAAACAAATTGTAAACAGGTGGCAGGAAGTAGGCAAAGCAATGTATGCCCCTCCCATCCAAGGACAAATTAGTTGTTCATCAAATATTACAGGGCTGCTACTAGTTAGAGATGGTGGAATTAACACCAGTGAGAGCAACGAGACCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAGGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATGCTGGGAGCTATGTTCCTTGGGCTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGTTGACGGTACAGACCAGACTATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGACTCCTAGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAACACTAGTTGGAGTAATAGATCTTATGAAGATATTTGGAACAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAGGCTTAATATACACCTTAATTGAAAAATCGCAGAACCAGCAGGAAATAAATGAACAAGAACTATTGTCATTGGATAAGTGGGCAAGCCTGTGGAATTGGTTTAATATAACAAATTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGACTGGATTCTTCGCACTTTTCTGGGACGATCTACGAAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTGATTGTAGTGAGGGTTGTGGAAATTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAAATAAAGAATAGTGCTATCAGCTTGCTCAACGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGAATCATAGGAGTAGTACAAAGAACTTGGAGAGCTTTTATCCACATACCTAGGAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 6010, + "end": 6055, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.5970755483346872, + "protein": "MEMGRLAPWNVDDL", + "aminoacids": "MEMGRLAPWNVDDL*", + "nucleotides": "ATGGAGATGGGGCGCCTTGCTCCTTGGAATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8130, - "end": 8223, + "end": 8226, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.6074396517609815, "protein": "RPPAQPQGDPTGPKKSKKEVEKETETDQCD", - "aminoacids": "RPPAQPQGDPTGPKKSKKEVEKETETDQCD*", - "nucleotides": "AGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAG" + "aminoacids": "RPPAQPQGDPTGPKKSKKEVEKETETDQCD**", + "nucleotides": "AGACCCCCCGCCCAGCCCCAAGGGGACCCGACAGGCCCGAAGAAATCGAAGAAGGAGGTGGAGAAAGAGACAGAGACAGATCAGTGCGATTAGTGA" }, { "name": "rev_exon2", "start": 8131, "end": 8407, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.47688921496698455, "protein": "DPPPSPKGTRQARRNRRRRWRKRQRQISAISDWILRTFLGRSTKPVPLQLPPLERLNLDCSEGCGNSGTQGVGSPQILVESPAVLESGNKE", @@ -2135,7 +2135,7 @@ "start": 8550, "end": 8985, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.6054615099468668, "protein": "MGGKWSKKSGGGWPAVREKMKRTEPAAEGVGAASRDLDKYGAITSSNTAQTNPDCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDMSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGGCFK", @@ -2149,7 +2149,7 @@ "start": 120, "end": 2022, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.7511039743075072, "protein": "MTKTLLVQNANPDCKTILKALGPAATLEEMMTACQKVRRPGHKAKVLAEAMSQATGAANIMMQRGNFKNQRKPVKCFNCGKEKHIAKNCKAPKKKGC", @@ -2161,7 +2161,7 @@ "start": 1787, "end": 4826, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7637180771917039, "protein": "ASQIYAKIKVKQLCKLLKRTKALTEVVPLTEEAELELAENRKILKDPVHRAYYDPAKDLIAELQKQREGQWTYQIYQKPFKNLKTEKYARTKGAHTNDVKQLTEAVQKISTESIVI", @@ -2173,7 +2173,7 @@ "start": 4425, "end": 5349, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7566838361540349, "protein": "MIVWQVDRMKIRTWKSLVKYHMYISKKAKK", @@ -2183,57 +2183,57 @@ { "name": "vpr", "start": 5033, - "end": 5582, + "end": 5579, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.7667502687208886, + "distance": 0.7661862444925697, "protein": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS", - "aminoacids": "MEKKEI*HTSRP*LSRPSNSCALF*LFFRICYKKCHIRT*S*P*V*ISSRT*QSKISTILGTNSINNTKEDKATFA*CKETDRR*IEQAPEDQRPQKEPYNE*TLELLEELKREAVRHFPRPWLQNLRQYIYETYKDTWTRVEAIIRILQQMLFIHFRIKCHHSRIGIVLQRRARNRASRS*T", - "nucleotides": "ATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAAGTAAGATCTCTACAATACTTGGCACTAACAGCATTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAAGATAGATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAAACCTAAGACAATATATCTATGAAACTTATAAAGATACTTGGACAAGAGTAGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACT" + "aminoacids": "MEKKEI*HTSRP*LSRPSNSCALF*LFFRICYKKCHIRT*S*P*V*ISSRT*QSKISTILGTNSINNTKEDKATFA*CKETDRR*IEQAPEDQRPQKEPYNE*TLELLEELKREAVRHFPRPWLQNLRQYIYETYKDTWTRVEAIIRILQQMLFIHFRIKCHHSRIGIVLQRRARNRASRS*", + "nucleotides": "ATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAAGTAAGATCTCTACAATACTTGGCACTAACAGCATTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAAGATAGATAGAACAAGCCCCAGAAGACCAAAGGCCACAGAAAGAGCCATACAATGAATAGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAAACCTAAGACAATATATCTATGAAACTTATAAAGATACTTGGACAAGAGTAGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAG" }, { "name": "tat_exon1", "start": 5486, - "end": 5777, + "end": 5780, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", "distance": 0.7619181418001311, "protein": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS", - "aminoacids": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS*TRALEASRKSA*DSLYQLLL*KVLLSLPSVFYTKSLRHLLWQEEAETETKISSRQSDSSSSSTKAV", - "nucleotides": "ATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTA" + "aminoacids": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS*TRALEASRKSA*DSLYQLLL*KVLLSLPSVFYTKSLRHLLWQEEAETETKISSRQSDSSSSSTKAVN", + "nucleotides": "ATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAAT" }, { - "name": "rev_exon1", - "start": 5698, - "end": 5776, - "subtype_start": 5969, - "subtype_end": 6044, + "name": "vpu", + "start": 5486, + "end": 6206, + "subtype_start": 6259, + "subtype_end": 6310, "orientation": "forward", - "distance": 0.5275498241500586, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGT" + "distance": 0.7713277742142399, + "protein": "MRYLCKEMQIPLYSVHQMLKHMIQKYIMFRPHMPVYPQTLTHMK", + "aminoacids": "MLFIHFRIKCHHSRIGIVLQRRARNRASRS*TRALEASRKSA*DSLYQLLL*KVLLSLPSVFYTKSLRHLLWQEEAETETKISSRQSDSSSSSTKAVNNTCNATFKDISNSSISSSSNNSNSCVDHSRHKI*ENIKTKKNR*NN**NKKKSRRQWQ*ERKRSGRIVSTCSRNKARCSLRC**SVVLRKICKSQSIMRYLCKEMQIPLYSVHQMLKHMIQKYIMFRPHMPVYPQTLTHMK*", + "nucleotides": "ATGCTGTTTATTCATTTCAGAATTAAGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATAGAGCCAGTAGATCATAGACTAGAGCCCTAGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAATAATACATGTAATGCAACCTTTAAAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGACCATAGTAGGCATAAAATATAAGAAAATATTAAGACAAAGAAAAATAGATAGAATAATTAATAGAATAAGAAAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTAAGTCACAGTCTATTATGAGGTACCTGTGTAAAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAG" }, { - "name": "vpu", - "start": 5790, - "end": 6039, - "subtype_start": 6061, - "subtype_end": 6309, + "name": "rev_exon1", + "start": 5698, + "end": 5779, + "subtype_start": 5969, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.6962169553327257, - "protein": "TIVGIKYKKILRQRKIDRIINRIRKRAEDSGNESEKDQEELSALVVEIRHDAP", - "aminoacids": "MQPLKILAIVALVVAAIIAIVV*TIVGIKYKKILRQRKIDRIINRIRKRAEDSGNESEKDQEELSALVVEIRHDAP*DVDDL*", - "nucleotides": "ATGCAACCTTTAAAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGACCATAGTAGGCATAAAATATAAGAAAATATTAAGACAAAGAAAAATAGATAGAATAATTAATAGAATAAGAAAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAAAAGATCAGGAAGAATTGTCAGCACTTGTAGTAGAAATAAGGCACGATGCTCCTTAAGATGTTGATGATCTGTAG" + "distance": 0.524971623155505, + "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAA" }, { "name": "env", "start": 5953, "end": 8521, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.7529994904340572, "protein": "MTNCSFNITTEIRDKVRKEYALFYKLDVMPIDKDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPASFAILKCKDEMFNRTRPCKNVSTVQCTHRIRPVVSTQLLLNSSLAEKKIVLRSENFTDNTKNIIVQLNRSIVINCTRPNNNTRKSISVAKRAIYATRQIIEDIRQAHCNISETD", @@ -2245,7 +2245,7 @@ "start": 7916, "end": 9176, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.7587548638132295, "protein": "MSLHRMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", @@ -2257,7 +2257,7 @@ "start": 8102, "end": 8198, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.7134052388289676, "protein": "RPSSQPQEDQTGPKE", @@ -2269,7 +2269,7 @@ "start": 8103, "end": 8379, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.6888374145157732, "protein": "ILTTHLRRPAKPVPFQLPPLKRLTLDCAEDCANSRTQEVRDPQVLVESPAVLNSGTKE", @@ -2283,7 +2283,7 @@ "start": 302, "end": 1715, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.7484174646972894, "protein": "MTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVRGPGHKARVLAEAMSQVTKSASIIVQGGNFKNQRKNVKCFNCGKERHTAKNCRAPKKKGC", @@ -2295,7 +2295,7 @@ "start": 1613, "end": 4625, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7535201229073285, "protein": "AEIKQEFSIPYNPQSQRVVKSMNNKLKKIIGQVKDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED", @@ -2307,7 +2307,7 @@ "start": 4289, "end": 4988, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7624714704923379, "protein": "MAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFQVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYRKQMAGDDCVASRQDED", @@ -2317,47 +2317,47 @@ { "name": "vpr", "start": 5087, - "end": 5381, + "end": 5249, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6769116239498034, + "distance": 0.6703246973639554, "protein": "MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT", - "aminoacids": "MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT*AEVEAIIRTLQQLLFIHFRIRCQHSRIGIIRQRRARNRASRS*L", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGCTT" + "aminoacids": "MEQVPEDQRPQKEPYNKWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDT*", + "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATAAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTAG" }, { "name": "tat_exon1", "start": 5235, - "end": 5574, + "end": 5577, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.6412492073557388, + "distance": 0.6325105553751218, "protein": "MTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ", - "aminoacids": "MEILRQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPSLEP*KHPGSQPMTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ", - "nucleotides": "ATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGCTTAGAGCCCTAGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAACAAAAAGCTTAGGCATCTCCTATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAG" + "aminoacids": "MEILRQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPSLEP*KHPGSQPMTACNNCYCKRCCFHCQVCFTTKSLGISYGRKKQRQRRRASHSSQNHQAALPEQ*", + "nucleotides": "ATGGAGATACTTAGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGCTTAGAGCCCTAGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAACAAAAAGCTTAGGCATCTCCTATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" }, { "name": "rev_exon1", "start": 5497, - "end": 5575, + "end": 5578, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5678884873515746, - "protein": "MAGRSRDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSRDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" + "distance": 0.564475664826894, + "protein": "MAGRSRDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSRDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCAGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "vpu", "start": 5589, "end": 5835, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_start": 6259, + "subtype_end": 6310, "orientation": "forward", - "distance": 0.6428836863619473, + "distance": 0.7710195148641255, "protein": "MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP", "aminoacids": "MHALEIAAIAGLVVAAIIAIVVWSIVLIEYKKILRQRKIDRLINRIRERAEDSGNESDEDQEELSALVEMRHLVP*DSNDM*", "nucleotides": "ATGCATGCCTTAGAAATAGCAGCAATAGCAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGACGAGGACCAAGAGGAATTATCCGCACTTGTGGAGATGAGGCATCTTGTTCCTTAAGATAGTAATGATATGTAG" @@ -2367,7 +2367,7 @@ "start": 5752, "end": 8353, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.7518870380010406, "protein": "MKEKGEIKNCSFNVTTGIRDKVTKEHALFYKLNVVPIDEDSKNTTGKYKMINCNTSVITQACPKVSFKPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEKIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYRTDIIRDIRQAHYNISKKD", @@ -2379,7 +2379,7 @@ "start": 7748, "end": 8996, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.7344808947652905, "protein": "MTYKRALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFR", @@ -2391,7 +2391,7 @@ "start": 7934, "end": 8027, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.5633802816901408, "protein": "RPSSQPREDPTGPKEQKKEVERKTEAHPRD", @@ -2403,7 +2403,7 @@ "start": 7935, "end": 8211, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.6781884553958476, "protein": "ILSTHLGRPAEPVPLQLPPLERLTLNCGENCRTSRTQKVRSTEVLVESPAVLESGNKE", @@ -2417,7 +2417,7 @@ "start": 292, "end": 1795, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.25442849599155104, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLETTEGCKQILEQLQPSLPTGSEELRSLFNTVATLYCVHKRIEVQDTKEALEKIEEEQNKSKKKAQQAVADTGSTSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKVLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ", @@ -2429,7 +2429,7 @@ "start": 1587, "end": 4599, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.17509882471546434, "protein": "FFREDLAFLQGKARELSSEQTRANSPTRGELQVWGGDSNSSSEAGAGGQGSVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVQLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSIPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMIKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPDKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLKGAKALTEVIQLTEEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARTRGTHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTSRGRQKVVSLTDTTNQKTELQAICLALQDSGLEVNIVTDSQYALGIIQAQPDRSESEIVNQIIEQLIKKERVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHNNWRAMASDFNLPPVVAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKAIHTDNGTNFTSATVKAACWWAGIKQECGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -2441,7 +2441,7 @@ "start": 4543, "end": 5122, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.34158090650317496, "protein": "MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTYWGLHTGERDWHLGQGVSIECRERKYSTQVTPDLADQLIHLYYFDCFAESAIREAILGHRVSPRCEYQAGHNKVGSLQYLALTALVTPKKTKPPLPSVTKLTEDRWNKPQKTKGHRGSQTMNGH", @@ -2453,9 +2453,9 @@ "start": 5061, "end": 5352, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5777089783281733, + "distance": 0.5625083211290105, "protein": "MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS", "aminoacids": "MEQAPEDQGPQREPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS*", "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTGGGACAACATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" @@ -2463,69 +2463,69 @@ { "name": "tat_exon1", "start": 5332, - "end": 5548, + "end": 5551, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.40192926045016075, + "distance": 0.37688442211055273, "protein": "MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTKCYCKKCCFHCQVCFITKGLGISYGRKKRRQRRRPPQDSQTHQVPLPKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTGA" }, { "name": "rev_exon1", "start": 5471, - "end": 5549, + "end": 5552, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSS", - "aminoacids": "MAGRSGDSDEDLLKTVRLIKYLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5563, - "end": 5809, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5669679539852097, - "protein": "MQSLYILTIVALVVAAILAIVVWAIVLIEYKKILKQRRIDRLIDRIIDRAEDSGNESEGDQEELSALVEMGHHAPWNVDDL", - "aminoacids": "MQSLYILTIVALVVAAILAIVVWAIVLIEYKKILKQRRIDRLIDRIIDRAEDSGNESEGDQEELSALVEMGHHAPWNVDDL*", - "nucleotides": "ATGCAATCCTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAGGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSSE", + "aminoacids": "MAGRSGDSDEDLLKTVRLIKYLYQSSE", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTGAG" }, { "name": "env", "start": 5726, "end": 8288, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.52645935624659, "protein": "MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL", "aminoacids": "MRVKEIKRSYQHLWRWGIMLLGMLMIYSTADQWWVTVYYGVPVWREANTTLFCASDAKAYSTEAHNVWATHACVPTDPNPQEIVIGNVTEDFNMWKNNMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTTWGEMTNCTFNITTSIKDKMKKEAALFYKIDLVEIDEEKNNSSTRYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNNKKFSGKGPCTNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIQIFCIRPNNNTRKSINIGPGRAFYTTGDIIGDIRQAHCNISGNWNNTLKQIATQLGKQLNQTQQIIFNSSAGGDPEIVTHSFNCGGEFFYCNSSSLFNSTWTKNGTGSWQSNDTQNGNITLQCRIKQIINLWQGVGKAMYAPPISGQINCTSNITGLVLTRDGGKVINETETFRPGGGNMKDNWRSELYKYKVVRIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGLWGCSGKLICTTTVPWNRSWGGHNKNLDDIWGNMTWMEWEKEIDNYTSLIYTLITESHSQQEKNEQELLALDKWASLWNWFDISQWLWYIKIFIMIVGGLVGLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPEGIEEGGGERDKGRSGRLVNGFLALIWDDLRSLCLFSYHRLSDLLLIVIRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNTTAIVVAEGTDRIIEILQRIGRAFLHIPRRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGGAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTACTTGGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATGAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAGAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAGGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATGGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAGGAAGCCATACAAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAGGACCAGGGAGAGCATTTTACACAACAGGAGATATAATAGGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAATAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAGGGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAGGGGAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTGGACTAAAAATGGTACTGGTAGTTGGCAGTCTAATGATACTCAGAATGGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGGAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAACTGTACATCAAATATTACAGGGCTGGTTTTAACAAGAGATGGGGGGAAGGTGATTAATGAAACTGAGACCTTTAGACCTGGAGGAGGAAATATGAAGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAAAGAGAGAAAAGAGCAGTAGGACTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCCGGAAGCACTATGGGCGCAGCGTCAATAGCGCTGACGGAACAGGCCAGACGAGTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATCGTAGTTGGGGTGGGCATAACAAAAATCTAGATGACATTTGGGGTAACATGACCTGGATGGAGTGGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAGAAAAGAATGAACAAGAATTATTGGCATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAGGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAGGGACAGATAGGATAATAGAAATATTACAAAGAATTGGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 5770, + "end": 5809, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.5690703735881842, + "protein": "MGHHAPWNVDDL", + "aminoacids": "MGHHAPWNVDDL*", + "nucleotides": "ATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAG" + }, { "name": "tat_exon2", "start": 7869, - "end": 7962, + "end": 7965, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.6995153473344102, "protein": "RPSSQLRGEPTGPKE", - "aminoacids": "RPSSQLRGEPTGPKE*KKEVERETKADPVD*", - "nucleotides": "AGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAG" + "aminoacids": "RPSSQLRGEPTGPKE*KKEVERETKADPVD**", + "nucleotides": "AGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAGAAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGA" }, { "name": "rev_exon2", "start": 7870, "end": 8146, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.32366339007432277, "protein": "DPPPSSEGSRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDCDQDCGTSGTQGVGSPQILVESPAVLESGTKE", @@ -2537,7 +2537,7 @@ "start": 8289, "end": 8940, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.46946145391741245, "protein": "MGGKWSKSSRVGWNAVRERMRRAQPTADRERAEPAADGVGAASRDLEKYGALTSRNTAATNADCAWLEAQEEEDEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHGMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC", @@ -2551,7 +2551,7 @@ "start": 521, "end": 2021, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -2563,7 +2563,7 @@ "start": 1813, "end": 4825, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -2575,7 +2575,7 @@ "start": 4769, "end": 5348, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -2585,23 +2585,23 @@ { "name": "vpr", "start": 5287, - "end": 5581, + "end": 5578, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5558, "end": 5777, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -2609,45 +2609,45 @@ { "name": "rev_exon1", "start": 5697, - "end": 5775, + "end": 5778, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5789, - "end": 6035, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5513972055888224, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 5952, "end": 8493, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4564898680537425, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 5990, + "end": 6035, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8074, "end": 8170, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -2659,7 +2659,7 @@ "start": 8075, "end": 8351, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -2671,7 +2671,7 @@ "start": 8494, "end": 9109, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -2685,7 +2685,7 @@ "start": 62, "end": 1601, "subtype_start": 140, - "subtype_end": 1642, + "subtype_end": 1643, "orientation": "forward", "distance": 0.7362754920106639, "protein": "MSQVNSTTVMMQKGNFRNQKKTVKCFNCGKIGHIAKNCRAPRRKGCWKCGQEGHQMKDCSERQANFLGKLWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETATPPQKQEPIDRELYSLASLKSLFGNDPSSQ", @@ -2697,7 +2697,7 @@ "start": 1393, "end": 4405, "subtype_start": 1435, - "subtype_end": 4446, + "subtype_end": 4447, "orientation": "forward", "distance": 0.2475474244944199, "protein": "FFRETLAFPQGEAREFPSEQTRANSPTRGELQVWGRDSNSPSEAGADRQGTIFLSFPQITLWQRPLVSIKVGGQLKEALLDTGADDTVLEEMCLPGKWKPKMIGGIGGFIKVRQYDQIPIEIYGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVRLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDMVIYQYMDDLYIGSDLELGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPITLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKSLTEVVPLTREAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARIKGTHTNDVKQLTQAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKDPIVGAETFYVDGAANRDTKLGKAGYVTDRGRQKIVPLTDTTNQKTELQAIYLALQDSGSEVNIVSDSQYAIGILQAQPDKSESELVNQIIEQLIKKEKVYLTWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVNTIHTDNGSNFTSTAVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGRYSAGERIVDMIASDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSEIKVVPRRKAKIIRDYGKQMAGDDCVAGRQDED", @@ -2709,7 +2709,7 @@ "start": 4349, "end": 4928, "subtype_start": 4391, - "subtype_end": 4969, + "subtype_end": 4970, "orientation": "forward", "distance": 0.4686663095875737, "protein": "MENRWQVMIVWQVDRMRINAWKSLVKHHMHVSRKVERWVYKHHYESTNPRISSEVHIPLGDARLKITTYWGLHTGERDWHLGQGVSIEWRKKSYNTQVDPEVADQLIHLYYFDCFSESAIRKAIVGHRVSPSCEYQAGHNKVGSLQYLALAALVKSKKTKPPLPSVTKLTEDRWNKPQRTKGRRGNHIMNGH", @@ -2721,7 +2721,7 @@ "start": 4867, "end": 5158, "subtype_start": 4909, - "subtype_end": 5199, + "subtype_end": 5200, "orientation": "forward", "distance": 0.4534005037783373, "protein": "MEQAPEDQGPQREPYNEWALELLEELKREAVRHFPRIWLQSLGQYVYETYGDTWTGVEAIIRILQQMLFIHFRIGCQHSRIGIIRRGRTRNGASRP", @@ -2733,9 +2733,9 @@ "start": 5138, "end": 5357, "subtype_start": 5180, - "subtype_end": 5394, + "subtype_end": 5399, "orientation": "forward", - "distance": 0.5040543502081963, + "distance": 0.48719691819623834, "protein": "MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKQ", "aminoacids": "MEPVDPSLAPWKHPGSQPKTACTNCYCKKCCLHCQVCFTKKGLGISYGRKKRRQRRRPPQSSKAHQNPLPKQ*", "nucleotides": "ATGGAGCCAGTAGACCCTAGCCTAGCGCCCTGGAAGCACCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGCTATTGTAAAAAGTGCTGCTTACATTGCCAAGTTTGTTTCACAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGTAA" @@ -2743,21 +2743,21 @@ { "name": "rev_exon1", "start": 5277, - "end": 5355, + "end": 5358, "subtype_start": 5319, - "subtype_end": 5394, + "subtype_end": 5397, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEDLLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEDLLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAGCAGTAAGGCTCATCAAAATCCTCTACCAAAGCAGTAAG" }, { "name": "vpu", "start": 5369, "end": 5615, "subtype_start": 5411, - "subtype_end": 5656, + "subtype_end": 5657, "orientation": "forward", "distance": 0.46373488953730724, "protein": "MQSLQIGAIVALVVGTIIAIVVWSIVLIEYRKILRQKKIDRIIDRIVERAEDSGNESEGDQEELSALVERGHDAPWNVNDL", @@ -2769,7 +2769,7 @@ "start": 5532, "end": 8073, "subtype_start": 5574, - "subtype_end": 8123, + "subtype_end": 8124, "orientation": "forward", "distance": 0.47450452559300893, "protein": "MRVKETRKSYQHWWKGGMMLLGMLMICSAATNLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVLLGNVTEDFNAWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILHCTDVNNTRNGMTGELKNCSFNITTKITNKVQKEYALFYKLDVVPINNKDNDTSFNNNSYRLISCNTSVITQACPKVSFEPIPIHYCTPAGYAILRCNNETFSGKGPCTNVSSIQCTHGIRPVVSTQLLLNGSLAKQEVVIRSQNFSDNVKTIIVQLKTPVKINCTRPNNNTRKSIHAGPGKVIYATGEIIGDIRQAHCNISAAEWNDTLGQIVTKLQEQFGNKTIVFNQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWNNNGTNTWNSTGNITLPCKIRQIVNMWQKVGKAMYAPPIRGQIKCSSNITGLLLTRDGGNESESETFRPGGGDMRDNWRSELYKYKVVRIEPLGLAPTKAKRRVVQREKRAIGTLGAVFLGFLGTAGSTMGAASMTLTVQARQLLSGIVQQQNNLLKAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNASWSNKSLNEIWDNMTWMEWEKEISNYTQLIYTLIEESQSQQEKNEQELLALDKWDSLWSWFSITKWLWYIKIFIMIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEDGGERDRDRSTRLVTGFLPLFWDDLRSLCLFSYHRLRDLLLIAARIVELLGHRGWEILKHWWSLLQYWSQELKKSAVSLLNATAIAVAEGTDRIIEVVQRACRAILHIPVRLRQGLERALL", @@ -2781,7 +2781,7 @@ "start": 7654, "end": 7750, "subtype_start": 7705, - "subtype_end": 7797, + "subtype_end": 7798, "orientation": "forward", "distance": 0.4464285714285715, "protein": "RPASQPRGDPTGPKESKKTVERETETDPHA", @@ -2793,7 +2793,7 @@ "start": 7655, "end": 7940, "subtype_start": 7706, - "subtype_end": 7981, + "subtype_end": 7982, "orientation": "forward", "distance": 0.46126825660935467, "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIHTLSNRILTTFLGRPEEPVPLQLPPLERLTLDCSEDCGTSGTQGVGNPQTLVESPTILESGTKKKCC", @@ -2805,7 +2805,7 @@ "start": 8074, "end": 8695, "subtype_start": 8125, - "subtype_end": 8751, + "subtype_end": 8752, "orientation": "forward", "distance": 0.437094682230869, "protein": "MGGKWSKSSVVGWPAVRERIRRAGPAAEGVGAVSRDLDKHGAITSNNTPATNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGMIYSQQRQDILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPLETEQVEAATGGENNCLLHPLNQHGMDDPEREVLMWKFDSSLAFHHRAKELHPEYYKDC", @@ -2819,7 +2819,7 @@ "start": 683, "end": 2183, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -2831,7 +2831,7 @@ "start": 1975, "end": 4987, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -2843,7 +2843,7 @@ "start": 4931, "end": 5510, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -2853,23 +2853,23 @@ { "name": "vpr", "start": 5449, - "end": 5743, + "end": 5740, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5720, "end": 5939, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -2877,45 +2877,45 @@ { "name": "rev_exon1", "start": 5859, - "end": 5937, + "end": 5940, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5951, - "end": 6197, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5513972055888224, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6114, "end": 8655, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4564898680537425, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6152, + "end": 6197, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8236, "end": 8332, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -2927,7 +2927,7 @@ "start": 8237, "end": 8513, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -2939,7 +2939,7 @@ "start": 8656, "end": 9271, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -2953,7 +2953,7 @@ "start": 210, "end": 1719, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.27936962750716343, "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPSLQTGSEELRSLYNTVAVLYCVHQRINVKDTKEALDTIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPISPSTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHTPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSASIMAQGGNFRNQKRNVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDKELYPLASLRSLFGNDP", @@ -2965,7 +2965,7 @@ "start": 1511, "end": 4523, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.26443159013103534, "protein": "FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGQGTVSFSFPQITLWQRPIISIRIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQVPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEDKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKRKKSVTVLDVGDAYFSVPLDQDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSNGIRKILFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", @@ -2977,7 +2977,7 @@ "start": 4467, "end": 5046, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3903081914030819, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPSLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", @@ -2989,7 +2989,7 @@ "start": 4985, "end": 8207, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.6663239775063792, "protein": "MHSFNCGGEFFYCNTTQLFNSTWNGTDNWNGTESNNTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAVGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWRQWEKEIDNYTDTIYNLIELSQNQQEQNEQDLLALDKWASLWSWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", @@ -2999,57 +2999,57 @@ { "name": "vpr", "start": 4985, - "end": 5279, + "end": 5276, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5928358208955226, + "distance": 0.5793112277557293, "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "aminoacids": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*L", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGCTT" + "aminoacids": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*", + "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAGCTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5256, - "end": 5472, + "end": 5475, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.48719691819623834, + "distance": 0.4691531785127845, "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ", - "aminoacids": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAG" + "aminoacids": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTTKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACCACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" }, { "name": "rev_exon1", "start": 5395, - "end": 5473, + "end": 5476, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.531617235590375, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" + "distance": 0.5290287574606619, + "protein": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "vpu", - "start": 5487, + "start": 5694, "end": 5733, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_start": 6259, + "subtype_end": 6310, "orientation": "forward", - "distance": 0.5852080576385825, - "protein": "MHALKIAAIVGLVVATIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM", - "aminoacids": "MHALKIAAIVGLVVATIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM*", - "nucleotides": "ATGCATGCCTTAAAAATAGCAGCAATAGTAGGATTAGTAGTAGCAACAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGAGACCAGGAGGAATTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG" + "distance": 0.6901936289818864, + "protein": "MGHLVPWDGDDM", + "aminoacids": "MGHLVPWDGDDM*", + "nucleotides": "ATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG" }, { "name": "tat_exon2", "start": 7788, "end": 7881, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", @@ -3061,7 +3061,7 @@ "start": 7789, "end": 8065, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", @@ -3073,7 +3073,7 @@ "start": 8208, "end": 8850, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.5478186258332784, "protein": "MGNKLSRGLRAGWPAIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTFKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHMARELHPEYFKDC", @@ -3087,7 +3087,7 @@ "start": 739, "end": 2239, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3099,7 +3099,7 @@ "start": 2031, "end": 5043, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3111,7 +3111,7 @@ "start": 4987, "end": 5566, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3123,9 +3123,9 @@ "start": 5505, "end": 5796, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" @@ -3133,69 +3133,69 @@ { "name": "tat_exon1", "start": 5776, - "end": 5992, + "end": 5995, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5915, - "end": 5993, + "end": 5996, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 6007, - "end": 6253, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5513972055888224, - "protein": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6170, "end": 8711, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.45675101255163, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6208, + "end": 6253, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8292, - "end": 8385, + "end": 8388, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD*", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG" + "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD**", + "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA" }, { "name": "rev_exon2", "start": 8293, "end": 8569, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -3207,7 +3207,7 @@ "start": 8712, "end": 9327, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -3221,7 +3221,7 @@ "start": 221, "end": 1730, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.2880084183556756, "protein": "MGARASVLSGGELDRWEKIYLRPGGKKKYRLKHIVWASRELERFAVNPGLLESSEGCRQILGQLQPALQTGSEELRSLYNTVAVLYCVHPRINVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSNSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINAEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNFRNQRRNVKCFNCGKEGHTAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEEATAPPQKQETKDQELYPLASLRSLFGNDP", @@ -3233,7 +3233,7 @@ "start": 1522, "end": 4534, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.25117173416656646, "protein": "FFREDLAFPQGKAREFSPEQTRANSPASRELQVWGRGNSSPSEAGDEGPGTVSFSFPQITLWQRPIISIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALIEICAEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNEAPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKENPDIVIYQYMDDLYVGSDLEIEQHRTKIEELRQHLLGWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIQLPDKDSWTINDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGAYYDPSKDLIAEVQKQGGDQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKIALEAIVIWGKTPKFKLPIQKETWEMWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLMDTTNQRTELHAIHLALQDSGSTVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLAWVPAHKGIGGNEQVDKLVSSGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEPMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIAAETGQETAYFILKLAGRWPVKIIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQISKIQNFRVYYRDSREPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKILRDYGKQMAGDDCVASRQDED", @@ -3245,7 +3245,7 @@ "start": 4478, "end": 5057, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3903081914030819, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMHISKKAQRWVYRHHYESHNPKTSSEVHIPLGEARLVIKTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTALITPKRRKPPLPSVTKLTEDRWNKSQRTKGHKGSHTMNGH", @@ -3255,23 +3255,23 @@ { "name": "vpr", "start": 4996, - "end": 5290, + "end": 5287, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6000000000000001, + "distance": 0.5872377841979652, "protein": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "aminoacids": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*L", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGCTT" + "aminoacids": "MEQVPEDQGPQREPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRTLQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*", + "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTGGGTGTCAACATAGCAGGATAGGAATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5267, "end": 5486, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5060292850990527, + "distance": 0.48954161103693805, "protein": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ", "aminoacids": "MEPVDPSLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISYGRKKRRQRRRASHSSQNHQAALPEQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGCTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" @@ -3279,45 +3279,45 @@ { "name": "rev_exon1", "start": 5406, - "end": 5484, + "end": 5487, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.531617235590375, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" - }, - { - "name": "vpu", - "start": 5498, - "end": 5744, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5852080576385825, - "protein": "MHALEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM", - "aminoacids": "MHALEIAAIVGLVVAAIIAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESDGDQEELSALVEMGHLVPWDGDDM*", - "nucleotides": "ATGCATGCCTTAGAAATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGTCTATAGTATTAATAGAATATAGGAAAATTTTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG" + "distance": 0.5290287574606619, + "protein": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "env", "start": 5661, "end": 8262, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.5065440396179699, "protein": "MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL", "aminoacids": "MKVTGTRRSYQHLWRWGILFLGMVMICSANNLWVTVYYGVPVWKEATTTLFCASDAKAYETEKHNVWATHACVPTDPSPQEVALENVTETFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDNLNLNRPNNNTCSNNTNYNITEKGEIKNCSFNVTTGIRDRVTKEHALFYKLDVVPIDEGSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPAGFAILKCKDKKFNGTGECRNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPGRAFYGTDIIGDIRQAHCNISGKDWNDTLKQIVIKLKEKFENKTIVFTQSSGGDPEIVMHSFNCGGEFFYCNTTQLFNSTWNNSTWNGTDNWNGTESNSTITLPCRIKQIINLWQEVGRAMYAPPIQGQIRCSSNITGLLLVRDGGSNNSSNDTETFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTHAKRRVVQREKRAIGLGAFFLGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTNVPWNISWSPRWNRSLDEIWTNMTWKQWEKEIDNYTDIIYNLIEQSQNQQEQNEQDLLALDKWASLWNWFDITQWLWYIKIFIMIVGGLIGLRIVFTILSIVNRVRQGYSPLSLQTLLPTQRGPDRPEGTEEGGGERDRGTSTRLVHGFLALIWDDLRSLFLFSYHRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQEIKNSAVSLLNTTAIAVAEGTDRIIEVLQRGFRAILHIPTRIRQGLERALL*", "nucleotides": "ATGAAAGTGACGGGGACCAGGAGGAGTTATCAGCACTTGTGGAGATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAGTGCCAACAACTTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACTACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAGAAGCATAATGTTTGGGCAACCCATGCCTGTGTACCCACAGACCCCAGCCCACAGGAAGTAGCATTGGAAAATGTGACAGAAACATTTAACATGTGGAAAAATGACATGGTAGAGCAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGCACTGATAATTTAAATCTAAATCGCCCTAACAATAATACTTGTAGTAATAATACTAATTATAATATAACGGAAAAAGGAGAAATAAAAAACTGCTCTTTCAATGTCACCACAGGAATAAGAGATAGGGTGACAAAAGAACATGCACTTTTCTATAAACTTGATGTAGTACCAATAGATGAAGGTAGTGGAAATACTACGGGCAAATATAGGATGATAAATTGTAACACCTCAGTCATTACACAAGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAAGATAAGAAATTCAATGGAACAGGAGAATGTAGAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGCTGAACGGCAGTCTAGCAGAAGAAGAGATAGTAATTAGATCTGCCAATCTCTCGAACAATGCTAAAACCATAATAGTACAGCTGAATAAATCTGTAAAAATTAATTGTACAAGACCCAACAATAATACAAGAAGAAGTATACACATAGGACCAGGGAGGGCATTTTATGGAACAGACATAATAGGAGATATAAGACAAGCGCATTGTAACATTAGTGGGAAAGATTGGAATGACACTTTAAAACAGATAGTTATAAAATTAAAAGAAAAATTTGAGAATAAAACAATAGTCTTTACTCAATCCTCAGGAGGGGACCCAGAGATAGTGATGCATAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAATAGTACTTGGAATGGTACTGACAATTGGAATGGTACTGAATCAAATAGCACTATCACACTCCCATGCAGAATAAAACAAATTATAAACTTGTGGCAGGAAGTAGGAAGAGCAATGTATGCCCCTCCCATCCAAGGGCAAATTAGATGTTCATCAAATATTACAGGGCTGCTGCTAGTAAGAGATGGTGGGAGCAACAATAGTAGTAATGATACAGAGACCTTCAGGCCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTCAAAATTGAACCATTAGGAATAGCACCCACCCATGCAAAGAGAAGAGTGGTGCAAAGGGAAAAAAGAGCAATAGGACTTGGAGCTTTCTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTTTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTAATGTGCCCTGGAATATTAGTTGGAGCCCTAGATGGAATAGATCTCTAGATGAGATTTGGACTAACATGACCTGGAAGCAGTGGGAAAAGGAAATTGACAATTACACAGACATAATATATAACCTAATTGAACAATCACAGAACCAACAAGAACAGAATGAACAAGACTTATTGGCATTAGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATTACACAGTGGCTATGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAGTTTTTACTATACTGTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTGTCATTGCAGACCCTCCTCCCAACCCAGAGGGGACCCGACAGGCCCGAAGGAACAGAAGAAGGAGGTGGAGAGAGAGACAGAGGCACATCCACGAGATTAGTGCATGGATTCTTAGCACTCATCTGGGACGACCTGCGGAGCCTGTTCCTCTTCAGTTACCACCGCTTGAGAGACTTACTCTTGATTGCGGCGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTGAAGTATTGGTGGAATCTCCTGCAGTATTGGAGTCAGGAAATAAAGAATAGTGCAGTTAGCTTGCTCAACACCACAGCAATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGGCTTTAGAGCTATTCTCCACATACCTACACGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 5705, + "end": 5744, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.6901936289818864, + "protein": "MGHLVPWDGDDM", + "aminoacids": "MGHLVPWDGDDM*", + "nucleotides": "ATGGGGCATCTTGTTCCTTGGGATGGTGATGATATGTAG" + }, { "name": "tat_exon2", "start": 7843, "end": 7936, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPSSQPRGDPTGPKEQKKEVERETEAHPRD", @@ -3329,7 +3329,7 @@ "start": 7844, "end": 8120, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPNPEGTRQARRNRRRRWRERQRHIHEISAWILSTHLGRPAEPVPLQLPPLERLTLDCGEDCGTSGTQGVGSTEVLVESPAVLESGNKE", @@ -3341,7 +3341,7 @@ "start": 8263, "end": 8905, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.5386842636859471, "protein": "MGNKLSRGLRAGWPTIRERMRRARPVREPEPAAAGVGAASRDLERHGALTSSNTAATNADVACLEAQQEEEEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPGPGVRFPLCFGWCFKLVPVDPDKVEEASVGENNCLLSPENLHGMEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", @@ -3355,7 +3355,7 @@ "start": 527, "end": 2297, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.706855791962175, "protein": "MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSNIIMMQRGNFRNQRKTVKCFNCGKERHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLRKIWPSSKGRPRNFLQSRPEPTAPPEESFRFREETATPPQKQEPVDKEVYPLASLKSLFGNDPSSQ", @@ -3367,7 +3367,7 @@ "start": 2089, "end": 5101, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7518376924488996, "protein": "AKIKQECGIPYNPQSQEVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDNRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIKDYGKQMAGDDCVASRQDED", @@ -3379,7 +3379,7 @@ "start": 5045, "end": 5624, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7067546928117459, "protein": "MENRWQVMIVWQVDRMRISTWKSLVKHHMYISKKAQGWFYRHHYENPHPRISSEVHIPLGDARLVITTY", @@ -3389,21 +3389,21 @@ { "name": "vpr", "start": 5563, - "end": 5857, + "end": 5854, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6021542339864789, + "distance": 0.5898566703417862, "protein": "MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS", - "aminoacids": "MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS*I", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTAGGACAACATATCTATGAAACTTATGAGGATACTTGGACAGGAGTAGGAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTAGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAGATT" + "aminoacids": "MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS*", + "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAAGGAGCCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTTCCTAGGACATGGCTTCATGGCTTAGGACAACATATCTATGAAACTTATGAGGATACTTGGACAGGAGTAGGAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTAGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5563, "end": 6052, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", "distance": 0.7571801566579635, "protein": "MEQAPEDQGPQKEPNNEWTLELLEELKREAVRHFPRTWLHGLGQHIYETYEDTWTGVGALIRILQQLLFIHFRIRCQHSRIGITRQRRARNRASRS", @@ -3413,45 +3413,45 @@ { "name": "rev_exon1", "start": 5973, - "end": 6051, + "end": 6054, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSS", - "aminoacids": "MAGRSGDSDEDLLKTVRLIKYLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGT" - }, - { - "name": "vpu", - "start": 6065, - "end": 6311, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.6838632273545291, - "protein": "AIVLIEYKKILKQRRIDRLIDRIIDKAEDSGNESEGDQEELSAFVEIGHHAP", - "aminoacids": "MQSLYILTIVALVVAAILAIVV*AIVLIEYKKILKQRRIDRLIDRIIDKAEDSGNESEGDQEELSAFVEIGHHAP*NVDDL*", - "nucleotides": "ATGCAATCTTTATATATATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTAGGCCATAGTACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAATAATAGATAAGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKTVRLIKYLYQSSE", + "aminoacids": "MAGRSGDSDEDLLKTVRLIKYLYQSSE", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTACCTCTACCAAAGCAGTGAG" }, { "name": "env", "start": 6228, "end": 8799, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.7471048806788873, "protein": "MTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST", "aminoacids": "MRVKEIKRSYQHL*R*GIMLLRMLMIYSTADQWWVTVYYKVPVWREANTTLFCASDAKAYSTEAHNV*ATHACVPTDPNPQEIVIENVTEDFNMWKNNMVDQMHEDIISL*DQSLKPCVKLTPLCVTLNCTDIRNSTVTSNTTSNTT*GEMTNCTFNITTSIKDKIKKEAALFYKIDLVEIDEKKNNSSTRYRLINCNTSAITQACPKVSFKPIPIHFCAPASFAILKCNNKKFSGKGPCTNVSTVQCTHRIKPVVSTQLLLNGSLAEEEVMIRSDNITDNTKNIIVQLKEAIRIFCIRPNNNTRKSINIRPGRAFYTTGDIIRDIRQAHCNISGNWSNTLKQIATQLGKQLNQTQQIIFNSSAGKDPEIVTHSFNCGKKFFYCNSSSLFNST*TKNGTDSWQSNDTQNSNITLQCRIKQIINLWQEVRKAMYAPPISRQINCTSNITGLVLTRDRRNETKTFRPGRENMKDNWRSKLYKYKVVRIEPLRIAPTKAKRRVVQREKRAVRLGAMFLKFLGAARSTIGAASIALTEQARRVLSGIVQQQNNLLRAIEAQQHLLQLTV*GIKQLQARVLAVERYLQDQQLLGL*GCSRKLICTTTVP*NRS*GRHNKNYKSLDDI*DNMT*IE*EKEIDNYTSLIYTLITESHSQQKKNEQELLALDK*ASL*N*FDISQWLWYIKIFIMIVGGLVSLRIVFAVLSIVNKVRQGYSPLSFQTLLPAPRGADRPERIEERGRERDKGRSGRLVNGFLALI*DDLRSLCLFSYHRLSDLLLIVIKIVELLRRKR*EALKY**NLLQY*SQELKNSAVSLLNTTAIVVAERTDKIIEILQRISRAFLHIPRRIRQGLEKALL*", "nucleotides": "ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAGGGCATCATGCTCCTTAGAATGTTGATGATCTATAGTACTGCAGACCAGTGGTGGGTCACAGTCTATTATAAGGTACCTGTGTGGAGAGAAGCAAACACCACTCTATTTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTAGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGAAAATGTAACAGAAGATTTTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATAGGATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTGATATTAGGAATAGTACCGTTACTTCTAATACTACTTCTAATACTACTTAGGGAGAAATGACAAACTGCACTTTCAATATCACCACAAGCATAAAGGATAAGATAAAAAAAGAAGCGGCACTTTTTTATAAAATTGATTTAGTAGAAATAGATGAAAAGAAAAATAACAGTAGTACCAGATATAGGTTGATAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTAAGCCAATTCCCATACATTTTTGTGCCCCGGCTAGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCAGTGGAAAAGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATAGAATTAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGATCTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAAGAAGCCATACGAATTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAAGACCAGGAAGAGCATTTTATACAACAGGAGATATAATAAGAGACATAAGGCAAGCACATTGTAACATTAGTGGAAATTGGAGTAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAACACAACAAATAATCTTTAATTCATCCGCAGGAAAGGACCCAGAGATTGTAACACACAGTTTTAATTGTGGAAAGAAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTAGACTAAAAATGGTACTGATAGTTGGCAGTCTAATGATACTCAGAATAGTAATATCACACTCCAATGCAGAATAAAACAAATTATAAACCTGTGGCAGGAAGTAAGAAAAGCAATGTATGCCCCTCCCATCAGTAGACAAATTAACTGTACATCAAATATTACAGGGCTAGTTTTAACAAGAGATAGGAGGAATGAAACTAAGACCTTTAGACCTGGAAGAGAAAATATGAAGGATAATTGGAGAAGTAAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAAGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGCAGAGAGAGAAAAGAGCAGTAAGACTAGGAGCTATGTTCCTTAAGTTCTTAGGAGCAGCCAGAAGCACTATAGGCGCAGCGTCGATAGCGCTGACGGAACAGGCCAGACGAGTCTTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTAAGGCATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTAGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGAGGTTGCTCTAGAAAACTCATTTGCACCACTACTGTGCCTTAGAATCGTAGTTGAGGTAGGCATAACAAAAATTACAAAAGTCTAGATGACATTTAGGATAACATGACCTAGATAGAGTAGGAAAAAGAAATTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAAAAAAGAATGAACAAGAATTATTGGCATTAGATAAATAGGCAAGTTTGTAGAATTAGTTTGACATATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAAGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAAGGATACTCACCATTATCATTTCAGACCCTCCTCCCAGCCCCGAGAGGAGCCGACAGGCCCGAAAGAATAGAAGAAAGAGGTAGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCACTTATCTAGGACGATCTACGGAGCCTATGCCTCTTCAGCTACCACCGCTTGAGCGACTTACTCTTGATTGTGATCAAGATTGTGGAACTTCTAAGACGCAAGAGGTAAGAAGCCCTCAAATATTAGTAGAATCTCCTGCAGTACTAGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGCTCAACACCACAGCTATAGTAGTAGCTGAAAGGACAGATAAGATAATAGAAATATTACAAAGAATTAGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAAGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 6228, + "end": 6276, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.7611885546588408, + "protein": "MRVKEIKRSYQHL", + "aminoacids": "MRVKEIKRSYQHL*R*", + "nucleotides": "ATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGCATTTGTAGAGATAG" + }, { "name": "nef", "start": 8194, "end": 9451, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.6542937183493158, "protein": "MTYKAALDLSHFLKEKGGLEGLVYSQKRQDILDLWIYHTQGYFPDWQNYTPGPRIRYPLTFGWCFKLVPLEPDQVEEANEGENNSLLHPLSQHRMDDPEKEVLVWRFDSRLAFHHMAREKHPEFYKDC", @@ -3463,7 +3463,7 @@ "start": 8380, "end": 8476, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.6995153473344102, "protein": "RPSSQPREEPTGPKE", @@ -3475,7 +3475,7 @@ "start": 8381, "end": 8657, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.5716671727907683, "protein": "RERQRQIRSISERILSTYLGRSTEPMPLQLPPLERLTLDCDQDCGTSKTQEVRSPQILVESPAVLESGTKE", @@ -3489,7 +3489,7 @@ "start": 746, "end": 2246, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3501,7 +3501,7 @@ "start": 2038, "end": 5050, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3513,7 +3513,7 @@ "start": 4994, "end": 5573, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3523,23 +3523,23 @@ { "name": "vpr", "start": 5512, - "end": 5806, + "end": 5803, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5783, "end": 6002, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" @@ -3547,45 +3547,45 @@ { "name": "rev_exon1", "start": 5922, - "end": 6000, + "end": 6003, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 6014, - "end": 6260, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5399181166837258, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6177, "end": 8718, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4585964351370794, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6215, + "end": 6260, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8299, "end": 8395, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -3597,7 +3597,7 @@ "start": 8300, "end": 8576, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -3609,7 +3609,7 @@ "start": 8719, "end": 9334, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -3623,7 +3623,7 @@ "start": 695, "end": 2195, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3635,7 +3635,7 @@ "start": 1986, "end": 5004, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.6033592883813991, "protein": "YGKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3647,7 +3647,7 @@ "start": 4948, "end": 5527, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3659,9 +3659,9 @@ "start": 5466, "end": 5757, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" @@ -3669,69 +3669,69 @@ { "name": "tat_exon1", "start": 5737, - "end": 5953, + "end": 5956, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGCAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5876, - "end": 5954, + "end": 5957, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5968, - "end": 6214, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5399181166837258, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6131, "end": 8672, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4569687738004571, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEVHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRDKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLDQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAGATAAGATGCAGAAAGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGGATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTTAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6169, + "end": 6214, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8253, - "end": 8346, + "end": 8349, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD*", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG" + "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD**", + "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA" }, { "name": "rev_exon2", "start": 8254, "end": 8530, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -3743,7 +3743,7 @@ "start": 8673, "end": 9288, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4756067663643049, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDNEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -3757,7 +3757,7 @@ "start": 817, "end": 2317, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3769,7 +3769,7 @@ "start": 2109, "end": 5121, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3781,7 +3781,7 @@ "start": 5065, "end": 5644, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3793,9 +3793,9 @@ "start": 5583, "end": 5874, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" @@ -3803,69 +3803,69 @@ { "name": "tat_exon1", "start": 5854, - "end": 6070, + "end": 6073, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5993, - "end": 6071, + "end": 6074, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 6085, - "end": 6331, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5513972055888224, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6248, "end": 8789, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4564898680537425, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6286, + "end": 6331, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8370, - "end": 8463, + "end": 8466, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", - "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD*", - "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAG" + "aminoacids": "RPTSQPRGDPTGPKESKKKVEKETETDQFD**", + "nucleotides": "AGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGA" }, { "name": "rev_exon2", "start": 8371, "end": 8647, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -3877,7 +3877,7 @@ "start": 8790, "end": 9405, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -3891,7 +3891,7 @@ "start": 687, "end": 2187, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.2967573174581697, "protein": "MGARASILSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -3903,7 +3903,7 @@ "start": 1979, "end": 4991, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19298018391400085, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -3915,7 +3915,7 @@ "start": 4935, "end": 5514, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -3925,69 +3925,69 @@ { "name": "vpr", "start": 5453, - "end": 5747, + "end": 5744, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5724, - "end": 5940, + "end": 5943, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5863, - "end": 5941, + "end": 5944, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5955, - "end": 6201, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5399181166837258, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6118, "end": 8659, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4604674291397314, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTSEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDKDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCTCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAAGGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6156, + "end": 6201, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8240, "end": 8336, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -3999,7 +3999,7 @@ "start": 8241, "end": 8517, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -4011,7 +4011,7 @@ "start": 8660, "end": 9275, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -4025,7 +4025,7 @@ "start": 555, "end": 2055, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -4037,7 +4037,7 @@ "start": 1847, "end": 4859, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4049,7 +4049,7 @@ "start": 4803, "end": 5382, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -4059,69 +4059,69 @@ { "name": "vpr", "start": 5321, - "end": 5615, + "end": 5612, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5592, - "end": 5808, + "end": 5811, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5731, - "end": 5809, + "end": 5812, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5823, - "end": 6069, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5513972055888224, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAKDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 5986, "end": 8527, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4564898680537425, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVEKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6024, + "end": 6069, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8108, "end": 8204, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -4133,7 +4133,7 @@ "start": 8109, "end": 8385, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -4145,7 +4145,7 @@ "start": 8528, "end": 9143, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPMSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -4159,7 +4159,7 @@ "start": 246, "end": 1782, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.35613851839948674, "protein": "MGARASVLSGGELDKWEKIRLRPGGRKRYKLKHIVWASRELERFAVNPGLLETSEGCKQIMGQLQPALQTGSEELRSLYNTVAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKTQQAAAADTGNNSQVSQNYPIVQNMQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVQAGPVAPGQIREPRGSDIAGTTSTLQEQIAWMTHNPPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMSQATGAHAIMMQRGNFKNQRKTVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESLRPTAPPVESFRFGEETAAPFQKQEPRDKEMSPLASLKSLFGNDQ", @@ -4171,7 +4171,7 @@ "start": 1544, "end": 4586, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.24526399193257942, "protein": "FFRENLAFPQGKAGEFPSEQTRANSPTRGEPQANSPTSRELQVWGRDSSPLSEAGTERQGDVSLSFPQITLWQRPVVTIKIGGQIKEALLDTGADDTVLEEMALPGRWKPKMIGGIGGFIKVRQYDQIAIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKIKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSITVLDVGDAYFSVPLDEEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRGHLLKWGFTTPDKKHQKEPPFLWMGYELHPDRWTVQPIKLPEKEIWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPTKELIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKISTESIVIWGKTPKFKLPIQKETWEIWWTDYWQATWIPEWEFVSTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYITDRGRQKVVTLNDTTNQKTELQAILLALQDSGLEANIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKIYLTWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDRAQEEHERYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQIDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGVKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQISKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4183,7 +4183,7 @@ "start": 4530, "end": 5109, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMYVSKKTKGWFYRHHYESTHPKISSEVHIPLGDAELVVTTYWGLQPGERDWHLGQGVSIEWRKGRYRTHVDPNLADQLIHLHYFDCFSESAIRHAILGHRVSPRCEYQAGHNKVGSLQYLALAALRAPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -4193,69 +4193,69 @@ { "name": "vpr", "start": 5048, - "end": 5342, + "end": 5339, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5873733056539261, + "distance": 0.5731147540983608, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS", - "aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS*P", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAACTTAAGCAGGAAGCTGTTAGGCATTTTCCTAGGCCATGGCTTCATAGCTTAGGGCAATATATCTATGAAACTTATGGGGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATGCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATCCCACAGAGGAGAGCAAGAAATGGATCCAGTAGATCCTAACCT" + "aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKQEAVRHFPRPWLHSLGQYIYETYGDTWAGVEAIIRMLQQLLFIHFRIGCQHSRIGIIPQRRARNGSSRS*", + "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAACTTAAGCAGGAAGCTGTTAGGCATTTTCCTAGGCCATGGCTTCATAGCTTAGGGCAATATATCTATGAAACTTATGGGGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATGCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATCCCACAGAGGAGAGCAAGAAATGGATCCAGTAGATCCTAA" }, { "name": "tat_exon1", "start": 5319, - "end": 5535, + "end": 5538, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.5375823324292911, + "distance": 0.5237430167597767, "protein": "MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ", - "aminoacids": "MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ", - "nucleotides": "ATGGATCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTAACAATTGCTATTGTAAAAAGTGTTGCCTTCATTGCCAAGTTTGTTTCACACGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAG" + "aminoacids": "MDPVDPNLEPWKHPGSQPKTACNNCYCKKCCLHCQVCFTRKGLGISYGRKKRRRRRGSLKGRQAHQDSLSKQ*", + "nucleotides": "ATGGATCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTAACAATTGCTATTGTAAAAAGTGTTGCCTTCATTGCCAAGTTTGTTTCACACGAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5458, - "end": 5536, + "end": 5539, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.5657327586206896, - "protein": "MAGRSGDGDEDLLKAVRLIKTLYQSS", - "aminoacids": "MAGRSGDGDEDLLKAVRLIKTLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5550, - "end": 5808, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.6128024980483997, - "protein": "MLSLEVIVAITALVVAGIIAIVVWTIVLIEYRKILRQRKIDKILDRIRERAEDSGNESEGDQEELSALVEMGHNAHHAPWDIND", - "aminoacids": "MLSLEVIVAITALVVAGIIAIVVWTIVLIEYRKILRQRKIDKILDRIRERAEDSGNESEGDQEELSALVEMGHNAHHAPWDIND**", - "nucleotides": "ATGTTATCTTTAGAAGTAATAGTAGCAATAACAGCATTAGTAGTAGCAGGAATAATAGCAATAGTTGTGTGGACCATAGTACTTATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAAGATACTTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAA" + "distance": 0.5622384937238494, + "protein": "MAGRSGDGDEDLLKAVRLIKTLYQSSK", + "aminoacids": "MAGRSGDGDEDLLKAVRLIKTLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACGGAGACGAGGATCTCTTAAAGGCCGTCAGGCTCATCAAGACTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 5716, "end": 8257, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.5208458282639616, "protein": "MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE", "aminoacids": "MRVRGIRKNCQRLWRWGTMLTMLLGILMISNATEQLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEMVLINVTENFNMWKNDMVDQMQEDIVSLWDQSLKPCVKLTPLCVTLNCTNLTIEPNNATKANISGRLEGKGEMTNCSFNVTTSLRDKRKKEYALFYKLDVVATGENNNSFRLISCNTSEITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGKCNNVSIVQCTHGIRPVVSTQLLLNGSLAEEEVVVRSANFSDNTKTIIVQLNKTVVINCTRPNNNTRRSIHIAPGRAFYATGDIIGDIRKAHCNISKEDWNTTLNQVAKKLQEQFENATIDFKPSSGGDPEIVMHSFNCGGEFFYCNTTELFSWNATTKLFTWNATNSNNGTIILPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGTNGTGNRNETFRPGGGNMKDNWRSELYKYKVVEIKPLGVAPTKAKRRVVQREKRAVTIGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNISWSNRTLNNIWDNLTWMQWDTEINNYTNKIYQLLEEAQNQQEKNEQELLELDKWANLWNWFDISNWLWYIKIFILIVGGLIGLRIVFTVLSIVNRVRQGYSPLSFQTRFPVPRGPDRPEGTEEEGGERDRDRSDRLVNGFLTLIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNVLQYWSQELKNSAVSLLNATAIVVAEGTDRIIELAQRICRAE*DRA*", "nucleotides": "ATGAGAGTGAGGGGGATCAGGAAGAATTGTCAGCGCTTGTGGAGATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAATGCTACAGAACAATTGTGGGTCACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAACAACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTATTAATAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAAATGCAAGAGGACATAGTCAGCTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACCTTAAATTGCACTAATTTGACCATTGAGCCAAACAATGCTACTAAAGCCAATATTAGTGGGAGGTTAGAGGGGAAAGGAGAAATGACAAACTGCTCTTTCAATGTCACCACAAGCCTAAGAGATAAGAGGAAGAAAGAATATGCACTCTTTTATAAACTTGATGTAGTAGCAACAGGTGAAAATAATAACAGCTTTAGGTTGATAAGTTGTAATACCTCAGAGATTACACAGGCCTGTCCAAAGGTATCATTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAATGATAAAAAGTTCAATGGAACAGGAAAATGTAACAATGTCAGCATAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAGTTAGATCTGCCAATTTCTCAGACAATACTAAGACCATAATAGTACAGCTGAACAAAACTGTAGTAATTAATTGTACAAGACCCAACAACAATACAAGGAGAAGTATACATATAGCACCAGGGAGAGCATTTTATGCAACAGGAGATATAATAGGAGATATAAGAAAAGCACATTGTAACATTAGTAAAGAAGATTGGAATACCACTTTAAACCAGGTGGCTAAAAAATTACAAGAACAATTTGAGAATGCAACAATAGACTTTAAACCATCCTCAGGAGGGGACCCAGAAATTGTAATGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACGGAACTATTTTCTTGGAATGCTACAACAAAACTGTTTACTTGGAATGCTACAAATAGCAATAATGGAACCATCATACTCCCATGTAGAATAAAACAAATTATAAACATGTGGCAAGAGGTAGGAAAAGCAATGTATGCCCCTCCCATTCGTGGACAAATTAGATGTTCGTCAAATATTACAGGACTGCTATTAACAAGAGATGGTGGGACTAACGGGACAGGGAACAGGAATGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAGAAATTAAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGACCATAGGAGCTATGTTCCTTGGGTTCCTGGGGGCAGCAGGAAGCACTATGGGCGCAGCATCACTGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCGATTGAGGCGCAGCAGCATCTGTTGCAACTCACAGTCTGGGGCATAAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGGTACCTAAGAGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCTTGGAATATTAGTTGGAGTAATAGAACTCTGAATAACATTTGGGACAATTTGACTTGGATGCAGTGGGATACAGAAATTAACAATTACACAAACAAAATATACCAATTACTTGAGGAAGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAATTTGTGGAATTGGTTTGACATATCAAACTGGCTGTGGTACATAAAAATATTCATATTAATAGTAGGAGGCTTAATAGGTTTAAGAATAGTTTTTACTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGTCCCGAGGGGACCCGACAGGCCCGAAGGAACCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGACAGATTAGTGAACGGATTCTTGACACTTATCTGGGTCGATCTGAGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCACTCAAATATTGGTGGAATGTCCTGCAATATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTAAATGCCACAGCCATAGTAGTAGCTGAGGGGACAGATAGGATTATAGAATTAGCACAAAGAATTTGTAGAGCAGAATAAGACAGGGCTTGA" }, + { + "name": "vpu", + "start": 5760, + "end": 5808, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.70010183299389, + "protein": "MGHNAHHAPWDIND", + "aminoacids": "MGHNAHHAPWDIND**", + "nucleotides": "ATGGGGCACAATGCTCACCATGCTCCTTGGGATATTAATGATTAGTAA" + }, { "name": "tat_exon2", "start": 7874, "end": 7970, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.3921568627450981, "protein": "RPASQSRGDPTGPKEPKKKVERETETDPTD", @@ -4267,7 +4267,7 @@ "start": 7875, "end": 8151, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.3471418653089562, "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRQISERILDTYLGRSEEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSTQILVECPAILESGTKE", @@ -4279,7 +4279,7 @@ "start": 8275, "end": 8896, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.47444962236863253, "protein": "MGGKWSKHSKSEWADVRERMAQTEAAADGVGAVSRDLERHGAITSSNTATNNAACAWLEAQEEEEVGFPVKPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRFPLCFGWCFKLVPVDPDKVEEANKGENNSLLHPMSLHGMEDTEREVLMWKFDSRLAFHHVAREKHPEYFKDC", @@ -4293,7 +4293,7 @@ "start": 2, "end": 1697, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.7478034493979825, "protein": "MTETLLVQNANPDCKTILKALRPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTKSASIMVQGGNLKNQRKNVKCFNCGKEGHTAKNCRAPKKKGC", @@ -4305,7 +4305,7 @@ "start": 1489, "end": 4501, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7624366800883231, "protein": "KPKIIEGIRGFIKVRQYDQVPIKICRHKAISTVLIRPTPVNIIRRNLLTQLSCTLNFPISPIETVPVKLKPGIDSPKVKQWPLTEEKIKALIEICAEIEKERKITKIRPKNPYNTPVFAIKKKDSTK", @@ -4317,7 +4317,7 @@ "start": 4445, "end": 5024, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7494633160752622, "protein": "MENRWQVMIVWQVDKMRIRTWNSLVKHHMHISKKAQR", @@ -4327,23 +4327,23 @@ { "name": "vpr", "start": 4963, - "end": 5257, + "end": 5254, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6216628527841345, + "distance": 0.6112852664576804, "protein": "MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS", - "aminoacids": "MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS*L", - "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAACTT" + "aminoacids": "MEQVPEDQRPQKEPYNEWTLELLEELKSEAVRHFPRPWLHSLGQYIYETYGDTWAEVEAIIRTLQQLLFIHFRIRCQHSRIRIIRQRRARNRASRS*", + "nucleotides": "ATGGAACAAGTCCCAGAGGACCAAAGGCCACAAAAGGAGCCATACAATGAATGGACATTAGAGCTCTTAGAAGAACTTAAGAGTGAAGCTGTTAGGCATTTTCCTAGACCATGGCTTCATAGCTTAGGACAATATATCTATGAAACTTATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAA" }, { "name": "tat_exon1", "start": 5111, "end": 5453, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.6491818320844167, + "distance": 0.6409453748630458, "protein": "MTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEQ", "aminoacids": "MEILGQR*KP**EPCNNCCSFISELGVNIAG*ELFDRGEQEIEPVDPNLEPWKHPGSQPMTACNNCYCKRCCFHCQVCFTRKGLGISHGRKKRRQRRRASHSSQNHQAALPEQ*", "nucleotides": "ATGGAGATACTTGGGCAGAGGTAGAAGCCATAATAAGAACCCTGCAACAACTGTTGTTCATTCATTTCAGAATTAGGTGTCAACATAGCAGGATAAGAATTATTCGACAGAGGAGAGCAAGAAATAGAGCCAGTAGATCCTAACTTAGAGCCCTGGAAACATCCAGGAAGTCAACCTATGACTGCTTGTAACAATTGCTATTGTAAACGGTGTTGCTTTCATTGCCAAGTTTGCTTCACAAGAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAA" @@ -4351,24 +4351,24 @@ { "name": "rev_exon1", "start": 5373, - "end": 5451, + "end": 5454, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.531617235590375, - "protein": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGT" + "distance": 0.5290287574606619, + "protein": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "aminoacids": "MAGRSGDSDEELLTAVRIIKRLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCACAGCAGTCAGAATCATCAAGCGGCTCTACCAGAGCAGTAAG" }, { "name": "vpu", "start": 5465, "end": 5711, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_start": 6259, + "subtype_end": 6310, "orientation": "forward", - "distance": 0.6918418647166362, - "protein": "SIVLIEYKKILRQKKIDRLIDRIRERAEDSGNESDEDQEELSAIVEIGHLVP", + "distance": 0.7683007254341614, + "protein": "MHALEIAAIVRLVVAAIIAIVV", "aminoacids": "MHALEIAAIVRLVVAAIIAIVV*SIVLIEYKKILRQKKIDRLIDRIRERAEDSGNESDEDQEELSAIVEIGHLVP*DSDDM*", "nucleotides": "ATGCATGCCTTAGAAATAGCAGCAATAGTAAGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTAGTCTATAGTATTAATAGAATATAAGAAAATTTTAAGACAAAAGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGACGAGGACCAGGAAGAATTATCAGCAATTGTAGAGATAGGGCATCTTGTTCCTTAGGATAGTGATGATATGTAG" }, @@ -4377,7 +4377,7 @@ "start": 5628, "end": 8229, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.7513561129398668, "protein": "MTEEGEIKNCSFNVTTGIRDKVTKEHALFYKLDVVPIDESSGNTTGKYRMINCNTSVITQACPKVSFEPIPIHFCAPASFAILKCKDKKFNRTGECRNVSTVQCTHRIRPVVSTQLLLNGSLAEEEIVIRSANLSNNAKTIIVQLNKSVKINCTRPNNNTRRSIHIGPRRAFYRTDIIGDIRQAHCNISRKD", @@ -4389,7 +4389,7 @@ "start": 7624, "end": 8872, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.5872017754762344, "protein": "MRQARPVRKPEPAATKVRAASRDLERHGALTSSNTAATNADVACLEAQQEEKEVGFPVRPQVPLRPMTYKGALDISHFLKEKGGLDGLIYSKRRQDILDLWLYNTQGYFPDWQNYTPRPGVRFPLCFRWCFKLVPVDPDKVEEASVGENNCLLSPENLHRIEDEHREVLQWRFDSRLAFHHVARELHPEYYKDC", @@ -4401,7 +4401,7 @@ "start": 7810, "end": 7903, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.5303030303030303, "protein": "RPSSQPRGDPTGPKEQKKEVERKTEAHPRD", @@ -4413,7 +4413,7 @@ "start": 7811, "end": 8087, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.6667847862036381, "protein": "ILSTHLGRPAEPVPLQLPPLERLTLDCGEDCRTSKTQKVRSTEVLVESPAVLESGNKE", @@ -4427,7 +4427,7 @@ "start": 549, "end": 2049, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDSWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSVNVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -4439,7 +4439,7 @@ "start": 1841, "end": 4853, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVHQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4451,7 +4451,7 @@ "start": 4797, "end": 5376, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.378905844492889, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVNPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -4461,69 +4461,69 @@ { "name": "vpr", "start": 5315, - "end": 5609, + "end": 5606, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5586, - "end": 5802, + "end": 5805, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5725, - "end": 5803, + "end": 5806, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5817, - "end": 6063, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5399181166837258, - "protein": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLVVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 5980, "end": 8521, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4585964351370794, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTTESHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSKEGKMAEEMRNCSFNITTEIRNKMQKEYALFYKLDVVPIDDDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYGNKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSGTETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAGTCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTAAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAGAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATGATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAGTGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGGGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGGGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTATTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGTCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 6018, + "end": 6063, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8102, "end": 8198, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -4535,7 +4535,7 @@ "start": 8103, "end": 8379, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.40871934604904625, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVSLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -4547,7 +4547,7 @@ "start": 8522, "end": 9137, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGVEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -4561,7 +4561,7 @@ "start": 468, "end": 1968, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3014827756125966, "protein": "MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELKSLFNAVAVLYCVHQRIQVQDTKEALEKVEEEQNKSKKKAQQVAAADTDTGNSSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWTTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYRTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVNSANVMMQRGNFRNQRKIVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETITPSQKQEPRDKELYPLSSLKSLFGSDPSSE", @@ -4573,7 +4573,7 @@ "start": 1760, "end": 4772, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19610372855115465, "protein": "FFRENLAFPQRKARELPSEQTRANSPTRRELQVWGRDNNSLSEAGAKRQGTVSLVFPQITLWQRPLVRIKIGGQFKEALLDTGADDTVLEEMSLPGKWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLIGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKEFRKYTAFTIPSTNNETPGIRYEYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIILPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAELELAENREILREPVHGVYYDPSKDLIAEVQKQGNGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWAEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVISLTDTTNQKTELQAIYLALQDSGSEVNIVTDSQYTLGIIQAQPDKSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSTGVRRVLFLDGIDKAQEDHEKYHSNWRAMAGDFNIPPVVAKEIVASCDKCQQKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDLQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4585,7 +4585,7 @@ "start": 4716, "end": 5295, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.36908151428872715, "protein": "MANRWQVMIVWQVDRMRIRTWNSLVKHHMHVSKKTKGWFYRHHYESTHPRISSEVHIPLGDARLVVTTYWGLNTGEREWHLGQGVSIEWRKRKYSTQVDPNLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALKALTTPKRRKPPLPSVRKLTEDRWNEHQKTKGHRGSHTMNGH", @@ -4595,69 +4595,69 @@ { "name": "vpr", "start": 5234, - "end": 5528, + "end": 5525, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5994382022471912, + "distance": 0.5872120921305184, "protein": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS", - "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*T", - "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MERAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHGLGQHIYETYGDTWTGVEAIIRTLQQLLFIHFRLGCRHSRIGIVPQRRARNGASRS*", + "nucleotides": "ATGGAACGAGCACCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACATATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCAACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCCACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5505, - "end": 5721, + "end": 5724, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRAPQGSQTDQDSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAGGAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5644, - "end": 5722, + "end": 5725, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKILYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5736, - "end": 5982, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5513972055888224, - "protein": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL", - "aminoacids": "MQPLHIAAIVGLIVAAIIAIVVWTIVLIEYRKILRQKRIDRLIDRIRERAEDSGNESEGDQEELSALMEMGHHAPGDVDDL*", - "nucleotides": "ATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAATAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAAGGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKILYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 5899, "end": 8440, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.45675101255163, "protein": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL", "aminoacids": "MRVKGIRKNCQRLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEANTTLFCASDAKAYTPEAHNVWATHACVPTDPNPQEVGLENVTENFNMWKNNMVEQMHEDVINLWDQSLKPCVKLTPLCVTLNCTNLNVTNTNSSSEEGKMAEEMRNCSFNITTKIRNKMQKEYALFYKLDVVPIDNDNTSYTLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGSGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRSENFTNNAKTIIVHLKEPIQINCTRPNNNTMKSIPLGPGRAFYATGAIIGDIRQAHCNLSRKAWNNTLKQVVEKLKEQYENKTIIFNHSSGGDPEIIMHTFNCGGEFFYCNTTQLFNSTWNSTWTGNATELDGNITLPCRIKQIINRWQEVGKAMYAPPIRGKIRCSSNITGLLLVRDGGNTSETETFRPGGGDMKDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLQDQQRLGIWGCSGKLICTTAVPWNASWSNKSLNQIWDNMTWMEWEREINNYTDTIYTLIEKAQNQQEKNEQELLELDKWASLWNWFNITKWLWYIKLFIMIVGGLIGLRIVFAVLAIVNRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSVRLVNGFLALFWDDLRSLCLFLCHRLRDLLLIVTRIVELLGRRGWEILKHWWSLLQYWIQELKSSAISLFNVTAIAVAEGTDRVIEVVQRACRALLHIPTRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTATGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCCCAGAGGCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAGGAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGAAATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAATGATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCAGAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGTATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGACAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAAAAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGACCCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACACAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGAAACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAAGCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTACTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGAGATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTGGGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTGGGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGTTGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGAGAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAGGAAAAGAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATAGGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTGGCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGACTTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTCAAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGCTTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGGGCATTGTTATAA" }, + { + "name": "vpu", + "start": 5937, + "end": 5982, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.46723952738990326, + "protein": "MEMGHHAPGDVDDL", + "aminoacids": "MEMGHHAPGDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTGGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8021, "end": 8117, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.32608695652173914, "protein": "RPTSQPRGDPTGPKESKKKVEKETETDQFD", @@ -4669,7 +4669,7 @@ "start": 8022, "end": 8298, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSPEGTRQARRNRRRRWRKRQRQISSISERILGTFLGRPAEPVPLPLPPLERLTLDCNKDCGTSGTQGVGNPQTLVESPAVLDSGAKE", @@ -4681,7 +4681,7 @@ "start": 8441, "end": 9056, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4690990320178705, "protein": "MGGKWSKSSGGGWPAVRERMRRTEPAAEGVGAVSRDLERHGAVTSSNTAATNADCAWLEAQEEDSEVGFPVRPQVPLRPMTFKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVYNTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPEEANQGENNCLLHPTSLHGMEDPEKEVLMWKFDSRLAFHHMAREKHPEYYKDC", @@ -4695,7 +4695,7 @@ "start": 825, "end": 2361, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.2559303794507086, "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPSLLETAEGCRQILGQLQPSLQTGSEELKSLYNTLATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVRHTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSHVTNSSAIMMQRGNFRNQRKAVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPPEEIFRFVEETTTPSQKQEPIDKELYPPLASLKSLFGNDPSSQ", @@ -4707,7 +4707,7 @@ "start": 2147, "end": 5165, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.19193360134872262, "protein": "FFRENLAFPQRKAREFSPEQTRANSPTTRRDLQVCGRDNNSLSETGANRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGRWKPKMIGGIGGFIKVRQYDQIPIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPYRTRNPEMVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPNKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGEGQWTFQIYQEPFKNLKTGKYARARGAHTNDVKQLTEAVQKIATEGIVIWGKIPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKKGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAIHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQIIKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4719,7 +4719,7 @@ "start": 5109, "end": 5688, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.24677296886864086, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHMYVSRKAKGWFYRHHFESNHPKISSEVHIPLEDARLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPDLADQLIHLYYFDCFSESAIRNAILGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKRKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -4729,47 +4729,47 @@ { "name": "vpr", "start": 5627, - "end": 5921, + "end": 5918, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.577708978328173, + "distance": 0.5625083211290103, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS", - "aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*A", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAGCTTAGGGCAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATTCTGCAACAACTGTTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGGCT" + "aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHSLGQYIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRARNGASRS*", + "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGAGCTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAGCTTAGGGCAATATATCTATGAAACTTATGGAGATACTTGGGCAGGGGTGGAAGCCATAATAAGAATTCTGCAACAACTGTTGTTTATTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5898, - "end": 6114, + "end": 6117, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.42503863987635226, + "distance": 0.40192926045016075, "protein": "MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGGCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCACTTGTTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTTCYCKKCCFHCQVCFTTKGLGISYGRKKRRQRRRAPQDSQTDQGPLPKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGGCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCACTTGTTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCACAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 6037, - "end": 6115, + "end": 6118, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDDELLKTVRLIKVLYQSS", - "aminoacids": "MAGRSGDSDDELLKTVRLIKVLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDDELLKTVRLIKVLYQSSK", + "aminoacids": "MAGRSGDSDDELLKTVRLIKVLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGACGAGCTCCTCAAGACAGTCAGACTGATCAAGGTCCTCTACCAAAGCAGTAAG" }, { "name": "vpu", "start": 6129, "end": 6375, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_start": 6259, + "subtype_end": 6310, "orientation": "forward", - "distance": 0.5210204814947899, + "distance": 0.7548091006737623, "protein": "MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL", "aminoacids": "MQPLTILAIVALVVAAILAIVVWSIVLIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSALVERGHLAPWNVDDL*", "nucleotides": "ATGCAACCTTTAACAATATTAGCAATAGTAGCACTAGTAGTAGCAGCAATACTAGCAATAGTTGTGTGGTCCATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAAAGTGAGGGGGATCAGGAAGAATTATCAGCACTGGTGGAGAGGGGGCATCTTGCTCCTTGGAATGTTGATGATCTGTAA" @@ -4779,7 +4779,7 @@ "start": 6292, "end": 8875, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4841544358231281, "protein": "MKVRGIRKNYQHWWRGGILLLGMLMICNATEQQLWVTVYYGVPVWKEANTTLFCASDAKAYSTEVHNVWATHACVPTDPNPQEVVLKNVTENFNMWENNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNTTRSSGNTTNEMKNCSFYTETDIRDKKRKEYALFYELDIVPIDEDNKNKSNNISYSRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEDEVVIKSSNFTNNAKTIIVQLNETVKINCTRPNNNTRKSIPIGPGRAFYATGDIIGDIRQAHCNISRANWTNTLKQIAEKLGKQFEENKTIVFNPSSGGDPEVVMHSFNCRGEFFYCNSTPLFNSTWKETNGIWTRIGESNDSATITLNDSDTITLQCKIRQIINLWQEVGKAMYAPPIKGQISCLSNITGLLLVRDGGNNTNGTEIFRPVGGEMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVQREKRATLGALFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTTVPWNTSWSNKSLEKIWNNMTWMEWEREIDNYTSLIYTLLEESQNQQEKNEKELLELDTWASLWNWFDITNWLWYIKIFIMIIGGLVGLRIVFTVLSIVNRVRQGYSPLSFQIHPPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLALFWVDLRSLCLFSYHRLRDLLLIVARIVELLGRRGWEALKYGWSLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEILQRACRAILHIPTRIRQGLERALL", @@ -4791,7 +4791,7 @@ "start": 8456, "end": 8552, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.5303030303030303, "protein": "RSTPQLRGDPTGPKESKEKVERETETDPVH", @@ -4803,7 +4803,7 @@ "start": 8457, "end": 8733, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.3690449563855961, "protein": "DPPPSSEGTRQARRNRRRRWRERQRQIRSISERILSTFLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGSSQIRVESPTILEPGTKE", @@ -4815,7 +4815,7 @@ "start": 8876, "end": 9509, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4195274186357557, "protein": "MGGKWSKSKLFGWPAVRERMRRAEPAAEPAADGVGAASRDLERHGAITSSNTPTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSPKRQEILDLWVYHTQGFFPDWDNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNCLLHPMSQHGMEDPEREVLEWRFDSRLAFRHVARELHPEYYKDC", @@ -4829,7 +4829,7 @@ "start": 801, "end": 2313, "subtype_start": 140, - "subtype_end": 1642, + "subtype_end": 1643, "orientation": "forward", "distance": 0.2589641434262949, "protein": "MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKTQQAAADTGNNSQTSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNFRNQRKNVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPVDKELYPLASLRSLFGNDPSSQ", @@ -4841,7 +4841,7 @@ "start": 2105, "end": 5117, "subtype_start": 1435, - "subtype_end": 4446, + "subtype_end": 4447, "orientation": "forward", "distance": 0.14797358397500882, "protein": "FFRENLAFPQGEAREFSSEQTRANSPTRRELQVWGGDNNSLSEAGAGRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVTTIHTDNGSNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4853,7 +4853,7 @@ "start": 5061, "end": 5640, "subtype_start": 4391, - "subtype_end": 4969, + "subtype_end": 4970, "orientation": "forward", "distance": 0.32379004771642805, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSIHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKERYSTQVDPGLADQLIHRYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -4865,7 +4865,7 @@ "start": 5579, "end": 5870, "subtype_start": 4909, - "subtype_end": 5199, + "subtype_end": 5200, "orientation": "forward", "distance": 0.34013605442176864, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAITRILQQLLFIHFRIGCQHSRIGIIQRRRARNGTSRS", @@ -4875,33 +4875,33 @@ { "name": "tat_exon1", "start": 5850, - "end": 6066, + "end": 6069, "subtype_start": 5180, - "subtype_end": 5394, + "subtype_end": 5399, "orientation": "forward", - "distance": 0.41088854648176687, + "distance": 0.38728632478632474, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ", - "nucleotides": "ATGGAACCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ*", + "nucleotides": "ATGGAACCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5989, - "end": 6067, + "end": 6070, "subtype_start": 5319, - "subtype_end": 5394, + "subtype_end": 5397, "orientation": "forward", - "distance": 0.35828025477707015, - "protein": "MAGRSGDSDEELIKTVRLIKLLYQSS", - "aminoacids": "MAGRSGDSDEELIKTVRLIKLLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGT" + "distance": 0.3620426829268293, + "protein": "MAGRSGDSDEELIKTVRLIKLLYQSSK", + "aminoacids": "MAGRSGDSDEELIKTVRLIKLLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6081, "end": 6330, "subtype_start": 5411, - "subtype_end": 5656, + "subtype_end": 5657, "orientation": "forward", "distance": 0.44033465433729635, "protein": "MQPLEISAIVALVVAIIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL", @@ -4913,7 +4913,7 @@ "start": 6244, "end": 8827, "subtype_start": 5574, - "subtype_end": 8123, + "subtype_end": 8124, "orientation": "forward", "distance": 0.44564023273792597, "protein": "MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFYASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNTTSTKNTTPSTTASSGERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKKFNGSGPCTNVSTIQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKTIIVHLKDSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSSWKDESNGTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLITRDGGKNNESNTTEIFRPEGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKTLEQIWDNMTWMQWEREIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHPPAPRGPDRPEGIGEEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLNIPRRIRQGLERALL", @@ -4925,7 +4925,7 @@ "start": 8408, "end": 8504, "subtype_start": 7705, - "subtype_end": 7797, + "subtype_end": 7798, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPTPQPRGDPTGQKESEKKVERETETDPDH", @@ -4937,7 +4937,7 @@ "start": 8409, "end": 8685, "subtype_start": 7706, - "subtype_end": 7981, + "subtype_end": 7982, "orientation": "forward", "distance": 0.3471418653089562, "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE", @@ -4949,7 +4949,7 @@ "start": 8828, "end": 9461, "subtype_start": 8125, - "subtype_end": 8751, + "subtype_end": 8752, "orientation": "forward", "distance": 0.33768732280275404, "protein": "MGGKWSKSSVVGWPAIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", @@ -4963,7 +4963,7 @@ "start": 855, "end": 2358, "subtype_start": 200, - "subtype_end": 1699, + "subtype_end": 1700, "orientation": "forward", "distance": 0.38383718162342295, "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPSLQTGSEELKSLYNTVATLYCVHQQIDVKDTKEALDKIEEEQNKSKKKVQQAATDTGNNSQASQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGAIMMQRGNFRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPAESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ", @@ -4975,7 +4975,7 @@ "start": 2150, "end": 5162, "subtype_start": 1492, - "subtype_end": 4503, + "subtype_end": 4504, "orientation": "forward", "distance": 0.21863141758600757, "protein": "FFREDLAFPQGEAREFSSEQTRANSPTSRELQVRGGDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIDELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLKGTKALTEVVPLTREAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQRETWDTWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -4987,7 +4987,7 @@ "start": 5106, "end": 5685, "subtype_start": 4448, - "subtype_end": 5026, + "subtype_end": 5027, "orientation": "forward", "distance": 0.3589413907639558, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISKKAKGWIYKHHYDSINPKISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPGLADRLIHLYYFDCFSESAIRNAILGRIVSPSCDYQAGHNKVGSLQYLALTALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -4999,7 +4999,7 @@ "start": 5624, "end": 5915, "subtype_start": 4966, - "subtype_end": 5256, + "subtype_end": 5257, "orientation": "forward", "distance": 0.2400808693454637, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCQHSRIGILQRRRARNGASRS", @@ -5009,33 +5009,33 @@ { "name": "tat_exon1", "start": 5895, - "end": 6111, + "end": 6114, "subtype_start": 5237, - "subtype_end": 5451, + "subtype_end": 5456, "orientation": "forward", - "distance": 0.28735632183908044, + "distance": 0.2515090543259557, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAHQDSQTHQASLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 6034, - "end": 6112, + "end": 6115, "subtype_start": 5376, - "subtype_end": 5451, + "subtype_end": 5454, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELIKTVRLIKLLYQSS", - "aminoacids": "MAGRSGDSDEELIKTVRLIKLLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELIKTVRLIKLLYQSSK", + "aminoacids": "MAGRSGDSDEELIKTVRLIKLLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6126, "end": 6375, "subtype_start": 5468, - "subtype_end": 5707, + "subtype_end": 5708, "orientation": "forward", "distance": 0.3726554787759131, "protein": "MQPLVISAIVALVVVAIIAIVVWSIVLIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVNDL", @@ -5047,7 +5047,7 @@ "start": 6289, "end": 8881, "subtype_start": 5631, - "subtype_end": 8207, + "subtype_end": 8208, "orientation": "forward", "distance": 0.48107374453325313, "protein": "MRARGIRKNYQHLLWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTGANNTTSTNTTTPSTTVSSEERMGEGEIKNCSFNITTSLRDKMQKEYALFYRPDIVPIDNTSYRLISCNTSVITQACPKVTFEPIPIHYCAPAGFAILKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEIVIRSENFSDNAKNIIVHLNKSVEITCIRPNNNTRKSIPMGPGKAFFATGAIIGDIRQAHCKINKTKWNNTLEQVFKKLREQFGNNKTIIFNHSSGGDPEIVTHNFNCGGEFFYCNTSKLFNSTWNETSYWKGEGSNNDTITLPCRIKQIINLWQEVGKAMYAPPIRGLIKCSSNITGLLLTRDGGNESETTDTETFRPVGGNMKDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGLGAMFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKNLTQIWDNMTWMQWEKEIDNYTSLIYTLIEESQNQQEKNELELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEGEGGERDRDRSGPLVNGFLAIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRGGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL", @@ -5059,7 +5059,7 @@ "start": 8462, "end": 8558, "subtype_start": 7789, - "subtype_end": 7881, + "subtype_end": 7885, "orientation": "forward", "distance": 0.5337214944201844, "protein": "RPTSQPRGDPTGQKESKEKVERETETDPDH", @@ -5071,7 +5071,7 @@ "start": 8463, "end": 8739, "subtype_start": 7790, - "subtype_end": 8065, + "subtype_end": 8066, "orientation": "forward", "distance": 0.41220115416323155, "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILSNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTRGVGTPQVLVESPAVLESGTKE", @@ -5083,7 +5083,7 @@ "start": 8882, "end": 9515, "subtype_start": 8209, - "subtype_end": 8841, + "subtype_end": 8842, "orientation": "forward", "distance": 0.3463855421686747, "protein": "MGGKWSKSSVVGWPKIRERMRRAEPAAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEEEEVGFPVRPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIYSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", @@ -5097,7 +5097,7 @@ "start": 767, "end": 2270, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.316486214000789, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKKYQLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELKSLFHTVATLYCVHQKIEVKDTKEALEKIEEEQKKSKKKAQQAAADTGNNSQVSQNYPIVQNMQGQMVHQPLSPRTLNAWVKVIEDKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQLREPRGSDIAGTTSNLQEQIAWMTHNPPVPVGEIYKRWILLGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQVANPASIMMQRGNFRNQRKPIKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKVWPSRKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKPEPIDKELYPLASLRSLFGNDPSSQ", @@ -5109,7 +5109,7 @@ "start": 2062, "end": 5074, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.1943357603710517, "protein": "FFREGLAFPQGEAREFPSEQTRANSPTRRELQVWGRDDNSLSETGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPRVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPILLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPSKDLIAELQKQGQGQWTYQIYQEPYKNLKTGKYARMRGTHTNDIKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWIEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDKGRQKVVPLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLKGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSNVVKAACWWAGIKQEFGIPYNPQSQEVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFVHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -5121,7 +5121,7 @@ "start": 5018, "end": 5597, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMHISRKAKNWLYRHHYESIHPRISSEVHIPLGDARLVITTYWGLLTGERDWHLGQGVSIEWRERRYRTQVDPDLADQLIHLYYFDCFSESAIRNALLGRVVSPRCEYQAGHNQVGSLQYLALTALITPKRRKPPLPSVRKLTEDRWNKPQKTKGHKGSHTMNGH", @@ -5131,23 +5131,23 @@ { "name": "vpr", "start": 5536, - "end": 5830, + "end": 5827, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5799857380556208, + "distance": 0.5656050955414005, "protein": "MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS", - "aminoacids": "MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS*P", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAACTTAAAAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTTCATGGATTGGGACAGCATATCTATGAAACATATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCGACATAGCAGAATAGGCATTAATCTACAGAGGAGAGCAAGGAATGGATCCAGTAGATCCTAGCCT" + "aminoacids": "MEQAPEDQGPQREPYNEWALELLEELKSEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGINLQRRARNGSSRS*", + "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAAAGGGAGCCATACAATGAATGGGCACTAGAACTTTTAGAGGAACTTAAAAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTTCATGGATTGGGACAGCATATCTATGAAACATATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCGACATAGCAGAATAGGCATTAATCTACAGAGGAGAGCAAGGAATGGATCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5807, "end": 6026, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.40192926045016075, + "distance": 0.37688442211055273, "protein": "MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKQ", "aminoacids": "MDPVDPSLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFIKKGLGISYGRKKRRQRRRASQGSETHQVSLPKQ*", "nucleotides": "ATGGATCCAGTAGATCCTAGCCTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTGCTTGTACCAATTGTTATTGTAAAAAGTGCTGCTTTCATTGCCAAGTTTGTTTCATAAAAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGTAA" @@ -5155,23 +5155,23 @@ { "name": "rev_exon1", "start": 5946, - "end": 6024, + "end": 6027, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLKAVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEELLKAVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLKAVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEELLKAVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGGCAGTGAGACTCATCAAGTTTCTCTACCAAAGCAGTAAG" }, { "name": "vpu", "start": 6038, "end": 6284, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_start": 6259, + "subtype_end": 6310, "orientation": "forward", - "distance": 0.5576513039199607, + "distance": 0.7548091006737623, "protein": "MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL", "aminoacids": "MRPLEIAAIVALVVAVLIAIVVWSIVLIEYRKILRQRKIDRIVDRIRERAEDSGNESEGDQEELSALVERGHLAPWDIDDL*", "nucleotides": "ATGCGACCTTTAGAAATAGCAGCAATAGTAGCACTAGTAGTAGCAGTACTAATAGCAATAGTTGTGTGGTCTATAGTACTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGATAGTGGATAGAATAAGAGAAAGAGCAGAAGATAGTGGAAATGAGAGTGAAGGGGATCAGGAGGAATTATCAGCACTGGTGGAGAGGGGGCACCTTGCTCCTTGGGATATTGATGATCTGTAG" @@ -5181,7 +5181,7 @@ "start": 6201, "end": 8778, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.5096088152339575, "protein": "MRVKGIRRNYQHWWRGGTLLLGILMICSATENLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPSPQEVVLKNVTEKFNMWKNNMVEQMHQDIISLWDESLKPCVKLTPLCVTLNCTNATISGNATEEIKNCSFNVNTKIGGKKQKERALFYKLDVVPIDDDSTNRTNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCGDKEFNGTGLCRNVSTVQCTHGIRPVVSTQLLLNGSLAEGEVVIKSENITNNVKTIIVQLNETVSINCTRPSNNTRRSIHMGPGRAFYATGEIIGDIRKAQCILNKTDWSDTLTRIAKKLHKQFHGPIAFEQSSGGDPEITMHTFNCGGEFFYCNTSALFSGTWNGTAWTNATWGNIAGNNITLQCRIKQFINRWQEVGKAMYAPPIRGEIRCSSNITGLLLTRDGGSNTTNGGENGTQIGENVTQIFRPGGGDMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVTFGALFLGFLGAAGSTMGAASMTLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLRDQQLLGIWGCSGKLICTTTVPWNASWSNKTLEKIWGNMTWMEWEREIDNYTDLIYTLIEQSQNQQEKNEQELLELDKWAGLWNWFDITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRLPASRGPDRPEGTDEEGGERDRDRSGSLVNGFLALIWIDLRSLFLFSYLRLRDLLLIAARIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNAIAISVAEGTDRIIEAIQRICRAILHIPTRIRQGLERALL", @@ -5193,7 +5193,7 @@ "start": 8359, "end": 8455, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.4542873367404884, "protein": "RPASQPRGDPTGPKEPTKKVERETETDPDH", @@ -5205,7 +5205,7 @@ "start": 8360, "end": 8636, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.38952607660679506, "protein": "DPPPSLEGTRQARRNRRRRWRERQRQIRIISERILSTHLDRPAESVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKE", @@ -5217,7 +5217,7 @@ "start": 8779, "end": 9406, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.5105263157894737, "protein": "MGGKGSKMRGWVAVREKMRRTKPEDEPAANGVGAASRDLEKYGALTSSNTVATNADLAWLEAQEEEEVGFPVRPQVPLRPMTYKGALDLSHFLKEKGGLEGLIYSQKRQDILDLWVHHTQGYLPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDQVEEANAGENNSLLHPMSLHGIEDPEKEVLMWKFDSHLAFRHMARELHPEYYKDC", @@ -5231,7 +5231,7 @@ "start": 773, "end": 2276, "subtype_start": 200, - "subtype_end": 1699, + "subtype_end": 1700, "orientation": "forward", "distance": 0.3910844507174782, "protein": "MGARASVLSGGELDRWERIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSKGCRQILEQLQPALQTGSEELKSLYNTVATLYCVHQKIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQASQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQCTQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTTPGTIMMQRGNLRSQRKTVKCFNCGKEGHIAKNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKEMYPLASLRSLFGNDPSSQ", @@ -5243,7 +5243,7 @@ "start": 2068, "end": 5080, "subtype_start": 1492, - "subtype_end": 4503, + "subtype_end": 4504, "orientation": "forward", "distance": 0.21863141758600757, "protein": "FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQRNVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMTLPGRWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTREAELELAENREILKEPVHGVYYDPTKDLIVEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFRLPIQRETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLIKKDKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDDHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGGNFISTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVYIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKLQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -5255,7 +5255,7 @@ "start": 5024, "end": 5603, "subtype_start": 4448, - "subtype_end": 5026, + "subtype_end": 5027, "orientation": "forward", "distance": 0.3264662839130924, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYISRKAKGWVYKHHYDSTHPRISSEVHIPLGDAKLVITTYWGLHTGERDWHLGQGVSIEWRKKRYSTQVDPGLADQLIHLYYFDCFSESAIRNAILGHIVSPSCDYQAGHNKVGSLQYLALAALITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -5267,7 +5267,7 @@ "start": 5542, "end": 5833, "subtype_start": 4966, - "subtype_end": 5256, + "subtype_end": 5257, "orientation": "forward", "distance": 0.2678354029705382, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKNEAVRHFPRPWLHSLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGIIQRRRARNGASRS", @@ -5279,9 +5279,9 @@ "start": 5813, "end": 6032, "subtype_start": 5237, - "subtype_end": 5451, + "subtype_end": 5456, "orientation": "forward", - "distance": 0.28735632183908044, + "distance": 0.2515090543259557, "protein": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKQ", "aminoacids": "MEPVDPRLEPWKHPGSQPRTACTSCYCKQCCFHCQVCFITKGLGISYGRKKRRQRRRAPPDSQTHQASLSKQ*", "nucleotides": "ATGGAGCCAGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTGCTTGTACCTCTTGTTATTGTAAGCAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAA" @@ -5289,21 +5289,21 @@ { "name": "rev_exon1", "start": 5952, - "end": 6030, + "end": 6033, "subtype_start": 5376, - "subtype_end": 5451, + "subtype_end": 5454, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEELLQTVRLIKLLYQSS", - "aminoacids": "MAGRSGDSDEELLQTVRLIKLLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGT" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEELLQTVRLIKLLYQSSK", + "aminoacids": "MAGRSGDSDEELLQTVRLIKLLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCCTCCAGACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6044, "end": 6290, "subtype_start": 5468, - "subtype_end": 5707, + "subtype_end": 5708, "orientation": "forward", "distance": 0.4384802297327147, "protein": "MQPLHIAAIVALVVAIIIAIVVWSIVFIEYRKILRQRKIDRIIDRIRERAEDSGNESEGDQEELSALVEMGHCAPWDVNDL", @@ -5315,7 +5315,7 @@ "start": 6207, "end": 8790, "subtype_start": 5631, - "subtype_end": 8207, + "subtype_end": 8208, "orientation": "forward", "distance": 0.4684887834843129, "protein": "MRVRGIKKNYQHLWRWGIVLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTNYNNTNNTTSTNDTTSTTTSSGEKMKEGEIKNCSFNITTSIRDKVQKEYALFYKPDIVPIDNDNTSYRLISCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGTGSCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSENFSDNAKTIIVHLNESVEITCIRPNNNTRKSIPIGPGRAFYATGAIIGDIRQAHCKINKTKWNNTLDQIFKKLREQFGNKTIIFTHSSGGDPEVVTHNFNCGGEFFYCNTTKLFNSTWNETSYWKDERSNDNDTITLPCRIKQIINLWQEVGKAMYAPPIRGYIKCSSNITGLLITRDGGKNESNTTETFRPGGGNMRDNWRSELYKYKVVKIEPLGIAPTKAKRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARLLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWDNMTWMEWEREIDNYTSLIYTLIEKSQNQQEKNEQELLELDKWASLWNWFDITKWLWYIKIFIMIVGGLVGLRIVFAVLSIANRVRQGYSPLSFQTHLPAPRGPDRPEGIEEEGGERDRDRSGPLVNGFLTIIWVDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWELLKYWWNLLQYWSQELKNSAISLLNATAIAVAEGTDRIIEVLQRAGRAFLHIPRRIRQGLERALL", @@ -5327,7 +5327,7 @@ "start": 8371, "end": 8467, "subtype_start": 7789, - "subtype_end": 7881, + "subtype_end": 7885, "orientation": "forward", "distance": 0.4971219256933542, "protein": "RPTSQPRGDPTGQKESKKKVERETETDPDH", @@ -5339,7 +5339,7 @@ "start": 8372, "end": 8648, "subtype_start": 7790, - "subtype_end": 8065, + "subtype_end": 8066, "orientation": "forward", "distance": 0.3934495959166312, "protein": "DPPPSPEGTRQARRNRRRRWRERQRQIRTISERILNNYLGRPAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGTPQILVESPAVLESGTKE", @@ -5351,7 +5351,7 @@ "start": 8791, "end": 9412, "subtype_start": 8209, - "subtype_end": 8841, + "subtype_end": 8842, "orientation": "forward", "distance": 0.36363636363636354, "protein": "MGGKWSKSSVVGWPAIRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNADCAWLEAQEDEEVGFPVRPQVPLRPMTYKGAVDLSHFLKEKGGLEGLIHSQKRQDILDLWVYHTQGYFPDWQNYTPGPGIRYPLTFGWCFKLVPVEPDKVEEANEGENNSLLHPMSLHGMDDPEKEVLVWRFDSRLAFHHMARELHPEYYKDC", @@ -5365,7 +5365,7 @@ "start": 808, "end": 2308, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.2880084183556755, "protein": "MGARASVLSGGELDKWEKIRLRPGGKKQYKLKHLVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEEFRSLYNTVATLYCVHQKIEVKDTKEALEKIEEEQNQSKKKAAAAAADTGNRSQVSQNYPIVQNLQGQMVHQPLSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTIGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSNLQEQIGWMTHNPPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTVMMQRGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSKPEPTAPPEESFRFGEETTTPSQKQGPIDKELYPLASLKSLFGNDPSSQ", @@ -5377,7 +5377,7 @@ "start": 2100, "end": 5112, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.1880239208210378, "protein": "FFREDLAFPQGEAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEDMNLPGKWKPKMIGGIGGFIKVRQYDQILVEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSIPLDKDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTAEAELELAENREILKEPVHGVYYDPTKDLIAELQKQGQSQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIAIESIVIWGKTPKFKLPIQKETWETWWTDYWQATWIPDWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETRSGKAGYVTDRGRQKVVPLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSSGIRKILFLDGITKAQDDHERYHSNWRAMASDFNLPPIIAKEIVASCDKCQLKGEATHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSAAVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVREQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIIDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -5389,7 +5389,7 @@ "start": 5056, "end": 5635, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.41532297468972923, "protein": "MENRWQVMIVWQVDRMRIRTWNSLVKHHIHVSKKAKGWVYRHHYESTHPRISSEVHIPLGEARLVIATYWGLHTGERDWHLGQGVSIEWREKKYITQVDPDLADQLIHLHYFDCFSESAIRNAIVGRLVSPQCEYQTGHNKVGSLQYLALVALITPKKRKPPLPSVRKLTEDRWNKPQKTKDHRGSHIMSGH", @@ -5401,9 +5401,9 @@ "start": 5574, "end": 5865, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5928358208955226, + "distance": 0.5793112277557293, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS", "aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKSEAVRHFPRIWLHNLGQYIYATYGDTWTGVEALIRILQQLLFIHFRIGCQHSRIGITRQRRARNGASRS*", "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGACCACAGAGGGAGCCATATAATGAGTGGACACTAGAGCTTTTAGAGGAACTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATATGGCTCCATAACTTAGGACAATACATCTATGCAACTTATGGGGATACTTGGACAGGAGTGGAAGCTTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGAATTGGATGTCAACATAGCAGAATAGGCATTACTCGACAGAGGAGAGCAAGGAATGGAGCCAGTAGATCCTAG" @@ -5411,69 +5411,69 @@ { "name": "tat_exon1", "start": 5845, - "end": 6061, + "end": 6064, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTCTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTPCTNCYCKKCCFHCQVCFIKKALGISYGRKKRRQRRRPPQNSETHQAALSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTCTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5984, - "end": 6062, + "end": 6065, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKTVRLIKQLYQSS", - "aminoacids": "MAGRSGDSDEDLLKTVRLIKQLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 6076, - "end": 6340, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5091759625764664, - "protein": "MQPLVILAIVALVVAAIIAIVVWTIVLIEYRKILRQRKIDSIINRIRERAEDSGNESEGDQEELSALVEMGHHVEMGHHAPWNVDDL", - "aminoacids": "MQPLVILAIVALVVAAIIAIVVWTIVLIEYRKILRQRKIDSIINRIRERAEDSGNESEGDQEELSALVEMGHHVEMGHHAPWNVDDL*", - "nucleotides": "ATGCAACCCTTAGTAATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAGAAAGATAGATAGCATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKTVRLIKQLYQSSK", + "aminoacids": "MAGRSGDSDEDLLKTVRLIKQLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAAACAGTGAGACTCATCAAGCAGCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6239, "end": 8807, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.4718646278993922, "protein": "MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL", "aminoacids": "MRVKEIRKNCRHLWRWGTMWKWGTMLLGMLMICSAKEQLWVTVYYGVPVWKEATTTLFCASNAKAYDPEVHNVWATHACVPTDPNPQEVPLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVILNCTNVNVTTNNNSSSEEQMEVGEIKNCSFNIATRIKNKIKKEYALFNRLDVVPIEDDNTSYMLINCNTSVTTQACPKVTFEPIPIHYCAPAGFAILKCNDKKFNGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEEEIVVRSENFTNNAKTIIVQLNKTIEINCIRPNNNTRKSISLRPGQAIYATEDIIGNIRQAHCNIRRKDWDKALEQVVAKLREQFKNKTIVFNQSSGGDPEIVMHSFNCAGEFFYCNTTKLFNSTWNVNNTRNNTTDNSTITLPCRIKQIINRWQEVGKAMYAPPIKGQIKCSSNITGLLLTRDGGVREDNAPEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQRGKRAVTLGAMFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLAVERYLQDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLEKIWKNMTWMEWEKEINNYTRTIYTLIEESQNQQEKNEQELLELDKWASLWNWFDITNWLWYIKIFIMIVGGIVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGTEEEGGERDRDRSGQLVNGFFALIWDDLQSLCLFSYRRLRDLLLIVARIVELLGHRGWEALKYWWNLLQYWSQELKKSAVSLLNATAIAVAEGTDRVIEVVQRIGRAILHIPRRIRQGLERALL*", "nucleotides": "ATGAGAGTGAAGGAGATCAGGAAGAATTGTCGGCACTTGTGGAGATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAGTGCTAAAGAACAATTGTGGGTCACAGTTTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTTTATTTTGTGCATCAAATGCTAAAGCATATGACCCAGAGGTGCATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACAAGAAGTACCATTGGAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTAGAACAGATGCATGAGGACATAATCAGTTTATGGGATCAAAGCTTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTATTTTAAATTGCACTAATGTGAATGTTACTACTAACAATAATAGTAGTAGTGAGGAACAGATGGAGGTAGGAGAAATAAAAAACTGCTCTTTCAATATTGCCACAAGAATAAAAAATAAGATAAAGAAAGAATATGCACTTTTTAATAGACTTGATGTAGTACCAATAGAGGATGATAATACAAGCTATATGTTGATAAATTGTAATACCTCAGTCACTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATACTAAAATGTAATGATAAAAAGTTCAATGGAACAGGACCATGTAACAATGTCAGCACAGTACAATGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGATAGTAGTTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTACAGCTGAATAAAACTATAGAAATTAATTGTATAAGACCCAACAATAATACAAGAAAAAGTATATCTTTAAGACCGGGGCAAGCAATTTATGCAACAGAAGACATAATAGGAAATATAAGACAAGCACATTGTAACATTAGGAGAAAAGACTGGGATAAAGCTTTAGAACAGGTAGTTGCAAAATTAAGAGAACAATTTAAGAATAAAACAATAGTCTTTAATCAATCCTCAGGAGGAGACCCAGAAATTGTAATGCATAGTTTTAATTGTGCAGGGGAATTTTTCTACTGTAACACAACAAAGCTGTTTAATAGTACTTGGAATGTTAATAACACTCGGAATAATACTACTGATAATAGCACCATCACTCTCCCGTGCAGAATAAAACAAATTATAAACAGATGGCAGGAAGTAGGAAAAGCAATGTATGCTCCTCCCATCAAAGGGCAAATTAAATGTTCATCAAATATTACAGGGTTATTATTAACAAGAGATGGTGGTGTCCGCGAGGACAACGCCCCTGAGATCTTTAGACCTGGAGGAGGAGATATGAGGGATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAGCCATTAGGAGTGGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAAAGAGGAAAAAGAGCAGTAACGCTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGGCAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACTTACAGGATCAACAGCTCCTAGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTACCTTGGAATACTAGTTGGAGTAATAAATCTTTGGAAAAGATTTGGAAAAACATGACCTGGATGGAGTGGGAGAAAGAAATTAACAATTACACAAGGACAATATACACCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAGGAATTATTGGAATTGGATAAGTGGGCAAGTTTGTGGAATTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATCATGATAGTAGGAGGTATAGTAGGGTTAAGAATAGTTTTTACTGTGCTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTGTCATTCCAGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGAACGGATTCTTTGCGCTTATCTGGGACGACCTGCAGAGCCTGTGCCTCTTCAGCTACCGCCGCTTGAGAGACTTACTCTTGATTGTAGCGAGGATTGTGGAACTTCTGGGACACAGGGGGTGGGAGGCCCTCAAATATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAAGAGTGCTGTTAGTTTGCTTAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAAGAATTGGTAGAGCTATTCTCCACATCCCTAGAAGAATAAGACAGGGCTTAGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 6283, + "end": 6340, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.4380664652567977, + "protein": "MGHHVEMGHHAPWNVDDL", + "aminoacids": "MGHHVEMGHHAPWNVDDL*", + "nucleotides": "ATGGGGCACCATGTGGAAATGGGGCACCATGCTCCTTGGAATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8388, - "end": 8481, + "end": 8484, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.4918032786885247, "protein": "RPASQLRGDQTGPKEQKKKVERETETDPGN", - "aminoacids": "RPASQLRGDQTGPKEQKKKVERETETDPGN*", - "nucleotides": "AGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAG" + "aminoacids": "RPASQLRGDQTGPKEQKKKVERETETDPGN**", + "nucleotides": "AGACCCGCTTCCCAGCTCCGAGGGGACCAGACAGGCCCGAAGGAACAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGGGCAATTAGTGA" }, { "name": "rev_exon2", "start": 8389, "end": 8674, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.4123815285339786, "protein": "DPLPSSEGTRQARRNRRRRWRERQRQIRAISERILCAYLGRPAEPVPLQLPPLERLTLDCSEDCGTSGTQGVGGPQILVESPAVLESGTKEECC", @@ -5485,7 +5485,7 @@ "start": 8808, "end": 9417, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.49080954243253805, "protein": "MGGKWSKCSLVGWPAIRERMRRAEPAPAAEGVGAASRDLEKHGALTTSNTAANNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKGAMDLGHFLKEKGGLEGLIYSPKRQEILDLWVYHTQGYFPDWQNYTPGPGTRYPLTFGWCFKLVPVEPDEGENSCLLHPMNQHGADDTEREVLMWKFDSSLAFHHKARELHPEYYKNC", diff --git a/tests/expected-results-single-csv/errors.csv b/tests/expected-results-single-csv/errors.csv index 7e0e377..8a1c252 100644 --- a/tests/expected-results-single-csv/errors.csv +++ b/tests/expected-results-single-csv/errors.csv @@ -1,4 +1,3 @@ sequence_name,error,message -MN692145,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MN692145,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" -MN692145,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 231" +MN692145,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MN692145,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 231" diff --git a/tests/expected-results-single-csv/holistic.csv b/tests/expected-results-single-csv/holistic.csv index be02c30..d3d8de7 100644 --- a/tests/expected-results-single-csv/holistic.csv +++ b/tests/expected-results-single-csv/holistic.csv @@ -1,2 +1,2 @@ seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs -MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,1.0,789,8794,3 +MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,1.0,789,8795,3 diff --git a/tests/expected-results-single-csv/orfs.csv b/tests/expected-results-single-csv/orfs.csv index b918a41..e41779e 100644 --- a/tests/expected-results-single-csv/orfs.csv +++ b/tests/expected-results-single-csv/orfs.csv @@ -1,12 +1,12 @@ seqid,name,start,end,subtype_start,subtype_end,orientation,distance,protein,aminoacids,nucleotides -MN692145,gag,775,2281,789,2291,forward,0.20784453738651432,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN692145,pol,2070,5085,2084,5095,forward,0.14843087362171337,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN692145,vif,5029,5608,5040,5618,forward,0.2608047690014903,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG -MN692145,vpr,5547,5838,5558,5849,forward,0.6002510555745751,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG -MN692145,tat_exon1,5818,6034,5830,6044,forward,0.31992687385740404,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ,ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG -MN692145,rev_exon1,5957,6035,5969,6044,forward,0.4267425320056898,MAGRSGDSDEELLKTVRLIKFLYQSS,MAGRSGDSDEELLKTVRLIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT -MN692145,vpu,6049,6298,6061,6309,forward,0.5326633165829145,MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL,MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL*,ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN692145,env,6212,8783,6224,8794,forward,0.47520309038232134,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL*,ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA -MN692145,tat_exon2,8364,8457,8376,8468,forward,0.3921568627450981,RPASQPRGDPTGPKESKKKVERETETDPLH,RPASQPRGDPTGPKESKKKVERETETDPLH*,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG -MN692145,rev_exon2,8365,8641,8377,8652,forward,0.29843322556577967,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE*,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG -MN692145,nef,8784,9387,8796,9416,forward,0.4049958673891082,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA +MN692145,gag,775,2281,789,2292,forward,0.20784453738651432,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MN692145,pol,2070,5085,2084,5096,forward,0.14843087362171337,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MN692145,vif,5029,5608,5040,5619,forward,0.2608047690014903,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG +MN692145,vpr,5547,5838,5558,5843,forward,0.587876570313453,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG +MN692145,tat_exon1,5818,6037,5830,6046,forward,0.28735632183908044,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ*,ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA +MN692145,rev_exon1,5957,6038,5969,6047,forward,0.4274965800273598,MAGRSGDSDEELLKTVRLIKFLYQSSK,MAGRSGDSDEELLKTVRLIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG +MN692145,env,6212,8783,6224,8795,forward,0.47520309038232134,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL*,ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA +MN692145,vpu,6253,6298,6259,6310,forward,0.3649167733674775,MEMGHHAPWDVDDL,MEMGHHAPWDVDDL*,ATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG +MN692145,tat_exon2,8364,8460,8376,8469,forward,0.3921568627450981,RPASQPRGDPTGPKESKKKVERETETDPLH,RPASQPRGDPTGPKESKKKVERETETDPLH**,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGA +MN692145,rev_exon2,8365,8641,8377,8653,forward,0.29843322556577967,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE*,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG +MN692145,nef,8784,9387,8796,9417,forward,0.4049958673891082,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA diff --git a/tests/expected-results-single-hxb2/holistic.json b/tests/expected-results-single-hxb2/holistic.json index 43acd43..9a1c5bf 100644 --- a/tests/expected-results-single-hxb2/holistic.json +++ b/tests/expected-results-single-hxb2/holistic.json @@ -8,7 +8,7 @@ "blast_qseq_coverage": 1.1271545051088863, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8793, + "orfs_end": 8794, "blast_n_conseqs": 3 } } \ No newline at end of file diff --git a/tests/expected-results-single-hxb2/orfs.json b/tests/expected-results-single-hxb2/orfs.json index bd5db77..3c10e8a 100644 --- a/tests/expected-results-single-hxb2/orfs.json +++ b/tests/expected-results-single-hxb2/orfs.json @@ -5,7 +5,7 @@ "start": 775, "end": 2281, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.20784453738651432, "protein": "MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ", @@ -17,7 +17,7 @@ "start": 2070, "end": 5085, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.14843087362171337, "protein": "FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED", @@ -29,7 +29,7 @@ "start": 5029, "end": 5608, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.2608047690014903, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH", @@ -41,7 +41,7 @@ "start": 5547, "end": 5838, "subtype_start": 5558, - "subtype_end": 5848, + "subtype_end": 5849, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS", @@ -51,33 +51,33 @@ { "name": "tat_exon1", "start": 5818, - "end": 6034, + "end": 6037, "subtype_start": 5829, - "subtype_end": 6043, + "subtype_end": 6045, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG" + "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ*", + "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5957, - "end": 6035, + "end": 6038, "subtype_start": 5968, - "subtype_end": 6043, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.4267425320056898, - "protein": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT" + "distance": 0.4274965800273598, + "protein": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG" }, { "name": "vpu", "start": 6049, "end": 6298, "subtype_start": 6060, - "subtype_end": 6308, + "subtype_end": 6309, "orientation": "forward", "distance": 0.520618556701031, "protein": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL", @@ -89,7 +89,7 @@ "start": 6212, "end": 8783, "subtype_start": 6223, - "subtype_end": 8793, + "subtype_end": 8794, "orientation": "forward", "distance": 0.4766895145301081, "protein": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL", @@ -99,21 +99,21 @@ { "name": "tat_exon2", "start": 8364, - "end": 8457, + "end": 8460, "subtype_start": 8375, - "subtype_end": 8467, + "subtype_end": 8471, "orientation": "forward", "distance": 0.3921568627450981, "protein": "RPASQPRGDPTGPKESKKKVERETETDPLH", - "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH*", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG" + "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH**", + "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGA" }, { "name": "rev_exon2", "start": 8365, "end": 8641, "subtype_start": 8376, - "subtype_end": 8651, + "subtype_end": 8652, "orientation": "forward", "distance": 0.29843322556577967, "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE", @@ -125,7 +125,7 @@ "start": 8784, "end": 9387, "subtype_start": 8795, - "subtype_end": 9415, + "subtype_end": 9416, "orientation": "forward", "distance": 0.3966849799795139, "protein": "MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC", diff --git a/tests/expected-results-single/errors.json b/tests/expected-results-single/errors.json index 77c3b08..292cbf8 100644 --- a/tests/expected-results-single/errors.json +++ b/tests/expected-results-single/errors.json @@ -3,17 +3,12 @@ { "sequence_name": "MN692145", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MN692145", "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" - }, - { - "sequence_name": "MN692145", - "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 231" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 231" } ] } \ No newline at end of file diff --git a/tests/expected-results-single/holistic.json b/tests/expected-results-single/holistic.json index d2da5f4..060aa5a 100644 --- a/tests/expected-results-single/holistic.json +++ b/tests/expected-results-single/holistic.json @@ -8,7 +8,7 @@ "blast_qseq_coverage": 1.1271545051088863, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 } } \ No newline at end of file diff --git a/tests/expected-results-single/orfs.json b/tests/expected-results-single/orfs.json index 29cf7d9..3da162b 100644 --- a/tests/expected-results-single/orfs.json +++ b/tests/expected-results-single/orfs.json @@ -5,7 +5,7 @@ "start": 775, "end": 2281, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.20784453738651432, "protein": "MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ", @@ -17,7 +17,7 @@ "start": 2070, "end": 5085, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.14843087362171337, "protein": "FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED", @@ -29,7 +29,7 @@ "start": 5029, "end": 5608, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.2608047690014903, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH", @@ -41,9 +41,9 @@ "start": 5547, "end": 5838, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6002510555745751, + "distance": 0.587876570313453, "protein": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS", "aminoacids": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS*", "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG" @@ -51,69 +51,69 @@ { "name": "tat_exon1", "start": 5818, - "end": 6034, + "end": 6037, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG" + "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ*", + "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5957, - "end": 6035, + "end": 6038, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.4267425320056898, - "protein": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 6049, - "end": 6298, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5326633165829145, - "protein": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL", - "aminoacids": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL*", - "nucleotides": "ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" + "distance": 0.4274965800273598, + "protein": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6212, "end": 8783, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.47520309038232134, "protein": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL", "aminoacids": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 6253, + "end": 6298, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.3649167733674775, + "protein": "MEMGHHAPWDVDDL", + "aminoacids": "MEMGHHAPWDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8364, - "end": 8457, + "end": 8460, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.3921568627450981, "protein": "RPASQPRGDPTGPKESKKKVERETETDPLH", - "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH*", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG" + "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH**", + "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGA" }, { "name": "rev_exon2", "start": 8365, "end": 8641, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.29843322556577967, "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE", @@ -125,7 +125,7 @@ "start": 8784, "end": 9387, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4049958673891082, "protein": "MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC", diff --git a/tests/expected-results-small-csv/errors.csv b/tests/expected-results-small-csv/errors.csv index 4b4b70f..6f92714 100644 --- a/tests/expected-results-small-csv/errors.csv +++ b/tests/expected-results-small-csv/errors.csv @@ -1,38 +1,34 @@ sequence_name,error,message -KX505501.1,DeletionInOrf,"ORF pol at 2084-5095 can have maximum deletions 30, got 2721" -KX505501.1,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6323 -KX505501.1,InsertionInOrf,"Smaller ORF vif at 5040-5618 can have maximum insertions 90, got 909" -KX505501.1,DeletionInOrf,"Smaller ORF vpr at 5558-5849 can have maximum deletions 30, got 84" -KX505501.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5893 -KX505501.1,InternalStopInOrf,Smaller ORF rev_exon1 at 5969-6044 contains an internal stop codon at 6005 -KX505501.1,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 129" -KX505501.1,FrameshiftInOrf,Smaller ORF tat_exon2 at 8376-8468 contains out of frame indels that impact 71 positions. -KX505501.1,DeletionInOrf,"Smaller ORF rev_exon2 at 8377-8652 can have maximum deletions 30, got 96" -KX505501.1,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 1116" +KX505501.1,DeletionInOrf,"ORF pol at 2084-5096 can have maximum deletions 30, got 2721" +KX505501.1,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6323 +KX505501.1,InsertionInOrf,"Smaller ORF vif at 5040-5619 can have maximum insertions 90, got 909" +KX505501.1,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 195 positions. +KX505501.1,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5893 +KX505501.1,InternalStopInOrf,Smaller ORF rev_exon1 at 5969-6047 contains an internal stop codon at 6005 +KX505501.1,FrameshiftInOrf,Smaller ORF tat_exon2 at 8376-8469 contains out of frame indels that impact 40 positions. +KX505501.1,DeletionInOrf,"Smaller ORF rev_exon2 at 8377-8653 can have maximum deletions 30, got 96" +KX505501.1,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 1116" KX505501.1,RevResponseElementDeletion,Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions. KX505501.1,LongDeletion,Query sequence contains a long deletion. KX505501.1,Scramble,Sequence is plus-scrambled. -MN691959,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MN691959,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 129" -MN691959,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 249" -MN692074,DeletionInOrf,"ORF pol at 2084-5095 can have maximum deletions 30, got 981" -MN692074,InternalStopInOrf,ORF env at 6224-8794 contains an internal stop codon at 6551 -MN692074,DeletionInOrf,"Smaller ORF vif at 5040-5618 can have maximum deletions 30, got 81" -MN692074,InsertionInOrf,"Smaller ORF vpr at 5558-5849 can have maximum insertions 90, got 234" -MN692074,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5893 -MN692074,FrameshiftInOrf,Smaller ORF vpu at 6061-6309 contains out of frame indels that impact 168 positions. -MN692074,FrameshiftInOrf,Smaller ORF tat_exon2 at 8376-8468 contains out of frame indels that impact 76 positions. -MN692074,DeletionInOrf,"Smaller ORF rev_exon2 at 8377-8652 can have maximum deletions 30, got 204" -MN692074,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 1131" +MN691959,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MN691959,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 249" +MN692074,DeletionInOrf,"ORF pol at 2084-5096 can have maximum deletions 30, got 981" +MN692074,InternalStopInOrf,ORF env at 6224-8795 contains an internal stop codon at 6551 +MN692074,DeletionInOrf,"Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 81" +MN692074,InsertionInOrf,"Smaller ORF vpr at 5558-5843 can have maximum insertions 90, got 261" +MN692074,InternalStopInOrf,Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5893 +MN692074,InsertionInOrf,"Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 108" +MN692074,FrameshiftInOrf,Smaller ORF tat_exon2 at 8376-8469 contains out of frame indels that impact 76 positions. +MN692074,DeletionInOrf,"Smaller ORF rev_exon2 at 8377-8653 can have maximum deletions 30, got 204" +MN692074,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 1131" MN692074,RevResponseElementDeletion,Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions. MN692074,LongDeletion,Query sequence contains a long deletion. -MN692145,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MN692145,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" -MN692145,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 231" -MN090335,InternalStopInOrf,ORF gag at 789-2291 contains an internal stop codon at 822 -MN090335,FrameshiftInOrf,Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions. -MN090335,InsertionInOrf,"Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" -MN090335,InsertionInOrf,"Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 282" +MN692145,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions. +MN692145,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 231" +MN090335,InternalStopInOrf,ORF gag at 789-2292 contains an internal stop codon at 822 +MN090335,FrameshiftInOrf,Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions. +MN090335,InsertionInOrf,"Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 282" MN090335,PackagingSignalDeletion,Query Sequence exceeds maximum deletion tolerance in PSI. Contains 42 deletions with max tolerance of 10 deletions. MN090335,MajorSpliceDonorSiteMutated,"Query sequence has a mutated splice donor site, AT." MN090335,Scramble,Sequence is minus-scrambled. diff --git a/tests/expected-results-small-csv/holistic.csv b/tests/expected-results-small-csv/holistic.csv index 8c4b982..0dd3a51 100644 --- a/tests/expected-results-small-csv/holistic.csv +++ b/tests/expected-results-small-csv/holistic.csv @@ -1,6 +1,6 @@ seqid,qlen,hypermutation_probablility,inferred_subtype,blast_matched_qlen,blast_sseq_coverage,blast_qseq_coverage,blast_sseq_orfs_coverage,orfs_start,orfs_end,blast_n_conseqs -KX505501.1,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498199403230785,1.2158237356034052,0.17663960024984385,789,8794,4 -MN691959,9493,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0816956477003807,1.1086063415148004,1.0,789,8794,3 -MN692074,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5041670953801831,1.1728099569171853,0.4114928169893816,789,8794,4 -MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,1.0,789,8794,3 -MN090335,9069,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842576396748637,1.0603153600176425,1.0003747657713928,789,8794,3 +KX505501.1,1997,0.7087072014754221,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,1997,0.2498199403230785,1.2158237356034052,0.17661753684736448,789,8795,4 +MN691959,9493,0.19593905853945925,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9493,1.0816956477003807,1.1086063415148004,1.0,789,8795,3 +MN692074,4178,0.36378645339477633,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,4178,0.5041670953801831,1.1728099569171853,0.41144141893579816,789,8795,4 +MN692145,9689,0.1661041079701131,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9689,1.1304660973351168,1.1271545051088863,1.0,789,8795,3 +MN090335,9069,0.1754017863888554,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,9069,0.9842576396748637,1.0603153600176425,1.0003747189607795,789,8795,3 diff --git a/tests/expected-results-small-csv/orfs.csv b/tests/expected-results-small-csv/orfs.csv index a6cf8d8..4880772 100644 --- a/tests/expected-results-small-csv/orfs.csv +++ b/tests/expected-results-small-csv/orfs.csv @@ -1,56 +1,56 @@ seqid,name,start,end,subtype_start,subtype_end,orientation,distance,protein,aminoacids,nucleotides -KX505501.1,env,0,1824,6224,8794,forward,0.7626080297560442,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,vif,0,1824,5040,5618,forward,0.7647696476964769,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,nef,0,1824,8796,9416,forward,0.7645782478980201,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,tat_exon1,1,1750,5830,6044,forward,0.7680130480667754,MRKLQNGIDCIQCMQGLLHQAR,VSLVRPDLSLGALWLTREPTA*ASIKLALSASSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKGKPEELSRRRTRLAERAQQEARGGDW*VRRKFFD*RRLEGERWVRERQY*AGENWIDGKKFG*GQEEIKDIN*NI*YGQAES*NDSQLILAC*KHQKAVDKYWDSYNRLLRQDQKNLSLCIIQ*QSSIVYIKK*M*KTPRKP*TR*KKSKTKLGNKHSKPQLQEVAVRSATITL*CRIIRGKWYIRPCHQEL*MHG*K**RKRLSAQK*YPCFQHYQKEPPHKT*TPC*IQWGDIKQPCKC*KRPLMRKLQNGIDCIQCMQGLLHQAR*ENQGEVT*QELLVPFRNK*HG*QIIHLSQ*ERFIKDG*S*G*IK**ECIALSAFWT*DKDQRNLLETM*TGSIKP*EPNKPHRK*KIG*QKPCWSRMRTQIVKLF*KH*DQQPH*KK**QHAREWEDPAIKQEFWLKQ*AK*QIQLQ**CRKAILGTKEKLLSASIVAKKGT*PEIAGPLEKRAVGNVEGKDIK*KTVLRDRLIL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLD,GTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGAT -KX505501.1,gag,336,1824,789,2291,forward,0.3997973809613161,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,rev_exon1,1306,1750,5969,6044,forward,0.758082497212932,MRTQIVKLF,MRTQIVKLF*KH*DQQPH*KK**QHAREWEDPAIKQEFWLKQ*AK*QIQLQ**CRKAILGTKEKLLSASIVAKKGT*PEIAGPLEKRAVGNVEGKDIK*KTVLRDRLIL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLD,ATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGAT -KX505501.1,vpr,1599,1749,5558,5849,forward,0.7677189534455227,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTR,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTR,ATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGA -KX505501.1,pol,1627,1927,2084,5095,forward,0.7724330674761569,GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,IL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,ATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA -KX505501.1,tat_exon2,1746,1824,8376,8468,forward,0.7616257781032589,RCIRSTTRTADTELFTRDFPLGTFQ,RCIRSTTRTADTELFTRDFPLGTFQ*,AGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG -KX505501.1,vpu,1747,1927,6061,6309,forward,0.769715460635405,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA -KX505501.1,rev_exon2,1747,1927,8377,8652,forward,0.7620186257236345,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA -MN691959,gag,639,2142,789,2291,forward,0.0801186943620179,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAGATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN691959,pol,1934,4946,2084,5095,forward,0.054722889368558514,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAGTAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACAAAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN691959,vif,4890,5469,5040,5618,forward,0.09157509157509158,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGAATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG -MN691959,vpr,5408,5702,5558,5849,forward,0.5562531391260666,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS*T,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT -MN691959,tat_exon1,5679,5895,5830,6044,forward,0.31992687385740404,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG -MN691959,rev_exon1,5818,5896,5969,6044,forward,0.48200514138817474,MAGRSGDSDEDLLKTVRLIKFLYQSS,MAGRSGDSDEDLLKTVRLIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT -MN691959,vpu,5910,6156,6061,6309,forward,0.53246275519588,SIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEVSALVEMGVEMGHHAPWDIDDL,MQPIQIAIVALVVAIIIAIVV*SIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEVSALVEMGVEMGHHAPWDIDDL*,ATGCAACCTATACAAATAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG -MN691959,env,6070,8656,6224,8794,forward,0.13638128518734216,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA -MN691959,tat_exon2,8237,8333,8376,8468,forward,0.6672629695885509,RPTSQTRGDPTGPKE,RPTSQTRGDPTGPKE*KKKVERETETDPFD**,AGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGA -MN691959,rev_exon2,8238,8514,8377,8652,forward,0.210025203024363,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE*,GACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAG -MN691959,nef,8657,9278,8796,9416,forward,0.08588605782994552,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA -MN692074,nef,0,4059,8796,9416,forward,0.763072203234748,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,WKG*FGPRKDKISLICGSTTHKATSLIGRTTHQGRGPDFH*PLDGASS*YQLIQRR*KRPMQERTTACYTL*ACMGWRTRRKKC*CGSLTAA*HFITWPESCIRSTTRIADFELSTRDFPLGTFQGGVAWAGLGSGEPSDAAYKQLLFACTGSLWLDQI*AWELSG*LGNPLLKPQ*SLP*VL*VVCARLLCDSGN*RSLRPF*SVWKISSSGARTGT*KRKRNQRSSLDAGLGLLKRARQEARGGDW*VRQKF*LAEARRREMGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ*G*GGN*RKLY*IQEQMIQY*KK*ICQEDGNQK**GELEVLSK*DSMIRYS*KSVDIKL*VQY**DLHLST*LEEIC*LRLVAL*IFPLVLLKLYQ*N*SQEWMAQKLNNGH*QKKK*KH**KFVQKWKRKGKFQKLGLKIHTILQYLP*RKKTVLNGEN**ISENLIRELKTSGKFN*EYHIPQG*ERKNQ*QYWMWGMHIFQFP*IKNLGNILHLPYPVETMRHQGLDISTMCFHRDGKDHQQYSKVA*QKF*SLLENKIQK*LSINTWMIYM*DLI*K*GSIE*K*RN*DNIC*DGDLPHQTKNIRKNPHSFGWVMNSILINGQYSL*CCQKKTAGLSMTYRS*WEN*IGQVRFTQGLK*GNYVNSLGEPKH*QK*YH*QKKQSWNWQKTGKF*KNQYMEYIMTHQKT**QKYRSRGKVNGHIKFIKSHLKI*KQENMQK*GVPTLMM*NN*QRQCKK*PQKA**YGERLLNLNYPYKKKHGKHGGQSIGKPPGFLSGSLSIPLP**NYGTS*RKNP**EQKPSM*MGQLTGRLN*EKQDMLLTEEDKKLSP*LTQQIRRLNYKQFI*LCRIRD*K*T**QTHNMH*,TGGAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAG -MN692074,env,2,4115,6224,8794,forward,0.7604257801108195,MNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA -MN692074,gag,789,2292,789,2291,forward,0.19470123431286457,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN692074,pol,2084,4115,2084,5095,forward,0.5617851221088768,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA -MN692074,vif,3617,4115,5040,5618,forward,0.7631664499349805,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA -MN692074,vpr,3617,4085,5558,5849,forward,0.7632679688399402,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDT,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDT,ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACC -MN692074,tat_exon1,3823,4084,5830,6044,forward,0.7659115426105717,MVPVRERTHSRSRNLLCRWGS,MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY,ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC -MN692074,rev_exon1,3823,4084,5969,6044,forward,0.7645569620253164,MVPVRERTHSRSRNLLCRWGS,MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY,ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC -MN692074,vpu,4080,4164,6061,6309,forward,0.7708418891170431,IPSGEPSDAAYKQLLFACTGSLWLDQI,IPSGEPSDAAYKQLLFACTGSLWLDQI*,ATACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGA -MN692074,tat_exon2,4080,4164,8376,8468,forward,0.7699443413729128,IPSGEPSDAAYKQLLFACTGSLWLDQI,IPSGEPSDAAYKQLLFACTGSLWLDQI*,ATACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGA -MN692074,rev_exon2,4081,4153,8377,8652,forward,0.7667894365645325,YPVASPQMLHISSCFLPVLGLSG,YPVASPQMLHISSCFLPVLGLSG*,TACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAG -MN692145,gag,775,2281,789,2291,forward,0.20784453738651432,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA -MN692145,pol,2070,5085,2084,5095,forward,0.14843087362171337,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN692145,vif,5029,5608,5040,5618,forward,0.2608047690014903,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG -MN692145,vpr,5547,5838,5558,5849,forward,0.6002510555745751,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG -MN692145,tat_exon1,5818,6034,5830,6044,forward,0.31992687385740404,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ,ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG -MN692145,rev_exon1,5957,6035,5969,6044,forward,0.4267425320056898,MAGRSGDSDEELLKTVRLIKFLYQSS,MAGRSGDSDEELLKTVRLIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT -MN692145,vpu,6049,6298,6061,6309,forward,0.5326633165829145,MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL,MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL*,ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG -MN692145,env,6212,8783,6224,8794,forward,0.47520309038232134,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL*,ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA -MN692145,tat_exon2,8364,8457,8376,8468,forward,0.3921568627450981,RPASQPRGDPTGPKESKKKVERETETDPLH,RPASQPRGDPTGPKESKKKVERETETDPLH*,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG -MN692145,rev_exon2,8365,8641,8377,8652,forward,0.29843322556577967,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE*,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG -MN692145,nef,8784,9387,8796,9416,forward,0.4049958673891082,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA -MN090335,gag,315,1665,789,2291,forward,0.596665989022159,MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ,MYTIEGGYCII**SKFF*SCLKGWL*LFQYLFTAS*CF*QARINCESF*FPACPYYMF*SIFFLSPWP*PNFLPSI*FSPAQY*RSRTHLSPSSLR*SKMAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ*,ATGTACACAATAGAGGGTGGCTACTGTATTATATAATGATCTAAGTTCTTCTGATCCTGTCTGAAGGGATGGTTGTAGCTGTTCCAATATCTGTTTACAGCCTCCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGTTCCCTGCTTGCCCATACTATATGTTTTAATCTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATCTATCTAATTCTCCCCCGCTCAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAATGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCACCCCACAAGATTTGAACACTATGCTAAACACAGTGGGTGGACACCAAGCAGCTATGCAAATGTTAAAAGAGGTCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGACTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAGCAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTTTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGCAACAGGTACAGCCAACATACTGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAGGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA -MN090335,pol,1427,4469,2084,5095,forward,0.27887169154684477,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAGGACATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCGGACACAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTCTAAATTTTCCCATCAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGACGGCCCAAAGGTTAAACAATGGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGAATTCTGGGAAATTCAATTAGGTATACCACATCCTGCAGGGCTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTGGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAACATAGAACAAAAGTAGAGGAACTGAGGCAACATCTGATGAGGTGGGGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGAAAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAAGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGATCCGGTACATGGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAGGGAGAAGGTCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCATACTAATGATGTAAAGCAATTAACAGAGGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATGGGGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGATTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAATAGGGATAATAAATCAGGAAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAGGATTCGGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGGCATGGGTGCCAGCCCACAAAGGAATTGGAGGAAATGAACAGGTAGATAAACTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGAAAAAGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAACAATGGCTAGTGATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCCAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG -MN090335,vif,4413,4992,5040,5618,forward,0.3566796368352788,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAAAAATGGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGGGATGATAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAGAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAACAGCAGTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG -MN090335,vpr,4931,5225,5558,5849,forward,0.6187165775401071,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS*T,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAGACT -MN090335,tat_exon1,5202,5418,5830,6044,forward,0.42503863987635226,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ,ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAG -MN090335,rev_exon1,5341,5419,5969,6044,forward,0.5275498241500586,MAGRSGDRDEDLLKTVRLIKFLYQSS,MAGRSGDRDEDLLKTVRLIKFLYQSS,ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGT -MN090335,vpu,5433,5682,6061,6309,forward,0.5368311327310633,MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL,MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL*,ATGCAATCTTTAGAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTAGGCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAAAATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG -MN090335,env,5596,8158,6224,8794,forward,0.5139610675592354,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ*,ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA -MN090335,tat_exon2,7739,7835,8376,8468,forward,0.6842105263157894,RPSSQPRGDQTGPKE,RPSSQPRGDQTGPKE*KKKVERETEADPED**,AGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGA -MN090335,rev_exon2,7740,8016,8377,8652,forward,0.4267425320056898,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE*,GACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAG -MN090335,nef,8159,8813,8796,9416,forward,0.49485619884358334,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAAGGTTGTATGGCTGGATGGCCTACTGTAAGGGAAAGAATGGAAAGAATTGATCCAAGGCCTGCTGCAAGGAGGGAACAAGCTGAGCCAGCAGCAGCTGGGGTAGGAGCAGCATCTCGAGACTTGGAAAAATATGGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTAGGCTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTGGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATGGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA +KX505501.1,env,0,1824,6224,8795,forward,0.7626080297560442,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,vif,0,1824,5040,5619,forward,0.7647696476964769,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,nef,0,1824,8796,9417,forward,0.7645782478980201,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,GLSG*TRSEPGSSLAN*GTHCLSLNKACLECFK*CVPVCCVTLVTRDPSDPFSQCGKSLAVAPEQGPESERETRGALSTQDSAC*ARAARGEGRRLVSTPKIF*LAEARRREMGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,tat_exon1,1,1750,5830,6046,forward,0.7680130480667754,MRKLQNGIDCIQCMQGLLHQAR,VSLVRPDLSLGALWLTREPTA*ASIKLALSASSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKGKPEELSRRRTRLAERAQQEARGGDW*VRRKFFD*RRLEGERWVRERQY*AGENWIDGKKFG*GQEEIKDIN*NI*YGQAES*NDSQLILAC*KHQKAVDKYWDSYNRLLRQDQKNLSLCIIQ*QSSIVYIKK*M*KTPRKP*TR*KKSKTKLGNKHSKPQLQEVAVRSATITL*CRIIRGKWYIRPCHQEL*MHG*K**RKRLSAQK*YPCFQHYQKEPPHKT*TPC*IQWGDIKQPCKC*KRPLMRKLQNGIDCIQCMQGLLHQAR*ENQGEVT*QELLVPFRNK*HG*QIIHLSQ*ERFIKDG*S*G*IK**ECIALSAFWT*DKDQRNLLETM*TGSIKP*EPNKPHRK*KIG*QKPCWSRMRTQIVKLF*KH*DQQPH*KK**QHAREWEDPAIKQEFWLKQ*AK*QIQLQ**CRKAILGTKEKLLSASIVAKKGT*PEIAGPLEKRAVGNVEGKDIK*KTVLRDRLIL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLD,GTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAGCGCGCGCAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAATTTTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGAT +KX505501.1,gag,336,1824,789,2292,forward,0.3997973809613161,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTGGATAGATGGGAAAAAATTCGGCTAAGGCCAGGAGGAAATAAAAGATATAAACTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGCTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCGACTCTTAAGACAGGATCAGAAGAACTTAAGTCTTTGTATAATACAATAGCAGTCCTCTATTGTGTACATCAAAAAATAGATGTGAAAGACACCAAGGAAGCCTTAGACAAGATAGAAGAAGAGCAAAACAAAATTAGGAAACAAGCACAGCAAACCGCAGCTGCAGGAAGTAGCAGTCAGGTCAGCCACAATTACCCTGTAGTGCAGAATCATCAGGGGCAAATGGTATATCAGGCCCTGTCACCAAGAACTTTAAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGACTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATTAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGATGACAAATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATCCTAGGGTTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAACCCTAAGAGCCGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,rev_exon1,1306,1750,5969,6047,forward,0.758082497212932,MRTQIVKLF,MRTQIVKLF*KH*DQQPH*KK**QHAREWEDPAIKQEFWLKQ*AK*QIQLQ**CRKAILGTKEKLLSASIVAKKGT*PEIAGPLEKRAVGNVEGKDIK*KTVLRDRLIL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLD,ATGCGAACCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCCACACTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAATGACAAATTCAGCTACAGTAATGATGCAGAAAGGCAATTTTAGGAACCAAAGAAAAACTGTTAAGTGCTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACATCAAATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGAT +KX505501.1,vpr,1599,1824,5558,5843,forward,0.7638478800047243,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ,MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*,ATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,pol,1627,1927,2084,5096,forward,0.7724330674761569,GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,IL*GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,ATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA +KX505501.1,tat_exon2,1746,1824,8376,8469,forward,0.7616257781032589,RCIRSTTRTADTELFTRDFPLGTFQ,RCIRSTTRTADTELFTRDFPLGTFQ*,AGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG +KX505501.1,rev_exon2,1747,1927,8377,8653,forward,0.7620186257236345,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI,DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*,GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA +KX505501.1,vpu,1748,1778,6259,6310,forward,0.7688723205964585,MHPEYYKDC,MHPEYYKDC*,ATGCATCCGGAGTACTACAAGGACTGCTGA +MN691959,gag,639,2142,789,2292,forward,0.0801186943620179,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ,MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAGATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MN691959,pol,1934,4946,2084,5096,forward,0.054722889368558514,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAGTAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACAAAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MN691959,vif,4890,5469,5040,5619,forward,0.09157509157509158,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGAATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAG +MN691959,vpr,5408,5699,5558,5843,forward,0.5391891891891883,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS,MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAG +MN691959,tat_exon1,5679,5898,5830,6046,forward,0.28735632183908044,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ,MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ*,ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA +MN691959,rev_exon1,5818,5899,5969,6047,forward,0.4807692307692307,MAGRSGDSDEDLLKTVRLIKFLYQSSK,MAGRSGDSDEDLLKTVRLIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG +MN691959,env,6070,8656,6224,8795,forward,0.13638128518734216,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL,MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL*,ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA +MN691959,vpu,6105,6156,6259,6310,forward,0.2321981424148607,MGVEMGHHAPWDIDDL,MGVEMGHHAPWDIDDL*,ATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG +MN691959,tat_exon2,8237,8333,8376,8469,forward,0.6672629695885509,RPTSQTRGDPTGPKE,RPTSQTRGDPTGPKE*KKKVERETETDPFD**,AGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGA +MN691959,rev_exon2,8238,8514,8377,8653,forward,0.210025203024363,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE,DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE*,GACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAG +MN691959,nef,8657,9278,8796,9417,forward,0.08588605782994552,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC,MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC*,ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGA +MN692074,nef,0,4059,8796,9417,forward,0.763072203234748,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,WKG*FGPRKDKISLICGSTTHKATSLIGRTTHQGRGPDFH*PLDGASS*YQLIQRR*KRPMQERTTACYTL*ACMGWRTRRKKC*CGSLTAA*HFITWPESCIRSTTRIADFELSTRDFPLGTFQGGVAWAGLGSGEPSDAAYKQLLFACTGSLWLDQI*AWELSG*LGNPLLKPQ*SLP*VL*VVCARLLCDSGN*RSLRPF*SVWKISSSGARTGT*KRKRNQRSSLDAGLGLLKRARQEARGGDW*VRQKF*LAEARRREMGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ*G*GGN*RKLY*IQEQMIQY*KK*ICQEDGNQK**GELEVLSK*DSMIRYS*KSVDIKL*VQY**DLHLST*LEEIC*LRLVAL*IFPLVLLKLYQ*N*SQEWMAQKLNNGH*QKKK*KH**KFVQKWKRKGKFQKLGLKIHTILQYLP*RKKTVLNGEN**ISENLIRELKTSGKFN*EYHIPQG*ERKNQ*QYWMWGMHIFQFP*IKNLGNILHLPYPVETMRHQGLDISTMCFHRDGKDHQQYSKVA*QKF*SLLENKIQK*LSINTWMIYM*DLI*K*GSIE*K*RN*DNIC*DGDLPHQTKNIRKNPHSFGWVMNSILINGQYSL*CCQKKTAGLSMTYRS*WEN*IGQVRFTQGLK*GNYVNSLGEPKH*QK*YH*QKKQSWNWQKTGKF*KNQYMEYIMTHQKT**QKYRSRGKVNGHIKFIKSHLKI*KQENMQK*GVPTLMM*NN*QRQCKK*PQKA**YGERLLNLNYPYKKKHGKHGGQSIGKPPGFLSGSLSIPLP**NYGTS*RKNP**EQKPSM*MGQLTGRLN*EKQDMLLTEEDKKLSP*LTQQIRRLNYKQFI*LCRIRD*K*T**QTHNMH*,TGGAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAG +MN692074,env,2,4115,6224,8795,forward,0.7604257801108195,MNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA +MN692074,vpu,2,2084,6259,6310,forward,0.7659115426105717,MGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ,EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*,GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAA +MN692074,gag,789,2292,789,2292,forward,0.19470123431286457,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ,MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MN692074,pol,2084,4115,2084,5096,forward,0.5617851221088768,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,TTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA +MN692074,vif,3617,4115,5040,5619,forward,0.7631664499349805,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA +MN692074,vpr,3617,4115,5558,5843,forward,0.7635778016363703,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI,MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*,ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA +MN692074,tat_exon1,3823,4084,5830,6046,forward,0.7659115426105717,MVPVRERTHSRSRNLLCRWGS,MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY,ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC +MN692074,rev_exon1,3823,4084,5969,6047,forward,0.7610789980732178,MVPVRERTHSRSRNLLCRWGS,MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY,ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC +MN692074,tat_exon2,4080,4164,8376,8469,forward,0.7699443413729128,IPSGEPSDAAYKQLLFACTGSLWLDQI,IPSGEPSDAAYKQLLFACTGSLWLDQI*,ATACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGA +MN692074,rev_exon2,4081,4153,8377,8653,forward,0.7667894365645325,YPVASPQMLHISSCFLPVLGLSG,YPVASPQMLHISSCFLPVLGLSG*,TACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAG +MN692145,gag,775,2281,789,2292,forward,0.20784453738651432,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ,MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ*,ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATCAATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATATAGTATGGGCAAGCAAGGAACTAGAACGATTTGCAGTTAATCCTGGCCTGTTAGAAACAACAGAAGGATGTAGACAAATACTGGGACAGCTACAACCATCTCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCATTAGATAAGATAGAGGAAGAGCAAAACAAGAGTAAGGAAAAAGCAAAACAAGCAGCAGCTGACACAGGAAACAGCAACCAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTTCAGGGCCAAATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACCACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGATATAAGACAAGGACCAAAAGAATCCTTTAGAGATTATGTAGACCGGTTCTACAAAACTCTAAGAGCTGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCAAACCCAGATTGTAAGACTATCTTAAAAGCATTGGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTCTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAGCCAAAGGAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAGGGGCATATAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAAAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA +MN692145,pol,2070,5085,2084,5096,forward,0.14843087362171337,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED,FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCCCACAAGGGGAGGCCAGGAAATTTCCTTCAGAGCAGGCCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGTCGACAGGCAAGGAACTGTATCCTCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATTAAAGTAAGACAGTATGATCAGATATCCATAGAAATCTGTGGACATAAAGCTATAGGGACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAAATTGGTTGTACTTTAAATTTCCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAAGATTTCTGGGAAGTTCAATTAGGGATACCACATCCAGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAGGATTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAATGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAAAGCTGGACTGTCAATGACATACAAAAGTTAGTGGGTAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATCAAAGTAAAACAATTATGTAAACTTCTTAGGGGAGCCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTTTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAGCCACTGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAGGTTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCATCTAATAGAGAGACTAAATTAGGAAAAGCAGGATATGTTACTGACAGAGGAAGACAGAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTCATCTAGCTTTGCAGGATTCAGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAACAGTTAATAAACAAGGAAAAGGTCTACTTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTTAGTACTGGAATTAGAAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAAATACCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTGGTAGCAAGAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGACAGGAAACAGCATACTTTCTCTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGTAATTTCACTAGTACTACAGTTAAGGCCGCCTGTTGGTGGGCGGGGATCAAGCAGGAGTTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGACCAGGCTGAACATCTTAAAACAGCAGTACAAATGGCAGTATTCATTCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTGATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGTAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MN692145,vif,5029,5608,5040,5619,forward,0.2608047690014903,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH,MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAAGGGATGGTTTTATAGACATCACTATGAAAGCCATCATCCAAGAATAAGTTCAGAAGTACATGTCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGATTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAGGAGATATAGCACACAAGTAGACCCCAACCTAGCAGACCAACTGATTCATCTGTATTACTTTGATTGTTTTTCAGAATCTGCTATAAGAAGTGCCATATTAGGACATATAGTTAGTCCTAGTTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAGTACTTGGCATTAGCAGCATTAATAACACCAAAAAGGAGAAAGCCCCCTTTGCCTAGTGTTGCAAAGCTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAG +MN692145,vpr,5547,5838,5558,5843,forward,0.587876570313453,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS,MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG +MN692145,tat_exon1,5818,6037,5830,6046,forward,0.28735632183908044,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ,MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ*,ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA +MN692145,rev_exon1,5957,6038,5969,6047,forward,0.4274965800273598,MAGRSGDSDEELLKTVRLIKFLYQSSK,MAGRSGDSDEELLKTVRLIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG +MN692145,env,6212,8783,6224,8795,forward,0.47520309038232134,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL,MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL*,ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA +MN692145,vpu,6253,6298,6259,6310,forward,0.3649167733674775,MEMGHHAPWDVDDL,MEMGHHAPWDVDDL*,ATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG +MN692145,tat_exon2,8364,8460,8376,8469,forward,0.3921568627450981,RPASQPRGDPTGPKESKKKVERETETDPLH,RPASQPRGDPTGPKESKKKVERETETDPLH**,AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGA +MN692145,rev_exon2,8365,8641,8377,8653,forward,0.29843322556577967,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE,DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE*,GACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAG +MN692145,nef,8784,9387,8796,9417,forward,0.4049958673891082,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC,MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC*,ATGGGGGGCAAGTGGTCAAAAAGTAGTTTGGTTGGATGGCCTAATGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCGAGACTTGGAAAGACATGGAGCAATCACAAGTAGTAATACAGCAACTAACAATGCTGATTGTGCCTGGCTAGAAGCACAAAAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGAGCCCACTGACAGAGAGAATGACAGATTGCTGCACCCTGCAAGCCTGCAGGGGATGGAAGACCCGGAGGGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTGGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAACTGCTGA +MN090335,gag,315,1665,789,2292,forward,0.596665989022159,MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ,MYTIEGGYCII**SKFF*SCLKGWL*LFQYLFTAS*CF*QARINCESF*FPACPYYMF*SIFFLSPWP*PNFLPSI*FSPAQY*RSRTHLSPSSLR*SKMAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ*,ATGTACACAATAGAGGGTGGCTACTGTATTATATAATGATCTAAGTTCTTCTGATCCTGTCTGAAGGGATGGTTGTAGCTGTTCCAATATCTGTTTACAGCCTCCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGTTCCCTGCTTGCCCATACTATATGTTTTAATCTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTCTCCCATCTATCTAATTCTCCCCCGCTCAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAAATGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCACCCCACAAGATTTGAACACTATGCTAAACACAGTGGGTGGACACCAAGCAGCTATGCAAATGTTAAAAGAGGTCATCAATGAGGAAGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACACATAATCCACCTATCCCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGACTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTTTGGACATAAAGCAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTTTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAGAATGCAAATCCAGATTGTAAAACTATTTTAAAAGCATTAGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGCAACAGGTACAGCCAACATACTGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGCCTGTTAAATGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAATGAAGGATTGCACTGAGAGACAGGCTAATTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAA +MN090335,pol,1427,4469,2084,5096,forward,0.27887169154684477,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED*,TTTTTTAGGGAAAATCTGGCCTTCTCACAAGGGAAGGCCAGGGAACTTTCCTCAGAGCCGACCAGAAATTCCTCAGAGCAGACCAGAGCCAACAGCCCCACCAGCCCCTCCAGAAGAGAGCTTCAGGTTTGGGGAAGCAACAACTCCCTCTCAGAAGCAGGAGACAATGGACAAGGAGCTGTATCCTCCTCTAACTTCCCTCAGATCACTCTTTGGCAACGACCCATCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTTTATTAGATACAGGAGCAGATGATACAGTATTAGAGGACATAGATTTGCCAGGAAAATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGAACAGGTACCCATAGAAATTAGCGGACACAAAGCTATAGGTACAGTGTTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGCTTGGTTGCACTCTAAATTTTCCCATCAGCCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGACGGCCCAAAGGTTAAACAATGGCCATTAACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTGTTTGCCATAAAGAAAAAAGATGGCACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGAATTCTGGGAAATTCAATTAGGTATACCACATCCTGCAGGGCTAAAAAAGAAAAAATCAGTCACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTATAATGTGCTTCCACAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAGATCCTGGAGCCTTTTAGAAAGACAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAACATAGAACAAAAGTAGAGGAACTGAGGCAACATCTGATGAGGTGGGGATTTACCACACCAGACAAAAAGCACCAGAAAGAACCCCCATTCCTTTGGATGGGGTATGAACTCCATCCTAATAAATGGACAGTACAGCCTATAGTACTGCCAGAAAAGGAAAGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTGAAGCAATTATGTAAGCTCCTTAGGGGAACCAAAGCATTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGATCCGGTACATGGAGCATATTATGACCCAGCAAAGGACTTAATAGCAGAACTACAGAAGCAGGGAGAAGGTCAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCCATACTAATGATGTAAAGCAATTAACAGAGGCAGTGCAAAAAATATCCACAGAAAGTATAGTAATATGGGGAAGGACTCCTAAATTTAGATTACCCATACAAAAAGAAACATGGGAAACATGGTGGACAGATTATTGGCAAGCCACCTGGATCCCTGAGTGGGAGTTTGTCAACACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAAAAAGAACCCATAATAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAATAGGGATAATAAATCAGGAAAAGCAGGATATGTTACTGATAGAGGAAGACAAAAGGTTGTCACCATAAATGACACAACAAATCAGAAAACTGAATTACAAGCCATTCATCTAGCCTTGCAGGATTCGGGGCTAGAAGTAAACATAGTAACAGACTCACAGTATGCATTAGGGATCATTCAAGCACAACCAGATAAAAGTGAATCAGAGTTAGTCAGTCAAATAATAGAACAGTTAATAAAAAAGGAAAAGGTCTACCTGGCATGGGTGCCAGCCCACAAAGGAATTGGAGGAAATGAACAGGTAGATAAACTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGAAAAAGCCCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAACAATGGCTAGTGATTTTAACCTACCACCTATAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCTATGCATGGGCAAGTAGACTGTAGTCCAGGTATATGGCAGCTAGATTGTACACATTTAGAAGAAAAAATTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTGATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTTATTTTAAAACTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCCGCAATTTCACCAGTAATACAGTGAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGTATTCCCTACAATCCCCAGAGTCAAGGAGTAGTAGAATCTATGAATAATGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGATACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGAAATACAGACTAAAGAATTACAAAAACAAATTACAAAAATTCCAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAACAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAAATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG +MN090335,vif,4413,4992,5040,5619,forward,0.3566796368352788,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH,MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH*,ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAATACCATATGTATATTTCAAAGAAAGCCAAAAAATGGGCTTATAGACACCATTACGAAACCACTCATCCAAGAATAAGTTCAGAAGTACATATCCCACTAGGGGATGATAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAGAGAGACTGGCATTTGGGCCAGGGAGTCTCCATAGAATGGAGAAAAAGGAGATATAGCACACAAGTAGACCCTGACTTAGCAGACCATCTAATTCATGTGCATTATTTTGATTGTTTTTCAGAATCTGCTATAAGAAATGCCATATTAGGACATAGAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAACAGCAGTAATAACACCAAAGAAGATAAAGCCACCTTTGCCTAGTGTAAAGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAG +MN090335,vpr,4931,5222,5558,5843,forward,0.6083541998634192,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS,MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS*,ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAG +MN090335,tat_exon1,5202,5421,5830,6046,forward,0.40192926045016075,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ,MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ*,ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAA +MN090335,rev_exon1,5341,5422,5969,6047,forward,0.524971623155505,MAGRSGDRDEDLLKTVRLIKFLYQSSK,MAGRSGDRDEDLLKTVRLIKFLYQSSK,ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAG +MN090335,env,5596,8158,6224,8795,forward,0.5139610675592354,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ,MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ*,ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA +MN090335,vpu,5643,5682,6259,6310,forward,0.5690703735881842,MGHDAPWDVDDL,MGHDAPWDVDDL*,ATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG +MN090335,tat_exon2,7739,7835,8376,8469,forward,0.6842105263157894,RPSSQPRGDQTGPKE,RPSSQPRGDQTGPKE*KKKVERETEADPED**,AGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGA +MN090335,rev_exon2,7740,8016,8377,8653,forward,0.4267425320056898,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE,DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE*,GACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAG +MN090335,nef,8159,8813,8796,9417,forward,0.49485619884358334,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC,MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC*,ATGGGTGGCAAGTGGTCAAAAGGTTGTATGGCTGGATGGCCTACTGTAAGGGAAAGAATGGAAAGAATTGATCCAAGGCCTGCTGCAAGGAGGGAACAAGCTGAGCCAGCAGCAGCTGGGGTAGGAGCAGCATCTCGAGACTTGGAAAAATATGGAGCAATCACAAGTAGCAATACATCAACTACTAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAAGTAGGCTTTCCAGTCAGACCTCAGGTACCTTTAAGACCAATGACTTACAAAGCAGCTTTGGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTTGGTCCCAGAGAAGACAAGAAATCCTTGATTTGTGGGTTTACCACACACAAGGATACTTCCCTGATTGGCAAAACTACACACCAGGACCAGGGGTCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGACCAAGTAGAGAAGGCCAATGAAGGAGAGAACAACTGCTTGCTACACCCTATGAGCCTGCATGGGATGGAAGACTCAGAGGGAGAAGTGTTAATGTGGAAGTTTGACAGCAGCCTGGCGTTTCATCATAGAGCCCGAGAACAACATCCGGAGTACTACAAAGACTGCTGA diff --git a/tests/expected-results-small/errors.json b/tests/expected-results-small/errors.json index 741e4e6..d1facf1 100644 --- a/tests/expected-results-small/errors.json +++ b/tests/expected-results-small/errors.json @@ -3,52 +3,47 @@ { "sequence_name": "KX505501.1", "error": "DeletionInOrf", - "message": "ORF pol at 2084-5095 can have maximum deletions 30, got 2721" + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 2721" }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6323" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6323" }, { "sequence_name": "KX505501.1", "error": "InsertionInOrf", - "message": "Smaller ORF vif at 5040-5618 can have maximum insertions 90, got 909" + "message": "Smaller ORF vif at 5040-5619 can have maximum insertions 90, got 909" }, { "sequence_name": "KX505501.1", - "error": "DeletionInOrf", - "message": "Smaller ORF vpr at 5558-5849 can have maximum deletions 30, got 84" + "error": "FrameshiftInOrf", + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 195 positions." }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5893" + "message": "Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5893" }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon1 at 5969-6044 contains an internal stop codon at 6005" - }, - { - "sequence_name": "KX505501.1", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 129" + "message": "Smaller ORF rev_exon1 at 5969-6047 contains an internal stop codon at 6005" }, { "sequence_name": "KX505501.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon2 at 8376-8468 contains out of frame indels that impact 71 positions." + "message": "Smaller ORF tat_exon2 at 8376-8469 contains out of frame indels that impact 40 positions." }, { "sequence_name": "KX505501.1", "error": "DeletionInOrf", - "message": "Smaller ORF rev_exon2 at 8377-8652 can have maximum deletions 30, got 96" + "message": "Smaller ORF rev_exon2 at 8377-8653 can have maximum deletions 30, got 96" }, { "sequence_name": "KX505501.1", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 1116" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 1116" }, { "sequence_name": "KX505501.1", @@ -70,64 +65,59 @@ { "sequence_name": "MN691959", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MN691959", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 129" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MN691959", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 249" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 249" } ], "MN692074": [ { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "ORF pol at 2084-5095 can have maximum deletions 30, got 981" + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 981" }, { "sequence_name": "MN692074", "error": "InternalStopInOrf", - "message": "ORF env at 6224-8794 contains an internal stop codon at 6551" + "message": "ORF env at 6224-8795 contains an internal stop codon at 6551" }, { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF vif at 5040-5618 can have maximum deletions 30, got 81" + "message": "Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 81" }, { "sequence_name": "MN692074", "error": "InsertionInOrf", - "message": "Smaller ORF vpr at 5558-5849 can have maximum insertions 90, got 234" + "message": "Smaller ORF vpr at 5558-5843 can have maximum insertions 90, got 261" }, { "sequence_name": "MN692074", "error": "InternalStopInOrf", - "message": "Smaller ORF tat_exon1 at 5830-6044 contains an internal stop codon at 5893" + "message": "Smaller ORF tat_exon1 at 5830-6046 contains an internal stop codon at 5893" }, { "sequence_name": "MN692074", - "error": "FrameshiftInOrf", - "message": "Smaller ORF vpu at 6061-6309 contains out of frame indels that impact 168 positions." + "error": "InsertionInOrf", + "message": "Smaller ORF vpu at 6259-6310 can have maximum insertions 90, got 108" }, { "sequence_name": "MN692074", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon2 at 8376-8468 contains out of frame indels that impact 76 positions." + "message": "Smaller ORF tat_exon2 at 8376-8469 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF rev_exon2 at 8377-8652 can have maximum deletions 30, got 204" + "message": "Smaller ORF rev_exon2 at 8377-8653 can have maximum deletions 30, got 204" }, { "sequence_name": "MN692074", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 1131" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 1131" }, { "sequence_name": "MN692074", @@ -144,39 +134,29 @@ { "sequence_name": "MN692145", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MN692145", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 75 positions." }, { "sequence_name": "MN692145", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 231" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 231" } ], "MN090335": [ { "sequence_name": "MN090335", "error": "InternalStopInOrf", - "message": "ORF gag at 789-2291 contains an internal stop codon at 822" + "message": "ORF gag at 789-2292 contains an internal stop codon at 822" }, { "sequence_name": "MN090335", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpr at 5558-5849 contains out of frame indels that impact 78 positions." - }, - { - "sequence_name": "MN090335", - "error": "InsertionInOrf", - "message": "Smaller ORF vpu at 6061-6309 can have maximum insertions 90, got 198" + "message": "Smaller ORF vpr at 5558-5843 contains out of frame indels that impact 76 positions." }, { "sequence_name": "MN090335", "error": "InsertionInOrf", - "message": "Smaller ORF nef at 8796-9416 can have maximum insertions 90, got 282" + "message": "Smaller ORF nef at 8796-9417 can have maximum insertions 90, got 282" }, { "sequence_name": "MN090335", diff --git a/tests/expected-results-small/holistic.json b/tests/expected-results-small/holistic.json index 129ed68..f80f6bf 100644 --- a/tests/expected-results-small/holistic.json +++ b/tests/expected-results-small/holistic.json @@ -6,9 +6,9 @@ "blast_matched_qlen": 1997, "blast_sseq_coverage": 0.2498199403230785, "blast_qseq_coverage": 1.2158237356034052, - "blast_sseq_orfs_coverage": 0.17663960024984385, + "blast_sseq_orfs_coverage": 0.17661753684736448, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MN691959": { @@ -20,7 +20,7 @@ "blast_qseq_coverage": 1.1086063415148004, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN692074": { @@ -30,9 +30,9 @@ "blast_matched_qlen": 4178, "blast_sseq_coverage": 0.5041670953801831, "blast_qseq_coverage": 1.1728099569171853, - "blast_sseq_orfs_coverage": 0.4114928169893816, + "blast_sseq_orfs_coverage": 0.41144141893579816, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 4 }, "MN692145": { @@ -44,7 +44,7 @@ "blast_qseq_coverage": 1.1271545051088863, "blast_sseq_orfs_coverage": 1.0, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 }, "MN090335": { @@ -54,9 +54,9 @@ "blast_matched_qlen": 9069, "blast_sseq_coverage": 0.9842576396748637, "blast_qseq_coverage": 1.0603153600176425, - "blast_sseq_orfs_coverage": 1.0003747657713928, + "blast_sseq_orfs_coverage": 1.0003747189607795, "orfs_start": 789, - "orfs_end": 8794, + "orfs_end": 8795, "blast_n_conseqs": 3 } } \ No newline at end of file diff --git a/tests/expected-results-small/orfs.json b/tests/expected-results-small/orfs.json index 07186e6..acad930 100644 --- a/tests/expected-results-small/orfs.json +++ b/tests/expected-results-small/orfs.json @@ -5,7 +5,7 @@ "start": 0, "end": 1824, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.7626080297560442, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -17,7 +17,7 @@ "start": 0, "end": 1824, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7647696476964769, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -29,7 +29,7 @@ "start": 0, "end": 1824, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.7645782478980201, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -41,7 +41,7 @@ "start": 1, "end": 1750, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", "distance": 0.7680130480667754, "protein": "MRKLQNGIDCIQCMQGLLHQAR", @@ -53,7 +53,7 @@ "start": 336, "end": 1824, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.3997973809613161, "protein": "MGARASVLSGGELDRWEKIRLRPGGNKRYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPTLKTGSEELKSLYNTIAVLYCVHQKIDVKDTKEALDKIEEEQNKIRKQAQQTAAAGSSSQVSHNYPVVQNHQGQMVYQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMSQMTNSATVMMQKGNFRNQRKTVKCFNCGKEGHIARNCRAPRKKGCWKCGREGHQMKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", @@ -65,7 +65,7 @@ "start": 1306, "end": 1750, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", "distance": 0.758082497212932, "protein": "MRTQIVKLF", @@ -75,21 +75,21 @@ { "name": "vpr", "start": 1599, - "end": 1749, + "end": 1824, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.7677189534455227, - "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTR", - "aminoacids": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTR", - "nucleotides": "ATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGA" + "distance": 0.7638478800047243, + "protein": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ", + "aminoacids": "MKDCTERQANFVGKIWPSHKGRPGNFLQNRPEPTAPPAESFRFGEETPTRCIRSTTRTADTELFTRDFPLGTFQ*", + "nucleotides": "ATGAAAGACTGTACTGAGAGACAGGCTAATTTTGTAGGGAAAATCTGGCCTTCCCACAAGGGAAGGCCGGGGAATTTCCTTCAGAACAGACCAGAGCCAACAGCCCCACCAGCAGAGAGCTTCAGGTTTGGGGAAGAGACACCAACTAGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG" }, { "name": "pol", "start": 1627, "end": 1927, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.7724330674761569, "protein": "GKSGLPTREGRGISFRTDQSQQPHQQRASGLGKRHQLDASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI", @@ -101,36 +101,36 @@ "start": 1746, "end": 1824, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.7616257781032589, "protein": "RCIRSTTRTADTELFTRDFPLGTFQ", "aminoacids": "RCIRSTTRTADTELFTRDFPLGTFQ*", "nucleotides": "AGATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAG" }, - { - "name": "vpu", - "start": 1747, - "end": 1927, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.769715460635405, - "protein": "DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI", - "aminoacids": "DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*", - "nucleotides": "GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA" - }, { "name": "rev_exon2", "start": 1747, "end": 1927, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.7620186257236345, "protein": "DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI", "aminoacids": "DASGVLQGLLTLSFSQGTFRWGLSSRGVAWAGLGSGEPSDAAYKQLLSACTRSLWLDQI*", "nucleotides": "GATGCATCCGGAGTACTACAAGGACTGCTGACACTGAGCTTTTCACAAGGGACTTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCAGATCTGA" + }, + { + "name": "vpu", + "start": 1748, + "end": 1778, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.7688723205964585, + "protein": "MHPEYYKDC", + "aminoacids": "MHPEYYKDC*", + "nucleotides": "ATGCATCCGGAGTACTACAAGGACTGCTGA" } ], "MN691959": [ @@ -139,7 +139,7 @@ "start": 639, "end": 2142, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.0801186943620179, "protein": "MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALEKIEEEQNKSKKKAQQAAADTGHSNQISQNYPIVQNIQGQVVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNXXXPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKECTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPSQKQEPIDKELYPLTSLRSLFGNDPSSQ", @@ -151,7 +151,7 @@ "start": 1934, "end": 4946, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.054722889368558514, "protein": "FFREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFNFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTRALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKITTESIVIWGKTPRFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNKGRQKVVPLTNTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKILFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSATVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -163,7 +163,7 @@ "start": 4890, "end": 5469, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.09157509157509158, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKARGWFYRHHYERTHPRISSEVYIPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPELADQLIHLYYFDCFSDSAIRKALLGHIVSPRCEYQAGHNKVGSLQYLALAALITPKKIKPPLPSVTKLTEDRWNKPQKTKGHRGSHTMNGH", @@ -173,69 +173,69 @@ { "name": "vpr", "start": 5408, - "end": 5702, + "end": 5699, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.5562531391260666, + "distance": 0.5391891891891883, "protein": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS", - "aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS*T", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACT" + "aminoacids": "MEQAPEDQGPQREPHNEWTLELLEELKNEAVRHFPRIWLHGLGQHIYETYGDTWAGVEAIIRILQQLLFIHFRIGCRHSRIGVTQQKRARNGASRS*", + "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAG" }, { "name": "tat_exon1", "start": 5679, - "end": 5895, + "end": 5898, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ", - "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ", - "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG" + "aminoacids": "MEPVDPRLEPWKHPGSQPKTACTTCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRPPQDSQTHQVSLSKQ*", + "nucleotides": "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5818, - "end": 5896, + "end": 5899, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.48200514138817474, - "protein": "MAGRSGDSDEDLLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEDLLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5910, - "end": 6156, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.53246275519588, - "protein": "SIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEVSALVEMGVEMGHHAPWDIDDL", - "aminoacids": "MQPIQIAIVALVVAIIIAIVV*SIVIIEYRKILRQRKIDRLIDRLIERAEDSGNESEGEVSALVEMGVEMGHHAPWDIDDL*", - "nucleotides": "ATGCAACCTATACAAATAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG" + "distance": 0.4807692307692307, + "protein": "MAGRSGDSDEDLLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEDLLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6070, "end": 8656, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.13638128518734216, "protein": "MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL", "aminoacids": "MRVKEKCQHLWRWGWRWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDAEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRNKVQKEYAFFYKLDIVPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPNPGGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGAYRAIRHIPRRIRQGLERILL*", "nucleotides": "ATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGCTACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATACGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAA" }, + { + "name": "vpu", + "start": 6105, + "end": 6156, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.2321981424148607, + "protein": "MGVEMGHHAPWDIDDL", + "aminoacids": "MGVEMGHHAPWDIDDL*", + "nucleotides": "ATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATATTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8237, "end": 8333, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.6672629695885509, "protein": "RPTSQTRGDPTGPKE", @@ -247,7 +247,7 @@ "start": 8238, "end": 8514, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.210025203024363, "protein": "DPPPKPGGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE", @@ -259,7 +259,7 @@ "start": 8657, "end": 9278, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.08588605782994552, "protein": "MGGKWSKSSVIGWPTVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAATNAACAWLEAQEEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQRRQDILDLWIYHTQGYFPDWQNYTPGPGIRYPLTFGWCYKLVPVEPDKVEEANKGENTSLLHPVSLHGMDDPEREVLEWRFDSRLAFHHVARELHPEYFKNC", @@ -273,7 +273,7 @@ "start": 0, "end": 4059, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.763072203234748, "protein": "MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ", @@ -285,19 +285,31 @@ "start": 2, "end": 4115, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.7604257801108195, "protein": "MNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", "aminoacids": "EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*", "nucleotides": "GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCCACAAGGGAAGGCCAGGGAATTTCCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCGCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAGCTTCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAGGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAGAAAGAAAAAATCAGTAACAGTACTGGATGTGGGGGATGCATATTTTTCAGTTCCCTTAGATAAAGAATTTAGGAAATATACTGCATTTACCATACCCAGTAGAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATTTTAGAGCCTTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACATGGATGATTTATATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATAAAAATAGAGGAACTGAGACAACATCTGTTGAGATGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCCCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGCACTAACAGAAATAGTACCACTAACAAAAGAAGCAGAGCTGGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGTCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAAAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA" }, + { + "name": "vpu", + "start": 2, + "end": 2084, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.7659115426105717, + "protein": "MGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ", + "aminoacids": "EGLIWSQKRQDILDLWIYHTQGYFPDWQNYTPGPGTRFPLTFGWCFKLVPVDPEKVEEANAGENNSLLHPISLHGMEDPEKEVLMWKFDSRLAFHHMARELHPEYYKNC*LRAIYKGLSAGDFPGRRGLGGTGEWRALRCCI*AAAFCLYWVSLVRPDLSLGALWLAREPTA*ASIKLALSALSSVCPSVV*LW*LEIPQTLLVSVENL*QWRPNRDLKAKEKPEELSRRRTRLAEARTARGEGRRLVSTPKILTSGG*KERDGCESVSIKCGRIRQMGKNSVKARGKEKI*IKTYSMGKQGARTIRS*SWPVRNIRRL*TNTGTATTIPSDRIRRT*III*YNSNPLLCASKDRGKRHQGSFREGRGRAKQK*EKGTASSS*HRKQQPGQPKLPYSAEHPGANGTSGHIT*NFKCMGKSSRREGFQPRSNTHVFSIIRRSHPTRFKHHAKHSGGTSSSHANVKRDHQ*GSCRMG*IAPSACRAYCTRPDERAKGK*HSRNY*YPSGTNRMDDK*STYPSRRNL*KMDNHGIK*NSKDV*SYQHSGHKTRTKGTL*RLCRSVL*NSKSRASFTGGKKLDDRNLVGPKCEPRL*DYFESIGTSRYIRRNDDSMSGSGRTRP*SKSFGGSNEPSNKFSYHNDAERQF*EPKKEC*VFQLWQRRAHSQKLQGP*EKGLLEMWKGRTPNERLY*ETG*", + "nucleotides": "GAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCACTGACCTTTGGATGGTGCTTCAAGCTAGTACCAGTTGATCCAGAGAAGGTAGAAGAGGCCAATGCAGGAGAGAACAACAGCTTGTTACACCCTATAAGCCTGCATGGGATGGAGGACCCGGAGAAAGAAGTGTTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAGAATTGCTGACTTCGAGCTATCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTTAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGAGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGCGGGAGAATTAGACAAATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAACCCTCTATTGTGTGCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGATTGCACCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAGCCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCATGGGATTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTGAAAGCATTGGGACCAGCCGCTACATTAGAAGAAATGATGACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCGGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAAAGGCAATTTTAGGAACCAAAAAAGGAATGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCCCCTAGGAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAA" + }, { "name": "gag", "start": 789, "end": 2292, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.19470123431286457, "protein": "MGARASVLSAGELDKWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIATLYCVHQKIEVKDTKEALEKVEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQKGNFRNQKRNVKCFNCGKEGHIARNCRAPRKRGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPAQKQEPIDKELYPLASLRSLFGNDPSSQ", @@ -309,7 +321,7 @@ "start": 2084, "end": 4115, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.5617851221088768, "protein": "FFREDLAFPQGKAREFPSEQTRANSPTRRELQVWGRDNNSRSEAGADRQGTVSFSFPQITLWQRPLVTIRIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLRKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEIVPLTKEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYAKMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", @@ -321,7 +333,7 @@ "start": 3617, "end": 4115, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.7631664499349805, "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", @@ -331,21 +343,21 @@ { "name": "vpr", "start": 3617, - "end": 4085, + "end": 4115, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.7632679688399402, - "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDT", - "aminoacids": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDT", - "nucleotides": "ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACC" + "distance": 0.7635778016363703, + "protein": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI", + "aminoacids": "MRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDTQWRALRCCI*", + "nucleotides": "ATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATACCCAGTGGCGAGCCCTCAGATGCTGCATATAA" }, { "name": "tat_exon1", "start": 3823, "end": 4084, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", "distance": 0.7659115426105717, "protein": "MVPVRERTHSRSRNLLCRWGS", @@ -357,31 +369,19 @@ "start": 3823, "end": 4084, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.7645569620253164, + "distance": 0.7610789980732178, "protein": "MVPVRERTHSRSRNLLCRWGS", "aminoacids": "MVPVRERTHSRSRNLLCRWGS*QGD*IRKSRICY*QRKTKSCLPN*HNKSED*ITSNSSSFAGFGIRSKHSNRLTICIRNHSSTTRY", "nucleotides": "ATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAACAGAGGAAGACAAAAAGTTGTCTCCCTAACTGACACAACAAATCAGAAGACTGAATTACAAGCAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATAC" }, - { - "name": "vpu", - "start": 4080, - "end": 4164, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.7708418891170431, - "protein": "IPSGEPSDAAYKQLLFACTGSLWLDQI", - "aminoacids": "IPSGEPSDAAYKQLLFACTGSLWLDQI*", - "nucleotides": "ATACCCAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGA" - }, { "name": "tat_exon2", "start": 4080, "end": 4164, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.7699443413729128, "protein": "IPSGEPSDAAYKQLLFACTGSLWLDQI", @@ -393,7 +393,7 @@ "start": 4081, "end": 4153, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.7667894365645325, "protein": "YPVASPQMLHISSCFLPVLGLSG", @@ -407,7 +407,7 @@ "start": 775, "end": 2281, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.20784453738651432, "protein": "MGARASVLSGGKLDQWEKIRLRPGGKKKYQLKHIVWASKELERFAVNPGLLETTEGCRQILGQLQPSLQTGSEELKSLYNTVATLYCVHQRIDVKDTKEALDKIEEEQNKSKEKAKQAAADTGNSNQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRSQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQESTGKELYPPLASLRSLFGNDPSSQ", @@ -419,7 +419,7 @@ "start": 2070, "end": 5085, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.14843087362171337, "protein": "FFRENLAFPQGEARKFPSEQARANSPTRRELQVWGRDNNSLSEAGVDRQGTVSSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQISIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARVRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFRLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGASNRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVNQIIEQLINKEKVYLAWVPAHKGIGGNEQVDKLVSTGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAREIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKVKIIRDYGKQMAGDDCVASRQDED", @@ -431,7 +431,7 @@ "start": 5029, "end": 5608, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.2608047690014903, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKHHMYVSGKAKGWFYRHHYESHHPRISSEVHVPLGDARLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPNLADQLIHLYYFDCFSESAIRSAILGHIVSPSCEYQAGHNKVGSLQYLALAALITPKRRKPPLPSVAKLTEDRWNKPQKTKGHRGSHIRNGH", @@ -443,9 +443,9 @@ "start": 5547, "end": 5838, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6002510555745751, + "distance": 0.587876570313453, "protein": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS", "aminoacids": "MEQAPEDQGPPREPYQEWTLELLEELKNEAVRHFPRMWLHGLGQHIYDTYGDTWTGVEAIIRILQQLLFIHFRIGCQHSRIGIIRQRRTRNGANRS*", "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACCGAGGGAGCCATATCAGGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATGTGGCTCCATGGCTTAGGGCAACATATCTATGACACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTCATTCATTTCAGGATTGGGTGTCAACATAGCAGAATAGGCATTATTCGCCAGAGGAGAACAAGAAATGGAGCCAATAGATCCTAG" @@ -453,69 +453,69 @@ { "name": "tat_exon1", "start": 5818, - "end": 6034, + "end": 6037, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.31992687385740404, + "distance": 0.28735632183908044, "protein": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ", - "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAG" + "aminoacids": "MEPIDPRLEPWKHPGSQPKTACTSCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRASQDSQTHQVSLSKQ*", + "nucleotides": "ATGGAGCCAATAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGCCAGCCTAAGACTGCTTGTACCTCTTGCTATTGTAAAAAGTGTTGCTTTCATTGTCAAGTCTGTTTCATGACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5957, - "end": 6035, + "end": 6038, "subtype_start": 5969, - "subtype_end": 6044, - "orientation": "forward", - "distance": 0.4267425320056898, - "protein": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGT" - }, - { - "name": "vpu", - "start": 6049, - "end": 6298, - "subtype_start": 6061, - "subtype_end": 6309, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5326633165829145, - "protein": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL", - "aminoacids": "MNSLQISAIVAIVVAIILAIVVWSIVFIEYRKILRQRKIDRLIDRIRERAEDSGNESEGDQEELSTALMEMGHHAPWDVDDL*", - "nucleotides": "ATGAACTCTTTACAAATATCAGCAATAGTAGCAATAGTAGTAGCAATAATACTAGCAATAGTTGTGTGGTCTATAGTATTCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" + "distance": 0.4274965800273598, + "protein": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDSDEELLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTTCTCAAGACAGTCAGACTCATCAAGTTTCTCTATCAAAGCAGTAAG" }, { "name": "env", "start": 6212, "end": 8783, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.47520309038232134, "protein": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL", "aminoacids": "MRVKGIRKNYQQHLWRWGTMLLGMLMICSAAGNLWVTVYYGVPVWKEATTTLFCASDAKAYGTEVHNVWATHACVPTDPNPQEVRLENVTENFNMWRNPMVEQMHEDIISLWDQSLKPCVKLTPLCVTLNCTDLGQESTNNNITGSIEKGEMKNCSFNISSSKGSKGHKEYASFYKLDVVPIGNNKTNGNYSSYRLISCNTSVITQACPKVSFEPIPIHFCAPAGYAILKCNDKQFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEITCTRPGNNTIRGVHMGPGKAFYAGAITGDIRKAHCIINKAKWNSTLRLVVAKLSEQFRNKTINFTQPSGGDPEIVTHSFNCGGEFFYCNTTKLFNSTWNATHMINGTWNDTEGNITLPCRIKQIINMWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGKTNESNTTEIFRPGGGDMRDNWRSELYKYKVVRIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGATSMALTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGVKQLQARVLALERYLKDQQLLGFWGCSGKLICTTDVPWNASWSNKSLGQIWENMTWMEWEREIDNYTSLIYTLITESQIQQEQNEKELLELDKWANLWNWFGITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTRFPAPRGPDRPEGIEEEGGERDRDRSAPLVKGFLALIWVDLRSLCLFSYHRLRDLLLIITRIVELLGRRGWEVLKYLWSLLQYWTQELKNSAVSLLNATAIAVAEGTDRIIEVLQRVFRAILHIPTRIRQGFERALL*", "nucleotides": "ATGAGAGTGAAGGGGATCAGGAAGAACTATCAACAGCACTTATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCAGGCAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAAGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGGTACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTGAGATTGGAAAATGTGACAGAAAATTTTAATATGTGGAGAAATCCAATGGTAGAACAGATGCATGAGGATATAATTAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAGTTGACCCCACTCTGTGTCACTTTAAATTGCACTGATTTGGGACAGGAGAGTACCAATAATAATATCACAGGAAGTATAGAGAAAGGAGAAATGAAAAACTGCTCTTTCAATATCTCCTCAAGCAAAGGAAGTAAGGGGCATAAAGAATATGCATCTTTTTATAAACTTGATGTAGTACCAATAGGTAATAATAAGACTAATGGTAATTATTCTAGCTATAGGTTGATAAGTTGTAATACCTCAGTCATTACACAGGCCTGTCCAAAAGTATCATTTGAGCCAATTCCCATACATTTTTGTGCCCCGGCTGGGTATGCGATTCTAAAGTGTAATGATAAACAATTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAGTGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGTAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGCCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGTAGAAATTACTTGTACAAGACCCGGTAACAATACAATAAGAGGGGTTCACATGGGACCAGGGAAGGCATTTTATGCAGGAGCAATAACAGGAGATATAAGAAAAGCACATTGTATCATTAATAAAGCAAAATGGAACAGTACTTTAAGACTGGTAGTTGCAAAATTAAGTGAGCAATTTAGGAATAAAACAATAAACTTTACTCAACCCTCAGGAGGGGACCCAGAAATTGTAACACACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATACAACAAAACTGTTTAATAGTACTTGGAATGCAACACACATGATTAATGGTACTTGGAATGATACTGAGGGAAATATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGAGGACAAATTAGATGTTCATCCAATATTACAGGGTTGCTATTAACAAGAGATGGTGGTAAGACTAACGAGAGCAACACCACCGAGATCTTTAGACCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGGGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTCTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAACGTCGATGGCGTTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGAGAGCTATTGAGGCGCAACAACATTTGTTGCAACTCACAGTCTGGGGCGTCAAGCAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGTTTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGATGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGGACAAATTTGGGAGAACATGACCTGGATGGAGTGGGAAAGAGAAATTGATAATTACACAAGCTTAATATACACTTTAATTACAGAATCGCAGATCCAACAAGAACAGAATGAAAAAGAATTGTTGGAATTGGATAAATGGGCAAATTTGTGGAATTGGTTTGGCATAACAAACTGGCTGTGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCGTTTCAGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGAAAGGATTCTTAGCACTTATCTGGGTCGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATAATAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGTCCTCAAATACTTGTGGAGTCTCCTACAGTATTGGACACAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGATTATAGAAGTATTACAAAGAGTTTTTAGAGCTATCCTCCATATCCCTACAAGAATAAGACAGGGCTTTGAAAGGGCTTTGCTATAA" }, + { + "name": "vpu", + "start": 6253, + "end": 6298, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.3649167733674775, + "protein": "MEMGHHAPWDVDDL", + "aminoacids": "MEMGHHAPWDVDDL*", + "nucleotides": "ATGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 8364, - "end": 8457, + "end": 8460, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.3921568627450981, "protein": "RPASQPRGDPTGPKESKKKVERETETDPLH", - "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH*", - "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAG" + "aminoacids": "RPASQPRGDPTGPKESKKKVERETETDPLH**", + "nucleotides": "AGACCCGCTTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATCGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCGCTCCATTAGTGA" }, { "name": "rev_exon2", "start": 8365, "end": 8641, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.29843322556577967, "protein": "DPLPSPEGTRQARRNRRRRWRERQRQIRSISERILSTYLGRSTEPVPLQLPPLERLTLDNNEDCGTSGTQGVGSPQILVESPTVLDTGTKE", @@ -527,7 +527,7 @@ "start": 8784, "end": 9387, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.4049958673891082, "protein": "MGGKWSKSSLVGWPNVRERMRRAEPAADGVGAVSRDLERHGAITSSNTATNNADCAWLEAQKEEEVGFPVTPQVPLRPMTYKAAVDLSHFLKEKGGLEGLIHSQKRQDILDLWIYHTQGYFPDWQNYTPGPGVRYPLTFGWCFKLVPVEPTDRENDRLLHPASLQGMEDPEGEVLEWRFDSRLAFHHMARELHPEYYKNC", @@ -541,7 +541,7 @@ "start": 315, "end": 1665, "subtype_start": 789, - "subtype_end": 2291, + "subtype_end": 2292, "orientation": "forward", "distance": 0.596665989022159, "protein": "MAYSPVAAPRLLPCALQQATPQDLNTMLNTVGGHQAAMQMLKEVINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQATGTANILMQRGNFRNQRKPVKCFNCGKEGHIAKNCRAPRKKGCWKCGREGHQMKDCTERQANFLGKIWPSHKGRPGNFPQSRPEIPQSRPEPTAPPAPPEESFRFGEATTPSQKQETMDKELYPPLTSLRSLFGNDPSSQ", @@ -553,7 +553,7 @@ "start": 1427, "end": 4469, "subtype_start": 2084, - "subtype_end": 5095, + "subtype_end": 5096, "orientation": "forward", "distance": 0.27887169154684477, "protein": "FFRENLAFSQGKARELSSEPTRNSSEQTRANSPTSPSRRELQVWGSNNSLSEAGDNGQGAVSSSNFPQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDIDLPGKWKPKMIGGIGGFIKVRQYEQVPIEISGHKAIGTVLVGPTPVNIIGRNLLTQLGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFRELNKRTQEFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKTNPDIVIYQYMDDLYVGSDLEIGQHRTKVEELRQHLMRWGFTTPDKKHQKEPPFLWMGYELHPNKWTVQPIVLPEKESWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKDPVHGAYYDPAKDLIAELQKQGEGQWTYQIYQEPFKNLKTGKYARTRGAHTNDVKQLTEAVQKISTESIVIWGRTPKFRLPIQKETWETWWTDYWQATWIPEWEFVNTPPLVKLWYQLEKEPIIGAETFYVDGAANRDNKSGKAGYVTDRGRQKVVTINDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIEKAQEDHEKYHSNWRTMASDFNLPPIVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEEKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKTIHTDNGRNFTSNTVKAACWWAGIKQEFGIPYNPQSQGVVESMNNELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATEIQTKELQKQITKIPNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED", @@ -565,7 +565,7 @@ "start": 4413, "end": 4992, "subtype_start": 5040, - "subtype_end": 5618, + "subtype_end": 5619, "orientation": "forward", "distance": 0.3566796368352788, "protein": "MENRWQVMIVWQVDRMRIRTWKSLVKYHMYISKKAKKWAYRHHYETTHPRISSEVHIPLGDDRLVITTYWGLHTGERDWHLGQGVSIEWRKRRYSTQVDPDLADHLIHVHYFDCFSESAIRNAILGHRVSPRCEYQAGHNKVGSLQYLALTAVITPKKIKPPLPSVKKLTEDRWNKPQKTKGHRESHTMNGH", @@ -575,69 +575,69 @@ { "name": "vpr", "start": 4931, - "end": 5225, + "end": 5222, "subtype_start": 5558, - "subtype_end": 5849, + "subtype_end": 5843, "orientation": "forward", - "distance": 0.6187165775401071, + "distance": 0.6083541998634192, "protein": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS", - "aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS*T", - "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAGACT" + "aminoacids": "MEQAPEDQGPQREPYNEWTLELLEELKREAVRHFPRPWLQDLGQYIYETYGDTWTGVEAIIRILQQMLFIHFRIGCHHSRIGIVLQRRARNGASRS*", + "nucleotides": "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGAGAGCCATACAATGAATGGACACTAGAGCTTTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACATTTCCCTAGACCATGGCTACAAGACCTAGGACAATATATCTATGAAACTTATGGGGATACTTGGACAGGAGTGGAAGCCATAATAAGAATTCTGCAACAAATGCTGTTTATTCATTTCAGAATTGGGTGTCACCATAGCAGAATAGGCATTGTTCTGCAAAGAAGAGCAAGAAATGGAGCCAGTAGATCATAG" }, { "name": "tat_exon1", "start": 5202, - "end": 5418, + "end": 5421, "subtype_start": 5830, - "subtype_end": 6044, + "subtype_end": 6046, "orientation": "forward", - "distance": 0.42503863987635226, + "distance": 0.40192926045016075, "protein": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ", - "aminoacids": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ", - "nucleotides": "ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAG" + "aminoacids": "MEPVDHRLEPWKHPGSQPRTPCTNCYCKKCCFHCQVCFIQKALGISYGRKKRRQRRRSPQDSQTHQVPLPKQ*", + "nucleotides": "ATGGAGCCAGTAGATCATAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAGGACTCCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTGTGTTTTATACAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAA" }, { "name": "rev_exon1", "start": 5341, - "end": 5419, + "end": 5422, "subtype_start": 5969, - "subtype_end": 6044, + "subtype_end": 6047, "orientation": "forward", - "distance": 0.5275498241500586, - "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSS", - "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGT" - }, - { - "name": "vpu", - "start": 5433, - "end": 5682, - "subtype_start": 6061, - "subtype_end": 6309, - "orientation": "forward", - "distance": 0.5368311327310633, - "protein": "MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL", - "aminoacids": "MQSLEILAIVALVVAAIIAIVVWTIVGIEYRKILRQRKIDKIINRIRERAEDSGNESEGDQEELSALVVEMGHDAPWDVDDL*", - "nucleotides": "ATGCAATCTTTAGAGATATTAGCAATAGTAGCATTAGTAGTAGCAGCAATAATAGCAATAGTTGTGTGGACCATAGTAGGCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGATAAAATAATTAATAGAATAAGAGAAAGAGCAGAAGACAGTGGCAATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG" + "distance": 0.524971623155505, + "protein": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "aminoacids": "MAGRSGDRDEDLLKTVRLIKFLYQSSK", + "nucleotides": "ATGGCAGGAAGAAGCGGAGACAGAGACGAAGATCTCCTCAAGACAGTCAGACTCATCAAGTTCCTCTACCAAAGCAGTAAG" }, { "name": "env", "start": 5596, "end": 8158, "subtype_start": 6224, - "subtype_end": 8794, + "subtype_end": 8795, "orientation": "forward", "distance": 0.5139610675592354, "protein": "MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ", "aminoacids": "MRAKGIRKNCQHLWWKWGTMLLGMLMICSAAENLWVTVYYGVPVWRDANTTLFCASDAKAYDTEVHNVWATHACVPTDPNPHEVELKNVTENFNMWKNNMVDQMHEDIINLWDQSLKPCAKLTPLCVTLNCTDLRNDTVGNQTNLTETNTIQGREMTNCSFNITTEIRDKVRKEHALFYKLDVMPIDRDNTSYTLINCNTSVITQACPKVTFEPIPIHYCTPAGFAILKCKDEMFNGTGPCKNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIVLRSENFTDNGKNIIVQLNRSIVINCTRPNNNTRKSISVAGRAIYATGQIIGDIRQAHCNISETDWNDTLSKIVEKLREKFGKDKTIIFNQSSGGDMEIETHSFNCGGEFFYCNTTRLFNSTWSVNGTSINGTKNITLPCRIKQIINRWQEVGKAMYAPPISGIIRCSSNITGLILTRDGGTNNSTEETETFRPGGGNMKDNWRSELYKYKVVKIEPLGVAPTKAQRRVVQREKRAIGALGAMFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLSVERYLQDQQLLGIWGCSGKLICTTTVPWNTSWSNKSYDTIWDNMTWMQWDREIQNYTGKIYNLLEESQIQQEKNEKELLELDQWANLWNWFSITKWLWYIKIFIMIVGGLVGLKIIFAVFSIVNRVRQGYSPLSLQTLLPTPRGPDRPEGIEEEGGERDRGRSGRLVTGFLPLIWDDLRSLCLFSYHHLRDLLLIVLRTVQVLGHRGWEILKYWWSLLQYWIQELKNSAVSLLNTIAIAVAEGTDRVIEVGQRIGRAFLHIPRRIRQGLERALQ*", "nucleotides": "ATGAGAGCGAAGGGGATCAGGAAGAATTGTCAGCACTTGTGGTGGAAATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAGTGCTGCGGAAAATTTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGATGCAAATACCACTCTATTCTGTGCATCAGATGCTAAAGCATATGATACAGAAGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCTAACCCACATGAAGTAGAATTGAAAAATGTGACAGAAAATTTTAATATGTGGAAAAATAACATGGTGGACCAGATGCATGAGGATATAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGCAAAATTGACCCCACTCTGTGTTACTTTAAATTGCACTGATTTAAGAAATGATACTGTAGGAAATCAAACAAATCTCACTGAAACTAATACAATACAGGGAAGAGAAATGACAAACTGCTCTTTCAATATCACCACAGAGATAAGAGATAAAGTGCGAAAAGAACATGCACTTTTTTATAAACTTGATGTCATGCCAATAGATAGGGATAATACAAGTTATACATTGATAAATTGCAATACCTCAGTCATTACACAGGCCTGTCCAAAGGTAACCTTTGAACCAATTCCCATACATTATTGTACTCCAGCTGGTTTTGCAATTCTAAAGTGTAAGGATGAGATGTTCAATGGAACAGGACCATGTAAGAATGTCAGCACAGTACAATGTACACATGGAATTAGACCAGTAGTGTCAACTCAACTACTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAGTACTTAGATCTGAAAATTTCACAGACAATGGTAAAAATATAATAGTACAGCTAAATAGATCTATAGTAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATAAGTGTAGCAGGGAGAGCAATTTATGCAACAGGGCAGATAATAGGAGATATAAGACAAGCACATTGTAACATCAGTGAGACAGATTGGAATGACACTTTAAGCAAAATAGTTGAAAAATTAAGGGAAAAATTTGGAAAAGATAAAACAATAATCTTTAATCAATCATCAGGAGGGGACATGGAAATTGAAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAACACAACACGACTGTTTAATAGTACTTGGAGTGTTAATGGAACTAGCATAAACGGAACTAAAAATATCACACTCCCGTGCAGAATAAAACAAATTATAAACAGGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCTATCAGTGGGATAATTAGGTGCTCATCAAATATTACAGGACTAATATTAACAAGAGATGGTGGTACAAATAATAGTACAGAGGAGACGGAGACCTTCAGACCTGGAGGGGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAGCCATTAGGAGTAGCACCCACCAAGGCACAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAATAGGAGCGTTAGGAGCTATGTTCCTCGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCACTGACGCTGACGGTACAAGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAACAATCTGCTGAGGGCTATTGAGGCGCAACAGCATATGTTGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAGTCCTGTCTGTGGAAAGATACCTACAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATACTAGTTGGAGCAATAAATCTTACGATACCATCTGGGATAACATGACCTGGATGCAGTGGGACAGAGAAATTCAAAATTACACAGGGAAAATATACAACTTACTTGAGGAATCGCAAATCCAACAGGAAAAGAATGAAAAGGAATTATTAGAACTAGATCAATGGGCAAATTTGTGGAATTGGTTTAGTATAACAAAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTAGTAGGTTTAAAAATAATTTTTGCTGTATTTTCTATAGTGAATAGAGTTAGGCAGGGATACTCACCATTATCATTGCAGACCCTCCTCCCAACCCCGAGGGGACCAGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGGCAGATCCGGAAGATTAGTGACTGGATTCTTACCACTCATCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTACTCTTGATTGTGCTGAGGACTGTGCAAGTTCTGGGACACAGGGGGTGGGAGATCCTCAAGTACTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATACCATAGCTATCGCAGTAGCTGAGGGAACAGATAGGGTTATAGAGGTAGGACAAAGAATTGGCAGAGCTTTTCTCCACATACCTAGAAGGATAAGACAGGGATTAGAAAGGGCTTTGCAATAA" }, + { + "name": "vpu", + "start": 5643, + "end": 5682, + "subtype_start": 6259, + "subtype_end": 6310, + "orientation": "forward", + "distance": 0.5690703735881842, + "protein": "MGHDAPWDVDDL", + "aminoacids": "MGHDAPWDVDDL*", + "nucleotides": "ATGGGGCACGATGCTCCTTGGGATGTTGATGATCTGTAG" + }, { "name": "tat_exon2", "start": 7739, "end": 7835, "subtype_start": 8376, - "subtype_end": 8468, + "subtype_end": 8469, "orientation": "forward", "distance": 0.6842105263157894, "protein": "RPSSQPRGDQTGPKE", @@ -649,7 +649,7 @@ "start": 7740, "end": 8016, "subtype_start": 8377, - "subtype_end": 8652, + "subtype_end": 8653, "orientation": "forward", "distance": 0.4267425320056898, "protein": "DPPPNPEGTRQARRNRRRRWRERQRQIRKISDWILTTHLGRPAEPVPLQLPPLERLTLDCAEDCASSGTQGVGDPQVLVESPAVLDSGTKE", @@ -661,7 +661,7 @@ "start": 8159, "end": 8813, "subtype_start": 8796, - "subtype_end": 9416, + "subtype_end": 9417, "orientation": "forward", "distance": 0.49485619884358334, "protein": "MGGKWSKGCMAGWPTVRERMERIDPRPAARREQAEPAAAGVGAASRDLEKYGAITSSNTSTTNAACAWLEAQEEEEVGFPVRPQVPLRPMTYKAALDLSHFLKEKGGLEGLIWSQRRQEILDLWVYHTQGYFPDWQNYTPGPGVRFPLTFGWCFKLVPVDPDQVEKANEGENNCLLHPMSLHGMEDSEGEVLMWKFDSSLAFHHRAREQHPEYYKDC", diff --git a/util/expected_orf.py b/util/expected_orf.py index f8630ca..14a2f1d 100644 --- a/util/expected_orf.py +++ b/util/expected_orf.py @@ -1,7 +1,6 @@ from dataclasses import dataclass -from util.reference_index import ReferenceIndex from util.translate_to_aminoacids import translate_to_aminoacids -from util.get_biggest_protein import get_biggest_protein +from util.find_orf import find_orf @dataclass @@ -14,23 +13,23 @@ class ExpectedORF: aminoacids: str protein: str - @staticmethod def subtyped(aligned_sequence, name, start, end, deletion_tolerence): - start_s = ReferenceIndex(start).mapto(aligned_sequence) - end_s = ReferenceIndex(end).mapto(aligned_sequence) - - nucleotides = str(aligned_sequence.this.seq[start_s:(end_s + 1)]) + nucleotides = aligned_sequence.reference.seq[start:(end + 1)] aminoacids = translate_to_aminoacids(nucleotides) - has_start_codon = translate_to_aminoacids(aligned_sequence.reference.seq[start:(end + 1)]).startswith("M") - protein = get_biggest_protein(has_start_codon, aminoacids) + protein = aminoacids.strip("*") + reference_orf = \ + ExpectedORF( + name=name, + start=start, + end=end, + deletion_tolerence=deletion_tolerence, + nucleotides=nucleotides, + aminoacids=aminoacids, + protein=protein, + ) - return ExpectedORF(name=name, - start=start_s, - end=end_s, - deletion_tolerence=deletion_tolerence, - nucleotides=nucleotides, - aminoacids=aminoacids, - protein=protein, - ) + subtype_orf = find_orf(aligned_sequence, reference_orf) + subtype_orf.deletion_tolerence = deletion_tolerence + return subtype_orf