diff --git a/intact/intact.py b/intact/intact.py index 978df3a..6455b85 100644 --- a/intact/intact.py +++ b/intact/intact.py @@ -656,11 +656,9 @@ def find_real_correspondence(e): for e in expected: best_match = find_real_correspondence(e) - aligned_start = query_aligned_mapping[best_match.start] - aligned_end = query_aligned_mapping[best_match.end - 1] + 1 - - insertions = len(re.findall(r"-", str(alignment[0].seq[aligned_start:aligned_end]))) - deletions = len(re.findall(r"-", str(alignment[1].seq[aligned_start:aligned_end]))) + got_protein = best_match.aminoseq.split("*")[0] + exp_protein = best_match.expectedaminoseq.split("*")[0] + deletions = (len(exp_protein) - len(got_protein)) * 3 # Max deletion allowed in ORF exceeded if deletions > e.deletion_tolerence: @@ -682,6 +680,12 @@ def find_real_correspondence(e): + str(deletions) )) + aligned_start = query_aligned_mapping[best_match.start] + aligned_end = query_aligned_mapping[best_match.end - 1] + 1 + + insertions = len(re.findall(r"-", str(alignment[0].seq[aligned_start:aligned_end]))) + deletions = len(re.findall(r"-", str(alignment[1].seq[aligned_start:aligned_end]))) + # Check for frameshift in ORF if (deletions - insertions) % 3 != 0: diff --git a/tests/expected-results-large/errors.json b/tests/expected-results-large/errors.json index 16eee0d..dcf6f99 100644 --- a/tests/expected-results-large/errors.json +++ b/tests/expected-results-large/errors.json @@ -15,11 +15,36 @@ "error": "FrameshiftInOrf", "message": "Smaller ORF vif at 5040-5619 contains an out of frame indel: insertions 1 deletions 7182." }, + { + "sequence_name": "KX505501.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon" + }, + { + "sequence_name": "KX505501.1", + "error": "DeletionInOrf", + "message": "Smaller ORF tat_exon1 at 5829-6044 can have maximum deletions 30, got 213" + }, + { + "sequence_name": "KX505501.1", + "error": "DeletionInOrf", + "message": "Smaller ORF rev_exon1 at 5968-6044 can have maximum deletions 30, got 75" + }, + { + "sequence_name": "KX505501.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" + }, { "sequence_name": "KX505501.1", "error": "FrameshiftInOrf", "message": "Smaller ORF tat_exon2 at 8375-8468 contains an out of frame indel: insertions 1 deletions 0." }, + { + "sequence_name": "KX505501.1", + "error": "DeletionInOrf", + "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 96" + }, { "sequence_name": "KX505501.1", "error": "FrameshiftInOrf", @@ -51,7 +76,18 @@ "message": "Sequence is plus-scrambled." } ], - "MN691959": [], + "MN691959": [ + { + "sequence_name": "MN691959", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" + }, + { + "sequence_name": "MN691959", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon" + } + ], "MN692074": [ { "sequence_name": "MN692074", @@ -61,12 +97,7 @@ { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 5400" - }, - { - "sequence_name": "MN692074", - "error": "DeletionInOrf", - "message": "Smaller ORF vpr at 5558-5849 can have maximum deletions 30, got 5400" + "message": "Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 81" }, { "sequence_name": "MN692074", @@ -78,11 +109,6 @@ "error": "FrameshiftInOrf", "message": "Smaller ORF tat_exon1 at 5829-6044 contains an out of frame indel: insertions 0 deletions 5236." }, - { - "sequence_name": "MN692074", - "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon1 at 5968-6044 contains an internal stop codon" - }, { "sequence_name": "MN692074", "error": "FrameshiftInOrf", @@ -91,22 +117,12 @@ { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF vpu at 6060-6309 can have maximum deletions 30, got 5400" - }, - { - "sequence_name": "MN692074", - "error": "DeletionInOrf", - "message": "Smaller ORF tat_exon2 at 8375-8468 can have maximum deletions 30, got 5400" - }, - { - "sequence_name": "MN692074", - "error": "DeletionInOrf", - "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 5400" + "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 192" }, { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 5400" + "message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 123" }, { "sequence_name": "MN692074", @@ -136,6 +152,11 @@ "error": "FrameshiftInOrf", "message": "ORF gag at 1175-2291 contains an out of frame indel, deletions 91 insertions 33." }, + { + "sequence_name": "MN090335", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon" + }, { "sequence_name": "MN090335", "error": "PackagingSignalDeletion", @@ -323,6 +344,41 @@ "error": "WrongORFNumber", "message": "Expected 3 forward ORFs, got 0" }, + { + "sequence_name": "MK114856.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon" + }, + { + "sequence_name": "MK114856.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon" + }, + { + "sequence_name": "MK114856.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon1 at 5829-6044 contains an internal stop codon" + }, + { + "sequence_name": "MK114856.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" + }, + { + "sequence_name": "MK114856.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon" + }, + { + "sequence_name": "MK114856.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon" + }, + { + "sequence_name": "MK114856.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon" + }, { "sequence_name": "MK114856.1", "error": "APOBECHypermutationDetected", @@ -335,6 +391,31 @@ "error": "WrongORFNumber", "message": "Expected 3 forward ORFs, got 0" }, + { + "sequence_name": "MK115009.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon" + }, + { + "sequence_name": "MK115009.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon" + }, + { + "sequence_name": "MK115009.1", + "error": "DeletionInOrf", + "message": "Smaller ORF tat_exon1 at 5829-6044 can have maximum deletions 30, got 54" + }, + { + "sequence_name": "MK115009.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon" + }, + { + "sequence_name": "MK115009.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon" + }, { "sequence_name": "MK115009.1", "error": "APOBECHypermutationDetected", @@ -351,7 +432,13 @@ "message": "Sequence contains an internal inversion." } ], - "MK115387.1": [], + "MK115387.1": [ + { + "sequence_name": "MK115387.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon" + } + ], "MK115491.1": [], "MK116110.1": [ { @@ -422,6 +509,36 @@ "error": "WrongORFNumber", "message": "Expected 3 forward ORFs, got 0" }, + { + "sequence_name": "MK115464.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon" + }, + { + "sequence_name": "MK115464.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon1 at 5829-6044 contains an internal stop codon" + }, + { + "sequence_name": "MK115464.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" + }, + { + "sequence_name": "MK115464.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon" + }, + { + "sequence_name": "MK115464.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon" + }, + { + "sequence_name": "MK115464.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon" + }, { "sequence_name": "MK115464.1", "error": "FrameshiftInOrf", @@ -508,6 +625,31 @@ "error": "WrongORFNumber", "message": "Expected 3 forward ORFs, got 0" }, + { + "sequence_name": "MK115095.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon" + }, + { + "sequence_name": "MK115095.1", + "error": "DeletionInOrf", + "message": "Smaller ORF tat_exon1 at 5829-6044 can have maximum deletions 30, got 54" + }, + { + "sequence_name": "MK115095.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" + }, + { + "sequence_name": "MK115095.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an internal stop codon" + }, + { + "sequence_name": "MK115095.1", + "error": "DeletionInOrf", + "message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 36" + }, { "sequence_name": "MK115095.1", "error": "APOBECHypermutationDetected", diff --git a/tests/expected-results-large/intact.fasta b/tests/expected-results-large/intact.fasta index 72785f7..2d494f4 100644 --- a/tests/expected-results-large/intact.fasta +++ b/tests/expected-results-large/intact.fasta @@ -1,163 +1,3 @@ ->MN691959 -CAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACC -CTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACA -GCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCT -GACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGG -GCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGT -ACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGTTCTCTGGCTAACTAGGGAAC -CCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTG -TTGTATGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCT -AGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGAC -GCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGAGGCGACTGGTGAGTA -CGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTA -TTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAA -AAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAAT -CCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCC -CTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGT -GTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAG -CAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAG -ATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATA -TCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAA -GTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATG -CTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAG -GAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAG -ATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATA -GGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATC -CTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAA -GGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAG -CAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAAC -CCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATG -ACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATG -AGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGA -AAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCC -CCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACT -GAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAAT -TTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTA -GAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACT -TCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAA -AGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAG -GAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATG -ATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTA -CACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTC -CCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAG -TTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAA -TGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTAT -TTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTA -ATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAA -AAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAG -ATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAG -GGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCC -AAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCT -ATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAA -AAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAAC -ATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAG -TACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAG -TGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTA -AACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGC -TAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACC -CATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAA -TTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCC -ACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAG -TAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACAT -GGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTC -CTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCT -ATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACA -AAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAG -CAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAAT -ATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAA -TAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAG -GAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTAT -TTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAG -CAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTG -ATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATAT -GGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCA -GTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTC -TTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATT -TCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAA -TTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAA -TTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTAT -TCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAG -TAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTC -AAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGC -TCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGC -CAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTG -TGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTAT -GTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGA -ATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGG -GGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGG -AAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTAT -TACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGC -CCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTA -GCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACA -GAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAAT -GGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTT -GGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGG -AAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGAC -ATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAGA -CTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGT -AAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTAT -GGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTT -TCTCTATCAAAGCAGTAAGTAGTACATGTAATGCAACCTATACAAATAGCAATAGTAGCA -TTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGG -AAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAA -GACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATG -GGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCAC -AGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGA -TGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCAC -AGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAA -AAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAA -GCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGC -TACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGG -AGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGA -ATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATAC -GTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCC -AATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGAC -GTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAG -GCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAAT -TAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGT -AGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGG -ACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAA -CATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACA -ATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGT -AACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAA -TAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGA -CACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAA -AGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCT -GCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGG -AGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGA -ACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGC -AGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGC -AGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCA -GAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGG -CATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCT -CCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGC -TAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGA -CAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCA -GCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTG -GTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTT -GGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATA -TTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGG -AATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATC -CTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAG -AGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGC -CCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGT -TAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGT -AGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGA -AAGGATTTTGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTA -CTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTC -GAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTT -GTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTAC -CTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGG -GGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCT -ACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGAT -ATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAG -AGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATG -ACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGG -CCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGAC -TTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCC -TCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAG -ATCTGAGCCTGGGAGTTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGC -TTGCCTTGAGTGC >MN692145 TGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACA CACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCAC @@ -640,160 +480,6 @@ AAGACTGCTGACATTGAGCTTTCTACAAAGGGACTTTCCGCTGGGGACTTTCCAGGGGAG GTGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGC GTTCTGCCTGTAAGGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCT AACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCT ->MK115387.1 -CGCCACTGCTAGAGATTTGGGGATCTCTAGTTACCAGAGTCGGGCGCCACTGCTAGAGAT -TTAGGGCTCTCTAGTTACCAGAGTCGGGCGCCACTGCTAGAGATTTGGGGATCTCTAGTT -ACCAGAGTCTGTTCGGGCGCCACTGCTAGAGATTGGGACCTGAAAGCGAAAGTAGAACCA -GAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGG -CGGCGACTGGTGAGTACGCCAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGC -GAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCC -AGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACG -ATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAGGGATGTAAACAAATACTGGAACA -GCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGC -AACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAA -GATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACACAGG -AAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGT -ACATCAGGCCATATCGCCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGC -CTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGA -TTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGA -GACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTGT -TGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCT -TCAAGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAA -AAGATGGATAATCCTGGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTT -GGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGT -CCTAAGAGCCGAGCAAGCATCGCAGGATGTAAAAAATTGGATGACAGAAACCTTATTGGT -CCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCAACACT -AGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTT -GGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTT -TAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAG -AAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAAT -GAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGG -AAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTT -CAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGT -GTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATA -GGAGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAA -ATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAA -GTAAGACAGTATGATCAGATACCTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTA -TTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTGGTTGT -ACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAGCCAGGAATG -GATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAA -ATTTGTACAGAAATGGAAAAAGAAGGGAAGATTTCAAAAATTGGGCCTGAAAATCCATAC -AATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGAT -TTCAGGGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACAC -CCCGCAGGGTTAAAAAAGAAGAAATCAATAACAGTACTGGATGTGGGTGATGCATATTTT -TCAATTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAAT -AATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCA -CCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCA -GACATAGTTATCTATCAATACATGGATGACTTGTATGTAGGATCTGACTTAGAAATAGGG -CAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTGGGGATTGACCACA -CCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATGGGTTATGAACTCCATCCT -GATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGAC -ATACAGAAGCTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTG -AGACAATTATGTAAACTCCTTAAGGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACA -GAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGA -GTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAG -TGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGA -ACGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACC -ACAGAAAGCATAGTGATATGGGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAA -ACATGGGATACCTGGTGGACAGAATATTGGCAAGCCACCTGGATTCCCGAGTGGGAGTTT -GTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAAAAAGAACCTATTGTAGGA -GCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGA -TATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAG -ACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTA -ACAGACTCACAATATGCACTAGGAATAATTCAAGCACAACCAGATAGGAGTGAATCAGAG -ATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAGGGTCTACCTTGCATGGGTA -CCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATC -AGGAAAGTCCTATTTTTGGATGGAATAGATAAGGCCCAAGAGGAGCATGAGAAATATCAC -AATAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATA -GTAGCCAGTTGTGATAAATGCCAGCTAAAGGGAGAAGCCACGCATGGACAAGTAGACTGT -AGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCA -GTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAA -ACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTGGCCAGTGAAAGCAATACATACAGAC -AATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAG -CAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAA -GAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTC -CAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCA -GGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAA -ATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAA -GGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAC -ATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCA -GGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAA -ACACCATATGTACATTTCAAAGAAAGCCCAGGGATGGTTTTATAGACATCACTATGAAAA -TCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAAT -AACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTC -CATAGAATGCAGGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACT -AATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGG -ACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACA -GTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGT -GACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAG -CCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACAT -TTTCCTAGGACATGGCTTCATGGCTTGGGACAACATATCTATGAAACTTATGGGGATACT -TGGACAGGAGTGGAAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGA -ATTGGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATGGAGCC -AGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTAC -CAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTT -AGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCA -GACTCATCAAGTACCTCTACCAAAGCAGTGAGTAGTATATGTAATGCAATCCTTATATAT -ATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGGCCATAGT -ACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAAT -AATAGATAGGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGC -ACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAGTACTGCAGACC -AGTGGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGAAGCAAACACCACTCTAT -TTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTGGGCCACACATG -CCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGGAAATGTAACAGAAGATT -TTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGG -ATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTG -ATATTAGGAATAGTACCGTTACTTCTAATACTACTACTTGGGGAGAAATGACAAACTGCA -CTTTCAATATCACCACAAGCATAAAGGATAAGATGAAAAAAGAAGCGGCACTTTTTTATA -AAATTGATTTAGTAGAAATAGATGAAGAGAAAAATAACAGTAGTACCAGATATAGGTTGA -TAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTGAGCCAATTC -CCATACATTTTTGTGCCCCGGCTGGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCA -GTGGAAAGGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATGGAATTAAGCCAG -TAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGAT -CTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAGGAAGCCATACAAA -TTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAGGACCAGGGAGAG -CATTTTACACAACAGGAGATATAATAGGAGACATAAGGCAAGCACATTGTAACATTAGTG -GAAATTGGAATAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAA -CACAACAAATAATCTTTAATTCATCCGCAGGAGGGGACCCAGAGATTGTAACACACAGTT -TTAATTGTGGAGGGGAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTGGA -CTAAAAATGGTACTGGTAGTTGGCAGTCTAATGATACTCAGAATGGTAATATCACACTCC -AATGCAGAATAAAACAAATTATAAACCTGTGGCAGGGAGTAGGAAAAGCAATGTATGCCC -CTCCCATCAGTGGACAAATTAACTGTACATCAAATATTACAGGGCTGGTTTTAACAAGAG -ATGGGGGGAAGGTGATTAATGAAACTGAGACCTTTAGACCTGGAGGAGGAAATATGAAGG -ATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGAATAG -CACCCACCAAGGCAAAAAGAAGAGTGGTGCAAAGAGAGAAAAGAGCAGTAGGACTGGGAG -CTATGTTCCTTGGGTTCTTGGGAGCAGCCGGAAGCACTATGGGCGCAGCGTCAATAGCGC -TGACGGAACAGGCCAGACGAGTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGA -GGGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTGGGGCATTAAGCAGCTCC -AGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGGG -GTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATCGTAGTTGGGGTGGGC -ATAACAAAAATCTAGATGACATTTGGGGTAACATGACCTGGATGGAGTGGGAAAAAGAAA -TTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAGAAA -AGAATGAACAAGAATTATTGGCATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACA -TATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTT -TAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAGGGATACTCACCAT -TATCATTTCAGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAG -AAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCAC -TTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTAC -TCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAAT -ATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGC -TCAACACCACAGCTATAGTAGTAGCTGAAGGGACAGATAGGATAATAGAAATATTACAAA -GAATTGGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAGGGCTT -TGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTAGGGTTGGATGGAATGCAGTGAGG -GAAAGAATGAGACGAGCTCAGCCAACAGCAGATAGGGAACGAGCTGAGCCAGCAGCAGAT -GGGGTGGGAGCAGCATCTAGAGACCTGGAAAAATATGGAGCACTTACAAGTAGGAATACA -GCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGATGAGGTGGGT -TTTCCAGTCAGACCTCAGGTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTG -AGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAA -GATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTAC -ACACCAGGGCCAGGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCA -CTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCT -CTGAGCCAGCATGGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGGTTTGACAGC -CGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA -CATAGAGTTTTCCACCAGGGACTTTCCAGAAGAGGCGTGGCCTGGGCGGGACCGGGGAGT -GGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTGCCTGTACTGGGTCTCTCTGGT -TAGACCAGATCTGAGCCTGGGAGCTCTCTGTCTAACTAGGGAACCCACTGCTTAAGCCTC -AATAAAGCTTGCCTTG >MK115491.1 CTGATTTGCTGTGGCGGCCGCAAGGACTCAAGTAGGTAACTATAGAAACCCCCCACCAAT TCTCTAGCAGTGGCGTCCGACGCTGGCCAAAACTCTCTAGCAGTGGCGCCCGAACAGACT diff --git a/tests/expected-results-large/nonintact.fasta b/tests/expected-results-large/nonintact.fasta index 7a69a61..29434e4 100644 --- a/tests/expected-results-large/nonintact.fasta +++ b/tests/expected-results-large/nonintact.fasta @@ -33,6 +33,166 @@ TTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCC CTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCA GATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAG CTTGCCTTGAGTGCTTC +>MN691959 +CAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACC +CTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACA +GCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCT +GACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGG +GCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGT +ACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGTTCTCTGGCTAACTAGGGAAC +CCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTG +TTGTATGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCT +AGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGAC +GCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGAGGCGACTGGTGAGTA +CGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTA +TTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAA +AAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAAT +CCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCC +CTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGT +GTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAG +CAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAG +ATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATA +TCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAA +GTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATG +CTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAG +GAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAG +ATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATA +GGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATC +CTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAA +GGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAG +CAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAAC +CCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATG +ACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATG +AGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGA +AAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCC +CCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACT +GAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAAT +TTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTA +GAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACT +TCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAA +AGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAG +GAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATG +ATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTA +CACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTC +CCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAG +TTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAA +TGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTAT +TTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTA +ATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAA +AAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAG +ATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAG +GGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCC +AAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCT +ATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAA +AAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAAC +ATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAG +TACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAG +TGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTA +AACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGC +TAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACC +CATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAA +TTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCC +ACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAG +TAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACAT +GGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTC +CTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCT +ATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACA +AAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAG +CAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAAT +ATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAA +TAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAG +GAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTAT +TTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAG +CAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTG +ATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATAT +GGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCA +GTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTC +TTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATT +TCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAA +TTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAA +TTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTAT +TCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAG +TAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTC +AAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGC +TCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGC +CAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTG +TGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTAT +GTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGA +ATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGG +GGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGG +AAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTAT +TACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGC +CCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTA +GCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACA +GAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAAT +GGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTT +GGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGG +AAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGAC +ATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAGA +CTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGT +AAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTAT +GGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTT +TCTCTATCAAAGCAGTAAGTAGTACATGTAATGCAACCTATACAAATAGCAATAGTAGCA +TTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGG +AAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAA +GACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATG +GGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCAC +AGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGA +TGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCAC +AGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAA +AAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAA +GCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGC +TACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGG +AGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGA +ATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATAC +GTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCC +AATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGAC +GTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAG +GCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAAT +TAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGT +AGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGG +ACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAA +CATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACA +ATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGT +AACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAA +TAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGA +CACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAA +AGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCT +GCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGG +AGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGA +ACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGC +AGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGC +AGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCA +GAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGG +CATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCT +CCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGC +TAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGA +CAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCA +GCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTG +GTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTT +GGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATA +TTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGG +AATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATC +CTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAG +AGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGC +CCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGT +TAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGT +AGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGA +AAGGATTTTGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTA +CTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTC +GAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTT +GTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTAC +CTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGG +GGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCT +ACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGAT +ATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAG +AGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATG +ACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGG +CCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGAC +TTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCC +TCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAG +ATCTGAGCCTGGGAGTTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGC +TTGCCTTGAGTGC >MN692074 TGGAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACA CACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCAC @@ -1981,6 +2141,160 @@ GAGATTTCTACAAAAGACTTTCCGCTAAGGACTTTCCAGGGGAGGCGTGGCCTAGGCAGG ACAGAGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACTAG GTCTCTCTAGTTAGACCAGATCTGAGCCTAGGAGCTCTCTGGCTAACTAAGGAACCCACT GCTTAAGCCTCAATAAAGCTTGCCTTG +>MK115387.1 +CGCCACTGCTAGAGATTTGGGGATCTCTAGTTACCAGAGTCGGGCGCCACTGCTAGAGAT +TTAGGGCTCTCTAGTTACCAGAGTCGGGCGCCACTGCTAGAGATTTGGGGATCTCTAGTT +ACCAGAGTCTGTTCGGGCGCCACTGCTAGAGATTGGGACCTGAAAGCGAAAGTAGAACCA +GAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGG +CGGCGACTGGTGAGTACGCCAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGC +GAGAGCGTCTGTATTAAGCGGGGGAGAATTAGATAAATGGGAAAAAATTCGGTTAAGGCC +AGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACG +ATTCGCAGTTAACCCCGGCCTTTTAGAAACAACAGAGGGATGTAAACAAATACTGGAACA +GCTGCAACCATCCCTTCCGACAGGATCAGAAGAACTTAGATCATTATTTAATACAGTAGC +AACCCTCTATTGTGTGCATAAGAGAATAGAGGTACAAGACACCAAGGAAGCCTTAGAAAA +GATAGAAGAAGAGCAAAACAAAAGTAAGAAAAAGGCACAGCAGGCAGTAGCTGACACAGG +AAGTACCAGCCAGGTCAGCCAAAATTACCCGATAGTGCAGAACCTCCAGGGGCAAATGGT +ACATCAGGCCATATCGCCTAGAACTTTAAATGCATGGGTGAAAGTAGTAGAAGAGAAGGC +CTTTAGCCCAGAGGTAATACCCATGTTTTCAGCATTATCGGAAGGAGCCACCCCACAAGA +TTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGA +GACCATCAATGAGGAGGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTGT +TGCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCT +TCAAGAACAAATAGCATGGATGACAAGTAATCCACCTATCCCAGTAGGAGAAATCTATAA +AAGATGGATAATCCTGGGATTAAATAAGATAGTAAGAATGTATAGCCCTACCAGCATTTT +GGACATAAAACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAGT +CCTAAGAGCCGAGCAAGCATCGCAGGATGTAAAAAATTGGATGACAGAAACCTTATTGGT +CCAAAATGCAAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCAGCAACACT +AGAAGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTT +GGCTGAAGCAATGAGCCAAGTAACAAATTCAAATATCATAATGATGCAGAGAGGCAATTT +TAGGAACCAAAGAAAGACTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAG +AAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGGACACCAAAT +GAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTCCAAGGG +AAGGCCAGGGAACTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAGGAGAGCTT +CAGGTTTGGGGAGGAGACAGCAACTCCTCCTCAGAAGCAGGAGCCGGTGGACAAGGAAGT +GTATCCTTTAGCTTCCCTCAAATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATA +GGAGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGAA +ATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAA +GTAAGACAGTATGATCAGATACCTATAGAAATCTGTGGACATAAAGCTATAGGTACAGTA +TTAGTAGGACCTACACCTGTCAACATAATTGGCAGAAATTTGTTGACTCAGATTGGTTGT +ACTTTAAATTTTCCCATTAGTCCTATTGAAACTGTACCAGTACAATTAAAGCCAGGAATG +GATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAA +ATTTGTACAGAAATGGAAAAAGAAGGGAAGATTTCAAAAATTGGGCCTGAAAATCCATAC +AATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGAT +TTCAGGGAACTTAATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACAC +CCCGCAGGGTTAAAAAAGAAGAAATCAATAACAGTACTGGATGTGGGTGATGCATATTTT +TCAATTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAAT +AATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCA +CCAGCAATATTCCAAAGTAGCATGATAAAAATCTTAGAGCCTTTTAGAAAGCAAAATCCA +GACATAGTTATCTATCAATACATGGATGACTTGTATGTAGGATCTGACTTAGAAATAGGG +CAGCATAGAACAAAAATAGAGGAACTGAGACAACACTTGTTGAAGTGGGGATTGACCACA +CCAGATAAAAAACATCAGAAAGAACCCCCATTCCTGTGGATGGGTTATGAACTCCATCCT +GATAAATGGACAGTACAGCCTATAGTGCTGCCAGACAAAGATAGCTGGACTGTCAATGAC +ATACAGAAGCTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTG +AGACAATTATGTAAACTCCTTAAGGGAGCCAAAGCGCTAACAGAAGTGATACAACTAACA +GAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGA +GTGTATTATGACCCATCAAAAGACTTAATAGCAGAGTTACAGAAGCAGGGGCAAGGCCAG +TGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGA +ACGAGGGGTACCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACC +ACAGAAAGCATAGTGATATGGGGAAAGACCCCTAGATTTAAACTACCCATACAGAAAGAA +ACATGGGATACCTGGTGGACAGAATATTGGCAAGCCACCTGGATTCCCGAGTGGGAGTTT +GTCAATACCCCTCCCTTAGTGAAATTATGGTACCAATTAGAAAAAGAACCTATTGTAGGA +GCAGAAACTTTCTATGTAGATGGGGCAGCTAATAGGGAGACTAAATTAGGAAAAGCAGGA +TATGTTACTAGCAGAGGAAGACAAAAGGTTGTCTCCCTAACTGACACAACAAATCAGAAG +ACTGAACTGCAAGCAATTTGTCTAGCATTGCAGGATTCAGGATTAGAAGTAAACATAGTA +ACAGACTCACAATATGCACTAGGAATAATTCAAGCACAACCAGATAGGAGTGAATCAGAG +ATAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAGGGTCTACCTTGCATGGGTA +CCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATC +AGGAAAGTCCTATTTTTGGATGGAATAGATAAGGCCCAAGAGGAGCATGAGAAATATCAC +AATAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATA +GTAGCCAGTTGTGATAAATGCCAGCTAAAGGGAGAAGCCACGCATGGACAAGTAGACTGT +AGTCCAGGAATATGGCAATTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCA +GTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAGACAGGGCAAGAA +ACAGCATACTTCCTCTTAAAATTAGCAGGAAGGTGGCCAGTGAAAGCAATACATACAGAC +AATGGAACCAATTTCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCAGGGATCAAG +CAGGAATGTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAA +GAATTAAAGAAAATTATAGGACAAGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTC +CAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCA +GGGGAAAGAATAGTAGACATAATAGCAACAGACCTACAAACTAAAGAATTACAGAAACAA +ATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAACAGAGATCCACTTTGGAAA +GGACCAGCAAAACTTCTTTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGAC +ATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCA +GGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGCACATGGAAAAGTTTAGTAAA +ACACCATATGTACATTTCAAAGAAAGCCCAGGGATGGTTTTATAGACATCACTATGAAAA +TCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCGCTAGGGGATGCTAGATTGGTAAT +AACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAAGGAGTCTC +CATAGAATGCAGGGAAAGGAAATATAGCACACAAGTAACCCCTGACCTAGCAGACCAACT +AATTCACCTGTATTATTTTGATTGTTTTGCAGAATCTGCTATAAGAGAGGCCATATTAGG +ACATAGAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACA +GTACTTGGCACTAACAGCATTAGTAACACCAAAAAAGACAAAGCCACCTTTGCCTAGTGT +GACAAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAG +CCAAACAATGAATGGACACTAGAGCTCTTAGAAGAGCTTAAGAGAGAAGCTGTTAGACAT +TTTCCTAGGACATGGCTTCATGGCTTGGGACAACATATCTATGAAACTTATGGGGATACT +TGGACAGGAGTGGAAGCCTTAATAAGAATTCTGCAACAACTGCTGTTTATTCATTTCAGA +ATTGGGTGCCAACATAGCAGAATAGGCATCACTCGACAGAGGAGAGCAAGAAATGGAGCC +AGTAGATCCTAGATTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAGACTCCTTGTAC +CAAATGCTATTGTAAAAAATGCTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGGCTT +AGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCA +GACTCATCAAGTACCTCTACCAAAGCAGTGAGTAGTATATGTAATGCAATCCTTATATAT +ATTAACAATAGTAGCATTAGTAGTAGCAGCAATATTAGCAATAGTTGTGTGGGCCATAGT +ACTCATAGAATATAAGAAAATATTAAAACAAAGGAGAATAGATAGGTTAATTGATAGAAT +AATAGATAGGGCAGAAGACAGTGGCAATGAGAGTGAAGGAGATCAAGAGGAGTTATCAGC +ACTTGTGGAGATGGGGCATCATGCTCCTTGGAATGTTGATGATCTATAGTACTGCAGACC +AGTGGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAGAGAAGCAAACACCACTCTAT +TTTGTGCATCAGATGCTAAAGCATATAGTACAGAGGCACATAATGTTTGGGCCACACATG +CCTGTGTACCCACAGACCCCAACCCACAAGAAATAGTAATAGGAAATGTAACAGAAGATT +TTAACATGTGGAAGAATAACATGGTAGATCAGATGCATGAGGATATAATCAGTTTATGGG +ATCAAAGTCTAAAGCCATGTGTGAAACTAACCCCACTCTGTGTTACTTTAAATTGTACTG +ATATTAGGAATAGTACCGTTACTTCTAATACTACTACTTGGGGAGAAATGACAAACTGCA +CTTTCAATATCACCACAAGCATAAAGGATAAGATGAAAAAAGAAGCGGCACTTTTTTATA +AAATTGATTTAGTAGAAATAGATGAAGAGAAAAATAACAGTAGTACCAGATATAGGTTGA +TAAATTGTAACACCTCAGCCATTACACAAGCCTGCCCAAAGGTATCCTTTGAGCCAATTC +CCATACATTTTTGTGCCCCGGCTGGTTTTGCAATTCTAAAGTGTAATAATAAGAAGTTCA +GTGGAAAGGGACCATGTACAAATGTCAGCACAGTGCAATGTACACATGGAATTAAGCCAG +TAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAATGATTAGAT +CTGACAATATCACAGACAATACTAAAAACATAATAGTACAGTTGAAGGAAGCCATACAAA +TTTTCTGTATAAGACCCAACAACAATACAAGAAAAAGTATAAATATAGGACCAGGGAGAG +CATTTTACACAACAGGAGATATAATAGGAGACATAAGGCAAGCACATTGTAACATTAGTG +GAAATTGGAATAACACTTTAAAACAGATAGCTACACAATTAGGAAAACAACTGAATCAAA +CACAACAAATAATCTTTAATTCATCCGCAGGAGGGGACCCAGAGATTGTAACACACAGTT +TTAATTGTGGAGGGGAATTCTTCTATTGTAATTCATCATCACTGTTTAATAGTACCTGGA +CTAAAAATGGTACTGGTAGTTGGCAGTCTAATGATACTCAGAATGGTAATATCACACTCC +AATGCAGAATAAAACAAATTATAAACCTGTGGCAGGGAGTAGGAAAAGCAATGTATGCCC +CTCCCATCAGTGGACAAATTAACTGTACATCAAATATTACAGGGCTGGTTTTAACAAGAG +ATGGGGGGAAGGTGATTAATGAAACTGAGACCTTTAGACCTGGAGGAGGAAATATGAAGG +ATAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAGAATTGAACCATTAGGAATAG +CACCCACCAAGGCAAAAAGAAGAGTGGTGCAAAGAGAGAAAAGAGCAGTAGGACTGGGAG +CTATGTTCCTTGGGTTCTTGGGAGCAGCCGGAAGCACTATGGGCGCAGCGTCAATAGCGC +TGACGGAACAGGCCAGACGAGTATTGTCTGGTATAGTGCAACAGCAGAACAATTTGCTGA +GGGCTATTGAGGCGCAACAGCATTTGTTGCAACTCACAGTCTGGGGCATTAAGCAGCTCC +AGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCTCCTAGGACTTTGGG +GTTGCTCTGGAAAACTCATTTGCACCACTACTGTGCCTTGGAATCGTAGTTGGGGTGGGC +ATAACAAAAATCTAGATGACATTTGGGGTAACATGACCTGGATGGAGTGGGAAAAAGAAA +TTGACAATTACACAAGCTTAATATACACCTTAATTACAGAATCACATTCCCAACAAGAAA +AGAATGAACAAGAATTATTGGCATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTGACA +TATCACAATGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGGTAGGTT +TAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAAAGTTAGGCAGGGATACTCACCAT +TATCATTTCAGACCCTCCTCCCAGCTCCGAGGGGAGCCGACAGGCCCGAAGGAATAGAAG +AAGGAGGTGGAGAGAGAGACAAAGGCAGATCCGGTCGATTAGTGAACGGATTCTTAGCAC +TTATCTGGGACGATCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGCGACTTAC +TCTTGATTGTGATCAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAAT +ATTGGTGGAATCTCCTGCAGTACTGGAGTCAGGAACTAAAGAATAGTGCTGTCAGCTTGC +TCAACACCACAGCTATAGTAGTAGCTGAAGGGACAGATAGGATAATAGAAATATTACAAA +GAATTGGTAGAGCTTTTCTCCACATACCTAGGAGAATAAGACAGGGCTTAGAAAGGGCTT +TGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTAGGGTTGGATGGAATGCAGTGAGG +GAAAGAATGAGACGAGCTCAGCCAACAGCAGATAGGGAACGAGCTGAGCCAGCAGCAGAT +GGGGTGGGAGCAGCATCTAGAGACCTGGAAAAATATGGAGCACTTACAAGTAGGAATACA +GCAGCTACTAATGCTGATTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGATGAGGTGGGT +TTTCCAGTCAGACCTCAGGTACCTTTAAGACCCATGACTTATAAAGCAGCTCTAGATCTG +AGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAGTTTACTCCCAAAAAAGGCAA +GATATCCTTGATCTGTGGATCTACCACACACAAGGCTATTTCCCTGATTGGCAGAACTAC +ACACCAGGGCCAGGGATCAGATATCCCCTGACCTTTGGATGGTGCTTCAAGCTAGTACCA +CTTGAGCCAGATCAGGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCCTGTTACACCCT +CTGAGCCAGCATGGGATGGATGACCCGGAGAAAGAAGTGCTAGTGTGGAGGTTTGACAGC +CGCCTCGCATTTCATCACATGGCCCGAGAGAAGCATCCGGAGTTCTACAAAGACTGCTGA +CATAGAGTTTTCCACCAGGGACTTTCCAGAAGAGGCGTGGCCTGGGCGGGACCGGGGAGT +GGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTGCCTGTACTGGGTCTCTCTGGT +TAGACCAGATCTGAGCCTGGGAGCTCTCTGTCTAACTAGGGAACCCACTGCTTAAGCCTC +AATAAAGCTTGCCTTG >MK116110.1 CCTGAAAGCGAAAGTAGAACCAGAGAAGTTCTCTCGACGCAGGACTCGGCTTGCTGAGCT TTATGGCCGGGTCCCCCCACTCCCTGACATGCTGTCATCATTTCTTCTAGTGTAGCTGCT diff --git a/tests/expected-results-small/errors.json b/tests/expected-results-small/errors.json index 81acd57..397bfb3 100644 --- a/tests/expected-results-small/errors.json +++ b/tests/expected-results-small/errors.json @@ -15,11 +15,36 @@ "error": "FrameshiftInOrf", "message": "Smaller ORF vif at 5040-5619 contains an out of frame indel: insertions 1 deletions 7182." }, + { + "sequence_name": "KX505501.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpr at 5558-5849 contains an internal stop codon" + }, + { + "sequence_name": "KX505501.1", + "error": "DeletionInOrf", + "message": "Smaller ORF tat_exon1 at 5829-6044 can have maximum deletions 30, got 213" + }, + { + "sequence_name": "KX505501.1", + "error": "DeletionInOrf", + "message": "Smaller ORF rev_exon1 at 5968-6044 can have maximum deletions 30, got 75" + }, + { + "sequence_name": "KX505501.1", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" + }, { "sequence_name": "KX505501.1", "error": "FrameshiftInOrf", "message": "Smaller ORF tat_exon2 at 8375-8468 contains an out of frame indel: insertions 1 deletions 0." }, + { + "sequence_name": "KX505501.1", + "error": "DeletionInOrf", + "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 96" + }, { "sequence_name": "KX505501.1", "error": "FrameshiftInOrf", @@ -51,7 +76,18 @@ "message": "Sequence is plus-scrambled." } ], - "MN691959": [], + "MN691959": [ + { + "sequence_name": "MN691959", + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" + }, + { + "sequence_name": "MN691959", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon" + } + ], "MN692074": [ { "sequence_name": "MN692074", @@ -61,12 +97,7 @@ { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 5400" - }, - { - "sequence_name": "MN692074", - "error": "DeletionInOrf", - "message": "Smaller ORF vpr at 5558-5849 can have maximum deletions 30, got 5400" + "message": "Smaller ORF vif at 5040-5619 can have maximum deletions 30, got 81" }, { "sequence_name": "MN692074", @@ -78,11 +109,6 @@ "error": "FrameshiftInOrf", "message": "Smaller ORF tat_exon1 at 5829-6044 contains an out of frame indel: insertions 0 deletions 5236." }, - { - "sequence_name": "MN692074", - "error": "InternalStopInOrf", - "message": "Smaller ORF rev_exon1 at 5968-6044 contains an internal stop codon" - }, { "sequence_name": "MN692074", "error": "FrameshiftInOrf", @@ -91,22 +117,12 @@ { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF vpu at 6060-6309 can have maximum deletions 30, got 5400" + "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 192" }, { "sequence_name": "MN692074", "error": "DeletionInOrf", - "message": "Smaller ORF tat_exon2 at 8375-8468 can have maximum deletions 30, got 5400" - }, - { - "sequence_name": "MN692074", - "error": "DeletionInOrf", - "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 5400" - }, - { - "sequence_name": "MN692074", - "error": "DeletionInOrf", - "message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 5400" + "message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 123" }, { "sequence_name": "MN692074", @@ -136,6 +152,11 @@ "error": "FrameshiftInOrf", "message": "ORF gag at 1175-2291 contains an out of frame indel, deletions 91 insertions 33." }, + { + "sequence_name": "MN090335", + "error": "InternalStopInOrf", + "message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon" + }, { "sequence_name": "MN090335", "error": "PackagingSignalDeletion", diff --git a/tests/expected-results-small/intact.fasta b/tests/expected-results-small/intact.fasta index 8793de9..1f7afc5 100644 --- a/tests/expected-results-small/intact.fasta +++ b/tests/expected-results-small/intact.fasta @@ -1,163 +1,3 @@ ->MN691959 -CAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACC -CTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACA -GCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCT -GACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGG -GCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGT -ACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGTTCTCTGGCTAACTAGGGAAC -CCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTG -TTGTATGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCT -AGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGAC -GCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGAGGCGACTGGTGAGTA -CGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTA -TTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAA -AAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAAT -CCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCC -CTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGT -GTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAG -CAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAG -ATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATA -TCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAA -GTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATG -CTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAG -GAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAG -ATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATA -GGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATC -CTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAA -GGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAG -CAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAAC -CCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATG -ACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATG -AGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGA -AAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCC -CCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACT -GAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAAT -TTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTA -GAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACT -TCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAA -AGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAG -GAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATG -ATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTA -CACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTC -CCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAG -TTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAA -TGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTAT -TTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTA -ATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAA -AAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAG -ATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAG -GGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCC -AAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCT -ATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAA -AAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAAC -ATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAG -TACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAG -TGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTA -AACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGC -TAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACC -CATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAA -TTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCC -ACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAG -TAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACAT -GGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTC -CTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCT -ATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACA -AAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAG -CAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAAT -ATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAA -TAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAG -GAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTAT -TTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAG -CAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTG -ATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATAT -GGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCA -GTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTC -TTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATT -TCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAA -TTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAA -TTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTAT -TCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAG -TAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTC -AAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGC -TCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGC -CAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTG -TGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTAT -GTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGA -ATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGG -GGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGG -AAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTAT -TACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGC -CCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTA -GCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACA -GAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAAT -GGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTT -GGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGG -AAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGAC -ATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAGA -CTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGT -AAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTAT -GGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTT -TCTCTATCAAAGCAGTAAGTAGTACATGTAATGCAACCTATACAAATAGCAATAGTAGCA -TTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGG -AAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAA -GACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATG -GGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCAC -AGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGA -TGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCAC -AGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAA -AAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAA -GCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGC -TACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGG -AGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGA -ATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATAC -GTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCC -AATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGAC -GTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAG -GCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAAT -TAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGT -AGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGG -ACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAA -CATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACA -ATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGT -AACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAA -TAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGA -CACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAA -AGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCT -GCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGG -AGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGA -ACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGC -AGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGC -AGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCA -GAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGG -CATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCT -CCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGC -TAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGA -CAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCA -GCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTG -GTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTT -GGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATA -TTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGG -AATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATC -CTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAG -AGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGC -CCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGT -TAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGT -AGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGA -AAGGATTTTGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTA -CTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTC -GAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTT -GTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTAC -CTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGG -GGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCT -ACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGAT -ATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAG -AGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATG -ACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGG -CCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGAC -TTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCC -TCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAG -ATCTGAGCCTGGGAGTTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGC -TTGCCTTGAGTGC >MN692145 TGGAAGGGCTAATTCACTCCCAAAAAAGACAAGATATTCTTGATCTATGGATCTACCACA CACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCAC diff --git a/tests/expected-results-small/nonintact.fasta b/tests/expected-results-small/nonintact.fasta index c379108..1baa046 100644 --- a/tests/expected-results-small/nonintact.fasta +++ b/tests/expected-results-small/nonintact.fasta @@ -33,6 +33,166 @@ TTTCCGCTGGGGACTTTCCAGTAGGGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCC CTCAGATGCTGCATATAAGCAGCTGCTTTCTGCCTGTACGAGGTCTCTCTGGTTAGACCA GATCTGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAG CTTGCCTTGAGTGCTTC +>MN691959 +CAGTTGAGCCAGATAAGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACC +CTGTGAGCCTGCATGGAATGGATGACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACA +GCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCT +GACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGG +GCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGT +ACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGTTCTCTGGCTAACTAGGGAAC +CCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTG +TTGTATGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCT +AGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGAC +GCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGAGGCGACTGGTGAGTA +CGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTA +TTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAA +AAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAAT +CCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCC +CTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGT +GTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAG +CAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAACCAG +ATCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATA +TCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAA +GTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATG +CTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAG +GAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAG +ATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATA +GGATGGATGACAAATAATCYWYCTWTCCCAGTAGGAGAAATTTATAAAAGATGGATAATC +CTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAA +GGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAG +CAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAAC +CCAGATTGTAAGACTATTTTAAAAGCATTGGGACCWGCAGCTACACTAGAAGAAATGATG +ACAGCATGTCAGGGAGTGGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATG +AGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGA +AAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGAAATTGCAGGGCC +CCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGAGTGTACT +GAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAAT +TTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTA +GAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACT +TCCCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAA +AGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAG +GAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATG +ATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTA +CACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTC +CCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAG +TTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAA +TGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTAT +TTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTA +ATAAAAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAA +AAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAG +ATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAG +GGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCC +AAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCT +ATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAA +AAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAAC +ATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAG +TACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAG +TGGGAAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTA +AACTCCTTAGGGGAACCAGAGCATTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGC +TAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGTGTATTATGACC +CATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAA +TTTATCAAGAGCCATTTAAAAATCTGAAAACAGGGAAATATGCAAGAACGAGGGGTGCCC +ACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGTATAG +TAATATGGGGAAAGACTCCTAGATTTAAACTACCCATACAAAAGGAAACATGGGAAACAT +GGTGGACAGAATATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTC +CTTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCT +ATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGGTATGTTACTAACA +AAGGAAGACAAAAGGTTGTCCCCCTAACTAACACAACAAATCAGAAGACTGAGTTACAAG +CAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAAT +ATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAA +TAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAG +GAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAATACTAT +TTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAG +CAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTG +ATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATAT +GGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCA +GTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTC +TTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACAGACAATGGCAGCAATT +TCACCAGTGCTACGGTTAAGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAA +TTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAA +TTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTAT +TCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAG +TAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTC +AAAATTTTCGGGTTTATTACAGGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGC +TCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGC +CAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTG +TGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTAT +GTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGAACTCATCCAAGA +ATAAGTTCAGAAGTATACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGG +GGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGG +AAAAGGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTAT +TACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACATATAGTTAGC +CCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTA +GCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACAAAACTGACA +GAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAAT +GGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTT +GGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGG +AAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGAC +ATAGCAGAATAGGCGTTACTCAACAGAAGAGAGCAAGAAATGGAGCCAGTAGATCCTAGA +CTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCACTTGCTATTGT +AAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAAAAGCCTTAGGCATCTCCTAT +GGCAGGAAGAAGCGGAGACAGCGACGAAGACCTCCTCAAGACAGTCAGACTCATCAAGTT +TCTCTATCAAAGCAGTAAGTAGTACATGTAATGCAACCTATACAAATAGCAATAGTAGCA +TTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGATCCATAGTAATCATAGAATATAGG +AAAATATTAAGACAAAGGAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAA +GACAGTGGCAATGAGAGTGAAGGAGAAGTGTCAGCACTTGTGGAGATGGGGGTGGAGATG +GGGCACCATGCTCCTTGGGATATTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCAC +AGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGA +TGCTAAAGCATATGATGCAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCAC +AGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAA +AAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAA +GCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGGGGAATGC +TACTAATACCAATAGTAGTAATACCAATAGTAGTAGCGGGGAAATGATGATGGAGAAAGG +AGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAAATAAGGTGCAGAAAGA +ATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATGATACTACCAGCTATAC +GTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCC +AATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGAC +GTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAG +GCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGATGTAGTAAT +TAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAGTACAGCTGAACCAATCTGT +AGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTATCCAGAGAGG +ACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAA +CATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACA +ATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGT +AACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAA +TAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGA +CACAATCACACTCCCATGCAGAATAAAACAATTTATAAACATGTGGCAGGAAGTAGGAAA +AGCAATGTATGCCCCTCCCATCAGCGGACAAATTAGATGTTCATCAAATATTACAGGGCT +GCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGG +AGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGA +ACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGC +AGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGC +AGCGTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCA +GAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGG +CATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCT +CCTGGGGATTTGGGGCTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGC +TAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGA +CAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCA +GCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTG +GTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTT +GGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATA +TTCACCATTATCGTTTCAGACCCACCTCCCAAACCCGGGGGGACCCGACAGGCCCGAAGG +AATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATC +CTTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAG +AGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGC +CCTCAAATATTGGTGGAATCTCCTACAATATTGGAGTCAGGAGCTAAAGAATAGTGCTGT +TAGCTTGCTCAATGCCACAGCTATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGT +AGTACAAGGAGCTTATAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGA +AAGGATTTTGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTA +CTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGCATCTC +GAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTT +GTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTAC +CTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGG +GGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCT +ACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGAT +ATCCATTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGGTAGAAG +AGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATG +ACCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGG +CCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGAC +TTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCC +TCAGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAG +ATCTGAGCCTGGGAGTTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGC +TTGCCTTGAGTGC >MN692074 TGGAAGGGCTAATTTGGTCCCAGAAAAGACAAGATATCCTTGATCTGTGGATCTACCACA CACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCGGGGACCAGATTTCCAC