diff --git a/intact/intact.py b/intact/intact.py index 6cb5e05..5089f98 100644 --- a/intact/intact.py +++ b/intact/intact.py @@ -591,10 +591,12 @@ def small_frames( "No ORFs >" + str(length) + " bases found.")] import util.coordinates as coords - coordinates_mapping = coords.map_positions(alignment[0], alignment[1]) - reverse_coordinates_mapping = coords.map_positions(alignment[1], alignment[0]) + coordinates_mapping = coords.map_positions(alignment[0], alignment[1].seq) + reverse_coordinates_mapping = coords.map_positions(alignment[1], alignment[0].seq) reference = alignment[0].seq.replace("-", "") + reference_aligned_mapping = coords.map_nonaligned_to_aligned_positions(reference, alignment[0].seq) + query_aligned_mapping = coords.map_nonaligned_to_aligned_positions(sequence, alignment[1].seq) def translate(seq, frame = 0): for_translation = seq[frame:] @@ -654,8 +656,11 @@ def find_real_correspondence(e): for e in expected: best_match = find_real_correspondence(e) - insertions = len(re.findall(r"-", str(alignment[0].seq[e.start:e.end]))) - deletions = len(re.findall(r"-", str(alignment[1].seq[e.start:e.end]))) + aligned_start = query_aligned_mapping[best_match.start] + aligned_end = query_aligned_mapping[best_match.end - 1] + 1 + + insertions = len(re.findall(r"-", str(alignment[0].seq[aligned_start:aligned_end]))) + deletions = len(re.findall(r"-", str(alignment[1].seq[aligned_start:aligned_end]))) translated = best_match.aminoseq.split("*")[0] adeletions = (len(best_match.expectedaminoseq) - (len(translated) + 1)) * 3 diff --git a/tests/expected-results-large/errors.json b/tests/expected-results-large/errors.json index 46f48a5..4d4a888 100644 --- a/tests/expected-results-large/errors.json +++ b/tests/expected-results-large/errors.json @@ -10,6 +10,11 @@ "error": "InternalStopInOrf", "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon" }, + { + "sequence_name": "KX505501.1", + "error": "FrameshiftInOrf", + "message": "Smaller ORF vif at 5040-5619 contains an out of frame indel: insertions 1 deletions 7182." + }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", @@ -20,11 +25,6 @@ "error": "DeletionInOrf", "message": "Smaller ORF tat_exon1 at 5829-6044 can have maximum deletions 30, got 210" }, - { - "sequence_name": "KX505501.1", - "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6044 contains an out of frame indel: insertions 0 deletions 215." - }, { "sequence_name": "KX505501.1", "error": "DeletionInOrf", @@ -32,24 +32,34 @@ }, { "sequence_name": "KX505501.1", - "error": "FrameshiftInOrf", - "message": "Smaller ORF rev_exon1 at 5968-6044 contains an out of frame indel: insertions 0 deletions 76." + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" }, { "sequence_name": "KX505501.1", - "error": "InternalStopInOrf", - "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" + "error": "FrameshiftInOrf", + "message": "Smaller ORF tat_exon2 at 8375-8468 contains an out of frame indel: insertions 1 deletions 0." }, { "sequence_name": "KX505501.1", "error": "DeletionInOrf", "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 96" }, + { + "sequence_name": "KX505501.1", + "error": "FrameshiftInOrf", + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an out of frame indel: insertions 1 deletions 0." + }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon" }, + { + "sequence_name": "KX505501.1", + "error": "FrameshiftInOrf", + "message": "Smaller ORF nef at 8795-9416 contains an out of frame indel: insertions 1 deletions 7182." + }, { "sequence_name": "KX505501.1", "error": "RevResponseElementDeletion", @@ -97,12 +107,12 @@ { "sequence_name": "MN692074", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6044 contains an out of frame indel: insertions 0 deletions 215." + "message": "Smaller ORF tat_exon1 at 5829-6044 contains an out of frame indel: insertions 0 deletions 5236." }, { "sequence_name": "MN692074", "error": "FrameshiftInOrf", - "message": "Smaller ORF rev_exon1 at 5968-6044 contains an out of frame indel: insertions 0 deletions 76." + "message": "Smaller ORF rev_exon1 at 5968-6044 contains an out of frame indel: insertions 0 deletions 5236." }, { "sequence_name": "MN692074", @@ -114,11 +124,6 @@ "error": "DeletionInOrf", "message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 123" }, - { - "sequence_name": "MN692074", - "error": "FrameshiftInOrf", - "message": "Smaller ORF nef at 8795-9416 contains an out of frame indel: insertions 0 deletions 617." - }, { "sequence_name": "MN692074", "error": "RevResponseElementDeletion", @@ -200,14 +205,13 @@ "message": "Sequence contains an internal inversion." } ], - "MK115581.1": [ + "MK115581.1": [], + "MK115690.1": [ { - "sequence_name": "MK115581.1", + "sequence_name": "MK115690.1", "error": "FrameshiftInOrf", - "message": "Smaller ORF vpu at 6060-6309 contains an out of frame indel: insertions 9 deletions 5." - } - ], - "MK115690.1": [ + "message": "Smaller ORF nef at 8795-9416 contains an out of frame indel: insertions 16 deletions 6." + }, { "sequence_name": "MK115690.1", "error": "PackagingSignalDeletion", @@ -220,11 +224,6 @@ } ], "MK115571.1": [ - { - "sequence_name": "MK115571.1", - "error": "FrameshiftInOrf", - "message": "Smaller ORF vpu at 6060-6309 contains an out of frame indel: insertions 9 deletions 1." - }, { "sequence_name": "MK115571.1", "error": "PackagingSignalDeletion", @@ -237,11 +236,6 @@ } ], "MK115514.1": [ - { - "sequence_name": "MK115514.1", - "error": "FrameshiftInOrf", - "message": "Smaller ORF vpu at 6060-6309 contains an out of frame indel: insertions 9 deletions 7." - }, { "sequence_name": "MK115514.1", "error": "MajorSpliceDonorSiteMutated", @@ -333,6 +327,11 @@ "error": "InternalStopInOrf", "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon" }, + { + "sequence_name": "MK114705.1", + "error": "FrameshiftInOrf", + "message": "Smaller ORF nef at 8795-9416 contains an out of frame indel: insertions 7 deletions 69." + }, { "sequence_name": "MK114705.1", "error": "Scramble", @@ -540,6 +539,11 @@ "error": "InternalStopInOrf", "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon" }, + { + "sequence_name": "MK115464.1", + "error": "FrameshiftInOrf", + "message": "Smaller ORF nef at 8795-9416 contains an out of frame indel: insertions 0 deletions 13." + }, { "sequence_name": "MK115464.1", "error": "APOBECHypermutationDetected", @@ -566,11 +570,6 @@ ], "MK115503.1": [], "MK115570.1": [ - { - "sequence_name": "MK115570.1", - "error": "FrameshiftInOrf", - "message": "Smaller ORF vpu at 6060-6309 contains an out of frame indel: insertions 9 deletions 7." - }, { "sequence_name": "MK115570.1", "error": "PackagingSignalDeletion", @@ -599,11 +598,6 @@ "error": "FrameshiftInOrf", "message": "ORF env at 6199-8764 contains an out of frame indel, deletions 50 insertions 37." }, - { - "sequence_name": "MK115702.1", - "error": "FrameshiftInOrf", - "message": "Smaller ORF nef at 8795-9416 contains an out of frame indel: insertions 0 deletions 19." - }, { "sequence_name": "MK115702.1", "error": "PackagingSignalDeletion", @@ -684,11 +678,6 @@ } ], "OQ092465": [ - { - "sequence_name": "OQ092465", - "error": "FrameshiftInOrf", - "message": "Smaller ORF vpu at 6060-6309 contains an out of frame indel: insertions 5 deletions 0." - }, { "sequence_name": "OQ092465", "error": "MajorSpliceDonorSiteMutated", diff --git a/tests/expected-results-large/intact.fasta b/tests/expected-results-large/intact.fasta index ebdbc61..2d494f4 100644 --- a/tests/expected-results-large/intact.fasta +++ b/tests/expected-results-large/intact.fasta @@ -161,6 +161,166 @@ CATATAAGCAGCTGCTTCTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTG GGAGCTCTCTGGCTGACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGT GCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACC CTTTTAGTCAGTGTGGAAAATCTCTAGCA +>MK115581.1 +AAATTGGGACGTTCGCCTACGCAAGCGAAAATTTCTCTACGTAGCCACCTTTCCCGATTA +ACGTCAGAACCCTAAAATTAAAATAAATCCTAGCGGCCGACCGACTCTGGTAACTAGAGA +TCCCTCAAAGATAAATCTCTAGCAGTGGCGCCCGAACAGACTCTGGTAACTAGAGATCCC +TCAGATTAAATCTCTAGCAGTGGCGCCCGAACAGAAATCTCTAGCAGTGGCGCCCGAACA +GACTCTGGTAACTAGAGATCCCTCAGATTAAATCTCTAGCAGTGGCGCCCGAATCATCTT +AGAGTGGCGCCCGAACGACTCTGGTAACTAGAGATCCCTCAAAGATAAATCTCTAGCAGT +GGCGCCCGCGACTCTGGTAACTAGAGATCCCTCAGATTAAATCTCTAGCAGTGGCGCCCG +AGACTCTGGTAACTAGAGATCCCTCAGATTAAATCTCTAGCAGTGGCGCCCGAACAGACT +CTGGTAACTAGAGATCCCTCAAAGATAAATCTCTAGCAGTGGCGCCCGACACAGGTACTA +GAGAAAGCGAAAGTAAAACCAGAGGAGCTCTCTCGGCGCAGGACTCGGCTTGCTGAAGCG +CGCACAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAAAAATTTTTGACTAG +CGGAGGCTAGAAGGGGAGAGATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAG +ATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATT +TAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACAT +CAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAG +AACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGG +TACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAA +AGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCA +TAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATG +CATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAG +CATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGAC +ATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATA +GATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAA +GTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATC +CACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAG +TAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTA +GAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAA +AAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTT +TAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGG +GGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTA +ATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATT +GTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGA +AATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAG +GGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGC +CAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGA +AGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCA +GCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGG +AGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGAT +AGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTG +TGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAG +AAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGT +ACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGA +AGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTC +AAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAG +TACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTG +GGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGT +ACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATAC +TGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGT +GCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTT +AGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTA +TGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACA +TCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCT +TTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGA +AAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAG +TCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGC +ACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGA +AATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGA +AGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAA +TCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATT +AACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAA +ATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGC +CACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCA +ATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAG +AGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTC +CCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGA +TTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGC +ACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAA +GGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGT +AGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGC +CCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATAT +ACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGA +GGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCT +AGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGT +TATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATG +GCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGC +CGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCA +AGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCA +GGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAA +AGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCT +ACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAG +GGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGC +AGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCAT +TAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGA +TTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGAT +GGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCC +CACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAG +AATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAG +TTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAAT +CTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAG +GACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGA +GAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACC +AGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGA +ACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACA +TATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCA +ACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCC +ACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAG +GAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCC +AAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGC +GACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAGTAG +TATATGTAATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAA +TAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAA +GGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTG +AAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATG +TTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTA +TGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAG +GCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTA +GGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATG +CATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCA +CTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAG +GAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGA +AATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAAT +GATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCA +AAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTA +AAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAA +TGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCA +GAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTA +CATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGT +ATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGA +CAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAA +AAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGAC +CCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACA +CAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGA +AACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAA +GCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTA +CTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGA +GATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCA +TTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTG +GGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCG +TCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAAC +AATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATC +AAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTG +GGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGT +TGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGA +GAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAG +GAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTT +AACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATA +GGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCA +CCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATC +GAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTG +GCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGAC +TTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTC +AAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGC +TTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTA +CAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGG +GCATTGTTATAAAATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGT +AAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGA +CCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGC +CTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCC +TTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGG +GGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTA +CAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATA +TCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCA +AGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAA +AGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAA +GCATCCGGAGTACTACAAAGACTGCTGACATTGAGCTTTCTACAAAGGGACTTTCCGCTG +GGGACTTTCCAGGGGAGGTGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGC +TGCATATAAGCAGCTGCGTTCTGCCTGTAAGGGGTCTCTCTGGTTAGACCAGATCTGAGC +CTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTG +CACTCAAGGCAAGCT >MK115498.1 AACACGGAATGGGAGATAAGCAGCATCTCTATCGGCGCGGCGAGATCTGACACTGGTAAC TAGAGATCCCTCCTAAATCTCTAGCAGTGGCGCCCGAACAGACTCTGGTAACTAGAGATC diff --git a/tests/expected-results-large/nonintact.fasta b/tests/expected-results-large/nonintact.fasta index 640c81a..29434e4 100644 --- a/tests/expected-results-large/nonintact.fasta +++ b/tests/expected-results-large/nonintact.fasta @@ -568,166 +568,6 @@ CCGCTAGGGACTTTCCAGGGGAGGCGTGAACTGGGCGGGACAGGGGAGTGGCGAGCCCTC AGATGCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGAT CAGAGCCTGGGAGCTCTCTGGCTAGCTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTT GCCTTGAGTGCTTAAAGTAGTGTGTGCCCGTCTGTTGTGTGACTC ->MK115581.1 -AAATTGGGACGTTCGCCTACGCAAGCGAAAATTTCTCTACGTAGCCACCTTTCCCGATTA -ACGTCAGAACCCTAAAATTAAAATAAATCCTAGCGGCCGACCGACTCTGGTAACTAGAGA -TCCCTCAAAGATAAATCTCTAGCAGTGGCGCCCGAACAGACTCTGGTAACTAGAGATCCC -TCAGATTAAATCTCTAGCAGTGGCGCCCGAACAGAAATCTCTAGCAGTGGCGCCCGAACA -GACTCTGGTAACTAGAGATCCCTCAGATTAAATCTCTAGCAGTGGCGCCCGAATCATCTT -AGAGTGGCGCCCGAACGACTCTGGTAACTAGAGATCCCTCAAAGATAAATCTCTAGCAGT -GGCGCCCGCGACTCTGGTAACTAGAGATCCCTCAGATTAAATCTCTAGCAGTGGCGCCCG -AGACTCTGGTAACTAGAGATCCCTCAGATTAAATCTCTAGCAGTGGCGCCCGAACAGACT -CTGGTAACTAGAGATCCCTCAAAGATAAATCTCTAGCAGTGGCGCCCGACACAGGTACTA -GAGAAAGCGAAAGTAAAACCAGAGGAGCTCTCTCGGCGCAGGACTCGGCTTGCTGAAGCG -CGCACAGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCGAAAAAAATTTTTGACTAG -CGGAGGCTAGAAGGGGAGAGATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAG -ATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATT -TAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCCGTTAACCCTGGCCTGTTAGAAACAT -CAGAGGGATGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAG -AACTTAAATCATTATTTAATGCAGTAGCAGTCCTCTATTGTGTACATCAAAGGATACAGG -TACAAGACACCAAGGAAGCTTTAGAGAAGGTAGAGGAAGAGCAAAACAAAAGTAAGAAAA -AGGCACAGCAAGTAGCAGCAGCTGACACTGACACAGGAAACAGCAGCCAAAATTACCCCA -TAGTGCAGAACCTCCAGGGGCAAATGGTACATCAGCCCCTATCACCTAGAACTTTAAATG -CATGGGTAAAAGTAGTAGAAGAAAAGGCCTTCAGCCCAGAAGTCATACCCATGTTTTCAG -CATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGAC -ATCAAGCAGCTATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATA -GATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAA -GTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGCATGGACGACAAATAATC -CACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCTTGGGATTAAATAAAATAG -TAAGGATGTATAGCCCTACCAGTATTCTGGATATAAAACAAGGACCAAAGGAACCCTTTA -GAGACTATGTAGACCGGTTCTATAGAACTTTAAGAGCTGAGCAAGCTTCACAGGAGGTAA -AAAATTGGATGACAGAAACCTTGTTGGTCCAAAATTCGAATCCAGATTGTAAGACTATTT -TAAAAGCATTAGGACCAGCAGCAACACTAGAGGAAATGATGACAGCATGTCAAGGAGTGG -GGGGACCAGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAAATTCAGCTA -ATGTAATGATGCAGAGAGGCAATTTTAGAAACCAAAGAAAGATTGTTAAGTGTTTCAATT -GTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGA -AATGTGGAAAGGAAGGACATCAAATGAAAGACTGTACAGAGAGACAGGCTAATTTTTTAG -GGAAAATCTGGCCTTCCCACAAAGGAAGGCCAGGGAACTTCCTTCAGAGCAGACCAGAGC -CAACAGCCCCACCAGAAGAGAGCTTCAGGTTTGGGGAAGAGACAATAACTCCCTCTCAGA -AGCAGGAGCCAAGAGACAAGGAACTGTATCCCTTGTCTTCCCTCAAATCACTCTTTGGCA -GCGACCCCTCGTCAGAATAAAGATAGGGGGGCAATTTAAAGAAGCTTTATTAGATACAGG -AGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAAATGGAAACCAAAAATGAT -AGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACCCATAGAAATCTG -TGGACACAAAGCTATAGGTACAGTATTAATAGGACCTACACCTGTCAACATAATTGGAAG -AAATCTGTTGACTCAGCTTGGTTGTACCTTAAATTTTCCCATTAGTCCTATTGAAACTGT -ACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGA -AGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTC -AAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCTATAAAGAAAAAAGACAG -TACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTCAATAAGAGAACTCAAGACTTTTG -GGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGT -ACTGGATGTGGGTGATGCATATTTTTCAATTCCCTTAGATAAAGAATTCAGGAAGTATAC -TGCATTTACCATACCTAGTACAAATAATGAGACACCAGGGATTAGGTATGAGTACAATGT -GCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTT -AGAGCCTTTTAGAAAGCAAAATCCAGATATAGTTATCTATCAATACATGGATGATTTGTA -TGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACA -TCTGTTAAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCT -TTGGATGGGTTATGAACTCCATCCTGATAAGTGGACAGTACAGCCTATAATACTGCCAGA -AAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAG -TCAGATTTACGCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGGGGAACCAAAGC -ACTAACAGAGGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGA -AATTTTAAGAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGA -AGTACAGAAGCAGGGCAATGGACAGTGGACATATCAAATTTATCAAGAGCCATTTAAAAA -TCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATT -AACAGAGGCAGTACAAAAGATAGCCACAGAAAGCATAGTAATATGGGGAAAAACTCCTAA -ATTTAGACTACCTATACAAAAAGAAACATGGGAGACATGGTGGGCAGAGTACTGGCAAGC -CACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAATTATGGTACCA -ATTAGAGAAAGAACCCATAGTAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCTAATAG -AGAAACTAAATTAGGCAAAGCAGGATATGTCACTGACAGAGGAAGACAAAAGGTTATCTC -CCTAACGGACACAACAAATCAAAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGA -TTCGGGATCAGAAGTAAACATAGTAACAGACTCACAATATACATTAGGAATCATTCAAGC -ACAACCAGATAAGAGTGAATCAGAGTTAGTCAATCAGATAATAGAGCAGTTAATAAAAAA -GGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGT -AGACAAACTAGTCAGTACTGGAGTCAGGAGAGTACTATTTCTAGATGGAATAGATAAGGC -CCAAGAAGACCATGAGAAATATCACAGTAATTGGAGAGCAATGGCTGGTGATTTTAATAT -ACCCCCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCAAAAAGGAGA -GGCCATGCATGGACAAGTAGATTGTAGTCCAGGAATATGGCAACTAGATTGTACACATCT -AGAAGGGAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGT -TATTCCAGCAGAGACAGGGCAGGAAACAGCATACTTTCTCCTGAAACTAGCAGGGAGATG -GCCAGTAAAAACAATACATACAGACAATGGCAGCAATTTCACCAGTAATGTGGTTAAGGC -CGCCTGTTGGTGGGCAGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCA -AGGAGTAGTAGAATCCATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCA -GGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAA -AGGGGGGATTGGGGGGTATAGTGCAGGGGAAAGAATAGTAGATATAATAGCAACAGACCT -ACAAACTAAAGAATTACAGAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAG -GGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAGCTTCTCTGGAAAGGTGAAGGGGC -AGTAGTAATACAAGATAATAGTGATATAAAAGTAGTGCCAAGGAGAAAAGCAAAAATCAT -TAGGGATTATGGCAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGA -TTAGAACATGGAACAGTTTAGTAAAACACCATATGCATGTTTCAAAGAAAACTAAAGGAT -GGTTTTATAGACATCACTATGAAAGCACTCATCCAAGAATAAGTTCAGAAGTACACATCC -CACTAGGGGATGCTAGATTGGTAGTAACAACATATTGGGGCCTGAATACAGGAGAAAGAG -AATGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGAAAAAGGAAATATAGCACACAAG -TTGACCCTAACCTAGCAGACCAACTAATTCATCTGTATTACTTTGATTGTTTTTCAGAAT -CTGCTATAAGAAATGCCATATTAGGACATATAGTTAGTCCTAGGTGTGAATATCAAGCAG -GACATAACAAGGTAGGCTCTCTACAATACTTGGCACTAAAAGCATTAACAACACCAAAGA -GAAGAAAGCCACCTTTACCTAGTGTTAGGAAACTGACAGAGGATAGATGGAACGAGCACC -AGAAGACCAAGGGCCACAGAGGGAGCCATACAATGAATGGACATTAGAGCTTTTAGAGGA -ACTTAAGAATGAAGCGGTTAGACATTTTCCTAGGCCGTGGCTCCATGGCTTAGGACAACA -TATCTATGAAACTTATGGGGACACTTGGACAGGAGTGGAGGCCATAATAAGAACCCTGCA -ACAACTGCTGTTTATTCATTTCAGACTTGGGTGTCGACATAGCAGAATAGGCATTGTGCC -ACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCACCCAG -GAAGTCAGCCTAAAACTCCTTGTACTAACTGCTATTGTAAAAAGTGTTGCTTTCATTGCC -AAGTTTGTTTCATAAAAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGC -GACGAAGAGCTCCTCAAGGCAGTCAGACTGATCAAGATTCTCTATCAAAGCAGTAAGTAG -TATATGTAATGCAACCTCTGCACATAGCAGCAATAGTAGGATTAGTAGTAGCAGCAATAA -TAGCAATAGTTGTGTGGACCATAGTGTTAATAGAATATAGGAAAATATTAAGACAAAAAA -GGATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAAAAGACAGTGGCAATGAGAGTG -AAGGGGATCAGGAAGAATTGTCAGCGCTTATGGAGATGGGGCACCATGCTCCTGGGGATG -TTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTGACAGTCTATTATGGGGTACCTGTA -TGGAAAGAAGCAAACACCACTCTATTTTGTGCATCGGATGCTAAAGCATATACCACAGAG -GCACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTA -GGATTGGAAAATGTGACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAACAGATG -CATGAGGATGTAATCAATTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCA -CTCTGTGTTACTTTAAATTGCACTAATCTGAATGTTACTAATACCAATAGCAGTAGTGAG -GAGGGAAAGATGGCGGAAGAAATGAGAAACTGCTCTTTCAATATCACCACAAAAATAAGA -AATAAGATGCAGAAGGAATATGCACTTTTTTATAAACTTGATGTAGTACCAATAGATAAT -GATAATACAAGTTATACATTGATAAATTGTAACACCTCAGTCATTACACAGGCCTGTCCA -AAGGTATCCTTTGAGCCAATTCCTATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTA -AAGTGTAAGGATAAGAAGTTCAATGGATCAGGACCATGTAAAAATGTCAGCACAGTACAA -TGTACACATGGAATTAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGGCAGTTTAGCA -GAAGAAGAGGTAGTAATTAGATCTGAAAATTTCACGAACAATGCTAAAACCATAATAGTA -CATCTGAAGGAACCTATACAAATTAATTGTACAAGACCCAACAACAATACAATGAAAAGT -ATCCCTCTAGGACCAGGAAGAGCATTCTATGCAACAGGAGCCATAATAGGAGATATAAGA -CAAGCACATTGTAACCTTAGTAGAAAAGCATGGAATAATACTTTAAAACAGGTAGTTGAA -AAATTAAAAGAACAATATGAGAATAAAACAATAATCTTTAATCACTCCTCAGGAGGGGAC -CCAGAAATTATAATGCATACTTTTAATTGTGGAGGGGAATTTTTCTATTGTAATACAACA -CAGCTGTTTAATAGTACTTGGAATAGTACTTGGACTGGGAATGCTACCGAATTAGATGGA -AACATCACACTCCCATGCAGAATAAAACAGATTATAAACAGGTGGCAAGAAGTAGGAAAA -GCAATGTATGCCCCTCCCATTAGAGGAAAAATTAGATGTTCATCAAATATTACAGGGTTA -CTATTAGTAAGAGATGGTGGTAACACGAGCGAGACTGAGACCTTCAGACCTGGAGGAGGA -GATATGAAGGACAATTGGAGGAGTGAATTATATAAATATAAAGTAGAAAAAATTGAGCCA -TTAGGAGTAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAAAGAGAAAAAAGAGCAGTG -GGAATAGGAGCTGTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCG -TCAGTGACGCTGACGGTACAAGCCAGGCTGTTATTGTCTGGTATAGTGCAACAGCAAAAC -AATTTGCTGAGAGCTATTGAGGCGCAACAACATCTGTTGCAACTCACAGTCTGGGGCATC -AAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTACAGGATCAACAGCGCCTG -GGAATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCCTGGAATGCTAGT -TGGAGTAATAAATCTCTGAATCAGATTTGGGATAACATGACCTGGATGGAGTGGGAAAGA -GAAATTAACAATTACACAGACACAATATACACCTTAATTGAAAAGGCGCAGAACCAGCAG -GAAAAAAATGAACAAGAATTATTGGAATTGGATAAATGGGCAAGTTTGTGGAATTGGTTT -AACATAACAAAATGGCTGTGGTACATAAAATTATTCATAATGATAGTAGGAGGATTGATA -GGTTTAAGAATAGTTTTTGCTGTACTTGCTATAGTAAATAGAGTTAGGCAGGGATATTCA -CCATTATCGTTTCAGACCCACCTCCCAGCCCCGAGGGGACCCGACAGGCCCGAAGGAATC -GAAGAAGAAGGTGGAGAAAGAGACAGAGACAGATCAGTTCGATTAGTGAACGGATTCTTG -GCACTTTTCTGGGACGACCTGCGGAGCCTGTGCCTCTTCCTCTGCCACCGCTTGAGAGAC -TTACTCTTGATTGTAACAAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAATCCTC -AAACATTGGTGGAGTCTCCTGCAGTATTGGATTCAGGAGCTAAAGAGTAGTGCTATTAGC -TTATTCAATGTCACAGCCATCGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTA -CAAAGAGCTTGTAGAGCTCTTCTCCACATACCTACAAGAATAAGACAGGGCTTGGAAAGG -GCATTGTTATAAAATGGGGGGCAAGTGGTCAAAAAGTAGTGGGGGTGGATGGCCTGCTGT -AAGGGAAAGAATGAGACGAACTGAGCCAGCAGCAGAAGGGGTGGGAGCAGTATCTCGAGA -CCTGGAAAGACATGGAGCAGTCACAAGTAGCAATACAGCAGCTACCAATGCTGATTGCGC -CTGGCTGGAAGCACAAGAAGAGGACAGTGAAGTGGGTTTTCCAGTCAGACCTCAGGTGCC -TTTAAGACCAATGACTTTCAAAGGAGCTCTCGATCTTAGCCACTTTTTAAAAGAAAAGGG -GGGACTGGAAGGGCTAATTTACTCCCAAAAAAGACAAGATATCCTTGACCTGTGGGTCTA -CAACACACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACCAGATA -TCCACTGACCTTTGGGTGGTGTTTCAAGCTAGTACCAGTGGAGCCAGAAGAGGCCAATCA -AGGAGAGAACAACTGCTTGTTACACCCTATGAGCCTGCATGGGATGGAGGACCCGGAAAA -AGAAGTATTAATGTGGAAGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGAA -GCATCCGGAGTACTACAAAGACTGCTGACATTGAGCTTTCTACAAAGGGACTTTCCGCTG -GGGACTTTCCAGGGGAGGTGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGC -TGCATATAAGCAGCTGCGTTCTGCCTGTAAGGGGTCTCTCTGGTTAGACCAGATCTGAGC -CTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTG -CACTCAAGGCAAGCT >MK115690.1 TGGAAGGGATAATTTACTCCCAAAAAAGACAAGATATTCTTGATCTGTGGGTCTACAACA CACAAGGCTACTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGACTAGATTTCCAC diff --git a/tests/expected-results-small/errors.json b/tests/expected-results-small/errors.json index 58cd196..bf7b42b 100644 --- a/tests/expected-results-small/errors.json +++ b/tests/expected-results-small/errors.json @@ -10,6 +10,11 @@ "error": "InternalStopInOrf", "message": "Smaller ORF vif at 5040-5619 contains an internal stop codon" }, + { + "sequence_name": "KX505501.1", + "error": "FrameshiftInOrf", + "message": "Smaller ORF vif at 5040-5619 contains an out of frame indel: insertions 1 deletions 7182." + }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", @@ -20,11 +25,6 @@ "error": "DeletionInOrf", "message": "Smaller ORF tat_exon1 at 5829-6044 can have maximum deletions 30, got 210" }, - { - "sequence_name": "KX505501.1", - "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6044 contains an out of frame indel: insertions 0 deletions 215." - }, { "sequence_name": "KX505501.1", "error": "DeletionInOrf", @@ -32,24 +32,34 @@ }, { "sequence_name": "KX505501.1", - "error": "FrameshiftInOrf", - "message": "Smaller ORF rev_exon1 at 5968-6044 contains an out of frame indel: insertions 0 deletions 76." + "error": "InternalStopInOrf", + "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" }, { "sequence_name": "KX505501.1", - "error": "InternalStopInOrf", - "message": "Smaller ORF vpu at 6060-6309 contains an internal stop codon" + "error": "FrameshiftInOrf", + "message": "Smaller ORF tat_exon2 at 8375-8468 contains an out of frame indel: insertions 1 deletions 0." }, { "sequence_name": "KX505501.1", "error": "DeletionInOrf", "message": "Smaller ORF rev_exon2 at 8376-8652 can have maximum deletions 30, got 96" }, + { + "sequence_name": "KX505501.1", + "error": "FrameshiftInOrf", + "message": "Smaller ORF rev_exon2 at 8376-8652 contains an out of frame indel: insertions 1 deletions 0." + }, { "sequence_name": "KX505501.1", "error": "InternalStopInOrf", "message": "Smaller ORF nef at 8795-9416 contains an internal stop codon" }, + { + "sequence_name": "KX505501.1", + "error": "FrameshiftInOrf", + "message": "Smaller ORF nef at 8795-9416 contains an out of frame indel: insertions 1 deletions 7182." + }, { "sequence_name": "KX505501.1", "error": "RevResponseElementDeletion", @@ -97,12 +107,12 @@ { "sequence_name": "MN692074", "error": "FrameshiftInOrf", - "message": "Smaller ORF tat_exon1 at 5829-6044 contains an out of frame indel: insertions 0 deletions 215." + "message": "Smaller ORF tat_exon1 at 5829-6044 contains an out of frame indel: insertions 0 deletions 5236." }, { "sequence_name": "MN692074", "error": "FrameshiftInOrf", - "message": "Smaller ORF rev_exon1 at 5968-6044 contains an out of frame indel: insertions 0 deletions 76." + "message": "Smaller ORF rev_exon1 at 5968-6044 contains an out of frame indel: insertions 0 deletions 5236." }, { "sequence_name": "MN692074", @@ -114,11 +124,6 @@ "error": "DeletionInOrf", "message": "Smaller ORF nef at 8795-9416 can have maximum deletions 30, got 123" }, - { - "sequence_name": "MN692074", - "error": "FrameshiftInOrf", - "message": "Smaller ORF nef at 8795-9416 contains an out of frame indel: insertions 0 deletions 617." - }, { "sequence_name": "MN692074", "error": "RevResponseElementDeletion",