From 401cd473206db9f2ff90e9bfeea1e56d97e85bef Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 23 Jun 2023 16:50:14 -0700 Subject: [PATCH] use small_frames analysis for large frames too --- intact/intact.py | 61 +-- tests/expected-results-large/errors.json | 153 ++++--- tests/expected-results-large/orfs.json | 527 +++++++++++++---------- tests/expected-results-single/orfs.json | 12 +- tests/expected-results-small/errors.json | 32 +- tests/expected-results-small/orfs.json | 61 ++- 6 files changed, 456 insertions(+), 390 deletions(-) diff --git a/intact/intact.py b/intact/intact.py index a47795c..e64a90f 100644 --- a/intact/intact.py +++ b/intact/intact.py @@ -75,6 +75,7 @@ class ReceivedORF: class CandidateORF: start: int end: int + orientation: str distance: float aminoseq: str expectedaminoseq: str @@ -555,41 +556,12 @@ def alignment_score(alignment): return sum([a==b for a, b in zip(alignment[0].seq, alignment[1].seq)]) def small_frames( - alignment, sequence, length, + alignment, sequence, is_small, expected, error_bar, reverse = False ): """ Check for presence of small reading frames """ - frames = reading_frames_single_stranded( - alignment, - sequence, length) - f_type = "forward" - if reverse: - tmp_reference = SeqRecord.SeqRecord(Seq.reverse_complement(alignment[0].seq), - id = alignment[0].id, - name = alignment[0].name - ) - tmp_subtype = SeqRecord.SeqRecord(Seq.reverse_complement(alignment[1].seq), - id = alignment[1].id, - name = alignment[1].name - ) - tmp_sequence = SeqRecord.SeqRecord(Seq.reverse_complement(sequence.seq), - id = sequence.id, - name = sequence.name - ) - - reverse_alignment = [tmp_reference, tmp_subtype] - frames = reading_frames_single_stranded( - reverse_alignment, - tmp_sequence, - length) - f_type = "reverse" - - if len(frames) == 0: - return [IntactnessError( - sequence.id, WRONGORFNUMBER_ERROR, - "No ORFs >" + str(length) + " bases found.")] import util.coordinates as coords coordinates_mapping = coords.map_positions(alignment[0], alignment[1].seq) @@ -650,8 +622,7 @@ def find_candidate_positions(e, q_start, q_end): dist = -1 * jarowinkler_similarity(got_aminoacids, expected_aminoacids) closest_start = min(n, (closest_start_a * 3) + frame) closest_end = min(n, (closest_end_a * 3) + 3 + frame) - yield CandidateORF(closest_start, - closest_end, + yield CandidateORF(closest_start, closest_end, "forward", dist, got_aminoacids, expected_aminoacids) def find_real_correspondence(e): @@ -685,8 +656,10 @@ def get_indel_impact(alignment): return impacted errors = [] + matches = [] for e in expected: best_match = find_real_correspondence(e) + matches.append(best_match) got_protein = best_match.aminoseq.split("*")[0] exp_protein = best_match.expectedaminoseq.split("*")[0] @@ -704,14 +677,16 @@ def get_indel_impact(alignment): if "*" in best_match.aminoseq[1:-1]: errors.append(IntactnessError( sequence.id, INTERNALSTOP_ERROR, - "Smaller ORF " + str(e.name) + " at " + str(e.start) + ("Smaller " if is_small else "") + + "ORF " + str(e.name) + " at " + str(e.start) + "-" + str(e.end) + " contains an internal stop codon" )) else: errors.append(IntactnessError( sequence.id, DELETIONINORF_ERROR, - "Smaller ORF " + str(e.name) + " at " + str(e.start) + ("Smaller " if is_small else "") + + "ORF " + str(e.name) + " at " + str(e.start) + "-" + str(e.end) + " can have maximum deletions " + str(e.deletion_tolerence) + ", got " @@ -725,7 +700,8 @@ def get_indel_impact(alignment): errors.append(IntactnessError( sequence.id, INSERTIONINORF_ERROR, - "Smaller ORF " + str(e.name) + " at " + str(e.start) + ("Smaller " if is_small else "") + + "ORF " + str(e.name) + " at " + str(e.start) + "-" + str(e.end) + " can have maximum insertions " + str(3 * e.deletion_tolerence) + ", got " @@ -741,7 +717,8 @@ def get_indel_impact(alignment): errors.append(IntactnessError( sequence.id, FRAMESHIFTINORF_ERROR, - "Smaller ORF " + str(e.name) + " at " + str(e.start) + ("Smaller " if is_small else "") + + "ORF " + str(e.name) + " at " + str(e.start) + "-" + str(e.end) + " contains out of frame indels that impact " + str(impacted_by_indels) + " positions." @@ -750,7 +727,7 @@ def get_indel_impact(alignment): continue - return errors + return matches, errors def has_reading_frames( @@ -961,14 +938,14 @@ def intact( working_dir, alignment = reverse_alignment sequence = reverse_sequence - sequence_orfs, orf_errors = has_reading_frames( + sequence_orfs, orf_errors = small_frames( alignment, - reference, sequence, min_orf_length, - forward_orfs, reverse_orfs, error_bar) + sequence, False, + forward_orfs, error_bar) sequence_errors.extend(orf_errors) - small_orf_errors = small_frames( - alignment, sequence, const.DEFAULT_SMALL_ORF_LENGTH, + sequence_small_orfs, small_orf_errors = small_frames( + alignment, sequence, True, small_orfs, error_bar, reverse = False) if include_small_orfs: sequence_errors.extend(small_orf_errors) diff --git a/tests/expected-results-large/errors.json b/tests/expected-results-large/errors.json index 50dc010..240495f 100644 --- a/tests/expected-results-large/errors.json +++ b/tests/expected-results-large/errors.json @@ -2,8 +2,13 @@ "KX505501.1": [ { "sequence_name": "KX505501.1", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 1" + "error": "DeletionInOrf", + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 2892" + }, + { + "sequence_name": "KX505501.1", + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" }, { "sequence_name": "KX505501.1", @@ -76,8 +81,13 @@ "MN692074": [ { "sequence_name": "MN692074", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 2" + "error": "DeletionInOrf", + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 981" + }, + { + "sequence_name": "MN692074", + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" }, { "sequence_name": "MN692074", @@ -129,18 +139,8 @@ "MN090335": [ { "sequence_name": "MN090335", - "error": "MisplacedORF", - "message": "Expected an ORF, gag, at 789-2292 in the forward strand, got 1175-2291" - }, - { - "sequence_name": "MN090335", - "error": "DeletionInOrf", - "message": "ORF gag at 1175-2291 can have maximum deletions 30, got 91" - }, - { - "sequence_name": "MN090335", - "error": "FrameshiftInOrf", - "message": "ORF gag at 1175-2291 contains an out of frame indel, deletions 91 insertions 33." + "error": "InternalStopInOrf", + "message": "ORF gag at 789-2292 contains an internal stop codon" }, { "sequence_name": "MN090335", @@ -171,8 +171,8 @@ "MN090376": [ { "sequence_name": "MN090376", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 2" + "error": "InternalStopInOrf", + "message": "ORF gag at 789-2292 contains an internal stop codon" }, { "sequence_name": "MN090376", @@ -235,11 +235,6 @@ } ], "MK115030.1": [ - { - "sequence_name": "MK115030.1", - "error": "MisplacedORF", - "message": "Expected an ORF, gag, at 789-2292 in the forward strand, got 765-2282" - }, { "sequence_name": "MK115030.1", "error": "Scramble", @@ -255,13 +250,8 @@ "MK115211.1": [ { "sequence_name": "MK115211.1", - "error": "MisplacedORF", - "message": "Expected an ORF, gag, at 789-2292 in the forward strand, got 765-2282" - }, - { - "sequence_name": "MK115211.1", - "error": "MisplacedORF", - "message": "Expected an ORF, env, at 6223-8794 in the forward strand, got 6955-8793" + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" }, { "sequence_name": "MK115211.1", @@ -285,16 +275,6 @@ } ], "MK115158.1": [ - { - "sequence_name": "MK115158.1", - "error": "MisplacedORF", - "message": "Expected an ORF, gag, at 789-2292 in the forward strand, got 709-2282" - }, - { - "sequence_name": "MK115158.1", - "error": "FrameshiftInOrf", - "message": "ORF gag at 709-2282 contains an out of frame indel, deletions 20 insertions 0." - }, { "sequence_name": "MK115158.1", "error": "PackagingSignalDeletion", @@ -321,8 +301,18 @@ "MK114856.1": [ { "sequence_name": "MK114856.1", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 0" + "error": "InternalStopInOrf", + "message": "ORF gag at 789-2292 contains an internal stop codon" + }, + { + "sequence_name": "MK114856.1", + "error": "InternalStopInOrf", + "message": "ORF pol at 2084-5096 contains an internal stop codon" + }, + { + "sequence_name": "MK114856.1", + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" }, { "sequence_name": "MK114856.1", @@ -368,8 +358,18 @@ "MK115009.1": [ { "sequence_name": "MK115009.1", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 0" + "error": "InternalStopInOrf", + "message": "ORF gag at 789-2292 contains an internal stop codon" + }, + { + "sequence_name": "MK115009.1", + "error": "InternalStopInOrf", + "message": "ORF pol at 2084-5096 contains an internal stop codon" + }, + { + "sequence_name": "MK115009.1", + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" }, { "sequence_name": "MK115009.1", @@ -423,8 +423,8 @@ "MK116110.1": [ { "sequence_name": "MK116110.1", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 2" + "error": "InternalStopInOrf", + "message": "ORF gag at 789-2292 contains an internal stop codon" }, { "sequence_name": "MK116110.1", @@ -446,22 +446,12 @@ "MK114997.1": [ { "sequence_name": "MK114997.1", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 4" + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" } ], "MK115518.1": [], "MK115065.1": [ - { - "sequence_name": "MK115065.1", - "error": "MisplacedORF", - "message": "Expected an ORF, gag, at 789-2292 in the forward strand, got 657-2282" - }, - { - "sequence_name": "MK115065.1", - "error": "DeletionInOrf", - "message": "ORF gag at 657-2282 can have maximum deletions 30, got 75" - }, { "sequence_name": "MK115065.1", "error": "PackagingSignalDeletion", @@ -486,8 +476,18 @@ "MK115464.1": [ { "sequence_name": "MK115464.1", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 0" + "error": "InternalStopInOrf", + "message": "ORF gag at 789-2292 contains an internal stop codon" + }, + { + "sequence_name": "MK115464.1", + "error": "InternalStopInOrf", + "message": "ORF pol at 2084-5096 contains an internal stop codon" + }, + { + "sequence_name": "MK115464.1", + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" }, { "sequence_name": "MK115464.1", @@ -529,8 +529,8 @@ "MK115520.1": [ { "sequence_name": "MK115520.1", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 4" + "error": "InternalStopInOrf", + "message": "ORF pol at 2084-5096 contains an internal stop codon" }, { "sequence_name": "MK115520.1", @@ -558,21 +558,6 @@ ], "MK115509.1": [], "MK115702.1": [ - { - "sequence_name": "MK115702.1", - "error": "MisplacedORF", - "message": "Expected an ORF, gag, at 789-2292 in the forward strand, got 765-2282" - }, - { - "sequence_name": "MK115702.1", - "error": "MisplacedORF", - "message": "Expected an ORF, env, at 6223-8794 in the forward strand, got 6199-8764" - }, - { - "sequence_name": "MK115702.1", - "error": "FrameshiftInOrf", - "message": "ORF env at 6199-8764 contains an out of frame indel, deletions 50 insertions 37." - }, { "sequence_name": "MK115702.1", "error": "PackagingSignalDeletion", @@ -597,8 +582,18 @@ "MK115095.1": [ { "sequence_name": "MK115095.1", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 0" + "error": "InternalStopInOrf", + "message": "ORF gag at 789-2292 contains an internal stop codon" + }, + { + "sequence_name": "MK115095.1", + "error": "InternalStopInOrf", + "message": "ORF pol at 2084-5096 contains an internal stop codon" + }, + { + "sequence_name": "MK115095.1", + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" }, { "sequence_name": "MK115095.1", diff --git a/tests/expected-results-large/orfs.json b/tests/expected-results-large/orfs.json index e00c9f5..ccbc38f 100644 --- a/tests/expected-results-large/orfs.json +++ b/tests/expected-results-large/orfs.json @@ -2,619 +2,698 @@ "KX505501.1": [ { "orientation": "forward", - "start": 765, - "end": 9460 + "start": 336, + "end": 1824 + }, + { + "orientation": "forward", + "start": 1628, + "end": 1748 + }, + { + "orientation": "forward", + "start": 0, + "end": 1824 } ], "MN691959": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 639, + "end": 2142 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1934, + "end": 4946 }, { "orientation": "forward", - "start": 6202, - "end": 8793 + "start": 6070, + "end": 8656 } ], "MN692074": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 789, + "end": 2292 }, { "orientation": "forward", "start": 2084, - "end": 9514 + "end": 4115 + }, + { + "orientation": "forward", + "start": 2, + "end": 4115 } ], "MN692145": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 775, + "end": 2287 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2070, + "end": 5085 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6212, + "end": 8783 } ], "MN090335": [ { "orientation": "forward", - "start": 1175, - "end": 2291 + "start": 315, + "end": 1665 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1427, + "end": 4469 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5596, + "end": 8158 } ], "MN090376": [ { "orientation": "forward", - "start": 2072, - "end": 5095 + "start": 376, + "end": 1639 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 1382, + "end": 4394 + }, + { + "orientation": "forward", + "start": 5524, + "end": 8137 } ], "MK115581.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 680, + "end": 2186 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1972, + "end": 4984 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6111, + "end": 8652 } ], "MK115690.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 777, + "end": 2292 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2078, + "end": 5090 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6217, + "end": 8800 } ], "MK115571.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 579, + "end": 2079 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1871, + "end": 4883 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6010, + "end": 8551 } ], "MK115514.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 584, + "end": 2090 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1876, + "end": 4888 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6015, + "end": 8556 } ], "MK115488.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 707, + "end": 2213 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1999, + "end": 5011 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6138, + "end": 8679 } ], "MK115030.1": [ { "orientation": "forward", - "start": 765, - "end": 2282 + "start": 176, + "end": 1685 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1477, + "end": 4567 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5616, + "end": 8217 } ], "MK115498.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 663, + "end": 2169 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1955, + "end": 4967 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6094, + "end": 8635 } ], "MK115211.1": [ { "orientation": "forward", - "start": 765, - "end": 2282 + "start": 250, + "end": 1753 }, { "orientation": "forward", - "start": 2072, - "end": 5095 + "start": 1545, + "end": 4557 }, { "orientation": "forward", - "start": 6955, - "end": 8793 + "start": 5687, + "end": 8198 } ], "MK115158.1": [ { "orientation": "forward", - "start": 709, - "end": 2282 + "start": 316, + "end": 1819 }, { "orientation": "forward", - "start": 2072, - "end": 5095 + "start": 1611, + "end": 4623 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5753, + "end": 8315 } ], "MK114705.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 532, + "end": 2053 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1839, + "end": 4851 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5972, + "end": 8549 + } + ], + "MK114856.1": [ + { + "orientation": "forward", + "start": 120, + "end": 2022 + }, + { + "orientation": "forward", + "start": 1787, + "end": 4892 + }, + { + "orientation": "forward", + "start": 5953, + "end": 8521 + } + ], + "MK115009.1": [ + { + "orientation": "forward", + "start": 302, + "end": 1715 + }, + { + "orientation": "forward", + "start": 1613, + "end": 4703 + }, + { + "orientation": "forward", + "start": 5752, + "end": 8353 } ], - "MK114856.1": [], - "MK115009.1": [], "MK115387.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 292, + "end": 1795 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1587, + "end": 4599 }, { "orientation": "forward", - "start": 6202, - "end": 8793 + "start": 5726, + "end": 8288 } ], "MK115491.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 521, + "end": 2027 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1813, + "end": 4825 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5952, + "end": 8493 } ], "MK116110.1": [ { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 291, + "end": 1650 }, { "orientation": "forward", - "start": 6202, - "end": 8793 + "start": 1393, + "end": 4405 + }, + { + "orientation": "forward", + "start": 5532, + "end": 8073 } ], "MK115527.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 683, + "end": 2189 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1975, + "end": 4987 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6114, + "end": 8655 } ], "MK114997.1": [ { "orientation": "forward", - "start": 765, - "end": 2282 - }, - { - "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 210, + "end": 1725 }, { "orientation": "forward", - "start": 6199, - "end": 7283 + "start": 1511, + "end": 4601 }, { "orientation": "forward", - "start": 7249, - "end": 8793 + "start": 5762, + "end": 8216 } ], "MK115518.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 739, + "end": 2245 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2031, + "end": 5043 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6170, + "end": 8711 } ], "MK115065.1": [ { "orientation": "forward", - "start": 657, - "end": 2282 + "start": 221, + "end": 1736 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1522, + "end": 4534 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5661, + "end": 8262 } ], - "MK115464.1": [], - "MK115530.1": [ + "MK115464.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 527, + "end": 2297 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2089, + "end": 5101 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6228, + "end": 8826 } ], - "MK115520.1": [ + "MK115530.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 746, + "end": 2252 }, { "orientation": "forward", - "start": 2084, - "end": 3281 + "start": 2038, + "end": 5050 + }, + { + "orientation": "forward", + "start": 6177, + "end": 8718 + } + ], + "MK115520.1": [ + { + "orientation": "forward", + "start": 695, + "end": 2195 }, { "orientation": "forward", - "start": 3259, - "end": 5095 + "start": 1987, + "end": 4915 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6131, + "end": 8672 } ], "MK115503.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 817, + "end": 2323 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2109, + "end": 5121 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6248, + "end": 8789 } ], "MK115570.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 687, + "end": 2193 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1979, + "end": 4991 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6118, + "end": 8659 } ], "MK115509.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 555, + "end": 2061 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1847, + "end": 4859 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5986, + "end": 8527 } ], "MK115702.1": [ { "orientation": "forward", - "start": 765, - "end": 2282 + "start": 246, + "end": 1782 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1544, + "end": 4586 + }, + { + "orientation": "forward", + "start": 5716, + "end": 8257 + } + ], + "MK115095.1": [ + { + "orientation": "forward", + "start": 2, + "end": 1697 }, { "orientation": "forward", - "start": 6199, - "end": 8764 + "start": 1489, + "end": 4579 + }, + { + "orientation": "forward", + "start": 5628, + "end": 8229 } ], - "MK115095.1": [], "MK115490.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 549, + "end": 2055 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1841, + "end": 4853 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5980, + "end": 8521 } ], "MK115576.1": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 468, + "end": 1974 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1760, + "end": 4772 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5899, + "end": 8440 } ], "OQ092466": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 825, + "end": 2361 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2147, + "end": 5165 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6292, + "end": 8902 } ], "OQ092463": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 801, + "end": 2319 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2105, + "end": 5168 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6244, + "end": 8827 } ], "OQ092465": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 855, + "end": 2364 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2150, + "end": 5213 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6289, + "end": 8908 } ], "OQ092462": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 767, + "end": 2270 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2062, + "end": 5074 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6201, + "end": 8844 } ], "OQ092464": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 773, + "end": 2276 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2068, + "end": 5080 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6207, + "end": 8790 } ], "OQ092467": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 808, + "end": 2308 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2100, + "end": 5112 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6239, + "end": 8834 } ] } \ No newline at end of file diff --git a/tests/expected-results-single/orfs.json b/tests/expected-results-single/orfs.json index c40e144..6969d6e 100644 --- a/tests/expected-results-single/orfs.json +++ b/tests/expected-results-single/orfs.json @@ -2,18 +2,18 @@ "MN692145": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 775, + "end": 2287 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2070, + "end": 5085 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6212, + "end": 8783 } ] } \ No newline at end of file diff --git a/tests/expected-results-small/errors.json b/tests/expected-results-small/errors.json index 1fa0168..e65e799 100644 --- a/tests/expected-results-small/errors.json +++ b/tests/expected-results-small/errors.json @@ -2,8 +2,13 @@ "KX505501.1": [ { "sequence_name": "KX505501.1", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 1" + "error": "DeletionInOrf", + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 2892" + }, + { + "sequence_name": "KX505501.1", + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" }, { "sequence_name": "KX505501.1", @@ -76,8 +81,13 @@ "MN692074": [ { "sequence_name": "MN692074", - "error": "WrongORFNumber", - "message": "Expected 3 forward ORFs, got 2" + "error": "DeletionInOrf", + "message": "ORF pol at 2084-5096 can have maximum deletions 30, got 981" + }, + { + "sequence_name": "MN692074", + "error": "InternalStopInOrf", + "message": "ORF env at 6223-8794 contains an internal stop codon" }, { "sequence_name": "MN692074", @@ -129,18 +139,8 @@ "MN090335": [ { "sequence_name": "MN090335", - "error": "MisplacedORF", - "message": "Expected an ORF, gag, at 789-2292 in the forward strand, got 1175-2291" - }, - { - "sequence_name": "MN090335", - "error": "DeletionInOrf", - "message": "ORF gag at 1175-2291 can have maximum deletions 30, got 91" - }, - { - "sequence_name": "MN090335", - "error": "FrameshiftInOrf", - "message": "ORF gag at 1175-2291 contains an out of frame indel, deletions 91 insertions 33." + "error": "InternalStopInOrf", + "message": "ORF gag at 789-2292 contains an internal stop codon" }, { "sequence_name": "MN090335", diff --git a/tests/expected-results-small/orfs.json b/tests/expected-results-small/orfs.json index 766e0e3..4166571 100644 --- a/tests/expected-results-small/orfs.json +++ b/tests/expected-results-small/orfs.json @@ -2,71 +2,86 @@ "KX505501.1": [ { "orientation": "forward", - "start": 765, - "end": 9460 + "start": 336, + "end": 1824 + }, + { + "orientation": "forward", + "start": 1628, + "end": 1748 + }, + { + "orientation": "forward", + "start": 0, + "end": 1824 } ], "MN691959": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 639, + "end": 2142 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1934, + "end": 4946 }, { "orientation": "forward", - "start": 6202, - "end": 8793 + "start": 6070, + "end": 8656 } ], "MN692074": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 789, + "end": 2292 }, { "orientation": "forward", "start": 2084, - "end": 9514 + "end": 4115 + }, + { + "orientation": "forward", + "start": 2, + "end": 4115 } ], "MN692145": [ { "orientation": "forward", - "start": 765, - "end": 2291 + "start": 775, + "end": 2287 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 2070, + "end": 5085 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 6212, + "end": 8783 } ], "MN090335": [ { "orientation": "forward", - "start": 1175, - "end": 2291 + "start": 315, + "end": 1665 }, { "orientation": "forward", - "start": 2084, - "end": 5095 + "start": 1427, + "end": 4469 }, { "orientation": "forward", - "start": 6199, - "end": 8793 + "start": 5596, + "end": 8158 } ] } \ No newline at end of file