diff --git a/intact/intact.py b/intact/intact.py index bd515b5..62d4b1f 100644 --- a/intact/intact.py +++ b/intact/intact.py @@ -356,8 +356,9 @@ def has_packaging_signal(alignment, psi_locus, psi_tolerance): """ packaging_begin = [m.start() for m in re.finditer(r"[^-]", str(alignment[0].seq))][psi_locus[0]] - query_start = [m.start() for m in re.finditer(r"[^-]", - str(alignment[1].seq))][0] + query_options = [m and m.start() for m in re.finditer(r"[^-]", + str(alignment[1].seq))] + query_start = query_options[0] if query_options else '' packaging_end = [m.start() for m in re.finditer(r"[^-]", str(alignment[0].seq))][psi_locus[1]] # if query_start > packaging_begin: diff --git a/tests/data-edgy.fasta b/tests/data-edgy.fasta index 55491b7..158160b 100644 --- a/tests/data-edgy.fasta +++ b/tests/data-edgy.fasta @@ -4,6 +4,9 @@ TGGAAGGGCTAATTCACTCCCAACGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGAT ATCTCTCACTCCCAGAGTC >singleton-sequence A +>empty-sequence + +>empty-sequence2 >Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED TGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTAGTTACCAGAGTCACACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCAGTGGGTTCCCTAGTTAGCCAGAGAGCTCCCAGGCTCAGATCTGGTCTAACCAGAGAGACCCAGTACAGGCAAAAAGCAGCTGCTTATATGCAGGATCTGAGGGCTCGCCACTCCCCAGTCCCGCCCAGGCCACGCCTCCCTGGAAAGTCCCCAGCGGAAAGTCCCTTGTAGCAAGCTCGATGTCAGCAGTTCTTGAAGTACTCCGGATGCAGCTCTCGGGCCACGTGATGAAATGCTAGGCGGCTGTCAAACCTCCACTCTAACACTTCTCTCTCCGGGTCATCCATCCCATGCAGGCTCACAGGGTGTAACAAGCTGGTGTTCTCTCCTTTATTGGCCTCTTCTATCTTATCTGGCTCAACTGGTACTAGCTTGTAGCACCATCCAAAGGTCAGTGGATATCTGACCCCTGGCCCTGGTGTGTAGTTCTGCCAATCAGGGAAGTAGCCTTGTGTGTGGTAGATCCACAGATCAAGGATATCTTGTCTTCTTTGGGAGTGAATTAGCCCTTCCAGTCCCCCCTTTTCTTTTAAAAAGTGGCTAAGATCTACAGCTGCCTTGTAAGTCATTGGTCTTAAAGGTACCTGAGGTGTGACTGGAAAACCCACCTCCTCCTCCTCTTGTGCTTCTAGCCAGGCACAAGCAGCATTGGTAGCTGCTGTATTGCTACTTGTGATTGCTCCATGTTTTTCCAGGTCTCGAGATGCTGCTCCCACCCTATCTGCTGCTGGCTCAGCTCGTCTCATTCTTTCCCTTACAGTAGGCCATCCAATCACACTACTTTTTGACCACTTGCCACCCATCTTATAGCAAAATCCTTTCCAAGCCCTGTCTTATTCTTCTAGGTATGTGGCGAATAGCTCTACAAGCTCCTTGTACTACTTCTATAACCCTATCTGTCCCCTCAGCTACTGCTATGGCTGTGGCATTGAGCAAGCTAACAGCACTATTCTTTAGTTCCTGACTCCAATACTGTAGGAGATTCCACCAATATTTGAGGGCTTCCCACCCCCTGCGTCCCAGAAGTTCCACAATCCTCGTTACAATCAAGAGTAAGTCTCTCAAGCGGTGGTAGCTGAAGAGGCACAGGCTCCGCAGATCGTCCCAGATAAGTGCCAAGGATCCGTTCACTAATCGAATGGATCTGTCTCTGTCTCTCTCTCCACCTTCTTCTTCGGTTCCTTCGGGCCTGTCGGGTCCCCTCGGGGTTGGGAGGTGGGTCTGAAACGATAATGGTGAATATCCCTGCCTAACTCTATTCACTATAGAAAGTACAGCAAAAACTATTCTTAAACCTACCAAGCCTCCTACTATCATTATGAATAATTTTATATACCACAGCCAATTTGTTATGTTAAACCAATTCCACAAACTTGCCCATTTATCTAATTCCAATAATTCTTGTTCATTCTTTTCTTGCTGGTTTTGCGATTCTTCAATTAAGGAGTGTATTAAGCTTGTGTAATTGTTAATTTCTCTGTCCCACTCCATCCAGGTCGTGTGATTCCAAATCTGTTCCAGAGATTTATTACTCCAACTAGCATTCCAAGGCACAGCAGTGGTGCAAATGAGTTTTCCAGAGCAACCCCAAATCCCCAGGAGCTGTTGATCCTTTAGGTATCTTTCCACAGCCAGGATTCTTGCCTGGAGCTGCTTGATGCCCCAGACTGTGAGTTGCAACAGATGCTGTTGCGCCTCAATAGCCCTCAGCAAATTGTTCTGCTGCTGCACTATACCAGACAATAATTGTCTGGCCTGTACCGTCAGCGTCATTGAGGCTGCGCCCATAGTGCTTCCTGCTGCTCCCAAGAACCCAAGGAACAAAGCTCCTATTCCCACTGCTCTTTTTTCTCTCTGCACCACTCTTCTCTTTGCCTTGGTGGGTGCTACTCCTAATGGTTCAATTTTTACTACTTTATATTTATATAATTCACTTCTCCAATTGTCCCTCATATCTCCTCCTCCAGGTCTGAAGATCTCGGACTCATTGTTGCTATTACCACCATCTCTTGTTAATAGCAGCCCTGTAATATTTGATGAACATCTAATTTGTCCACTGATGGGAGGGGCATACATTGCTTTTCCTACTTTCTGCCACATGTTTATAATTTGTTTTATTCTGCATGGGAGGGTGATTGTGTCACTTCCTTCAGTGTTATTTGACCCTTCAGTACTCCAAGTACTATTAAACCAAGTACTATTAAACAGTTGTGTTGAATTACAGTAGAAAAATTCCCCTCCACAATTAAAACTGTGCGTTACAATTTCTGGGTCCCCTCCTGAGGATTGCTTAAAGATTATTGTTTTATTATTTCCAAATTGTTCTCTTAATTTGCTAGCTATCTGTTTTAAAGTGTTATTCCATTTTGCTCTACTAATGTTACAATGTGCTTGTCTCATATTTCCTATTTTTCCTATTGTAACAAATGCTCTCCCTGGTCCTCTCTGGATACGGATTCTTTTTCTTGTATTGTTGTTGGGTCTTGTACAATTAATTTCTACAGATGTGTTCAGCTGTACTATTATGGTTTTAGCATTGTCCGTGAAATTGACAGATCTAATTACTACCTCTTCTTCTGCTAGACTGCCATTTAACAGCAGTTGAGTTGATACTACTGGCCTAATTCCATGTGTACATTGTACTGTGCTGACATTTGTACATGGTCCTGTTCCATTGAACGTCTTATTATTACATTTTAGAATCGCAAAACCAGCCGGGGCACAATAATGTATGGGAATTGGCTCAAAGGATACCTTTGGACAGGCCTGTGTAATGACTGAGGTGTTACAACTTGTCAACTTATAGCTGGTAGTATCATTATCTATTGGTATTATATCAAGTTTATAAAAAAATGCATATTCTTTCTGCACCTTACCTCTTATGCTTGTGCTGATATTGAAAGAGCAGTTTTTTATCTCTCCTTTCTCCATTATCATTCTCCCGCTACTACTATTGGTATTAGTATCATTCTTCAAATCAGTGCACTTTAAACTAACACAGAGTGGGGTTAATTTTACACATGGCTTTAGGCTTTGATCCCATAAACTGATTATATCCTCATGCATCTGTTCTACCATGTCATTTTTCCACATGTTAAAATTTTCTGTCACATTTACCAATACTACTTCTTGTGGGTTGGGGTCTGTGGGTACACAGGCATGTGTGGCCCAAACATTATGTACCTCTGTATCATATGCTTTAGCATCTGATGCACAAAATAGAGTGGTGGTTGCTTCCTTCCACACAGGTACCCCATAATAGACTGTGACCCACAATTTTTCTGTAGCACTACAGATCATCAACATCCCAAGGAGCATGGTGCCCCATCTCCACCCCCATCTCCACAAGTGCTGATATTTCTCCTTCACTCTCATTGCCACTGTCTTCTGCTCTTTCTATTAGTCTATCAATTAACCTGTCTATTTTTCTTTGTCTTAATATTTTCCTATATTCTATGATTACTATGGACCACACAACTATTGCTATTATTATTGCTACTACTAATGCTACTATTGCTACTATTGGTATAGGTTGCATTACATGTACTACTTACTGCTTTGATAGAGAAGCTTGATGAGTCTGACTGTTCTGATGAGCTCTTCGTCGCTGTCTCCGCTTCTTCCTGCCATAGGAGATGCCTAAGGCTTTTGTTATGAAACAAACTTGGCAATGAAAGCAACACTTTTTACAATAGCAATTGGTACAAGCAGTTTTAGGCTGACTTCCTGGATGCTTCCAGGGCTCTAGTCTAGGATCTACTGGCTCCATTTCTTGCTCTCCTCTGTCGAGTAACGCCTATTCTGCTATGTCGACACCCAATTCTGAAATGGATAAACAGCAGTTGTTGCAGAATTCTTATTATGGCTTCCACTCCTGCCCAAGTATCCCCATAAGTTTCATAGATATGTTGCCCTAAGCCATGGAGCCAAATCCTAGGAAAATGTCTAACAGCTTCATTCTTAAGCTCCTCTAAAAGCTCTAGTGTCCATTCATTGTGTGGCTCCCTCTGTGGCCCTTGGTCTTCTGGGGCTTGTTCCATCTATCCTCTGTCAGTTTCGTAACACTAGGCAAAGGTGGCTTTATCTTTTTTGGTGTTATTAATGCTGCTAGTGCCAAGTATTGTAGAGATCCTACCTTGTTATGTCCTGCTTGATATTCACACCTAGGGCTAACTATGTGTCCTAATAAGGCCTTTCTTATAGCAGAGTCTGAAAAACAGTCAAAGTAATACAGATGAATTAGTTGGTCTGCTAGTTCAGGGTCTACTTGTGTGCTATATCTCTTTTTCCTCCATTCTATGGAGACTCCCTGACCCAAATGCCAGTCTCTTTCTCCTGTATGCAGACCCCAATATGTTGTTATTACCAATCTAGCATCCCCTAGTGGGATGTGTACTTCTGAACTTATTCTTGGATGAGGGCTTTCATAGTGATGTCTATAAAACCATCCCCTAGCTTTCCCTGAAACATACATATGGTGTTTTACTAAACTTTTCCATGTTCTAATCCTCATCCTGTCTACTTGCCACACAATCATCACCTGCCATCTGTTTTCCATAATCCCTAATGATCTTTGCTTTTCTTCTTGGCACTACTTTTATGTCACTATTATCTTGTATTACTACTGCCCCTTCACCTTTCCAGAGGAGCTTTGCTGGTCCTTTCCAAAGTGGATTTCTGCTGTCCCTGTAATAAACCCGAAAATTTTGAATTTTTGTAATTTGTTTTTGTAATTCTTTAGTTTGTATGTCTGTTGCTATTATGTCTACTATTCTTTCCCCTGCACTGTACCCCCCAATCCCCCCTTTTCTTTTAAAATTGTGGATGAATACTGCCATTTGTACTGCTGTCTTAAGATGTTCAGCCTGATCTCTTACCTGTCCTATAATTTTCTTTAATTCTTTATTCATAGATTCTACTACTCCTTGACTTTGGGGATTGTAGGGAATTCCAAATTCCTGCTTGATTCCCGCCCACCAACAGGCGGCCCTAACCGTAGCACCGGTGAAATTGCTGCCATTGTCAGTATGTATTGTTTTTACTGGCCATCTTCCTGCTAATTTTAAAAGAAAATATGCTGTTTCCTGCCCTGTTTCTGCTGGAATAACTTCTGCTTCTATATATCCACTGGCTACATGAACTGCTACCAGGATAACTTTTCCTTCTAAATGTGTACAATCTAGTTGCCATATTCCTGGACTACAGTCTACTTGTCCATGCATGGCTTCTCCTTTTAGCTGACATTTATCACAGCTGGCTACTATTTCTTTTGCTACTACAGGTGGCAGGTTAAAATCACTAGCCATTGCTCTCCAATTACTGTGATATTTCTCATGTTCATCTTGGGCCTTATCTATTCCATCTAAAAATAGTACTTTCCTGATTCCAGCACTGACTAATTTATCTACTTGTTCATTTCCTCCAATTCCTTTGTGTGCTGGTACCCATGCCAGATAGACCTTTTCCTTTTTTATTAACTGCTCTATTATTTGATTGACTAACTCTGATTCACTTTGATCTGGTTGTGCTTGAATGATTCCTAATGCATATTGTGAGTCTGTTACTATGTTTACTTCTAATCCCGAATCCTGCAAAGCTAGATAAATTGCTTGTAACTCAGTCTTCTGATTTGTTGTGTCAGTTAGGGTGACAACTTTTTGTCTTCCTCTATTAGTAACATATCCTGCTTTTCCTAATTTAGTCTCCCTGTTAGCTGCCCCATCTACATAGAAGGTTTCTGCTCCTACTATGGGTTCTTTCTCTAACTGGTACCATAATTTCACTAAGGGAGGGGTATTAACAAACTCCCACTCAGGAATCCAGGTGGCTTGCCAATACTCTGTCCACCATGTTTCCCATGTTTCCTTTTGTATGGGCAGTTTAAATTTAGGAGTCTTTCCCCATATTACTATGCTTTCTGTGGTTATTTTTTGCACTGCCTCTGTTAATTGTTTTACATCATTAGTGTGGGCACCCCTCATTCTTGCATATTTTCCTGTTTTCAGATTTTTAAATGGCTCTTGATAAATTTGATATGTCCATTGGCCTTGCCCCTGCTTCTGTATTTCTGCTATTAAGTCTTTTGATGGGTCATAATACACTCCATGTACTGGTTCTTTTAGAATCTCTCTGTTTTCTGCCAGTTCTAGCTCTGCTTCTTCTGTTAGTGGTATTACTTCTGTTAGTGCTTTGGTTCCTCTAAGGAGTTTACATAATTGCCTTACTTTAATCCCTGGGTAAATCTGACTTGCCCAATTCAATTTCCCCACTAACTTCTGTATGTCATTGACAGTCCAGCTGTCTTTTTCTGGCAGCACTATAGGCTGTACTGTCCATTTATCAGGATGGAGTTCATAACCCATCCAAAGGAATGGAGGTTCTTTCTGATGTTTTTTGTCTGGTGTGGTAAGTCCCCACCTCAACAGATGTTGTCTCAGCTCCTCTATTTTTGTTCTATGCTGCCCTATTTCTAAGTCAGATCCTACATACAAATCATCCATGTATTGATAGATAACTATGTCTGGATTTTGTTTTCTAAAAGGCTCTAAGATTTTTGTCATGCTACTTTGGAATATTGCTGGTGATCCTTTCCATCCCTGTGGAAGCACATTGTACTGATATCTAATCCCTGGTGTCTCATTGTTTATACTAGGTATGGTAAATGCAGTATACTTCCTGAAGTCTTCATCTAAGGGAACTGAAAAATATGCATCACCCACATCCAGTACTGTTACTGATTTTTTCTTTTTTAACCCTGCGGGATGTGGTATTCCTAATTGAACTTCCCAGAAGTCTTGAGTTCTCTTATTAAGTTCTCTGAAATCTACTAATTTTCTCCATTTAGTACTGTCTTTTTTCTTTATGGCAAATACTGGAGTATTGTATGGATTTTCAGGCCCAATTTTTGAAATTTTCCCTTCCTTTTCCATCTCTGTACAAATTTCTACTAATGCTTTTATTTTTTCTTCTGTCAATGGCCATTGTTTAACTTTTGGGCCATCCATTCCTGGCTTTAATTTTACTGGTACAGTCTCAATAGGGCTAATGGGAAAATTTAAAGTGCAACCAATCTGAGTCAACAGATTTCTTCCAATTATGTTGACAGGTGTAGGTCCTACTAATACTGTACCTATAGCTTTATGTCCACAGATTTCTATGAGTATCTGATCATACTGTCTTACTTTGATAAAACCTCCAATTCCCCCTATCATTTTTGGTTTCCATCTTCCTGGCAAACTCATTTCTTCTAATACTGTATCATCTGCTCCTGTATCTAATAGAGCTTCCTTTAGTTGCCCCCCTATCTTTATTGTGACGAGGGGTCGTTGCCAAAGAGTGACCTGAGGGAAGTTAAAGGATACAGTTCCTTGTCTATCGGCTCCTGCTTCTGAGGGGGAGTTGTTGTCTCTACCCCAGACCTGAAGCTCTCTTCTGGTGGGGCTGTTGGCTCTGGTCTGCTCTGAAGAAAATTCCCTGGCCTTCCCTTGTAGGAAGGCCAGATCTTCCCTAAAAAATTAGCCTGTCTCTCAGTACAATCTTTCATTTGGTGTCCTTCCTTTCCACATTTCCAACAGCCCTTTTTCCTAGGGGCCCTGCAATTTCTGGCTGTGTGCCCTTCTTTGCCACAATTGAAACACTTAACAATCTTTCTTTGGTTCCTAAAATTGCCTCTCTGCATCATTATGGTAGCTGAATTTGTTACTTGGCTCATTGCTTCAGCCAAAACTCTTGCCTTATGGCCGGGTCCTCCTACTCCCTGACATGCTGTCATCATTTCTTCTAGTGTAGCCGCTGGTCCCAATGCTTTTAAAATAGTCTTACAATCTGGGTTCGCATTTTGGACCAACAAGGTTTCTGTCATCCAATTTTTTACCTCCTGTGAAGCTTGCTCGGCTCTTAGAGTTTTATAGAACCGGTCTACATAGTCTCTAAAGGGTTCCTTTGGTCCTTGTCTTATGTCCAGAATGCTGGTAGGGCTATACATTCTTACTATTTTATTTAATCCCAGGATTATCCATCTTTTATAAATTTCTCCTACTGGGATAGGTGGATTATTTGTCATCCATCCTATTTGTTCCTGAAGGGTACTAGTAGTTCCTGCTATGTCACTTCCCCTTGGTTCTCTCATCTGGCCTGGTGCAATAGGCCCTGCATGCACTGGATGCACTCTATCCCATTCTGCAGCTTCCTCATTGATGGTCTCTTTTAACATTTGCATGGCTGCTTGATGTCCCCCCACTGTGTTTAGCATGGTGTTTAAATCTTGTGGGGTGGCTCCTTCTGATAATGCTGAAAACATGGGTATCACTTCTGGGCTGAAAGCCTTCTCTTCTACTACTTTTACCCATGCATTTAAAGTTCTAGGTGATATGGCCTGATGTACCATTTGCCCCTGGATGTTCTGCACTATAGGGTAATTTTGGCTGACCTGATTGCTGTGTCCTGTGTCAGCTGCTGCTTGCTGTGCTTTTTTCTTACTTTTGTTTTGCTCTTCCTCTATCTTGTCTAAAGCTTCCTTGGTGTCTTTTATCTCTATCCTTTGATGCACACAATAGAGGGTTGCTACTGTATTATATAATGATCTAAGTTCTTCTGATCCTGTCTGAAGGGATGGTTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGCTCCCTGCTTGCCCATACTATATGTTTTAATTTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTTTCCCATCGATCTAATTCTCCCCCGCTTAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAATTTTTGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCGAGTCCTGCGTCGAGAGAGCTCCTCTGGTTTCCCTTTCGCTTTCAGGTCCCTGTTCGGGCGCCACTGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTAGTTACCAGAGTCACACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCAGTGGGTTCCCTAGTTAGCCAGAGAGCTCCCAGGCTCAGATCTGGTCTAACCAGAGAGACCCAGTACAGGCAAAAAGCAGCTGCTTATATGCAGGATCTGAGGGCTCGCCACTCCCCAGTCCCGCCCAGGCCACGCCTCCCTGGAAAGTCCCCAGCGGAAAGTCCCTTGTAGCAAGCTCGATGTCAGCAGTTCTTGAAGTACTCCGGATGCAGCTCTCGGGCCATGTGATGAAATGCTAGGCGGCTGTCAAACCTCCACTCTAACACTTCTCTCTCCGGGTCATCCATTCCATGCAGGCTCACAGGGTGTAACAAGCTGGTGTTCTCTCCTTTGTTGGCTTCTTCTAACTTCTCTGGCTCAACTGGTACTAGCTTGTAGCACCATCCAAAGGTCAGTGGATATCTGATCCCTGGCCCTGGTGTGTAGTTCTGCTAATCAGGGAAGTAGCCTTGTGTGTGGTAGATCCACAGATCAAGGATATCTTGTCTTCGTTGGGAGTGAATTAGCCCTTCCA >Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS diff --git a/tests/expected-results-edgy/errors.json b/tests/expected-results-edgy/errors.json index e86e85c..04be38b 100644 --- a/tests/expected-results-edgy/errors.json +++ b/tests/expected-results-edgy/errors.json @@ -164,6 +164,160 @@ "message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence." } ], + "empty-sequence": [ + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "ORF gag at 1-1498 can have maximum deletions 30, got 1494" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "ORF pol at 1290-4302 can have maximum deletions 30, got 3009" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "ORF env at 5430-8007 can have maximum deletions 100, got 2574" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "Smaller ORF vif at 4246-4822 can have maximum deletions 30, got 573" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "Smaller ORF vpr at 4764-5052 can have maximum deletions 30, got 285" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "Smaller ORF tat_exon1 at 5032-5248 can have maximum deletions 30, got 216" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "Smaller ORF rev_exon1 at 5171-5249 can have maximum deletions 30, got 78" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "Smaller ORF vpu at 5267-5513 can have maximum deletions 30, got 243" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "Smaller ORF tat_exon2 at 7567-7663 can have maximum deletions 30, got 90" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "Smaller ORF rev_exon2 at 7568-7865 can have maximum deletions 30, got 294" + }, + { + "sequence_name": "empty-sequence", + "error": "DeletionInOrf", + "message": "Smaller ORF nef at 8008-8683 can have maximum deletions 30, got 672" + }, + { + "sequence_name": "empty-sequence", + "error": "PackagingSignalDeletion", + "message": "Query Sequence exceeds maximum deletion tolerance in PSI. Contains 21 deletions with max tolerance of 10 deletions." + }, + { + "sequence_name": "empty-sequence", + "error": "RevResponseElementDeletion", + "message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions." + }, + { + "sequence_name": "empty-sequence", + "error": "MajorSpliceDonorSiteMutated", + "message": "Query sequence has a missing splice donor site, -." + }, + { + "sequence_name": "empty-sequence", + "error": "LongDeletion", + "message": "Query sequence contains a long deletion." + } + ], + "empty-sequence2": [ + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "ORF gag at 1-1498 can have maximum deletions 30, got 1494" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "ORF pol at 1290-4302 can have maximum deletions 30, got 3009" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "ORF env at 5430-8007 can have maximum deletions 100, got 2574" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "Smaller ORF vif at 4246-4822 can have maximum deletions 30, got 573" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "Smaller ORF vpr at 4764-5052 can have maximum deletions 30, got 285" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "Smaller ORF tat_exon1 at 5032-5248 can have maximum deletions 30, got 216" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "Smaller ORF rev_exon1 at 5171-5249 can have maximum deletions 30, got 78" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "Smaller ORF vpu at 5267-5513 can have maximum deletions 30, got 243" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "Smaller ORF tat_exon2 at 7567-7663 can have maximum deletions 30, got 90" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "Smaller ORF rev_exon2 at 7568-7865 can have maximum deletions 30, got 294" + }, + { + "sequence_name": "empty-sequence2", + "error": "DeletionInOrf", + "message": "Smaller ORF nef at 8008-8683 can have maximum deletions 30, got 672" + }, + { + "sequence_name": "empty-sequence2", + "error": "PackagingSignalDeletion", + "message": "Query Sequence exceeds maximum deletion tolerance in PSI. Contains 21 deletions with max tolerance of 10 deletions." + }, + { + "sequence_name": "empty-sequence2", + "error": "RevResponseElementDeletion", + "message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions." + }, + { + "sequence_name": "empty-sequence2", + "error": "MajorSpliceDonorSiteMutated", + "message": "Query sequence has a missing splice donor site, -." + }, + { + "sequence_name": "empty-sequence2", + "error": "LongDeletion", + "message": "Query sequence contains a long deletion." + } + ], "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": [ { "sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED [REVERSED]", diff --git a/tests/expected-results-edgy/holistic.json b/tests/expected-results-edgy/holistic.json index a221188..8b3aaf6 100644 --- a/tests/expected-results-edgy/holistic.json +++ b/tests/expected-results-edgy/holistic.json @@ -35,6 +35,30 @@ "orfs_end": 8007, "blast_n_conseqs": 0 }, + "empty-sequence": { + "qlen": 0, + "hypermutation_probablility": 0.0, + "inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441", + "blast_matched_qlen": 1, + "blast_sseq_coverage": 0.0, + "blast_qseq_coverage": 0.0, + "blast_sseq_orfs_coverage": 0.0, + "orfs_start": 1, + "orfs_end": 8007, + "blast_n_conseqs": 0 + }, + "empty-sequence2": { + "qlen": 0, + "hypermutation_probablility": 0.0, + "inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441", + "blast_matched_qlen": 1, + "blast_sseq_coverage": 0.0, + "blast_qseq_coverage": 0.0, + "blast_sseq_orfs_coverage": 0.0, + "orfs_start": 1, + "orfs_end": 8007, + "blast_n_conseqs": 0 + }, "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": { "qlen": 9718, "hypermutation_probablility": 0.13527282947774355, diff --git a/tests/expected-results-edgy/nonintact.fasta b/tests/expected-results-edgy/nonintact.fasta index d26baf6..4e9a8af 100644 --- a/tests/expected-results-edgy/nonintact.fasta +++ b/tests/expected-results-edgy/nonintact.fasta @@ -2,6 +2,8 @@ ATCTCTCACTCCCAGAGTC >singleton-sequence A +>empty-sequence +>empty-sequence2 >Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED TGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTAGTTACCAGAGTCAC ACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCA diff --git a/tests/expected-results-edgy/orfs.json b/tests/expected-results-edgy/orfs.json index 7472790..b5163ee 100644 --- a/tests/expected-results-edgy/orfs.json +++ b/tests/expected-results-edgy/orfs.json @@ -401,6 +401,274 @@ "nucleotides": "A" } ], + "empty-sequence": [ + { + "name": "gag", + "start": -1, + "end": 0, + "subtype_start": 1, + "subtype_end": 1498, + "orientation": "forward", + "distance": 0.7803451759315906, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "pol", + "start": -1, + "end": 0, + "subtype_start": 1290, + "subtype_end": 4302, + "orientation": "forward", + "distance": 0.78024284744996, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "env", + "start": -1, + "end": 0, + "subtype_start": 5430, + "subtype_end": 8007, + "orientation": "forward", + "distance": 0.7802599076361288, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "vif", + "start": -1, + "end": 0, + "subtype_start": 4246, + "subtype_end": 4822, + "orientation": "forward", + "distance": 0.7806712105497112, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "vpr", + "start": -1, + "end": 0, + "subtype_start": 4764, + "subtype_end": 5052, + "orientation": "forward", + "distance": 0.7812035661218425, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "tat_exon1", + "start": -1, + "end": 0, + "subtype_start": 5032, + "subtype_end": 5248, + "orientation": "forward", + "distance": 0.7815405696388372, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "rev_exon1", + "start": -1, + "end": 0, + "subtype_start": 5171, + "subtype_end": 5249, + "orientation": "forward", + "distance": 0.7839721254355401, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "vpu", + "start": -1, + "end": 0, + "subtype_start": 5267, + "subtype_end": 5513, + "orientation": "forward", + "distance": 0.7813860351732544, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "tat_exon2", + "start": -1, + "end": 0, + "subtype_start": 7567, + "subtype_end": 7663, + "orientation": "forward", + "distance": 0.7834691501746216, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "rev_exon2", + "start": -1, + "end": 0, + "subtype_start": 7568, + "subtype_end": 7865, + "orientation": "forward", + "distance": 0.7811712165958367, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "nef", + "start": -1, + "end": 0, + "subtype_start": 8008, + "subtype_end": 8683, + "orientation": "forward", + "distance": 0.7805933836772095, + "protein": "", + "aminoacids": "", + "nucleotides": "" + } + ], + "empty-sequence2": [ + { + "name": "gag", + "start": -1, + "end": 0, + "subtype_start": 1, + "subtype_end": 1498, + "orientation": "forward", + "distance": 0.7803451759315906, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "pol", + "start": -1, + "end": 0, + "subtype_start": 1290, + "subtype_end": 4302, + "orientation": "forward", + "distance": 0.78024284744996, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "env", + "start": -1, + "end": 0, + "subtype_start": 5430, + "subtype_end": 8007, + "orientation": "forward", + "distance": 0.7802599076361288, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "vif", + "start": -1, + "end": 0, + "subtype_start": 4246, + "subtype_end": 4822, + "orientation": "forward", + "distance": 0.7806712105497112, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "vpr", + "start": -1, + "end": 0, + "subtype_start": 4764, + "subtype_end": 5052, + "orientation": "forward", + "distance": 0.7812035661218425, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "tat_exon1", + "start": -1, + "end": 0, + "subtype_start": 5032, + "subtype_end": 5248, + "orientation": "forward", + "distance": 0.7815405696388372, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "rev_exon1", + "start": -1, + "end": 0, + "subtype_start": 5171, + "subtype_end": 5249, + "orientation": "forward", + "distance": 0.7839721254355401, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "vpu", + "start": -1, + "end": 0, + "subtype_start": 5267, + "subtype_end": 5513, + "orientation": "forward", + "distance": 0.7813860351732544, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "tat_exon2", + "start": -1, + "end": 0, + "subtype_start": 7567, + "subtype_end": 7663, + "orientation": "forward", + "distance": 0.7834691501746216, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "rev_exon2", + "start": -1, + "end": 0, + "subtype_start": 7568, + "subtype_end": 7865, + "orientation": "forward", + "distance": 0.7811712165958367, + "protein": "", + "aminoacids": "", + "nucleotides": "" + }, + { + "name": "nef", + "start": -1, + "end": 0, + "subtype_start": 8008, + "subtype_end": 8683, + "orientation": "forward", + "distance": 0.7805933836772095, + "protein": "", + "aminoacids": "", + "nucleotides": "" + } + ], "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": [ { "name": "gag",