Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
make sure to not crash on empty input sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Sep 27, 2023
1 parent d045178 commit 8830259
Show file tree
Hide file tree
Showing 6 changed files with 454 additions and 2 deletions.
5 changes: 3 additions & 2 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,9 @@ def has_packaging_signal(alignment, psi_locus, psi_tolerance):
"""
packaging_begin = [m.start() for m in re.finditer(r"[^-]",
str(alignment[0].seq))][psi_locus[0]]
query_start = [m.start() for m in re.finditer(r"[^-]",
str(alignment[1].seq))][0]
query_options = [m and m.start() for m in re.finditer(r"[^-]",
str(alignment[1].seq))]
query_start = query_options[0] if query_options else ''
packaging_end = [m.start() for m in re.finditer(r"[^-]",
str(alignment[0].seq))][psi_locus[1]]
# if query_start > packaging_begin:
Expand Down
3 changes: 3 additions & 0 deletions tests/data-edgy.fasta
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ TGGAAGGGCTAATTCACTCCCAACGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGAT
ATCTCTCACTCCCAGAGTC
>singleton-sequence
A
>empty-sequence

>empty-sequence2
>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED
TGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTAGTTACCAGAGTCACACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCAGTGGGTTCCCTAGTTAGCCAGAGAGCTCCCAGGCTCAGATCTGGTCTAACCAGAGAGACCCAGTACAGGCAAAAAGCAGCTGCTTATATGCAGGATCTGAGGGCTCGCCACTCCCCAGTCCCGCCCAGGCCACGCCTCCCTGGAAAGTCCCCAGCGGAAAGTCCCTTGTAGCAAGCTCGATGTCAGCAGTTCTTGAAGTACTCCGGATGCAGCTCTCGGGCCACGTGATGAAATGCTAGGCGGCTGTCAAACCTCCACTCTAACACTTCTCTCTCCGGGTCATCCATCCCATGCAGGCTCACAGGGTGTAACAAGCTGGTGTTCTCTCCTTTATTGGCCTCTTCTATCTTATCTGGCTCAACTGGTACTAGCTTGTAGCACCATCCAAAGGTCAGTGGATATCTGACCCCTGGCCCTGGTGTGTAGTTCTGCCAATCAGGGAAGTAGCCTTGTGTGTGGTAGATCCACAGATCAAGGATATCTTGTCTTCTTTGGGAGTGAATTAGCCCTTCCAGTCCCCCCTTTTCTTTTAAAAAGTGGCTAAGATCTACAGCTGCCTTGTAAGTCATTGGTCTTAAAGGTACCTGAGGTGTGACTGGAAAACCCACCTCCTCCTCCTCTTGTGCTTCTAGCCAGGCACAAGCAGCATTGGTAGCTGCTGTATTGCTACTTGTGATTGCTCCATGTTTTTCCAGGTCTCGAGATGCTGCTCCCACCCTATCTGCTGCTGGCTCAGCTCGTCTCATTCTTTCCCTTACAGTAGGCCATCCAATCACACTACTTTTTGACCACTTGCCACCCATCTTATAGCAAAATCCTTTCCAAGCCCTGTCTTATTCTTCTAGGTATGTGGCGAATAGCTCTACAAGCTCCTTGTACTACTTCTATAACCCTATCTGTCCCCTCAGCTACTGCTATGGCTGTGGCATTGAGCAAGCTAACAGCACTATTCTTTAGTTCCTGACTCCAATACTGTAGGAGATTCCACCAATATTTGAGGGCTTCCCACCCCCTGCGTCCCAGAAGTTCCACAATCCTCGTTACAATCAAGAGTAAGTCTCTCAAGCGGTGGTAGCTGAAGAGGCACAGGCTCCGCAGATCGTCCCAGATAAGTGCCAAGGATCCGTTCACTAATCGAATGGATCTGTCTCTGTCTCTCTCTCCACCTTCTTCTTCGGTTCCTTCGGGCCTGTCGGGTCCCCTCGGGGTTGGGAGGTGGGTCTGAAACGATAATGGTGAATATCCCTGCCTAACTCTATTCACTATAGAAAGTACAGCAAAAACTATTCTTAAACCTACCAAGCCTCCTACTATCATTATGAATAATTTTATATACCACAGCCAATTTGTTATGTTAAACCAATTCCACAAACTTGCCCATTTATCTAATTCCAATAATTCTTGTTCATTCTTTTCTTGCTGGTTTTGCGATTCTTCAATTAAGGAGTGTATTAAGCTTGTGTAATTGTTAATTTCTCTGTCCCACTCCATCCAGGTCGTGTGATTCCAAATCTGTTCCAGAGATTTATTACTCCAACTAGCATTCCAAGGCACAGCAGTGGTGCAAATGAGTTTTCCAGAGCAACCCCAAATCCCCAGGAGCTGTTGATCCTTTAGGTATCTTTCCACAGCCAGGATTCTTGCCTGGAGCTGCTTGATGCCCCAGACTGTGAGTTGCAACAGATGCTGTTGCGCCTCAATAGCCCTCAGCAAATTGTTCTGCTGCTGCACTATACCAGACAATAATTGTCTGGCCTGTACCGTCAGCGTCATTGAGGCTGCGCCCATAGTGCTTCCTGCTGCTCCCAAGAACCCAAGGAACAAAGCTCCTATTCCCACTGCTCTTTTTTCTCTCTGCACCACTCTTCTCTTTGCCTTGGTGGGTGCTACTCCTAATGGTTCAATTTTTACTACTTTATATTTATATAATTCACTTCTCCAATTGTCCCTCATATCTCCTCCTCCAGGTCTGAAGATCTCGGACTCATTGTTGCTATTACCACCATCTCTTGTTAATAGCAGCCCTGTAATATTTGATGAACATCTAATTTGTCCACTGATGGGAGGGGCATACATTGCTTTTCCTACTTTCTGCCACATGTTTATAATTTGTTTTATTCTGCATGGGAGGGTGATTGTGTCACTTCCTTCAGTGTTATTTGACCCTTCAGTACTCCAAGTACTATTAAACCAAGTACTATTAAACAGTTGTGTTGAATTACAGTAGAAAAATTCCCCTCCACAATTAAAACTGTGCGTTACAATTTCTGGGTCCCCTCCTGAGGATTGCTTAAAGATTATTGTTTTATTATTTCCAAATTGTTCTCTTAATTTGCTAGCTATCTGTTTTAAAGTGTTATTCCATTTTGCTCTACTAATGTTACAATGTGCTTGTCTCATATTTCCTATTTTTCCTATTGTAACAAATGCTCTCCCTGGTCCTCTCTGGATACGGATTCTTTTTCTTGTATTGTTGTTGGGTCTTGTACAATTAATTTCTACAGATGTGTTCAGCTGTACTATTATGGTTTTAGCATTGTCCGTGAAATTGACAGATCTAATTACTACCTCTTCTTCTGCTAGACTGCCATTTAACAGCAGTTGAGTTGATACTACTGGCCTAATTCCATGTGTACATTGTACTGTGCTGACATTTGTACATGGTCCTGTTCCATTGAACGTCTTATTATTACATTTTAGAATCGCAAAACCAGCCGGGGCACAATAATGTATGGGAATTGGCTCAAAGGATACCTTTGGACAGGCCTGTGTAATGACTGAGGTGTTACAACTTGTCAACTTATAGCTGGTAGTATCATTATCTATTGGTATTATATCAAGTTTATAAAAAAATGCATATTCTTTCTGCACCTTACCTCTTATGCTTGTGCTGATATTGAAAGAGCAGTTTTTTATCTCTCCTTTCTCCATTATCATTCTCCCGCTACTACTATTGGTATTAGTATCATTCTTCAAATCAGTGCACTTTAAACTAACACAGAGTGGGGTTAATTTTACACATGGCTTTAGGCTTTGATCCCATAAACTGATTATATCCTCATGCATCTGTTCTACCATGTCATTTTTCCACATGTTAAAATTTTCTGTCACATTTACCAATACTACTTCTTGTGGGTTGGGGTCTGTGGGTACACAGGCATGTGTGGCCCAAACATTATGTACCTCTGTATCATATGCTTTAGCATCTGATGCACAAAATAGAGTGGTGGTTGCTTCCTTCCACACAGGTACCCCATAATAGACTGTGACCCACAATTTTTCTGTAGCACTACAGATCATCAACATCCCAAGGAGCATGGTGCCCCATCTCCACCCCCATCTCCACAAGTGCTGATATTTCTCCTTCACTCTCATTGCCACTGTCTTCTGCTCTTTCTATTAGTCTATCAATTAACCTGTCTATTTTTCTTTGTCTTAATATTTTCCTATATTCTATGATTACTATGGACCACACAACTATTGCTATTATTATTGCTACTACTAATGCTACTATTGCTACTATTGGTATAGGTTGCATTACATGTACTACTTACTGCTTTGATAGAGAAGCTTGATGAGTCTGACTGTTCTGATGAGCTCTTCGTCGCTGTCTCCGCTTCTTCCTGCCATAGGAGATGCCTAAGGCTTTTGTTATGAAACAAACTTGGCAATGAAAGCAACACTTTTTACAATAGCAATTGGTACAAGCAGTTTTAGGCTGACTTCCTGGATGCTTCCAGGGCTCTAGTCTAGGATCTACTGGCTCCATTTCTTGCTCTCCTCTGTCGAGTAACGCCTATTCTGCTATGTCGACACCCAATTCTGAAATGGATAAACAGCAGTTGTTGCAGAATTCTTATTATGGCTTCCACTCCTGCCCAAGTATCCCCATAAGTTTCATAGATATGTTGCCCTAAGCCATGGAGCCAAATCCTAGGAAAATGTCTAACAGCTTCATTCTTAAGCTCCTCTAAAAGCTCTAGTGTCCATTCATTGTGTGGCTCCCTCTGTGGCCCTTGGTCTTCTGGGGCTTGTTCCATCTATCCTCTGTCAGTTTCGTAACACTAGGCAAAGGTGGCTTTATCTTTTTTGGTGTTATTAATGCTGCTAGTGCCAAGTATTGTAGAGATCCTACCTTGTTATGTCCTGCTTGATATTCACACCTAGGGCTAACTATGTGTCCTAATAAGGCCTTTCTTATAGCAGAGTCTGAAAAACAGTCAAAGTAATACAGATGAATTAGTTGGTCTGCTAGTTCAGGGTCTACTTGTGTGCTATATCTCTTTTTCCTCCATTCTATGGAGACTCCCTGACCCAAATGCCAGTCTCTTTCTCCTGTATGCAGACCCCAATATGTTGTTATTACCAATCTAGCATCCCCTAGTGGGATGTGTACTTCTGAACTTATTCTTGGATGAGGGCTTTCATAGTGATGTCTATAAAACCATCCCCTAGCTTTCCCTGAAACATACATATGGTGTTTTACTAAACTTTTCCATGTTCTAATCCTCATCCTGTCTACTTGCCACACAATCATCACCTGCCATCTGTTTTCCATAATCCCTAATGATCTTTGCTTTTCTTCTTGGCACTACTTTTATGTCACTATTATCTTGTATTACTACTGCCCCTTCACCTTTCCAGAGGAGCTTTGCTGGTCCTTTCCAAAGTGGATTTCTGCTGTCCCTGTAATAAACCCGAAAATTTTGAATTTTTGTAATTTGTTTTTGTAATTCTTTAGTTTGTATGTCTGTTGCTATTATGTCTACTATTCTTTCCCCTGCACTGTACCCCCCAATCCCCCCTTTTCTTTTAAAATTGTGGATGAATACTGCCATTTGTACTGCTGTCTTAAGATGTTCAGCCTGATCTCTTACCTGTCCTATAATTTTCTTTAATTCTTTATTCATAGATTCTACTACTCCTTGACTTTGGGGATTGTAGGGAATTCCAAATTCCTGCTTGATTCCCGCCCACCAACAGGCGGCCCTAACCGTAGCACCGGTGAAATTGCTGCCATTGTCAGTATGTATTGTTTTTACTGGCCATCTTCCTGCTAATTTTAAAAGAAAATATGCTGTTTCCTGCCCTGTTTCTGCTGGAATAACTTCTGCTTCTATATATCCACTGGCTACATGAACTGCTACCAGGATAACTTTTCCTTCTAAATGTGTACAATCTAGTTGCCATATTCCTGGACTACAGTCTACTTGTCCATGCATGGCTTCTCCTTTTAGCTGACATTTATCACAGCTGGCTACTATTTCTTTTGCTACTACAGGTGGCAGGTTAAAATCACTAGCCATTGCTCTCCAATTACTGTGATATTTCTCATGTTCATCTTGGGCCTTATCTATTCCATCTAAAAATAGTACTTTCCTGATTCCAGCACTGACTAATTTATCTACTTGTTCATTTCCTCCAATTCCTTTGTGTGCTGGTACCCATGCCAGATAGACCTTTTCCTTTTTTATTAACTGCTCTATTATTTGATTGACTAACTCTGATTCACTTTGATCTGGTTGTGCTTGAATGATTCCTAATGCATATTGTGAGTCTGTTACTATGTTTACTTCTAATCCCGAATCCTGCAAAGCTAGATAAATTGCTTGTAACTCAGTCTTCTGATTTGTTGTGTCAGTTAGGGTGACAACTTTTTGTCTTCCTCTATTAGTAACATATCCTGCTTTTCCTAATTTAGTCTCCCTGTTAGCTGCCCCATCTACATAGAAGGTTTCTGCTCCTACTATGGGTTCTTTCTCTAACTGGTACCATAATTTCACTAAGGGAGGGGTATTAACAAACTCCCACTCAGGAATCCAGGTGGCTTGCCAATACTCTGTCCACCATGTTTCCCATGTTTCCTTTTGTATGGGCAGTTTAAATTTAGGAGTCTTTCCCCATATTACTATGCTTTCTGTGGTTATTTTTTGCACTGCCTCTGTTAATTGTTTTACATCATTAGTGTGGGCACCCCTCATTCTTGCATATTTTCCTGTTTTCAGATTTTTAAATGGCTCTTGATAAATTTGATATGTCCATTGGCCTTGCCCCTGCTTCTGTATTTCTGCTATTAAGTCTTTTGATGGGTCATAATACACTCCATGTACTGGTTCTTTTAGAATCTCTCTGTTTTCTGCCAGTTCTAGCTCTGCTTCTTCTGTTAGTGGTATTACTTCTGTTAGTGCTTTGGTTCCTCTAAGGAGTTTACATAATTGCCTTACTTTAATCCCTGGGTAAATCTGACTTGCCCAATTCAATTTCCCCACTAACTTCTGTATGTCATTGACAGTCCAGCTGTCTTTTTCTGGCAGCACTATAGGCTGTACTGTCCATTTATCAGGATGGAGTTCATAACCCATCCAAAGGAATGGAGGTTCTTTCTGATGTTTTTTGTCTGGTGTGGTAAGTCCCCACCTCAACAGATGTTGTCTCAGCTCCTCTATTTTTGTTCTATGCTGCCCTATTTCTAAGTCAGATCCTACATACAAATCATCCATGTATTGATAGATAACTATGTCTGGATTTTGTTTTCTAAAAGGCTCTAAGATTTTTGTCATGCTACTTTGGAATATTGCTGGTGATCCTTTCCATCCCTGTGGAAGCACATTGTACTGATATCTAATCCCTGGTGTCTCATTGTTTATACTAGGTATGGTAAATGCAGTATACTTCCTGAAGTCTTCATCTAAGGGAACTGAAAAATATGCATCACCCACATCCAGTACTGTTACTGATTTTTTCTTTTTTAACCCTGCGGGATGTGGTATTCCTAATTGAACTTCCCAGAAGTCTTGAGTTCTCTTATTAAGTTCTCTGAAATCTACTAATTTTCTCCATTTAGTACTGTCTTTTTTCTTTATGGCAAATACTGGAGTATTGTATGGATTTTCAGGCCCAATTTTTGAAATTTTCCCTTCCTTTTCCATCTCTGTACAAATTTCTACTAATGCTTTTATTTTTTCTTCTGTCAATGGCCATTGTTTAACTTTTGGGCCATCCATTCCTGGCTTTAATTTTACTGGTACAGTCTCAATAGGGCTAATGGGAAAATTTAAAGTGCAACCAATCTGAGTCAACAGATTTCTTCCAATTATGTTGACAGGTGTAGGTCCTACTAATACTGTACCTATAGCTTTATGTCCACAGATTTCTATGAGTATCTGATCATACTGTCTTACTTTGATAAAACCTCCAATTCCCCCTATCATTTTTGGTTTCCATCTTCCTGGCAAACTCATTTCTTCTAATACTGTATCATCTGCTCCTGTATCTAATAGAGCTTCCTTTAGTTGCCCCCCTATCTTTATTGTGACGAGGGGTCGTTGCCAAAGAGTGACCTGAGGGAAGTTAAAGGATACAGTTCCTTGTCTATCGGCTCCTGCTTCTGAGGGGGAGTTGTTGTCTCTACCCCAGACCTGAAGCTCTCTTCTGGTGGGGCTGTTGGCTCTGGTCTGCTCTGAAGAAAATTCCCTGGCCTTCCCTTGTAGGAAGGCCAGATCTTCCCTAAAAAATTAGCCTGTCTCTCAGTACAATCTTTCATTTGGTGTCCTTCCTTTCCACATTTCCAACAGCCCTTTTTCCTAGGGGCCCTGCAATTTCTGGCTGTGTGCCCTTCTTTGCCACAATTGAAACACTTAACAATCTTTCTTTGGTTCCTAAAATTGCCTCTCTGCATCATTATGGTAGCTGAATTTGTTACTTGGCTCATTGCTTCAGCCAAAACTCTTGCCTTATGGCCGGGTCCTCCTACTCCCTGACATGCTGTCATCATTTCTTCTAGTGTAGCCGCTGGTCCCAATGCTTTTAAAATAGTCTTACAATCTGGGTTCGCATTTTGGACCAACAAGGTTTCTGTCATCCAATTTTTTACCTCCTGTGAAGCTTGCTCGGCTCTTAGAGTTTTATAGAACCGGTCTACATAGTCTCTAAAGGGTTCCTTTGGTCCTTGTCTTATGTCCAGAATGCTGGTAGGGCTATACATTCTTACTATTTTATTTAATCCCAGGATTATCCATCTTTTATAAATTTCTCCTACTGGGATAGGTGGATTATTTGTCATCCATCCTATTTGTTCCTGAAGGGTACTAGTAGTTCCTGCTATGTCACTTCCCCTTGGTTCTCTCATCTGGCCTGGTGCAATAGGCCCTGCATGCACTGGATGCACTCTATCCCATTCTGCAGCTTCCTCATTGATGGTCTCTTTTAACATTTGCATGGCTGCTTGATGTCCCCCCACTGTGTTTAGCATGGTGTTTAAATCTTGTGGGGTGGCTCCTTCTGATAATGCTGAAAACATGGGTATCACTTCTGGGCTGAAAGCCTTCTCTTCTACTACTTTTACCCATGCATTTAAAGTTCTAGGTGATATGGCCTGATGTACCATTTGCCCCTGGATGTTCTGCACTATAGGGTAATTTTGGCTGACCTGATTGCTGTGTCCTGTGTCAGCTGCTGCTTGCTGTGCTTTTTTCTTACTTTTGTTTTGCTCTTCCTCTATCTTGTCTAAAGCTTCCTTGGTGTCTTTTATCTCTATCCTTTGATGCACACAATAGAGGGTTGCTACTGTATTATATAATGATCTAAGTTCTTCTGATCCTGTCTGAAGGGATGGTTGTAGCTGTCCCAGTATTTGTCTACAGCCTTCTGATGTTTCTAACAGGCCAGGATTAACTGCGAATCGTTCTAGCTCCCTGCTTGCCCATACTATATGTTTTAATTTATATTTTTTCTTTCCCCCTGGCCTTAACCGAATTTTTTCCCATCGATCTAATTCTCCCCCGCTTAATACTGACGCTCTCGCACCCATCTCTCTCCTTCTAGCCTCCGCTAGTCAAAATTTTTGGCGTACTCACCAGTCGCCGCCCCTCGCCTCTTGCCGTGCGCGCTTCAGCAAGCCGAGTCCTGCGTCGAGAGAGCTCCTCTGGTTTCCCTTTCGCTTTCAGGTCCCTGTTCGGGCGCCACTGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTAGTTACCAGAGTCACACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCAGTGGGTTCCCTAGTTAGCCAGAGAGCTCCCAGGCTCAGATCTGGTCTAACCAGAGAGACCCAGTACAGGCAAAAAGCAGCTGCTTATATGCAGGATCTGAGGGCTCGCCACTCCCCAGTCCCGCCCAGGCCACGCCTCCCTGGAAAGTCCCCAGCGGAAAGTCCCTTGTAGCAAGCTCGATGTCAGCAGTTCTTGAAGTACTCCGGATGCAGCTCTCGGGCCATGTGATGAAATGCTAGGCGGCTGTCAAACCTCCACTCTAACACTTCTCTCTCCGGGTCATCCATTCCATGCAGGCTCACAGGGTGTAACAAGCTGGTGTTCTCTCCTTTGTTGGCTTCTTCTAACTTCTCTGGCTCAACTGGTACTAGCTTGTAGCACCATCCAAAGGTCAGTGGATATCTGATCCCTGGCCCTGGTGTGTAGTTCTGCTAATCAGGGAAGTAGCCTTGTGTGTGGTAGATCCACAGATCAAGGATATCTTGTCTTCGTTGGGAGTGAATTAGCCCTTCCA
>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS
Expand Down
154 changes: 154 additions & 0 deletions tests/expected-results-edgy/errors.json
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,160 @@
"message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence."
}
],
"empty-sequence": [
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "ORF gag at 1-1498 can have maximum deletions 30, got 1494"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "ORF pol at 1290-4302 can have maximum deletions 30, got 3009"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "ORF env at 5430-8007 can have maximum deletions 100, got 2574"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "Smaller ORF vif at 4246-4822 can have maximum deletions 30, got 573"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "Smaller ORF vpr at 4764-5052 can have maximum deletions 30, got 285"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon1 at 5032-5248 can have maximum deletions 30, got 216"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "Smaller ORF rev_exon1 at 5171-5249 can have maximum deletions 30, got 78"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "Smaller ORF vpu at 5267-5513 can have maximum deletions 30, got 243"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon2 at 7567-7663 can have maximum deletions 30, got 90"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "Smaller ORF rev_exon2 at 7568-7865 can have maximum deletions 30, got 294"
},
{
"sequence_name": "empty-sequence",
"error": "DeletionInOrf",
"message": "Smaller ORF nef at 8008-8683 can have maximum deletions 30, got 672"
},
{
"sequence_name": "empty-sequence",
"error": "PackagingSignalDeletion",
"message": "Query Sequence exceeds maximum deletion tolerance in PSI. Contains 21 deletions with max tolerance of 10 deletions."
},
{
"sequence_name": "empty-sequence",
"error": "RevResponseElementDeletion",
"message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions."
},
{
"sequence_name": "empty-sequence",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, -."
},
{
"sequence_name": "empty-sequence",
"error": "LongDeletion",
"message": "Query sequence contains a long deletion."
}
],
"empty-sequence2": [
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "ORF gag at 1-1498 can have maximum deletions 30, got 1494"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "ORF pol at 1290-4302 can have maximum deletions 30, got 3009"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "ORF env at 5430-8007 can have maximum deletions 100, got 2574"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "Smaller ORF vif at 4246-4822 can have maximum deletions 30, got 573"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "Smaller ORF vpr at 4764-5052 can have maximum deletions 30, got 285"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon1 at 5032-5248 can have maximum deletions 30, got 216"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "Smaller ORF rev_exon1 at 5171-5249 can have maximum deletions 30, got 78"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "Smaller ORF vpu at 5267-5513 can have maximum deletions 30, got 243"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon2 at 7567-7663 can have maximum deletions 30, got 90"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "Smaller ORF rev_exon2 at 7568-7865 can have maximum deletions 30, got 294"
},
{
"sequence_name": "empty-sequence2",
"error": "DeletionInOrf",
"message": "Smaller ORF nef at 8008-8683 can have maximum deletions 30, got 672"
},
{
"sequence_name": "empty-sequence2",
"error": "PackagingSignalDeletion",
"message": "Query Sequence exceeds maximum deletion tolerance in PSI. Contains 21 deletions with max tolerance of 10 deletions."
},
{
"sequence_name": "empty-sequence2",
"error": "RevResponseElementDeletion",
"message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions."
},
{
"sequence_name": "empty-sequence2",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, -."
},
{
"sequence_name": "empty-sequence2",
"error": "LongDeletion",
"message": "Query sequence contains a long deletion."
}
],
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": [
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED [REVERSED]",
Expand Down
24 changes: 24 additions & 0 deletions tests/expected-results-edgy/holistic.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,30 @@
"orfs_end": 8007,
"blast_n_conseqs": 0
},
"empty-sequence": {
"qlen": 0,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
"blast_matched_qlen": 1,
"blast_sseq_coverage": 0.0,
"blast_qseq_coverage": 0.0,
"blast_sseq_orfs_coverage": 0.0,
"orfs_start": 1,
"orfs_end": 8007,
"blast_n_conseqs": 0
},
"empty-sequence2": {
"qlen": 0,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
"blast_matched_qlen": 1,
"blast_sseq_coverage": 0.0,
"blast_qseq_coverage": 0.0,
"blast_sseq_orfs_coverage": 0.0,
"orfs_start": 1,
"orfs_end": 8007,
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": {
"qlen": 9718,
"hypermutation_probablility": 0.13527282947774355,
Expand Down
2 changes: 2 additions & 0 deletions tests/expected-results-edgy/nonintact.fasta
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
ATCTCTCACTCCCAGAGTC
>singleton-sequence
A
>empty-sequence
>empty-sequence2
>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED
TGCTAGAGATTTTCCACACTGACTAAAAGGGTCTGAGGGATCTCTAGTTACCAGAGTCAC
ACAACAGACGGGCACACACTACTTGAAGCACTCAAGGCAAGCTTTATTGAGGCTTAAGCA
Expand Down
Loading

0 comments on commit 8830259

Please sign in to comment.