Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
inline individual sequence analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Sep 27, 2023
1 parent baf4bf3 commit 8136533
Show file tree
Hide file tree
Showing 6 changed files with 711 additions and 713 deletions.
16 changes: 7 additions & 9 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,8 +722,9 @@ def intact( working_dir,
for sequence in SeqIO.parse(in_handle, "fasta"):
subtype_choices[sequence.id] = sequence

def analyse_single_sequence(holistic, sequence, blast_rows):
def analyse_single_sequence(writer, sequence, blast_rows):
sequence_errors = []
holistic = HolisticInfo()

invalid_subsequences = find_invalid_subsequences(sequence)
if invalid_subsequences:
Expand Down Expand Up @@ -871,19 +872,16 @@ def analyse_single_sequence(holistic, sequence, blast_rows):
if not include_small_orfs:
sequence_errors.extend(small_orf_errors)

return is_intact, hxb2_found_orfs, sequence_errors
orfs = [x.__dict__ for x in hxb2_found_orfs]
errors = [x.__dict__ for x in sequence_errors]
holistic = holistic.__dict__
writer.write(sequence, is_intact, orfs, errors, holistic)

with OutputWriter(working_dir, "csv" if output_csv else "json") as writer:

blast_it = blast_iterate_inf(subtype, input_file, working_dir) if check_internal_inversion or check_nonhiv or check_scramble or 1 < len(subtype_choices) else iterate_empty_lists()
for (sequence, blast_rows) in with_blast_rows(blast_it, iterate_sequences(input_file)):
holistic = HolisticInfo()
is_intact, hxb2_found_orfs, sequence_errors = analyse_single_sequence(holistic, sequence, blast_rows)
orfs = [x.__dict__ for x in hxb2_found_orfs]
errors = [x.__dict__ for x in sequence_errors]
holistic = holistic.__dict__
writer.write(sequence, is_intact, orfs, errors, holistic=holistic)

analyse_single_sequence(writer, sequence, blast_rows)

#/end def intact
#/end intact.py
66 changes: 33 additions & 33 deletions tests/expected-results-edgy/errors.json
Original file line number Diff line number Diff line change
@@ -1,83 +1,83 @@
{
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455": [],
"small-sequence": [
"small-sequence[REVERSE_COMPLEMENT]": [
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "ORF gag at 1-1498 can have maximum deletions 30, got 1494"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "ORF pol at 1290-4302 can have maximum deletions 30, got 2988"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "ORF env at 5430-8007 can have maximum deletions 100, got 2574"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF vif at 4246-4822 can have maximum deletions 30, got 573"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF vpr at 4764-5052 can have maximum deletions 30, got 285"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon1 at 5032-5248 can have maximum deletions 30, got 216"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF rev_exon1 at 5171-5249 can have maximum deletions 30, got 78"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF vpu at 5267-5513 can have maximum deletions 30, got 243"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF tat_exon2 at 7567-7663 can have maximum deletions 30, got 90"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF rev_exon2 at 7568-7865 can have maximum deletions 30, got 294"
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF nef at 8008-8683 can have maximum deletions 30, got 672"
},
{
"sequence_name": "small-sequence",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "PackagingSignalDeletion",
"message": "Query Sequence exceeds maximum deletion tolerance in PSI. Contains 21 deletions with max tolerance of 10 deletions."
},
{
"sequence_name": "small-sequence",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "RevResponseElementDeletion",
"message": "Query Sequence exceeds maximum deletion tolerance in RRE. Contains 265 deletions with max tolerance of 20 deletions."
},
{
"sequence_name": "small-sequence",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, -."
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "LongDeletion",
"message": "Query sequence contains a long deletion."
},
{
"sequence_name": "small-sequence [REVERSED]",
"sequence_name": "small-sequence[REVERSE_COMPLEMENT]",
"error": "NonHIV",
"message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence."
}
Expand Down Expand Up @@ -318,41 +318,41 @@
"message": "Query sequence contains a long deletion."
}
],
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": [
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]": [
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED [REVERSED]",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF nef at 8008-8683 can have maximum deletions 30, got 54"
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a mutated splice donor site, G."
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED [REVERSED]",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]",
"error": "NonHIV",
"message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence."
}
],
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS": [
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS[REVERSE_COMPLEMENT]": [
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS",
"error": "UnknownNucleotide",
"message": "Sequence contains invalid nucleotides: XXXXXXX (start: 45, end: 51)"
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS [REVERSED]",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF nef at 8008-8683 can have maximum deletions 30, got 54"
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS[REVERSE_COMPLEMENT]",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a mutated splice donor site, G."
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS [REVERSED]",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS[REVERSE_COMPLEMENT]",
"error": "NonHIV",
"message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence."
}
Expand Down Expand Up @@ -444,24 +444,24 @@
"message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence."
}
],
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS": [
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS[REVERSE_COMPLEMENT]": [
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS",
"error": "UnknownNucleotide",
"message": "Sequence contains invalid nucleotides: >1>#>@> (start: 45, end: 51)"
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS [REVERSED]",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF nef at 8008-8683 can have maximum deletions 30, got 54"
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS[REVERSE_COMPLEMENT]",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a mutated splice donor site, G."
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS [REVERSED]",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS[REVERSE_COMPLEMENT]",
"error": "NonHIV",
"message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence."
}
Expand Down Expand Up @@ -553,24 +553,24 @@
"message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence."
}
],
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES": [
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES[REVERSE_COMPLEMENT]": [
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES",
"error": "UnknownNucleotide",
"message": "Sequence contains invalid nucleotides: ------- (start: 45, end: 51)"
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES [REVERSED]",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES[REVERSE_COMPLEMENT]",
"error": "DeletionInOrf",
"message": "Smaller ORF nef at 8008-8683 can have maximum deletions 30, got 54"
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES[REVERSE_COMPLEMENT]",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a mutated splice donor site, G."
},
{
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES [REVERSED]",
"sequence_name": "Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES[REVERSE_COMPLEMENT]",
"error": "NonHIV",
"message": "Sequence contains unrecognized parts. It is probably a Human/HIV Chimera sequence."
}
Expand Down
10 changes: 5 additions & 5 deletions tests/expected-results-edgy/holistic.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"orfs_end": 8794,
"blast_n_conseqs": 3
},
"small-sequence": {
"small-sequence[REVERSE_COMPLEMENT]": {
"qlen": 19,
"hypermutation_probablility": 0.0,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand Down Expand Up @@ -59,7 +59,7 @@
"orfs_end": 8007,
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED": {
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.REVERSED[REVERSE_COMPLEMENT]": {
"qlen": 9718,
"hypermutation_probablility": 0.13527282947774355,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -71,7 +71,7 @@
"orfs_end": 8007,
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS": {
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-XS[REVERSE_COMPLEMENT]": {
"qlen": 9711,
"hypermutation_probablility": 0.13527282947774355,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -95,7 +95,7 @@
"orfs_end": 8007,
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS": {
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-BAD-SYMBOLS[REVERSE_COMPLEMENT]": {
"qlen": 9711,
"hypermutation_probablility": 0.13527282947774355,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand All @@ -119,7 +119,7 @@
"orfs_end": 8007,
"blast_n_conseqs": 0
},
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES": {
"Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455.BUT.WITH-DASHES[REVERSE_COMPLEMENT]": {
"qlen": 9711,
"hypermutation_probablility": 0.13527282947774355,
"inferred_subtype": "Ref.01_AE.AF.07.569M.GQ477441",
Expand Down
Loading

0 comments on commit 8136533

Please sign in to comment.