Skip to content

Commit

Permalink
fill out .json files online
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Jul 4, 2023
1 parent 2020ec0 commit 7af6d4a
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 104 deletions.
25 changes: 9 additions & 16 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,8 +725,6 @@ def intact( working_dir,
Name of a file containing all consensus sequences.
"""

orfs = {}
errors = {}
pos_mapping = st.map_hxb2_positions_to_subtype(subtype)
pos_subtype_mapping = {
"forward": st.map_subtype_positions_to_hxb2("forward", subtype),
Expand All @@ -751,9 +749,13 @@ def intact( working_dir,

intact_file = os.path.join(working_dir, "intact.fasta")
non_intact_file = os.path.join(working_dir, "nonintact.fasta")
orf_file = os.path.join(working_dir, "orfs.json")
error_file = os.path.join(working_dir, "errors.json")

with open(intact_file, 'w') as intact_writer, \
open(non_intact_file, 'w') as nonintact_writer:
open(non_intact_file, 'w') as nonintact_writer, \
open(orf_file, 'w') as orfs_writer, \
open(error_file, 'w') as errors_writer:

for (sequence, blast_rows) in with_blast_rows(blast_it, iterate_sequences(input_file)):
sequence_errors = []
Expand Down Expand Up @@ -846,7 +848,6 @@ def intact( working_dir,
if error:
sequence_errors.append(error)

orfs[sequence.id] = hxb2_found_orfs
if len(sequence_errors) == 0:
SeqIO.write([sequence], intact_writer, "fasta")
else:
Expand All @@ -856,19 +857,11 @@ def intact( working_dir,
if not include_small_orfs:
sequence_errors.extend(small_orf_errors)

errors[sequence.id] = sequence_errors
orfs_writer.write(json.dumps({sequence.id: [x.__dict__ for x in hxb2_found_orfs]},
indent=4))

orf_file = os.path.join(working_dir, "orfs.json")
with open(orf_file, 'w') as f:
f.write(json.dumps({seq: [x.__dict__ for x in sorfs] \
for seq, sorfs in orfs.items()},
indent=4))

error_file = os.path.join(working_dir, "errors.json")
with open(error_file, 'w') as f:
f.write(json.dumps({seq: [x.__dict__ for x in serrors] \
for seq,serrors in errors.items()},
indent=4))
errors_writer.write(json.dumps({sequence.id: [x.__dict__ for x in sequence_errors]},
indent=4))

return intact_file, non_intact_file, orf_file, error_file
#/end def intact
Expand Down
120 changes: 80 additions & 40 deletions tests/expected-results-large/errors.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@
"error": "Scramble",
"message": "Sequence is plus-scrambled."
}
],
]
}{
"MN691959": [
{
"sequence_name": "MN691959",
Expand All @@ -77,7 +78,8 @@
"error": "InternalStopInOrf",
"message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon"
}
],
]
}{
"MN692074": [
{
"sequence_name": "MN692074",
Expand Down Expand Up @@ -134,8 +136,10 @@
"error": "LongDeletion",
"message": "Query sequence contains a long deletion."
}
],
"MN692145": [],
]
}{
"MN692145": []
}{
"MN090335": [
{
"sequence_name": "MN090335",
Expand Down Expand Up @@ -167,7 +171,8 @@
"error": "InternalInversion",
"message": "Sequence contains an internal inversion."
}
],
]
}{
"MN090376": [
{
"sequence_name": "MN090376",
Expand All @@ -194,8 +199,10 @@
"error": "InternalInversion",
"message": "Sequence contains an internal inversion."
}
],
"MK115581.1": [],
]
}{
"MK115581.1": []
}{
"MK115690.1": [
{
"sequence_name": "MK115690.1",
Expand All @@ -207,7 +214,8 @@
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a mutated splice donor site, G-."
}
],
]
}{
"MK115571.1": [
{
"sequence_name": "MK115571.1",
Expand All @@ -219,21 +227,24 @@
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, --."
}
],
]
}{
"MK115514.1": [
{
"sequence_name": "MK115514.1",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, --."
}
],
]
}{
"MK115488.1": [
{
"sequence_name": "MK115488.1",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, --."
}
],
]
}{
"MK115030.1": [
{
"sequence_name": "MK115030.1",
Expand All @@ -245,8 +256,10 @@
"error": "InternalInversion",
"message": "Sequence contains an internal inversion."
}
],
"MK115498.1": [],
]
}{
"MK115498.1": []
}{
"MK115211.1": [
{
"sequence_name": "MK115211.1",
Expand All @@ -273,7 +286,8 @@
"error": "InternalInversion",
"message": "Sequence contains an internal inversion."
}
],
]
}{
"MK115158.1": [
{
"sequence_name": "MK115158.1",
Expand All @@ -285,7 +299,8 @@
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, --."
}
],
]
}{
"MK114705.1": [
{
"sequence_name": "MK114705.1",
Expand All @@ -297,7 +312,8 @@
"error": "Scramble",
"message": "Sequence is plus-scrambled."
}
],
]
}{
"MK114856.1": [
{
"sequence_name": "MK114856.1",
Expand Down Expand Up @@ -354,7 +370,8 @@
"error": "APOBECHypermutationDetected",
"message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 3.639064030015132e-65)."
}
],
]
}{
"MK115009.1": [
{
"sequence_name": "MK115009.1",
Expand Down Expand Up @@ -411,15 +428,18 @@
"error": "InternalInversion",
"message": "Sequence contains an internal inversion."
}
],
]
}{
"MK115387.1": [
{
"sequence_name": "MK115387.1",
"error": "InternalStopInOrf",
"message": "Smaller ORF tat_exon2 at 8375-8468 contains an internal stop codon"
}
],
"MK115491.1": [],
]
}{
"MK115491.1": []
}{
"MK116110.1": [
{
"sequence_name": "MK116110.1",
Expand All @@ -441,16 +461,20 @@
"error": "InternalInversion",
"message": "Sequence contains an internal inversion."
}
],
"MK115527.1": [],
]
}{
"MK115527.1": []
}{
"MK114997.1": [
{
"sequence_name": "MK114997.1",
"error": "InternalStopInOrf",
"message": "ORF env at 6223-8794 contains an internal stop codon"
}
],
"MK115518.1": [],
]
}{
"MK115518.1": []
}{
"MK115065.1": [
{
"sequence_name": "MK115065.1",
Expand All @@ -472,7 +496,8 @@
"error": "InternalInversion",
"message": "Sequence contains an internal inversion."
}
],
]
}{
"MK115464.1": [
{
"sequence_name": "MK115464.1",
Expand Down Expand Up @@ -524,8 +549,10 @@
"error": "APOBECHypermutationDetected",
"message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 4.937891251407691e-23)."
}
],
"MK115530.1": [],
]
}{
"MK115530.1": []
}{
"MK115520.1": [
{
"sequence_name": "MK115520.1",
Expand All @@ -542,8 +569,10 @@
"error": "InternalInversion",
"message": "Sequence contains an internal inversion."
}
],
"MK115503.1": [],
]
}{
"MK115503.1": []
}{
"MK115570.1": [
{
"sequence_name": "MK115570.1",
Expand All @@ -555,8 +584,10 @@
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, --."
}
],
"MK115509.1": [],
]
}{
"MK115509.1": []
}{
"MK115702.1": [
{
"sequence_name": "MK115702.1",
Expand All @@ -578,7 +609,8 @@
"error": "InternalInversion",
"message": "Sequence contains an internal inversion."
}
],
]
}{
"MK115095.1": [
{
"sequence_name": "MK115095.1",
Expand Down Expand Up @@ -625,16 +657,20 @@
"error": "APOBECHypermutationDetected",
"message": "Query sequence shows evidence of APOBEC3F/G-mediated hypermutation (p = 8.33506116803153e-40)."
}
],
"MK115490.1": [],
"MK115576.1": [],
]
}{
"MK115490.1": []
}{
"MK115576.1": []
}{
"OQ092466": [
{
"sequence_name": "OQ092466",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a mutated splice donor site, GC."
}
],
]
}{
"OQ092463": [
{
"sequence_name": "OQ092463",
Expand All @@ -646,14 +682,16 @@
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, --."
}
],
]
}{
"OQ092465": [
{
"sequence_name": "OQ092465",
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a mutated splice donor site, GA."
}
],
]
}{
"OQ092462": [
{
"sequence_name": "OQ092462",
Expand All @@ -665,7 +703,8 @@
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, --."
}
],
]
}{
"OQ092464": [
{
"sequence_name": "OQ092464",
Expand All @@ -677,7 +716,8 @@
"error": "MajorSpliceDonorSiteMutated",
"message": "Query sequence has a missing splice donor site, --."
}
],
]
}{
"OQ092467": [
{
"sequence_name": "OQ092467",
Expand Down
Loading

0 comments on commit 7af6d4a

Please sign in to comment.