Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
generate blast.csv instead of blast.tsv
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Sep 15, 2023
1 parent 20fa4b2 commit db531f7
Show file tree
Hide file tree
Showing 14 changed files with 354 additions and 331 deletions.
62 changes: 31 additions & 31 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,60 +102,60 @@ class FoundORF:
@dataclass
class BlastRow:
qseqid: str
qlen: int
sseqid: str
sgi: str
qlen: int
slen: int
length: int
qstart: int
qend: int
sstart: int
send: int
evalue: float
bitscore: float
length: int
pident: float
nident: float
btop: int
stitle: str
sstrand: str
stitle: str
btop: str

@staticmethod
def init(row):
it = iter(row)
return BlastRow(
qseqid=next(it),
qlen=int(next(it)),
sseqid=next(it),
sgi=next(it),
slen=int(next(it)),
qstart=int(next(it)),
qend=int(next(it)),
sstart=int(next(it)),
send=int(next(it)),
evalue=float(next(it)),
bitscore=float(next(it)),
length=int(next(it)),
pident=float(next(it)),
nident=float(next(it)),
btop=next(it),
stitle=next(it),
sstrand=next(it),
qseqid=row['qseqid'],
sseqid=row['sseqid'],
sgi=row['sgi'],
qlen=int(row['qlen']),
slen=int(row['slen']),
length=int(row['length']),
qstart=int(row['qstart']),
qend=int(row['qend']),
sstart=int(row['sstart']),
send=int(row['send']),
evalue=float(row['evalue']),
bitscore=float(row['bitscore']),
pident=float(row['pident']),
nident=float(row['nident']),
sstrand=row['sstrand'],
stitle=row['stitle'],
btop=row['btop'],
)


def iterate_values_from_tsv(file_path):
with open(file_path, 'r') as tsv_file:
reader = csv.reader(tsv_file, delimiter='\t')
def iterate_values_from_csv(file_path):
with open(file_path, 'r') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
yield row


def iterate_blast_rows_from_tsv(file_path):
def iterate_blast_rows_from_csv(file_path):
previous_key = None
values = []

for row in iterate_values_from_tsv(file_path):
key = row[0]
for row in iterate_values_from_csv(file_path):
key = row['qseqid']
typed = BlastRow.init(row)

if key != previous_key and previous_key is not None:
Expand All @@ -170,10 +170,10 @@ def iterate_blast_rows_from_tsv(file_path):


def blast_iterate(subtype, input_file, working_dir):
with open(os.path.join(working_dir, 'blast.tsv'), 'w') as output_file:
with open(os.path.join(working_dir, 'blast.csv'), 'w') as output_file:
db_file = st.alignment_file(subtype)
wrappers.blast(db_file, input_file, output_file.name)
for seq in iterate_blast_rows_from_tsv(output_file.name):
for seq in iterate_blast_rows_from_csv(output_file.name):
yield seq


Expand Down Expand Up @@ -776,8 +776,8 @@ def __exit__(self, *args):
log.info('Non-intact sequences written to ' + self.non_intact_path)
log.info('ORFs for all sequences written to ' + self.orf_path)
log.info('Intactness error information written to ' + self.error_path)
if os.path.exists(os.path.join(self.working_dir, 'blast.tsv')):
log.info('Blast output written to ' + os.path.join(self.working_dir, 'blast.tsv'))
if os.path.exists(os.path.join(self.working_dir, 'blast.csv')):
log.info('Blast output written to ' + os.path.join(self.working_dir, 'blast.csv'))


def write(self, sequence, is_intact, orfs, errors):
Expand Down
125 changes: 125 additions & 0 deletions tests/expected-results-large-csv/blast.csv

Large diffs are not rendered by default.

124 changes: 0 additions & 124 deletions tests/expected-results-large-csv/blast.tsv

This file was deleted.

125 changes: 125 additions & 0 deletions tests/expected-results-large/blast.csv

Large diffs are not rendered by default.

124 changes: 0 additions & 124 deletions tests/expected-results-large/blast.tsv

This file was deleted.

4 changes: 4 additions & 0 deletions tests/expected-results-single-csv/blast.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
qseqid,sseqid,sgi,qlen,slen,length,qstart,qend,sstart,send,evalue,bitscore,pident,nident,sstrand,stitle,btop
MN692145,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,0,9689,9840,9863,1,9689,1,9840,0.0,11812,92.203,9094,plus,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,23ACAG12TC8AG34GA25GA30TA20-C-A-G-A-G-A-A-G-T-T-A-G-A-A-G-A-A-G3CATCGA1CAAG7TCGAAC3AC3CT1GA7CTAG8GT2GA5AT9GA34GA40AT23TG31G-21GA1AT22GC18-T2CT56GA153TC12AG13AC29-N-N-N-N-N-N8AG39A-T-TNTNTA63AG9AG6GA36CA27AG3AG11TC27AT7AC32TC25AG51TGGA19AT5TC23GA6GA8AC1AG21AC7CT3-N-N-N-N-N-N-N-N-N30CA1TC5CG11CT42AG28AG62TC41AG107CT65CT41GA8TC17TC17AG3TC10TC17CT14TC8CT48TG1AG20CT19GCAG38GA17AG8CT67GA6GA32GA5TC1TC81AG23AG12CT7GA8AG5CT11GA2-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N35TC5AT16TC14TC3CT2GA12T-C-C-7GA10AG106AG47TC24TCCT30GT56AG8TC11CT8TC8AG84AGCT15AG65TC17CT41AG11TC20GA11AC74GA2TC114TA127AGTC26TC109AC23AG11TG26TCGC7CT7AGAG14TC5GA3GA64GA6TC91GA48GA56GA4TA37GAGATC1AG11AG65CT65TC18TG4TC2AG33GA1CT11GA9TA44CT19AG66AC34AG11CA14CTTC58TC3AG7TC2AG38AT14CT47GA7GA84GA95AG14CT5CT41AT11TC8TCAG2AG4AG4AG21GA11GA5CA80CT17AG26TC147GA28TC26GA8CT34TC3AG119AG32AC27TCGA64TC40GA24CTAG1CA14GA20TC11AC13GATG3AT10TC8TC5TG41GA9TC24GA2GT6CA15GA1AG2GA45CA10TC1TCCA1GT62GT32CA18AG43CT2TC2-T6GA10AG16AG3TC3CA9AG15AG37CT8GA12TACA30TC5CT8GA57TCCA3AGGA18TC32TC1AC1CATC2TA3AC4TGCT13AT17CA19TC6TA40TC15AC3GT36GA1ANTNCNANGNGNANANGN2CA5AG2-C-T-T-G-T1CG1CGTA1AG-G-G-G-G48GA3GACA2TA38AG46GA66GAAGGT5AT21TC7GA4CGCAAC29TC35GA2GA14CT1CG6TG12GAGAAGCA1GT2GT-N-N-N-N-N-N-N-N-N-N-N-N-A-C-T1GA8AG2AG2TG1AGCGAGGA3GTTG3-A-T-G11AG2GA21TACG1TA6AT1GA2AG6GT3TG13CT16GA2GA9GA4AG3A-G-A-3A-T-G-G-T-A-A-T-T-A-T-T-1TC7GA5TC9TC29AG5AC22TA15GT1AT12GA6GA4AGCAACAG41GA50TC34CT43CAAC14CA13GAGATC10TG2GA1GA1GAGA1TC1AGCT2GC-C-A-G-A-G-A10AGGA6TGAT1GA2GAGT2CG2TA2CT1-N-N-N3GA4AG3AC12TA5AG2AG12CT1GATC7GA2TA1GA3TC1GACGAC8TA2GA6AG1GA-A-A-T13AT5CATG3CT28AG39AT5AC20-T-T-T3GNCNAN1CGATC-2AT1-G2TG2AC2GATAAGCGTG1GCGA3GA1TC5GA4AG1-G-A-C-A-C-A45GA34AT20CA12TC23ANANGNANCNTN2CTGAAGGC1GA3CTAGCACGAT10TC23AG40GA15GA66CT44AG1GC2GA3GA2TC40AG20AG17AG3TC23GA20GA8CG35TA36AC38GA3AG6GA1GTAC3TC18AC9GA1TC18TC2AT1TC6AGCA9GA1TA3AG6CA8AC7GA8GA13AG16GAGA9CT11CT6AT4TC20AG59CT22GA1TC5GA31CA36GACT2CG9AC4TC3AG13TA6AG50ATAG42TC9CT1TG5GA17CGAT72AG11TG5AG3TC2TG9CT1TG3TC2CA4CG18TG6GACT16GT21TG2GA12AC19-N-N-N-N-N-N-N-N-N-N-N-N21GA12TC10TC6GA21TC9AG3AC8AC22AG11-N-N-N118AG12TC8AG34GA56TA20-C-A-G-A-T-A-A-G-A-T-A-G-A-A-G-A-G-G3CA1GA1CAAG7TCGAAC3AC3CT1GA7CTAG8GT8AT9GA34GA12AG27AT13-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N10TG31G-21GA1AT22GC20CT-T56GA134
MN692145,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,0,9689,9840,635,9074,9689,1,634,0.0,752,91.811,583,plus,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,23ACAG12TC8AG34GA25GA30TA20-C-A-G-A-G-A-A-G-T-T-A-G-A-A-G-A-A-G3CATCGA1CAAG7TCGAAC3AC3CT1GA7CTAG8GT2GA5AT9GA34GA40AT23TG31G-21GA1AT22GC18-T2CT56GA134
MN692145,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,0,9689,9840,661,1,616,9181,9840,0.0,720,88.654,586,plus,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,24AG12TC8AG34GA56TA20-C-A-G-A-T-A-A-G-A-T-A-G-A-A-G-A-G-G3CA1GA1CAAG7TCGAAC3AC3CT1GA7CTAG8GT8AT9GA34GA12AG27AT13-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N10TG31G-21GA1AT22GC18-T2CT56GA134
3 changes: 0 additions & 3 deletions tests/expected-results-single-csv/blast.tsv

This file was deleted.

4 changes: 4 additions & 0 deletions tests/expected-results-single/blast.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
qseqid,sseqid,sgi,qlen,slen,length,qstart,qend,sstart,send,evalue,bitscore,pident,nident,sstrand,stitle,btop
MN692145,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,0,9689,9840,9863,1,9689,1,9840,0.0,11812,92.203,9094,plus,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,23ACAG12TC8AG34GA25GA30TA20-C-A-G-A-G-A-A-G-T-T-A-G-A-A-G-A-A-G3CATCGA1CAAG7TCGAAC3AC3CT1GA7CTAG8GT2GA5AT9GA34GA40AT23TG31G-21GA1AT22GC18-T2CT56GA153TC12AG13AC29-N-N-N-N-N-N8AG39A-T-TNTNTA63AG9AG6GA36CA27AG3AG11TC27AT7AC32TC25AG51TGGA19AT5TC23GA6GA8AC1AG21AC7CT3-N-N-N-N-N-N-N-N-N30CA1TC5CG11CT42AG28AG62TC41AG107CT65CT41GA8TC17TC17AG3TC10TC17CT14TC8CT48TG1AG20CT19GCAG38GA17AG8CT67GA6GA32GA5TC1TC81AG23AG12CT7GA8AG5CT11GA2-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N35TC5AT16TC14TC3CT2GA12T-C-C-7GA10AG106AG47TC24TCCT30GT56AG8TC11CT8TC8AG84AGCT15AG65TC17CT41AG11TC20GA11AC74GA2TC114TA127AGTC26TC109AC23AG11TG26TCGC7CT7AGAG14TC5GA3GA64GA6TC91GA48GA56GA4TA37GAGATC1AG11AG65CT65TC18TG4TC2AG33GA1CT11GA9TA44CT19AG66AC34AG11CA14CTTC58TC3AG7TC2AG38AT14CT47GA7GA84GA95AG14CT5CT41AT11TC8TCAG2AG4AG4AG21GA11GA5CA80CT17AG26TC147GA28TC26GA8CT34TC3AG119AG32AC27TCGA64TC40GA24CTAG1CA14GA20TC11AC13GATG3AT10TC8TC5TG41GA9TC24GA2GT6CA15GA1AG2GA45CA10TC1TCCA1GT62GT32CA18AG43CT2TC2-T6GA10AG16AG3TC3CA9AG15AG37CT8GA12TACA30TC5CT8GA57TCCA3AGGA18TC32TC1AC1CATC2TA3AC4TGCT13AT17CA19TC6TA40TC15AC3GT36GA1ANTNCNANGNGNANANGN2CA5AG2-C-T-T-G-T1CG1CGTA1AG-G-G-G-G48GA3GACA2TA38AG46GA66GAAGGT5AT21TC7GA4CGCAAC29TC35GA2GA14CT1CG6TG12GAGAAGCA1GT2GT-N-N-N-N-N-N-N-N-N-N-N-N-A-C-T1GA8AG2AG2TG1AGCGAGGA3GTTG3-A-T-G11AG2GA21TACG1TA6AT1GA2AG6GT3TG13CT16GA2GA9GA4AG3A-G-A-3A-T-G-G-T-A-A-T-T-A-T-T-1TC7GA5TC9TC29AG5AC22TA15GT1AT12GA6GA4AGCAACAG41GA50TC34CT43CAAC14CA13GAGATC10TG2GA1GA1GAGA1TC1AGCT2GC-C-A-G-A-G-A10AGGA6TGAT1GA2GAGT2CG2TA2CT1-N-N-N3GA4AG3AC12TA5AG2AG12CT1GATC7GA2TA1GA3TC1GACGAC8TA2GA6AG1GA-A-A-T13AT5CATG3CT28AG39AT5AC20-T-T-T3GNCNAN1CGATC-2AT1-G2TG2AC2GATAAGCGTG1GCGA3GA1TC5GA4AG1-G-A-C-A-C-A45GA34AT20CA12TC23ANANGNANCNTN2CTGAAGGC1GA3CTAGCACGAT10TC23AG40GA15GA66CT44AG1GC2GA3GA2TC40AG20AG17AG3TC23GA20GA8CG35TA36AC38GA3AG6GA1GTAC3TC18AC9GA1TC18TC2AT1TC6AGCA9GA1TA3AG6CA8AC7GA8GA13AG16GAGA9CT11CT6AT4TC20AG59CT22GA1TC5GA31CA36GACT2CG9AC4TC3AG13TA6AG50ATAG42TC9CT1TG5GA17CGAT72AG11TG5AG3TC2TG9CT1TG3TC2CA4CG18TG6GACT16GT21TG2GA12AC19-N-N-N-N-N-N-N-N-N-N-N-N21GA12TC10TC6GA21TC9AG3AC8AC22AG11-N-N-N118AG12TC8AG34GA56TA20-C-A-G-A-T-A-A-G-A-T-A-G-A-A-G-A-G-G3CA1GA1CAAG7TCGAAC3AC3CT1GA7CTAG8GT8AT9GA34GA12AG27AT13-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N10TG31G-21GA1AT22GC20CT-T56GA134
MN692145,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,0,9689,9840,635,9074,9689,1,634,0.0,752,91.811,583,plus,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,23ACAG12TC8AG34GA25GA30TA20-C-A-G-A-G-A-A-G-T-T-A-G-A-A-G-A-A-G3CATCGA1CAAG7TCGAAC3AC3CT1GA7CTAG8GT2GA5AT9GA34GA40AT23TG31G-21GA1AT22GC18-T2CT56GA134
MN692145,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,0,9689,9840,661,1,616,9181,9840,0.0,720,88.654,586,plus,Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455,24AG12TC8AG34GA56TA20-C-A-G-A-T-A-A-G-A-T-A-G-A-A-G-A-G-G3CA1GA1CAAG7TCGAAC3AC3CT1GA7CTAG8GT8AT9GA34GA12AG27AT13-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N-N10TG31G-21GA1AT22GC18-T2CT56GA134
3 changes: 0 additions & 3 deletions tests/expected-results-single/blast.tsv

This file was deleted.

Loading

0 comments on commit db531f7

Please sign in to comment.