Skip to content

Commit

Permalink
Improves unknown estimation by accounting all mapped reads
Browse files Browse the repository at this point in the history
  • Loading branch information
abmiguez committed Jun 14, 2021
1 parent 8b036c9 commit 6c67b0c
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 12 deletions.
25 changes: 17 additions & 8 deletions metaphlan/metaphlan.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
__author__ = ('Francesco Beghini ([email protected]),'
'Nicola Segata ([email protected]), '
'Duy Tin Truong, '
'Francesco Asnicar ([email protected])')
__version__ = '3.0.9'
__date__ = '17 May 2021'
'Francesco Asnicar ([email protected]), '
'Aitor Blanco Miguez ([email protected])')
__version__ = '3.0.10'
__date__ = '14 Jun 2021'

import sys
try:
Expand Down Expand Up @@ -404,6 +405,7 @@ def run_bowtie2(fna_in, outfmt6_out, bowtie2_db, preset, nproc, min_mapq_val, fi
except IOError as e:
sys.stderr.write('IOError: "{}"\nUnable to open sam output file.\n'.format(e))
sys.exit(1)

for line in p.stdout:
if samout:
sam_file.write(line)
Expand Down Expand Up @@ -432,7 +434,7 @@ def run_bowtie2(fna_in, outfmt6_out, bowtie2_db, preset, nproc, min_mapq_val, fi
if not avg_read_length:
sys.stderr.write('Fatal error running MetaPhlAn. The average read length was not estimated.\nPlease check your input files.\n')
sys.exit(1)

outf.write(lmybytes('#nreads\t{}\n'.format(int(nreads))))
outf.write(lmybytes('#avg_read_length\t{}'.format(avg_read_length)))
outf.close()
Expand Down Expand Up @@ -1053,6 +1055,17 @@ def main():

if not CAMI_OUTPUT:
outf.write('#' + '\t'.join((pars["sample_id_key"], pars["sample_id"])) + '\n')

if ESTIMATE_UNK:
mapped_reads = 0
cl2pr = tree.clade_profiles( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
for c, m in cl2pr.items():
markers_cov = [a / 1000 for _, a in m]
mapped_reads += np.mean(markers_cov) * tree.all_clades[c.split('|')[-1]].glen
# If the mapped reads are over-estimated, set the ratio at 1
fraction_mapped_reads = min(mapped_reads/float(n_metagenome_reads), 1.0)
else:
fraction_mapped_reads = 1.0

if pars['t'] == 'reads_map':
if not MPA2_OUTPUT:
Expand All @@ -1073,9 +1086,6 @@ def main():

cl2ab, _, tot_nreads = tree.relative_abundances(
pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )

# If the mapped reads are over-estimated, set the ratio at 1
fraction_mapped_reads = min(tot_nreads/float(n_metagenome_reads), 1.0) if ESTIMATE_UNK else 1.0

outpred = [(taxstr, taxid,round(relab*100.0,5)) for (taxstr, taxid), relab in cl2ab.items() if relab > 0.0]
has_repr = False
Expand Down Expand Up @@ -1129,7 +1139,6 @@ def main():
cl2ab, rr, tot_nreads = tree.relative_abundances(
pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )

fraction_mapped_reads = min(tot_nreads/float(n_metagenome_reads), 1.0) if ESTIMATE_UNK else 1.0
unmapped_reads = max(n_metagenome_reads - tot_nreads, 0)

outpred = [(taxstr, taxid,round(relab*100.0*fraction_mapped_reads,5)) for (taxstr, taxid),relab in cl2ab.items() if relab > 0.0]
Expand Down
6 changes: 3 additions & 3 deletions metaphlan/strainphlan.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
'Francesco Asnicar ([email protected]), '
'Moreno Zolfo ([email protected]), '
'Francesco Beghini ([email protected])')
__version__ = '3.0.8'
__date__ = '7 May 2021'
__version__ = '3.0.10'
__date__ = '14 Jun 2021'


import sys
Expand Down Expand Up @@ -398,7 +398,7 @@ def sample_markers_to_fasta(sample_path, filtered_samples, tmp_dir, filtered_cla
for r in sample:
if r['marker'] in filtered_clade_markers:
marker_name = parse_marker_name(r['marker'])
seq = SeqRecord(Seq(r['sequence'][trim_sequences:-trim_sequences].replace("*","N").replace("-","N")), id=marker_name, description=marker_name)
seq = SeqRecord(Seq(r['sequence'][trim_sequences:-trim_sequences].replace("*","N").replace("-","N").strip('N')), id=marker_name, description=marker_name)
SeqIO.write(seq, marker_fna, 'fasta')


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

setuptools.setup(
name='MetaPhlAn',
version='3.0.9',
version='3.0.10',
author='Francesco Beghini',
author_email='[email protected]',
url='http://github.com/biobakery/MetaPhlAn/',
Expand Down

0 comments on commit 6c67b0c

Please sign in to comment.