diff --git a/README.md b/README.md index 8ba20e9..9766c63 100755 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ * Better representation of, not only the human gut microbiome but also many other animal and ecological environments * Estimation of metagenome composed by microbes not included in the database with parameter `--unclassified_estimation` * Compatibility with MetaPhlAn 3 databases with parameter `--mpa3` + +Full [changeset](https://github.com/biobakery/MetaPhlAn/blob/master/changeset.txt) ------------- ## Description diff --git a/bioconda_recipe/meta.yaml b/bioconda_recipe/meta.yaml index 79a8cdd..4b15487 100644 --- a/bioconda_recipe/meta.yaml +++ b/bioconda_recipe/meta.yaml @@ -28,7 +28,7 @@ requirements: - dendropy - numpy - cmseq - - phylophlan + - phylophlan >=3.0.3 - biom-format - matplotlib-base - biopython diff --git a/changeset.txt b/changeset.txt index e5aa9e1..2f45248 100755 --- a/changeset.txt +++ b/changeset.txt @@ -1,19 +1,25 @@ -=== Version 4 -* Adoption of the species-level genome bins system (SGBs). -* New MetaPhlAn marker genes extracted identified from ~1M microbial genomes. -* Ability to profile 21,978 known (kSGBs) and 4,992 unknown (uSGBs) microbial species. -* Better representation of, not only the human gut microbiome but also many other animal and ecological environments. -* Estimation of metagenome composed by microbes not included in the database with parameter --unclassified_estimation. +=== Version 4.0.1 +* The new --offline parameter stops MetaPhlAn from automatically checking for updates +* Fixes "KeyError: 't'" error when running MetaPhlAn with the --CAMI_format_output parameter +* Improved StrainPhlAn's gaps management with the newest version of PhyloPhlAn (version 3.0.3) +* Improved set of colors for the plot_tree_graphlan.py script + +=== Version 4.0.0 +* Adoption of the species-level genome bins system (SGBs) +* New MetaPhlAn marker genes extracted identified from ~1M microbial genomes +* Ability to profile 21,978 known (kSGBs) and 4,992 unknown (uSGBs) microbial species +* Better representation of, not only the human gut microbiome but also many other animal and ecological environments +* Estimation of metagenome composed by microbes not included in the database with parameter --unclassified_estimation * Compatibility with MetaPhlAn 3 databases with parameter --mpa3 === Version 3.1 -* 433 low-quality species were removed from the MetaPhlAn 3.1 marker database and 2,680 species were added (for a new total of 15,766; a 17% increase). -* Marker genes for a subset of existing bioBakery 3 species were also revised. -* Most existing bioBakery 3 species pangenomes were updated with revised or expanded gene content. -* MetaPhlAn 3.1 software has been updated to work with revised marker database. +* 433 low-quality species were removed from the MetaPhlAn 3.1 marker database and 2,680 species were added (for a new total of 15,766; a 17% increase) +* Marker genes for a subset of existing bioBakery 3 species were also revised +* Most existing bioBakery 3 species pangenomes were updated with revised or expanded gene content +* MetaPhlAn 3.1 software has been updated to work with revised marker database === Version 3.0 -* New MetaPhlAn marker genes extracted with a newer version of ChocoPhlAn based on UniRef +* New MetaPhlAn marker genes extracted with a newer version of ChocoPhlAn based on UniRef * Estimation of metagenome composed by unknown microbes with parameter `--unknown_estimation` * Automatic retrieval and installation of the latest MetaPhlAn database with parameter `--index latest` * Virus profiling with `--add_viruses` diff --git a/metaphlan/__init__.py b/metaphlan/__init__.py index 38d61ae..2c8b631 100644 --- a/metaphlan/__init__.py +++ b/metaphlan/__init__.py @@ -238,14 +238,17 @@ def download_unpack_zip(url,download_file_name,folder,software_name): except EnvironmentError: print("WARNING: Unable to remove the temp download: " + download_file) -def resolve_latest_database(bowtie2_db,mpa_latest_url, force=False): - if os.path.exists(os.path.join(bowtie2_db,'mpa_latest')): +def resolve_latest_database(bowtie2_db,mpa_latest_url, force=False, offline=False): + if not offline and os.path.exists(os.path.join(bowtie2_db,'mpa_latest')): ctime_latest_db = int(os.path.getctime(os.path.join(bowtie2_db,'mpa_latest'))) if int(time.time()) - ctime_latest_db > 31536000: #1 year in epoch os.rename(os.path.join(bowtie2_db,'mpa_latest'),os.path.join(bowtie2_db,'mpa_previous')) download(mpa_latest_url, os.path.join(bowtie2_db,'mpa_latest'), force=True) if not os.path.exists(os.path.join(bowtie2_db,'mpa_latest') or force): + if offline: + print("Database cannot be downloaded with the --offline option activated") + sys.exit() download(mpa_latest_url, os.path.join(bowtie2_db,'mpa_latest')) with open(os.path.join(bowtie2_db,'mpa_latest')) as mpa_latest: @@ -253,7 +256,7 @@ def resolve_latest_database(bowtie2_db,mpa_latest_url, force=False): return ''.join(latest_db_version) -def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_redownload_latest): +def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_redownload_latest, offline): # Create the folder if it does not already exist if not os.path.isdir(bowtie2_db): try: @@ -266,7 +269,7 @@ def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_re use_zenodo = False try: - if urllib.request.urlopen("http://cmprod1.cibio.unitn.it/biobakery4/metaphlan_databases/mpa_latest").getcode() != 200: + if not offline and urllib.request.urlopen("http://cmprod1.cibio.unitn.it/biobakery4/metaphlan_databases/mpa_latest").getcode() != 200: # use_zenodo = True pass except: @@ -284,10 +287,9 @@ def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_re #try downloading from the segatalab website. If fails, use zenodo if index == 'latest': mpa_latest = 'http://cmprod1.cibio.unitn.it/biobakery4/metaphlan_databases/mpa_latest' - - index = resolve_latest_database(bowtie2_db, mpa_latest, force_redownload_latest) + index = resolve_latest_database(bowtie2_db, mpa_latest, force_redownload_latest, offline) - if os.path.exists(os.path.join(bowtie2_db,'mpa_previous')): + if not offline and os.path.exists(os.path.join(bowtie2_db,'mpa_previous')): with open(os.path.join(bowtie2_db,'mpa_previous')) as mpa_previous: previous_db_version = ''.join([line.strip() for line in mpa_previous if not line.startswith('#')]) @@ -302,7 +304,9 @@ def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_re if len(glob(os.path.join(bowtie2_db, "*{}*".format(index)))) >= 7: return index - + if offline: + print("Database cannot be downloaded with the --offline option activated") + sys.exit() # download the tar archive and decompress sys.stderr.write("\nDownloading MetaPhlAn database\nPlease note due to " "the size this might take a few minutes\n") diff --git a/metaphlan/metaphlan.py b/metaphlan/metaphlan.py index 7ff4369..bdacf54 100755 --- a/metaphlan/metaphlan.py +++ b/metaphlan/metaphlan.py @@ -4,8 +4,8 @@ 'Nicola Segata (nicola.segata@unitn.it), ' 'Duy Tin Truong, ' 'Francesco Asnicar (f.asnicar@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import sys try: @@ -337,6 +337,8 @@ def read_params(args): help="The number of CPUs to use for parallelizing the mapping [default 4]") arg('--install', action='store_true', help="Only checks if the MetaPhlAn DB is installed and installs it if not. All other parameters are ignored.") + arg('--offline', action='store_true', + help="If used, MetaPhlAn will not check for new database updates.") arg('--force_download', action='store_true', help="Force the re-download of the latest MetaPhlAn database.") arg('--read_min_len', type=int, default=70, @@ -952,7 +954,7 @@ def main(): ESTIMATE_UNK = pars['unclassified_estimation'] # check if the database is installed, if not then install - pars['index'] = check_and_install_database(pars['index'], pars['bowtie2db'], pars['bowtie2_build'], pars['nproc'], pars['force_download']) + pars['index'] = check_and_install_database(pars['index'], pars['bowtie2db'], pars['bowtie2_build'], pars['nproc'], pars['force_download'], pars['offline']) if pars['install']: sys.stderr.write('The database is installed\n') @@ -1129,7 +1131,7 @@ def main(): if CAMI_OUTPUT: for clade, taxid, relab in sorted( outpred, reverse=True, key=lambda x:x[2]+(100.0*(8-(x[0].count("|"))))): - if taxid: + if taxid and clade.split('|')[-1][0] != 't': rank = ranks2code[clade.split('|')[-1][0]] leaf_taxid = taxid.split('|')[-1] taxpathsh = '|'.join([remove_prefix(name) if '_unclassified' not in name else '' for name in clade.split('|')]) diff --git a/metaphlan/strainphlan.py b/metaphlan/strainphlan.py index e3a7619..91821e1 100755 --- a/metaphlan/strainphlan.py +++ b/metaphlan/strainphlan.py @@ -4,8 +4,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import sys @@ -415,7 +415,7 @@ def sample_markers_to_fasta(sample_path, filtered_samples, tmp_dir, filtered_cla for r in sample: if r['marker'] in filtered_clade_markers: marker_name = parse_marker_name(r['marker']) - seq = SeqRecord(Seq(r['sequence'][trim_sequences:-trim_sequences].replace("*","-")), id=marker_name, description=marker_name) + seq = SeqRecord(Seq(r['sequence'][trim_sequences:-trim_sequences].replace("*","-").replace('-','N')), id=marker_name, description=marker_name) SeqIO.write(seq, marker_fna, 'fasta') diff --git a/metaphlan/utils/add_metadata_tree.py b/metaphlan/utils/add_metadata_tree.py index 2b02c18..51df209 100755 --- a/metaphlan/utils/add_metadata_tree.py +++ b/metaphlan/utils/add_metadata_tree.py @@ -1,8 +1,8 @@ #!/usr/bin/env python __author__ = ('Duy Tin Truong (duytin.truong@unitn.it), ' 'Aitor Blanco Miguez (aitor.blancomiguez@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import argparse as ap import pandas diff --git a/metaphlan/utils/external_exec.py b/metaphlan/utils/external_exec.py index 63c5730..f5b5ff1 100755 --- a/metaphlan/utils/external_exec.py +++ b/metaphlan/utils/external_exec.py @@ -3,8 +3,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import os, sys, re, shutil, tempfile import subprocess as sb @@ -147,7 +147,7 @@ def execute_phylophlan(samples_markers_dir, conf_file, min_entries, min_markers, " --databases_folder "+tmp_dir+" -t n -f "+conf_file+ " --diversity low"+accuracy+" --genome_extension fna"+ " --force_nucleotides --min_num_entries "+str(min_entries)+ - " --min_num_markers "+str(min_markers), + " --convert_N2gap --min_num_markers "+str(min_markers), "input" : "-i", "output_path" : "--output_folder", "output" : "-o", diff --git a/metaphlan/utils/extract_markers.py b/metaphlan/utils/extract_markers.py index 2d300f1..9f37ff6 100755 --- a/metaphlan/utils/extract_markers.py +++ b/metaphlan/utils/extract_markers.py @@ -4,8 +4,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import sys try: diff --git a/metaphlan/utils/parallelisation.py b/metaphlan/utils/parallelisation.py index e3eb9f2..fbc181e 100755 --- a/metaphlan/utils/parallelisation.py +++ b/metaphlan/utils/parallelisation.py @@ -3,8 +3,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' try: from .util_fun import error diff --git a/metaphlan/utils/plot_tree_graphlan.py b/metaphlan/utils/plot_tree_graphlan.py index 0911e48..514e3dd 100755 --- a/metaphlan/utils/plot_tree_graphlan.py +++ b/metaphlan/utils/plot_tree_graphlan.py @@ -1,17 +1,20 @@ #!/usr/bin/env python __author__ = ('Duy Tin Truong (duytin.truong@unitn.it), ' 'Aitor Blanco Miguez (aitor.blancomiguez@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import argparse as ap import dendropy from io import StringIO import re +import random from collections import defaultdict import matplotlib.colors as colors import subprocess +def for_shuffle(): + return 0.1 def read_params(): p = ap.ArgumentParser() @@ -106,7 +109,10 @@ def main(): count += 1 node.taxon = dendropy.Taxon(label='node_%d'%count) metadatas = sorted(list(metadatas)) - color_names = list(colors.cnames.keys()) + color_names = list(colors.TABLEAU_COLORS.keys()) + color_names_plus = list(colors.CSS4_COLORS.keys()) + random.shuffle(color_names_plus, for_shuffle) + color_names += color_names_plus metadata2color = {} for i, md in enumerate(metadatas): metadata2color[md] = color_names[i % len(color_names)] diff --git a/metaphlan/utils/sample2markers.py b/metaphlan/utils/sample2markers.py index 3ab8c84..db53d1c 100755 --- a/metaphlan/utils/sample2markers.py +++ b/metaphlan/utils/sample2markers.py @@ -4,8 +4,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import sys try: diff --git a/metaphlan/utils/sgb_to_gtdb_profile.py b/metaphlan/utils/sgb_to_gtdb_profile.py index 3512444..722091e 100755 --- a/metaphlan/utils/sgb_to_gtdb_profile.py +++ b/metaphlan/utils/sgb_to_gtdb_profile.py @@ -1,6 +1,6 @@ __author__ = 'Aitor Blanco (aitor.blancomiguez@unitn.it' -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import os, time, sys import argparse as ap diff --git a/metaphlan/utils/strain_transmission.py b/metaphlan/utils/strain_transmission.py index c6ccda1..ff06e73 100755 --- a/metaphlan/utils/strain_transmission.py +++ b/metaphlan/utils/strain_transmission.py @@ -1,7 +1,7 @@ __author__ = ('Aitor Blanco (aitor.blancomiguez@unitn.it), ' 'Mireia Valles-Colomer (mireia.vallescolomer@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import os, time, sys import argparse as ap diff --git a/metaphlan/utils/util_fun.py b/metaphlan/utils/util_fun.py index 7bbc4a8..5624f09 100755 --- a/metaphlan/utils/util_fun.py +++ b/metaphlan/utils/util_fun.py @@ -3,8 +3,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '4.0.0' -__date__ = '22 Aug 2022' +__version__ = '4.0.1' +__date__ = '24 Aug 2022' import os, sys, re, pickletools, pickle, time, bz2, gzip diff --git a/setup.py b/setup.py index bf393e9..e26ec57 100755 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setuptools.setup( name='MetaPhlAn', - version='4.0.0', + version='4.0.1', author='Aitor Blanco-Miguez', author_email='aitor.blancomiguez@unitn.it', url='http://github.com/biobakery/MetaPhlAn/',