From bf1cf82109e8d92e6db826864b9dd33f53f101b7 Mon Sep 17 00:00:00 2001 From: Francesco Beghini Date: Thu, 23 Jul 2020 13:42:24 +0200 Subject: [PATCH] Use Zenodo as backup download --- metaphlan/__init__.py | 63 +++++++++++++++++++++++--------------- metaphlan/metaphlan.py | 17 +++++----- metaphlan/utils/cmseq | 2 +- metaphlan/utils/hclust2 | 2 +- metaphlan/utils/phylophlan | 2 +- 5 files changed, 49 insertions(+), 37 deletions(-) diff --git a/metaphlan/__init__.py b/metaphlan/__init__.py index d0e7c55..b548d70 100644 --- a/metaphlan/__init__.py +++ b/metaphlan/__init__.py @@ -71,7 +71,8 @@ def report(self, blocknum, block_size, total_size): # set the location of the database download url DATABASE_DOWNLOAD = "https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AADHWzATSQcI0CNFD0sk7MAga" -FILE_LIST= "https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AAA4XDP85WHon_eHvztxkamTa/file_list.txt?dl=1" +ZENODO_DATABASE_DOWNLOAD = "https://zenodo.org/record/3957592" +DBX_FILE_LIST = "https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AAA4XDP85WHon_eHvztxkamTa/file_list.txt?dl=1" def download(url, download_file, force=False): """ @@ -88,7 +89,7 @@ def download(url, download_file, force=False): else: sys.stderr.write("\nFile {} already present!\n".format(download_file)) -def download_unpack_tar(FILE_LIST, download_file_name, folder, bowtie2_build, nproc): +def download_unpack_tar(FILE_LIST, download_file_name, folder, bowtie2_build, nproc, use_zenodo): """ Download the url to the file and decompress into the folder """ @@ -105,21 +106,27 @@ def download_unpack_tar(FILE_LIST, download_file_name, folder, bowtie2_build, np sys.exit("ERROR: The directory is not writeable: " + folder + ". " "Please modify the permissions.") - #Download the list of all the files in the Dropbox folder - list_file_path = os.path.join(folder, "file_list.txt") - download(FILE_LIST, list_file_path) + #local path of the tarfile and md5file + tar_file = os.path.join(folder, download_file_name + ".tar") + md5_file = os.path.join(folder, download_file_name + ".md5") - if os.path.isfile(list_file_path): - with open(list_file_path) as f: - ls_f = dict( [row.strip().split() for row in f]) + #Download the list of all the files in the Dropbox folder + if not use_zenodo: + list_file_path = os.path.join(folder, "file_list.txt") + if not os.path.exists(list_file_path): + download(FILE_LIST, list_file_path) + + if os.path.isfile(list_file_path): + with open(list_file_path) as f: + ls_f = dict( [row.strip().split() for row in f]) + url_tar_file = ls_f[download_file_name + ".tar"] + url_md5_file = ls_f[download_file_name + ".md5"] + else: + url_tar_file = "https://zenodo.org/record/3957592/files/{}.tar?download=1".format(download_file_name) + url_md5_file = "https://zenodo.org/record/3957592/files/{}.md5?download=1".format(download_file_name) - tar_file = os.path.join(folder, download_file_name + ".tar") - url_tar_file = ls_f[download_file_name + ".tar"] + # download tar and MD5 checksum download(url_tar_file, tar_file) - - # download MD5 checksum - md5_file = os.path.join(folder, download_file_name + ".md5") - url_md5_file = ls_f[download_file_name + ".md5"] download(url_md5_file, md5_file) md5_md5 = None @@ -229,21 +236,24 @@ def download_unpack_zip(url,download_file_name,folder,software_name): except EnvironmentError: print("WARNING: Unable to remove the temp download: " + download_file) -def resolve_latest_database(bowtie2_db,mpa_latest_dbx_url, force=False): +def resolve_latest_database(bowtie2_db,mpa_latest_url, force=False): if os.path.exists(os.path.join(bowtie2_db,'mpa_latest')): ctime_latest_db = int(os.path.getctime(os.path.join(bowtie2_db,'mpa_latest'))) if int(time.time()) - ctime_latest_db > 31536000: #1 year in epoch os.rename(os.path.join(bowtie2_db,'mpa_latest'),os.path.join(bowtie2_db,'mpa_previous')) - download(mpa_latest_dbx_url, os.path.join(bowtie2_db,'mpa_latest'), force=True) + download(mpa_latest_url, os.path.join(bowtie2_db,'mpa_latest'), force=True) if not os.path.exists(os.path.join(bowtie2_db,'mpa_latest') or force): - download(mpa_latest_dbx_url, os.path.join(bowtie2_db,'mpa_latest')) + download(mpa_latest_url, os.path.join(bowtie2_db,'mpa_latest')) with open(os.path.join(bowtie2_db,'mpa_latest')) as mpa_latest: latest_db_version = [line.strip() for line in mpa_latest if not line.startswith('#')] return ''.join(latest_db_version) +def download_from_dropbox(bowtie2_db): + + def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_redownload_latest): # Create the folder if it does not already exist if not os.path.isdir(bowtie2_db): @@ -255,14 +265,19 @@ def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_re if index != 'latest' and len(glob(os.path.join(bowtie2_db, "*{}*".format(index)))) >= 6: return index - #Download the list of all the files in the Dropbox folder list_file_path = os.path.join(bowtie2_db, "file_list.txt") - if not os.path.exists(list_file_path): - download(FILE_LIST, list_file_path) + #try downloading from Dropbox + try: + if not os.path.exists(list_file_path): + download(DBX_FILE_LIST, list_file_path) - if os.path.isfile(list_file_path): - with open(list_file_path) as f: - ls_f = dict( [row.strip().split() for row in f]) + if os.path.isfile(list_file_path): + with open(list_file_path) as f: + ls_f = dict( [row.strip().split() for row in f]) + use_zenodo = False + except: #If fails, use zenodo + ls_f = {'mpa_lates' : 'https://zenodo.org/record/3957592/files/mpa_latest?download=1' } + use_zenodo = True """ Check if the database is installed, if not download and install """ if index == 'latest': @@ -287,6 +302,6 @@ def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_re # download the tar archive and decompress sys.stderr.write("\nDownloading MetaPhlAn database\nPlease note due to " "the size this might take a few minutes\n") - download_unpack_tar(FILE_LIST, index, bowtie2_db, bowtie2_build, nproc) + download_unpack_tar(DBX_FILE_LIST, index, bowtie2_db, bowtie2_build, nproc, use_zenodo) sys.stderr.write("\nDownload complete\n") return index \ No newline at end of file diff --git a/metaphlan/metaphlan.py b/metaphlan/metaphlan.py index 8c85a39..de4f50d 100755 --- a/metaphlan/metaphlan.py +++ b/metaphlan/metaphlan.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -__author__ = ('Nicola Segata (nicola.segata@unitn.it), ' +__author__ = ('Francesco Beghini (francesco.beghini@unitn.it),' + 'Nicola Segata (nicola.segata@unitn.it), ' 'Duy Tin Truong, ' - 'Francesco Asnicar (f.asnicar@unitn.it), ' - 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '3.0.1' -__date__ = '25 Jun 2020' + 'Francesco Asnicar (f.asnicar@unitn.it)') +__version__ = '3.0.2' +__date__ = '23 Jul 2020' import sys try: @@ -16,6 +16,7 @@ sys.stderr.write("MetaPhlAn requires Python 3, your current Python version is {}.{}.{}\n" .format(sys.version_info[0], sys.version_info[1], sys.version_info[2])) sys.exit(1) + import os import stat import re @@ -26,6 +27,7 @@ from subprocess import DEVNULL import argparse as ap import bz2 +import json import pickle import subprocess as subp import tempfile as tf @@ -54,11 +56,6 @@ except ImportError: sys.stderr.write("Warning! Biom python library not detected!" "\n Exporting to biom format will not work!\n") -try: - import json -except ImportError: - sys.stderr.write("Warning! json python library not detected!" - "\n Exporting to biom format will not work!\n") # get the directory that contains this script metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__)) diff --git a/metaphlan/utils/cmseq b/metaphlan/utils/cmseq index 4fd28ae..4b5dbca 160000 --- a/metaphlan/utils/cmseq +++ b/metaphlan/utils/cmseq @@ -1 +1 @@ -Subproject commit 4fd28ae821c5133feca73873c1271e582668495f +Subproject commit 4b5dbca13acaa9a1a9e1e590a2a178218ae2dc4f diff --git a/metaphlan/utils/hclust2 b/metaphlan/utils/hclust2 index 90a07c8..2dad3f2 160000 --- a/metaphlan/utils/hclust2 +++ b/metaphlan/utils/hclust2 @@ -1 +1 @@ -Subproject commit 90a07c84bd076e9cead6e06936020c38c7b044bd +Subproject commit 2dad3f2ea2959799338ad39e783965f6bb64233e diff --git a/metaphlan/utils/phylophlan b/metaphlan/utils/phylophlan index 24ca898..2378603 160000 --- a/metaphlan/utils/phylophlan +++ b/metaphlan/utils/phylophlan @@ -1 +1 @@ -Subproject commit 24ca898323926d48bc2c59ae4d12190bf7ccd334 +Subproject commit 2378603a52e0b6e86cd87c47a848b3362bab32a9