Skip to content

Commit

Permalink
Use Zenodo as backup download
Browse files Browse the repository at this point in the history
  • Loading branch information
Francesco Beghini committed Jul 23, 2020
1 parent 15afe9c commit bf1cf82
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 37 deletions.
63 changes: 39 additions & 24 deletions metaphlan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def report(self, blocknum, block_size, total_size):

# set the location of the database download url
DATABASE_DOWNLOAD = "https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AADHWzATSQcI0CNFD0sk7MAga"
FILE_LIST= "https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AAA4XDP85WHon_eHvztxkamTa/file_list.txt?dl=1"
ZENODO_DATABASE_DOWNLOAD = "https://zenodo.org/record/3957592"
DBX_FILE_LIST = "https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AAA4XDP85WHon_eHvztxkamTa/file_list.txt?dl=1"

def download(url, download_file, force=False):
"""
Expand All @@ -88,7 +89,7 @@ def download(url, download_file, force=False):
else:
sys.stderr.write("\nFile {} already present!\n".format(download_file))

def download_unpack_tar(FILE_LIST, download_file_name, folder, bowtie2_build, nproc):
def download_unpack_tar(FILE_LIST, download_file_name, folder, bowtie2_build, nproc, use_zenodo):
"""
Download the url to the file and decompress into the folder
"""
Expand All @@ -105,21 +106,27 @@ def download_unpack_tar(FILE_LIST, download_file_name, folder, bowtie2_build, np
sys.exit("ERROR: The directory is not writeable: " + folder + ". "
"Please modify the permissions.")

#Download the list of all the files in the Dropbox folder
list_file_path = os.path.join(folder, "file_list.txt")
download(FILE_LIST, list_file_path)
#local path of the tarfile and md5file
tar_file = os.path.join(folder, download_file_name + ".tar")
md5_file = os.path.join(folder, download_file_name + ".md5")

if os.path.isfile(list_file_path):
with open(list_file_path) as f:
ls_f = dict( [row.strip().split() for row in f])
#Download the list of all the files in the Dropbox folder
if not use_zenodo:
list_file_path = os.path.join(folder, "file_list.txt")
if not os.path.exists(list_file_path):
download(FILE_LIST, list_file_path)

if os.path.isfile(list_file_path):
with open(list_file_path) as f:
ls_f = dict( [row.strip().split() for row in f])
url_tar_file = ls_f[download_file_name + ".tar"]
url_md5_file = ls_f[download_file_name + ".md5"]
else:
url_tar_file = "https://zenodo.org/record/3957592/files/{}.tar?download=1".format(download_file_name)
url_md5_file = "https://zenodo.org/record/3957592/files/{}.md5?download=1".format(download_file_name)

tar_file = os.path.join(folder, download_file_name + ".tar")
url_tar_file = ls_f[download_file_name + ".tar"]
# download tar and MD5 checksum
download(url_tar_file, tar_file)

# download MD5 checksum
md5_file = os.path.join(folder, download_file_name + ".md5")
url_md5_file = ls_f[download_file_name + ".md5"]
download(url_md5_file, md5_file)

md5_md5 = None
Expand Down Expand Up @@ -229,21 +236,24 @@ def download_unpack_zip(url,download_file_name,folder,software_name):
except EnvironmentError:
print("WARNING: Unable to remove the temp download: " + download_file)

def resolve_latest_database(bowtie2_db,mpa_latest_dbx_url, force=False):
def resolve_latest_database(bowtie2_db,mpa_latest_url, force=False):
if os.path.exists(os.path.join(bowtie2_db,'mpa_latest')):
ctime_latest_db = int(os.path.getctime(os.path.join(bowtie2_db,'mpa_latest')))
if int(time.time()) - ctime_latest_db > 31536000: #1 year in epoch
os.rename(os.path.join(bowtie2_db,'mpa_latest'),os.path.join(bowtie2_db,'mpa_previous'))
download(mpa_latest_dbx_url, os.path.join(bowtie2_db,'mpa_latest'), force=True)
download(mpa_latest_url, os.path.join(bowtie2_db,'mpa_latest'), force=True)

if not os.path.exists(os.path.join(bowtie2_db,'mpa_latest') or force):
download(mpa_latest_dbx_url, os.path.join(bowtie2_db,'mpa_latest'))
download(mpa_latest_url, os.path.join(bowtie2_db,'mpa_latest'))

with open(os.path.join(bowtie2_db,'mpa_latest')) as mpa_latest:
latest_db_version = [line.strip() for line in mpa_latest if not line.startswith('#')]

return ''.join(latest_db_version)

def download_from_dropbox(bowtie2_db):


def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_redownload_latest):
# Create the folder if it does not already exist
if not os.path.isdir(bowtie2_db):
Expand All @@ -255,14 +265,19 @@ def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_re
if index != 'latest' and len(glob(os.path.join(bowtie2_db, "*{}*".format(index)))) >= 6:
return index

#Download the list of all the files in the Dropbox folder
list_file_path = os.path.join(bowtie2_db, "file_list.txt")
if not os.path.exists(list_file_path):
download(FILE_LIST, list_file_path)
#try downloading from Dropbox
try:
if not os.path.exists(list_file_path):
download(DBX_FILE_LIST, list_file_path)

if os.path.isfile(list_file_path):
with open(list_file_path) as f:
ls_f = dict( [row.strip().split() for row in f])
if os.path.isfile(list_file_path):
with open(list_file_path) as f:
ls_f = dict( [row.strip().split() for row in f])
use_zenodo = False
except: #If fails, use zenodo
ls_f = {'mpa_lates' : 'https://zenodo.org/record/3957592/files/mpa_latest?download=1' }
use_zenodo = True

""" Check if the database is installed, if not download and install """
if index == 'latest':
Expand All @@ -287,6 +302,6 @@ def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_re
# download the tar archive and decompress
sys.stderr.write("\nDownloading MetaPhlAn database\nPlease note due to "
"the size this might take a few minutes\n")
download_unpack_tar(FILE_LIST, index, bowtie2_db, bowtie2_build, nproc)
download_unpack_tar(DBX_FILE_LIST, index, bowtie2_db, bowtie2_build, nproc, use_zenodo)
sys.stderr.write("\nDownload complete\n")
return index
17 changes: 7 additions & 10 deletions metaphlan/metaphlan.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env python
__author__ = ('Nicola Segata ([email protected]), '
__author__ = ('Francesco Beghini ([email protected]),'
'Nicola Segata ([email protected]), '
'Duy Tin Truong, '
'Francesco Asnicar ([email protected]), '
'Francesco Beghini ([email protected])')
__version__ = '3.0.1'
__date__ = '25 Jun 2020'
'Francesco Asnicar ([email protected])')
__version__ = '3.0.2'
__date__ = '23 Jul 2020'

import sys
try:
Expand All @@ -16,6 +16,7 @@
sys.stderr.write("MetaPhlAn requires Python 3, your current Python version is {}.{}.{}\n"
.format(sys.version_info[0], sys.version_info[1], sys.version_info[2]))
sys.exit(1)

import os
import stat
import re
Expand All @@ -26,6 +27,7 @@
from subprocess import DEVNULL
import argparse as ap
import bz2
import json
import pickle
import subprocess as subp
import tempfile as tf
Expand Down Expand Up @@ -54,11 +56,6 @@
except ImportError:
sys.stderr.write("Warning! Biom python library not detected!"
"\n Exporting to biom format will not work!\n")
try:
import json
except ImportError:
sys.stderr.write("Warning! json python library not detected!"
"\n Exporting to biom format will not work!\n")

# get the directory that contains this script
metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__))
Expand Down
2 changes: 1 addition & 1 deletion metaphlan/utils/cmseq
Submodule cmseq updated 3 files
+7 −6 README.md
+12 −8 cmseq/breadth_depth.py
+1 −1 setup.py
2 changes: 1 addition & 1 deletion metaphlan/utils/hclust2
Submodule hclust2 updated 4 files
+14 −0 README.md
+0 −0 __init__.py
+41 −35 hclust2.py
+20 −0 setup.py

0 comments on commit bf1cf82

Please sign in to comment.