Skip to content

Commit

Permalink
Download database from internal webserver
Browse files Browse the repository at this point in the history
  • Loading branch information
Francesco Beghini committed Nov 24, 2020
1 parent 57feb0f commit e58939a
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 33 deletions.
47 changes: 17 additions & 30 deletions metaphlan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import time
import zipfile
from glob import glob
from urllib.request import urlretrieve
import urllib.request

def remove_prefix(text):
return re.sub(r'^[a-z]__', '', text)
Expand Down Expand Up @@ -70,9 +70,8 @@ def report(self, blocknum, block_size, total_size):
sys.stderr.write(status)

# set the location of the database download url
DATABASE_DOWNLOAD = "https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AADHWzATSQcI0CNFD0sk7MAga"
DROPBOX_DATABASE_DOWNLOAD = "https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AADHWzATSQcI0CNFD0sk7MAga"
ZENODO_DATABASE_DOWNLOAD = "https://zenodo.org/record/3957592"
DBX_FILE_LIST = "https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AAA4XDP85WHon_eHvztxkamTa/file_list.txt?dl=1"

def download(url, download_file, force=False):
"""
Expand All @@ -82,14 +81,14 @@ def download(url, download_file, force=False):
if not os.path.isfile(download_file) or force:
try:
sys.stderr.write("\nDownloading " + url + "\n")
file, headers = urlretrieve(url, download_file,
file, headers = urllib.request.urlretrieve(url, download_file,
reporthook=ReportHook().report)
except EnvironmentError:
sys.stderr.write("\nWarning: Unable to download " + url + "\n")
else:
sys.stderr.write("\nFile {} already present!\n".format(download_file))

def download_unpack_tar(FILE_LIST, download_file_name, folder, bowtie2_build, nproc, use_zenodo):
def download_unpack_tar(download_file_name, folder, bowtie2_build, nproc, use_zenodo):
"""
Download the url to the file and decompress into the folder
"""
Expand All @@ -112,15 +111,8 @@ def download_unpack_tar(FILE_LIST, download_file_name, folder, bowtie2_build, np

#Download the list of all the files in the Dropbox folder
if not use_zenodo:
list_file_path = os.path.join(folder, "file_list.txt")
if not os.path.exists(list_file_path):
download(FILE_LIST, list_file_path)

if os.path.isfile(list_file_path):
with open(list_file_path) as f:
ls_f = dict( [row.strip().split() for row in f])
url_tar_file = ls_f[download_file_name + ".tar"]
url_md5_file = ls_f[download_file_name + ".md5"]
url_tar_file = "http://cmprod1.cibio.unitn.it/biobakery3/metaphlan_databases/{}.tar".format(download_file_name)
url_md5_file = "http://cmprod1.cibio.unitn.it/biobakery3/metaphlan_databases/{}.md5".format(download_file_name)
else:
url_tar_file = "https://zenodo.org/record/3957592/files/{}.tar?download=1".format(download_file_name)
url_md5_file = "https://zenodo.org/record/3957592/files/{}.md5?download=1".format(download_file_name)
Expand Down Expand Up @@ -261,25 +253,20 @@ def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_re

if index != 'latest' and len(glob(os.path.join(bowtie2_db, "*{}*".format(index)))) >= 6:
return index

list_file_path = os.path.join(bowtie2_db, "file_list.txt")
#try downloading from Dropbox
try:
if not os.path.exists(list_file_path):
download(DBX_FILE_LIST, list_file_path)

if os.path.isfile(list_file_path):
with open(list_file_path) as f:
ls_f = dict( [row.strip().split() for row in f])
use_zenodo = False
except: #If fails, use zenodo
ls_f = {'mpa_lates' : 'https://zenodo.org/record/3957592/files/mpa_latest?download=1' }

use_zenodo = False
if urllib.request.urlopen("http://cmprod1.cibio.unitn.it/biobakery3/metaphlan_databases/mpa_latest").getcode() != 200:
use_zenodo = True

""" Check if the database is installed, if not download and install """
#try downloading from the segatalab website. If fails, use zenodo
if index == 'latest':
index = resolve_latest_database(bowtie2_db, ls_f['mpa_latest'], force_redownload_latest)
if not use_zenodo:
mpa_latest = 'http://cmprod1.cibio.unitn.it/biobakery3/metaphlan_databases/mpa_latest'
else:
mpa_latest = 'https://zenodo.org/record/3957592/files/mpa_latest?download=1'

index = resolve_latest_database(bowtie2_db, mpa_latest, force_redownload_latest)

if os.path.exists(os.path.join(bowtie2_db,'mpa_previous')):
with open(os.path.join(bowtie2_db,'mpa_previous')) as mpa_previous:
previous_db_version = ''.join([line.strip() for line in mpa_previous if not line.startswith('#')])
Expand All @@ -299,6 +286,6 @@ def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc, force_re
# download the tar archive and decompress
sys.stderr.write("\nDownloading MetaPhlAn database\nPlease note due to "
"the size this might take a few minutes\n")
download_unpack_tar(DBX_FILE_LIST, index, bowtie2_db, bowtie2_build, nproc, use_zenodo)
download_unpack_tar(index, bowtie2_db, bowtie2_build, nproc, use_zenodo)
sys.stderr.write("\nDownload complete\n")
return index
6 changes: 3 additions & 3 deletions metaphlan/metaphlan.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
'Nicola Segata ([email protected]), '
'Duy Tin Truong, '
'Francesco Asnicar ([email protected])')
__version__ = '3.0.5'
__date__ = '10 Nov 2020'
__version__ = '3.0.6'
__date__ = '24s Nov 2020'

import sys
try:
Expand Down Expand Up @@ -201,7 +201,7 @@ def read_params(args):
arg('--bowtie2out', metavar="FILE_NAME", type=str, default=None,
help="The file for saving the output of BowTie2")
arg('--min_mapq_val', type=int, default=5,
help="Minimum mapping quality value (MAPQ)")
help="Minimum mapping quality value (MAPQ) [default 5]")
arg('--no_map', action='store_true',
help="Avoid storing the --bowtie2out map file")
arg('--tmp_dir', metavar="", default=None, type=str,
Expand Down

0 comments on commit e58939a

Please sign in to comment.