Skip to content

Commit

Permalink
Merge pull request #51 from mboudet/master
Browse files Browse the repository at this point in the history
Trying some stuff..
  • Loading branch information
mboudet authored Aug 26, 2024
2 parents 0cd068d + bf7f4f4 commit a362bc6
Showing 1 changed file with 60 additions and 1 deletion.
61 changes: 60 additions & 1 deletion biomaj_download/download/curl.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import requests
from datetime import datetime
import hashlib
import time
Expand Down Expand Up @@ -298,8 +299,61 @@ def _append_file_to_download(self, rfile):
rfile['url'] = self.url
if 'root' not in rfile or not rfile['root']:
rfile['root'] = self.rootdir

if rfile.get('modified_size'):
# Size parsed is inacurate. Try to get a more accurate size with a HEAD query
try:
self.logger.debug('Trying to get a more accurate size for ' + rfile['name'])
head_size = self._estimate_size(rfile)
if head_size:
rfile['size'] = head_size
except Exception as e:
self.logger.error('Exception while trying to get a more accurate size for ' + rfile['name'] + ' - ' + str(e))

super(CurlDownload, self)._append_file_to_download(rfile)

def _estimate_size(self, rfile):
# Cannot reuse _file_url, since we did not cleanup the name yet
# Mostly pasted for the same stuff in direct download
name = re.sub('//+', '/', rfile['name'])
url = self.url + '/' + rfile['root'] + name
url_elts = url.split('://')
if len(url_elts) == 2:
url_elts[1] = re.sub("/{2,}", "/", url_elts[1])
full_url = '://'.join(url_elts)
else:
full_url = re.sub("/{2,}", "/", url)

return self._head_size_call(full_url)

def _head_size_call(self, full_url):
# Now do a HEAD call on this url

auth = ()
proxies = {}

if self.credentials is not None:
auth = tuple(self.credentials.split(":"))

if self.proxy is not None:
proxy = self.proxy
if not self.proxy.startswith("http"):
proxy = 'http://' + self.proxy
if self.proxy_auth is not None:
# Don't really want to manage properly the various schemes
proxy.replace('http://', 'http://{}@'.format(self.proxy_auth))
proxy.replace('https://', 'https://{}@'.format(self.proxy_auth))
proxies['http'] = proxy
proxies['https'] = proxy

try:
size_response = requests.head(full_url, allow_redirects=True, auth=auth, proxies=proxies)
size = int(size_response.headers.get('content-length', 0))
return size

except Exception:
return 0

def _file_url(self, rfile):
# rfile['root'] is set to self.rootdir if needed but may be different.
# We don't use os.path.join because rfile['name'] may starts with /
Expand Down Expand Up @@ -519,7 +573,12 @@ def _http_parse_result(self, result):
rfile['group'] = ''
rfile['user'] = ''
if self.http_parse.file_size != -1:
rfile['size'] = humanfriendly.parse_size(foundfile[self.http_parse.file_size - 1])
size = humanfriendly.parse_size(foundfile[self.http_parse.file_size - 1])
if not str(size) == foundfile[self.http_parse.file_size - 1]:
# This is an approximation of the real size (conversion to byte)
# We will check later (in match()) if we can get a more accurate size
rfile['modified_size'] = True
rfile['size'] = size
else:
rfile['size'] = 0
if self.http_parse.file_date != -1:
Expand Down

0 comments on commit a362bc6

Please sign in to comment.