From eecf3021cb80dbaec21565fa56d22ae7434aebd6 Mon Sep 17 00:00:00 2001 From: David Farkas Date: Mon, 29 Jan 2018 16:29:47 +0100 Subject: [PATCH] Add range read funcionality for single file download --- api/handlers/listhandler.py | 34 ++++- api/util.py | 42 +++++++ .../integration_tests/python/test_download.py | 119 ++++++++++++++++++ 3 files changed, 193 insertions(+), 2 deletions(-) diff --git a/api/handlers/listhandler.py b/api/handlers/listhandler.py index 8f601e532..bfca8c305 100644 --- a/api/handlers/listhandler.py +++ b/api/handlers/listhandler.py @@ -483,8 +483,38 @@ def get(self, cont_name, list_name, **kwargs): if signed_url: self.redirect(signed_url) else: - self.response.app_iter = file_system.open(file_path, 'rb') - self.response.headers['Content-Length'] = str(fileinfo['size']) # must be set after setting app_iter + range_header = self.request.headers.get('Range', None) + if range_header: + try: + first, last = util.parse_range_header(range_header) + except util.ParseError as e: + self.abort(400, str(e)) + with file_system.open(file_path, 'rb') as f: + if last > fileinfo['size']: + self.abort(400, 'Last byte is greater then the size of the file') + if first is not None and last is not None: + length = last - first + elif last: + first = fileinfo['size']-last + length = last + else: + last = fileinfo['size'] + length = fileinfo['size'] - first + + f.seek(first) + data = f.read(length) + + self.response.write(data) + self.response.headers['Content-Length'] = str(length) + self.response.headers['Content-Range'] = 'bytes %s-%s/%s' % (str(first), + str(last), + str(fileinfo['size'])) + if not (len(data) == fileinfo['size']): + self.response.status = 206 + else: + self.response.app_iter = file_system.open(file_path, 'rb') + self.response.headers['Content-Length'] = str(fileinfo['size']) # must be set after setting app_iter + if self.is_true('view'): self.response.headers['Content-Type'] = str(fileinfo.get('mimetype', 'application/octet-stream')) else: diff --git a/api/util.py b/api/util.py index a0239e821..bbffaeb2e 100644 --- a/api/util.py +++ b/api/util.py @@ -275,3 +275,45 @@ def path_from_hash(hash_): second_stanza = actual_hash[2:4] path = (hash_version, hash_alg, first_stanza, second_stanza, hash_) return os.path.join(*path) + + +class ParseError(ValueError): + """Exception class representing a string parsing error.""" + + +def parse_range_header(range_header_val, valid_units=('bytes',)): + first = last = None + + split_range_header_val = range_header_val.split('=') + if not len(split_range_header_val) == 2: + raise ParseError('Invalid range header syntax') + + unit, range_str = split_range_header_val + + if not unit in valid_units: + raise ParseError('Invalid unit specified') + + split_range_str = range_str.split('-') + + if not len(split_range_str) == 2: + raise ParseError('Invalid range syntax') + + if split_range_str[0]: + try: + first = int(split_range_str[0]) + except ValueError: + raise ParseError('Invalid start %s' % unit) + + if split_range_str[1]: + try: + last = int(split_range_str[1]) + except ValueError: + raise ParseError('Invalid last %s' % unit) + + if first is None and last is None: + raise ParseError('Invalid range, you need to specify at least first or last %s' % unit) + + if last is not None and first > last: + raise ParseError('Invalid range, first %s can\'t be greater than the last %s' % (unit, unit)) + + return first, last diff --git a/tests/integration_tests/python/test_download.py b/tests/integration_tests/python/test_download.py index 39053e067..54a04747e 100644 --- a/tests/integration_tests/python/test_download.py +++ b/tests/integration_tests/python/test_download.py @@ -244,6 +244,125 @@ def test_filelist_download(data_builder, file_form, as_admin, legacy_cas_file): assert r.content == file_content +def test_filelist_range_download(data_builder, as_admin, file_form): + session = data_builder.create_session() + session_files = '/sessions/' + session + '/files' + as_admin.post(session_files, files=file_form(('one.csv', '123456789'))) + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # download single file from byte 0 to end of file + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=0-'}) + assert r.ok + assert r.content == '123456789' + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # download single file's first byte + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=0-1'}) + assert r.ok + assert r.content == '1' + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # download single file's last 5 bytes + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=-5'}) + assert r.ok + assert r.content == '56789' + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # try to download single file with invalid unit + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'kilobyte=-5'}) + assert r.status_code == 400 + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # try to download single file with with invalid range where the last byte is greater then the size of the file + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=0-500'}) + assert r.status_code == 400 + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # try to download single file with with invalid range, first and last byte is not specified + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=-'}) + assert r.status_code == 400 + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # try to download single file with with invalid range first byte is greater than the last one + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=10-5'}) + assert r.status_code == 400 + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # try to download single file with with invalid range, can't parse first byte + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=r-0'}) + assert r.status_code == 400 + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # try to download single file with with invalid range, can't parse last byte + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=0-bb'}) + assert r.status_code == 400 + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # try to download single file with invalid range syntax + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=1+5'}) + assert r.status_code == 400 + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # try to download single file with with invalid range header syntax + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes-1+5'}) + assert r.status_code == 400 + + def test_analysis_download(data_builder, file_form, as_admin, default_payload): session = data_builder.create_session() acquisition = data_builder.create_acquisition()