From cf1f1c5700a1bf7fa4180e73b1ce3ca2ef869312 Mon Sep 17 00:00:00 2001 From: David Farkas Date: Wed, 7 Feb 2018 16:46:48 +0100 Subject: [PATCH] Follow google storage responses in case of invalid range, add test for multiple ranges --- api/handlers/listhandler.py | 98 +++++++++---------- api/util.py | 2 +- .../integration_tests/python/test_download.py | 80 ++++++++++++--- 3 files changed, 113 insertions(+), 67 deletions(-) diff --git a/api/handlers/listhandler.py b/api/handlers/listhandler.py index 6fc5f85c7..3a10830e6 100644 --- a/api/handlers/listhandler.py +++ b/api/handlers/listhandler.py @@ -1,3 +1,4 @@ +import os import bson import copy import datetime @@ -465,8 +466,8 @@ def get(self, cont_name, list_name, **kwargs): with zipfile.ZipFile(f) as zf: self.response.headers['Content-Type'] = util.guess_mimetype(zip_member) self.response.write(zf.open(zip_member).read()) - except zipfile.BadZipfile: - self.abort(400, 'not a zip file') + except zipfile.BadZipfile as e: + self.abort(400, str(e)) except KeyError: self.abort(400, 'zip file contains no such member') # log download if we haven't already for this ticket @@ -483,60 +484,57 @@ def get(self, cont_name, list_name, **kwargs): if signed_url: self.redirect(signed_url) else: - range_header = self.request.headers.get('Range', None) - if range_header: - try: - ranges = util.parse_range_header(range_header) - except util.ParseError as e: - self.abort(400, str(e)) - - with file_system.open(file_path, 'rb') as f: - for first, last in ranges: - if last > fileinfo['size']: - self.abort(400, 'Last byte is greater then the size of the file') - - if not last: + range_header = self.request.headers.get('Range', '') + try: + ranges = util.parse_range_header(range_header) + + if ranges: + with file_system.open(file_path, 'rb') as f: + for first, last in ranges: + if first > fileinfo['size']-1: + self.abort(416, 'Invalid range') + + if last > fileinfo['size'] - 1: + raise util.ParseError('Invalid range') + + mode = os.SEEK_SET if first < 0: - f.seek(first, 2) + mode = os.SEEK_END length = abs(first) - else: - f.seek(first) + elif last is None: length = fileinfo['size'] - first - else: - if first < 0 and last < 0: - f.seek(first, 2) - elif first >= 0 and last >= 0: - f.seek(first) else: - self.abort(400, 'Invalid range: %s-%s' % (first, last)) - - length = last - first - - data = f.read(length) + if last > fileinfo['size']: + length = fileinfo['size'] - first + else: + length = last - first + 1 + + f.seek(first, mode) + data = f.read(length) + + if len(ranges) > 1: + self.response.write('--THIS_STRING_SEPARATES\n') + self.response.write('Content-Type: %s\n' % str( + fileinfo.get('mimetype', 'application/octet-stream'))) + self.response.write('Content-Range: %s' % 'bytes %s-%s/%s\n' % (str(first), + str(last), + str(fileinfo['size']))) + self.response.write('\n') + self.response.write(data) + self.response.write('\n') + else: + self.response.headers['Content-Type'] = str( + fileinfo.get('mimetype', 'application/octet-stream')) + self.response.headers['Content-Range'] = 'bytes %s-%s/%s' % (str(first), + str(last), + str(fileinfo['size'])) + self.response.write(data) if len(ranges) > 1: - self.response.write('--THIS_STRING_SEPARATES\n') - self.response.write('Content-Type: %s\n' % str( - fileinfo.get('mimetype', 'application/octet-stream'))) - self.response.write('Content-Range: %s' % 'bytes %s-%s/%sn\n' % (str(first), - str(last), - str(fileinfo['size']))) - self.response.write('\n') - self.response.write(data) - self.response.write('\n') - else: - self.response.headers['Content-Type'] = str( - fileinfo.get('mimetype', 'application/octet-stream')) - self.response.headers['Content-Range'] = 'bytes %s-%s/%s' % (str(first), - str(last), - str(fileinfo['size'])) - self.response.write(data) - - if len(ranges) > 1: - self.response.headers['Content-Type'] = 'multipart/byteranges; boundary=THIS_STRING_SEPARATES' - - self.response.status = 206 - else: + self.response.headers['Content-Type'] = 'multipart/byteranges; boundary=THIS_STRING_SEPARATES' + + self.response.status = 206 + except util.ParseError: self.response.app_iter = file_system.open(file_path, 'rb') self.response.headers['Content-Length'] = str(fileinfo['size']) # must be set after setting app_iter diff --git a/api/util.py b/api/util.py index 27168c5b0..dbd818377 100644 --- a/api/util.py +++ b/api/util.py @@ -299,7 +299,7 @@ def parse_range_header(range_header_val, valid_units=('bytes',)): unit, ranges_str = split_range_header_val - if not unit in valid_units: + if unit not in valid_units: raise ParseError('Invalid unit specified') split_ranges_str = ranges_str.split(', ') diff --git a/tests/integration_tests/python/test_download.py b/tests/integration_tests/python/test_download.py index 54a04747e..3c72aebd2 100644 --- a/tests/integration_tests/python/test_download.py +++ b/tests/integration_tests/python/test_download.py @@ -267,7 +267,7 @@ def test_filelist_range_download(data_builder, as_admin, file_form): # download single file's first byte r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, - headers={'Range': 'bytes=0-1'}) + headers={'Range': 'bytes=0-0'}) assert r.ok assert r.content == '1' @@ -275,6 +275,17 @@ def test_filelist_range_download(data_builder, as_admin, file_form): assert r.ok ticket = r.json()['ticket'] + # download single file's first two byte + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=0-1'}) + assert r.ok + assert r.content == '12' + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + # download single file's last 5 bytes r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, @@ -289,58 +300,75 @@ def test_filelist_range_download(data_builder, as_admin, file_form): # try to download single file with invalid unit r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, - headers={'Range': 'kilobyte=-5'}) - assert r.status_code == 400 + headers={'Range': 'lol=-5'}) + assert r.status_code == 200 + assert r.content == '123456789' r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) assert r.ok ticket = r.json()['ticket'] - # try to download single file with with invalid range where the last byte is greater then the size of the file + # try to download single file with invalid range where the last byte is greater then the size of the file + # in this case the whole file is returned r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, headers={'Range': 'bytes=0-500'}) - assert r.status_code == 400 + assert r.status_code == 200 + assert r.content == '123456789' + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # try to download single file with invalid range where the first byte is greater then the size of the file + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=500-'}) + assert r.status_code == 416 r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) assert r.ok ticket = r.json()['ticket'] - # try to download single file with with invalid range, first and last byte is not specified + # try to download single file with invalid range, in this case the whole file is returned r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, headers={'Range': 'bytes=-'}) - assert r.status_code == 400 + assert r.status_code == 200 + assert r.content == '123456789' r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) assert r.ok ticket = r.json()['ticket'] - # try to download single file with with invalid range first byte is greater than the last one + # try to download single file with invalid range first byte is greater than the last one r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, headers={'Range': 'bytes=10-5'}) - assert r.status_code == 400 + assert r.status_code == 200 + assert r.content == '123456789' r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) assert r.ok ticket = r.json()['ticket'] - # try to download single file with with invalid range, can't parse first byte + # try to download single file with invalid range, can't parse first byte r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, headers={'Range': 'bytes=r-0'}) - assert r.status_code == 400 + assert r.status_code == 200 + assert r.content == '123456789' r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) assert r.ok ticket = r.json()['ticket'] - # try to download single file with with invalid range, can't parse last byte + # try to download single file with invalid range, can't parse last byte r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, headers={'Range': 'bytes=0-bb'}) - assert r.status_code == 400 + assert r.status_code == 200 + assert r.content == '123456789' r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) assert r.ok @@ -350,17 +378,37 @@ def test_filelist_range_download(data_builder, as_admin, file_form): r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, headers={'Range': 'bytes=1+5'}) - assert r.status_code == 400 + assert r.status_code == 200 + assert r.content == '123456789' r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) assert r.ok ticket = r.json()['ticket'] - # try to download single file with with invalid range header syntax + # try to download single file with invalid range header syntax r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket}, headers={'Range': 'bytes-1+5'}) - assert r.status_code == 400 + assert r.status_code == 200 + assert r.content == '123456789' + + r = as_admin.get(session_files + '/one.csv', params={'ticket': ''}) + assert r.ok + ticket = r.json()['ticket'] + + # download multiple ranges + r = as_admin.get(session_files + '/one.csv', + params={'ticket': ticket}, + headers={'Range': 'bytes=1-2, 3-4'}) + + assert r.content == '--THIS_STRING_SEPARATES\n' \ + 'Content-Type: text/csv\n' \ + 'Content-Range: bytes 1-2/9\n\n' \ + '23\n' \ + '--THIS_STRING_SEPARATES\n' \ + 'Content-Type: text/csv\n' \ + 'Content-Range: bytes 3-4/9\n\n' \ + '45\n' def test_analysis_download(data_builder, file_form, as_admin, default_payload):