Skip to content

Commit

Permalink
Follow google storage responses in case of invalid range, add test fo…
Browse files Browse the repository at this point in the history
…r multiple ranges
  • Loading branch information
davidfarkas93 committed Feb 7, 2018
1 parent f0a33fc commit cf1f1c5
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 67 deletions.
98 changes: 48 additions & 50 deletions api/handlers/listhandler.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import bson
import copy
import datetime
Expand Down Expand Up @@ -465,8 +466,8 @@ def get(self, cont_name, list_name, **kwargs):
with zipfile.ZipFile(f) as zf:
self.response.headers['Content-Type'] = util.guess_mimetype(zip_member)
self.response.write(zf.open(zip_member).read())
except zipfile.BadZipfile:
self.abort(400, 'not a zip file')
except zipfile.BadZipfile as e:
self.abort(400, str(e))
except KeyError:
self.abort(400, 'zip file contains no such member')
# log download if we haven't already for this ticket
Expand All @@ -483,60 +484,57 @@ def get(self, cont_name, list_name, **kwargs):
if signed_url:
self.redirect(signed_url)
else:
range_header = self.request.headers.get('Range', None)
if range_header:
try:
ranges = util.parse_range_header(range_header)
except util.ParseError as e:
self.abort(400, str(e))

with file_system.open(file_path, 'rb') as f:
for first, last in ranges:
if last > fileinfo['size']:
self.abort(400, 'Last byte is greater then the size of the file')

if not last:
range_header = self.request.headers.get('Range', '')
try:
ranges = util.parse_range_header(range_header)

if ranges:
with file_system.open(file_path, 'rb') as f:
for first, last in ranges:
if first > fileinfo['size']-1:
self.abort(416, 'Invalid range')

if last > fileinfo['size'] - 1:
raise util.ParseError('Invalid range')

mode = os.SEEK_SET
if first < 0:
f.seek(first, 2)
mode = os.SEEK_END
length = abs(first)
else:
f.seek(first)
elif last is None:
length = fileinfo['size'] - first
else:
if first < 0 and last < 0:
f.seek(first, 2)
elif first >= 0 and last >= 0:
f.seek(first)
else:
self.abort(400, 'Invalid range: %s-%s' % (first, last))

length = last - first

data = f.read(length)
if last > fileinfo['size']:
length = fileinfo['size'] - first
else:
length = last - first + 1

f.seek(first, mode)
data = f.read(length)

if len(ranges) > 1:
self.response.write('--THIS_STRING_SEPARATES\n')
self.response.write('Content-Type: %s\n' % str(
fileinfo.get('mimetype', 'application/octet-stream')))
self.response.write('Content-Range: %s' % 'bytes %s-%s/%s\n' % (str(first),
str(last),
str(fileinfo['size'])))
self.response.write('\n')
self.response.write(data)
self.response.write('\n')
else:
self.response.headers['Content-Type'] = str(
fileinfo.get('mimetype', 'application/octet-stream'))
self.response.headers['Content-Range'] = 'bytes %s-%s/%s' % (str(first),
str(last),
str(fileinfo['size']))
self.response.write(data)

if len(ranges) > 1:
self.response.write('--THIS_STRING_SEPARATES\n')
self.response.write('Content-Type: %s\n' % str(
fileinfo.get('mimetype', 'application/octet-stream')))
self.response.write('Content-Range: %s' % 'bytes %s-%s/%sn\n' % (str(first),
str(last),
str(fileinfo['size'])))
self.response.write('\n')
self.response.write(data)
self.response.write('\n')
else:
self.response.headers['Content-Type'] = str(
fileinfo.get('mimetype', 'application/octet-stream'))
self.response.headers['Content-Range'] = 'bytes %s-%s/%s' % (str(first),
str(last),
str(fileinfo['size']))
self.response.write(data)

if len(ranges) > 1:
self.response.headers['Content-Type'] = 'multipart/byteranges; boundary=THIS_STRING_SEPARATES'

self.response.status = 206
else:
self.response.headers['Content-Type'] = 'multipart/byteranges; boundary=THIS_STRING_SEPARATES'

self.response.status = 206
except util.ParseError:
self.response.app_iter = file_system.open(file_path, 'rb')
self.response.headers['Content-Length'] = str(fileinfo['size']) # must be set after setting app_iter

Expand Down
2 changes: 1 addition & 1 deletion api/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def parse_range_header(range_header_val, valid_units=('bytes',)):

unit, ranges_str = split_range_header_val

if not unit in valid_units:
if unit not in valid_units:
raise ParseError('Invalid unit specified')

split_ranges_str = ranges_str.split(', ')
Expand Down
80 changes: 64 additions & 16 deletions tests/integration_tests/python/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,14 +267,25 @@ def test_filelist_range_download(data_builder, as_admin, file_form):
# download single file's first byte
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=0-1'})
headers={'Range': 'bytes=0-0'})
assert r.ok
assert r.content == '1'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# download single file's first two byte
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=0-1'})
assert r.ok
assert r.content == '12'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# download single file's last 5 bytes
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
Expand All @@ -289,58 +300,75 @@ def test_filelist_range_download(data_builder, as_admin, file_form):
# try to download single file with invalid unit
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'kilobyte=-5'})
assert r.status_code == 400
headers={'Range': 'lol=-5'})
assert r.status_code == 200
assert r.content == '123456789'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# try to download single file with with invalid range where the last byte is greater then the size of the file
# try to download single file with invalid range where the last byte is greater then the size of the file
# in this case the whole file is returned
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=0-500'})
assert r.status_code == 400
assert r.status_code == 200
assert r.content == '123456789'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# try to download single file with invalid range where the first byte is greater then the size of the file
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=500-'})
assert r.status_code == 416

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# try to download single file with with invalid range, first and last byte is not specified
# try to download single file with invalid range, in this case the whole file is returned
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=-'})
assert r.status_code == 400
assert r.status_code == 200
assert r.content == '123456789'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# try to download single file with with invalid range first byte is greater than the last one
# try to download single file with invalid range first byte is greater than the last one
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=10-5'})
assert r.status_code == 400
assert r.status_code == 200
assert r.content == '123456789'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# try to download single file with with invalid range, can't parse first byte
# try to download single file with invalid range, can't parse first byte
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=r-0'})
assert r.status_code == 400
assert r.status_code == 200
assert r.content == '123456789'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# try to download single file with with invalid range, can't parse last byte
# try to download single file with invalid range, can't parse last byte
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=0-bb'})
assert r.status_code == 400
assert r.status_code == 200
assert r.content == '123456789'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
Expand All @@ -350,17 +378,37 @@ def test_filelist_range_download(data_builder, as_admin, file_form):
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=1+5'})
assert r.status_code == 400
assert r.status_code == 200
assert r.content == '123456789'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# try to download single file with with invalid range header syntax
# try to download single file with invalid range header syntax
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes-1+5'})
assert r.status_code == 400
assert r.status_code == 200
assert r.content == '123456789'

r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
assert r.ok
ticket = r.json()['ticket']

# download multiple ranges
r = as_admin.get(session_files + '/one.csv',
params={'ticket': ticket},
headers={'Range': 'bytes=1-2, 3-4'})

assert r.content == '--THIS_STRING_SEPARATES\n' \
'Content-Type: text/csv\n' \
'Content-Range: bytes 1-2/9\n\n' \
'23\n' \
'--THIS_STRING_SEPARATES\n' \
'Content-Type: text/csv\n' \
'Content-Range: bytes 3-4/9\n\n' \
'45\n'


def test_analysis_download(data_builder, file_form, as_admin, default_payload):
Expand Down

0 comments on commit cf1f1c5

Please sign in to comment.