Skip to content

Commit

Permalink
Improve range header parsing and add unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
davidfarkas93 committed Feb 2, 2018
1 parent eecf302 commit f0a33fc
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 50 deletions.
79 changes: 52 additions & 27 deletions api/handlers/listhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,40 +486,65 @@ def get(self, cont_name, list_name, **kwargs):
range_header = self.request.headers.get('Range', None)
if range_header:
try:
first, last = util.parse_range_header(range_header)
ranges = util.parse_range_header(range_header)
except util.ParseError as e:
self.abort(400, str(e))

with file_system.open(file_path, 'rb') as f:
if last > fileinfo['size']:
self.abort(400, 'Last byte is greater then the size of the file')
if first is not None and last is not None:
length = last - first
elif last:
first = fileinfo['size']-last
length = last
else:
last = fileinfo['size']
length = fileinfo['size'] - first

f.seek(first)
data = f.read(length)

self.response.write(data)
self.response.headers['Content-Length'] = str(length)
self.response.headers['Content-Range'] = 'bytes %s-%s/%s' % (str(first),
str(last),
str(fileinfo['size']))
if not (len(data) == fileinfo['size']):
self.response.status = 206
for first, last in ranges:
if last > fileinfo['size']:
self.abort(400, 'Last byte is greater then the size of the file')

if not last:
if first < 0:
f.seek(first, 2)
length = abs(first)
else:
f.seek(first)
length = fileinfo['size'] - first
else:
if first < 0 and last < 0:
f.seek(first, 2)
elif first >= 0 and last >= 0:
f.seek(first)
else:
self.abort(400, 'Invalid range: %s-%s' % (first, last))

length = last - first

data = f.read(length)

if len(ranges) > 1:
self.response.write('--THIS_STRING_SEPARATES\n')
self.response.write('Content-Type: %s\n' % str(
fileinfo.get('mimetype', 'application/octet-stream')))
self.response.write('Content-Range: %s' % 'bytes %s-%s/%sn\n' % (str(first),
str(last),
str(fileinfo['size'])))
self.response.write('\n')
self.response.write(data)
self.response.write('\n')
else:
self.response.headers['Content-Type'] = str(
fileinfo.get('mimetype', 'application/octet-stream'))
self.response.headers['Content-Range'] = 'bytes %s-%s/%s' % (str(first),
str(last),
str(fileinfo['size']))
self.response.write(data)

if len(ranges) > 1:
self.response.headers['Content-Type'] = 'multipart/byteranges; boundary=THIS_STRING_SEPARATES'

self.response.status = 206
else:
self.response.app_iter = file_system.open(file_path, 'rb')
self.response.headers['Content-Length'] = str(fileinfo['size']) # must be set after setting app_iter

if self.is_true('view'):
self.response.headers['Content-Type'] = str(fileinfo.get('mimetype', 'application/octet-stream'))
else:
self.response.headers['Content-Type'] = 'application/octet-stream'
self.response.headers['Content-Disposition'] = 'attachment; filename="' + filename + '"'
if self.is_true('view'):
self.response.headers['Content-Type'] = str(fileinfo.get('mimetype', 'application/octet-stream'))
else:
self.response.headers['Content-Type'] = 'application/octet-stream'
self.response.headers['Content-Disposition'] = 'attachment; filename="' + filename + '"'

# log download if we haven't already for this ticket
if ticket:
Expand Down
66 changes: 43 additions & 23 deletions api/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from django.conf import settings
from django.template import Template, Context

BYTE_RANGE_REGEX = r'^(?P<first>\d+)-(?P<last>\d+)?$'
SUFFIX_BYTE_RANGE_REGEX = r'^(?P<first>-\d+)$'

# If this is not called before templating, django throws a hissy fit
settings.configure(
TEMPLATES=[{'BACKEND': 'django.template.backends.django.DjangoTemplates'}],
Expand Down Expand Up @@ -282,38 +285,55 @@ class ParseError(ValueError):


def parse_range_header(range_header_val, valid_units=('bytes',)):
first = last = None
"""
Range header parser according to RFC7233
https://tools.ietf.org/html/rfc7233
"""
byte_range_re = re.compile(BYTE_RANGE_REGEX)
suffix_byte_range_re = re.compile(SUFFIX_BYTE_RANGE_REGEX)

split_range_header_val = range_header_val.split('=')
if not len(split_range_header_val) == 2:
raise ParseError('Invalid range header syntax')

unit, range_str = split_range_header_val
unit, ranges_str = split_range_header_val

if not unit in valid_units:
raise ParseError('Invalid unit specified')

split_range_str = range_str.split('-')
split_ranges_str = ranges_str.split(', ')

if not len(split_range_str) == 2:
raise ParseError('Invalid range syntax')
ranges = []

if split_range_str[0]:
try:
first = int(split_range_str[0])
except ValueError:
raise ParseError('Invalid start %s' % unit)
for range_str in split_ranges_str:
re_match = byte_range_re.match(range_str)
first, last = None, None

if split_range_str[1]:
try:
last = int(split_range_str[1])
except ValueError:
raise ParseError('Invalid last %s' % unit)

if first is None and last is None:
raise ParseError('Invalid range, you need to specify at least first or last %s' % unit)

if last is not None and first > last:
raise ParseError('Invalid range, first %s can\'t be greater than the last %s' % (unit, unit))

return first, last
if re_match:
first, last = re_match.groups()
else:
re_match = suffix_byte_range_re.match(range_str)
if re_match:
first = re_match.group('first')
else:
raise ParseError('Invalid range format')

if first is not None:
try:
first = int(first)
except TypeError:
raise ParseError('Invalid range, only numbers are allowed')

if last is not None:
try:
last = int(last)
except TypeError:
raise ParseError('Invalid range, only numbers are allowed')

if last is not None and first > last:
raise ParseError('Invalid range, first %s can\'t be greater than the last %s' % (unit, unit))

ranges.append((first, last))

return ranges
34 changes: 34 additions & 0 deletions tests/unit_tests/python/test_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,39 @@
import pytest

from api import util

@pytest.fixture(scope='function', params=[
#range header content expected_output
('bytes=1-5', [(1, 5)]),
('bytes=-5', [(-5, None)]),
('bytes=5-', [(5, None)]),
('bytes=-', util.ParseError),
('bytes=3', util.ParseError),
('bytes=a-b', util.ParseError),
('by-', util.ParseError),
('bytes=5+5', util.ParseError),
('bytes=5=', util.ParseError),
('b=1-5', util.ParseError),
('bytes=1-5, 6-10, 10-15', [(1, 5), (6, 10), (10, 15)]),
('bytes=-5, 6-, 10-15', [(-5, None), (6, None), (10, 15)]),
('bytes=15, 6-10, 10-15', util.ParseError),
('bytes=15, -6--10, 10-15', util.ParseError),
('bytes=1-5; 6-10; 10-15', util.ParseError),
])
def parse_range_header_fixture(request):
header, expected_output = request.param
return header, expected_output


def test_parse_range_header(parse_range_header_fixture):
input, expected_output = parse_range_header_fixture

if expected_output == util.ParseError:
with pytest.raises(expected_output):
util.parse_range_header(input)
else:
assert util.parse_range_header(input) == expected_output


def test_hrsize():
assert util.hrsize(999) == '999B'
Expand Down

0 comments on commit f0a33fc

Please sign in to comment.