Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix!: correct range-not-satisfiable errs + content length for partial res #89

Merged
merged 1 commit into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion chord_drs/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from flask import Flask
from flask_cors import CORS
from flask_migrate import Migrate
from werkzeug.exceptions import BadRequest, Forbidden, NotFound
from werkzeug.exceptions import BadRequest, Forbidden, NotFound, RequestedRangeNotSatisfiable

from .authz import authz_middleware
from .backend import close_backend
Expand Down Expand Up @@ -64,6 +64,13 @@
drs_compat=True,
authz=authz_middleware,
)(e))
application.register_error_handler(
RequestedRangeNotSatisfiable,
flask_errors.flask_error_wrap(
flask_errors.flask_range_not_satisfiable_error,
drs_compat=True,
authz=authz_middleware,
))

# Attach the database to the application and run migrations if needed
db.init_app(application)
Expand Down
31 changes: 23 additions & 8 deletions chord_drs/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from sqlalchemy import or_
from urllib.parse import urlparse
from werkzeug.exceptions import BadRequest, Forbidden, NotFound, InternalServerError
from werkzeug.exceptions import BadRequest, Forbidden, NotFound, InternalServerError, RequestedRangeNotSatisfiable

from . import __version__
from .authz import authz_middleware
Expand Down Expand Up @@ -110,6 +110,12 @@ def bad_request_log_mark(err: str) -> BadRequest:
return BadRequest(err)


def range_not_satisfiable_log_mark(description: str, length: int) -> RequestedRangeNotSatisfiable:
authz_middleware.mark_authz_done(request)
current_app.logger.error(f"Requested range not satisfiable: {description}; true length: {length}")
return RequestedRangeNotSatisfiable(description=description, length=length)


def get_drs_base_path() -> str:
parsed_service_url = urlparse(current_app.config["SERVICE_BASE_URL"])
return f"{parsed_service_url.netloc}{parsed_service_url.path}"
Expand Down Expand Up @@ -335,6 +341,8 @@ def object_download(object_id: str):
res.headers["Accept-Ranges"] = "bytes"
return res

drs_end_byte = drs_object.size - 1

logger.debug(f"Found Range header: {range_header}")
range_err = f"Malformatted range header: expected bytes=X-Y or bytes=X-, got {range_header}"

Expand All @@ -346,14 +354,20 @@ def object_download(object_id: str):
logger.debug(f"Retrieving byte range {byte_range}")

try:
start = int(byte_range[0])
end = int(byte_range[1]) if byte_range[1] else None
start: int = int(byte_range[0])
end: int = int(byte_range[1]) if byte_range[1] else drs_end_byte
except (IndexError, ValueError):
raise bad_request_log_mark(range_err)

if end is not None and end < start:
raise bad_request_log_mark(
f"Invalid range header: end cannot be less than start (start={start}, end={end})")
if end > drs_end_byte:
raise range_not_satisfiable_log_mark(
f"End cannot be past last byte ({end} > {drs_end_byte})",
drs_object.size)

if end < start:
raise range_not_satisfiable_log_mark(
f"Invalid range header: end cannot be less than start (start={start}, end={end})",
drs_object.size)

def generate_bytes():
with open(drs_object.location, "rb") as fh2:
Expand All @@ -372,12 +386,13 @@ def generate_bytes():
# If we've hit the end of the file and are reading empty byte strings, or we've reached the
# end of our range (inclusive), then escape the loop.
# This is guaranteed to terminate with a finite-sized file.
if len(data) == 0 or (end is not None and byte_offset > end):
if len(data) == 0 or byte_offset > end:
break

# Stream the bytes of the file or file segment from the generator function
r = current_app.response_class(generate_bytes(), status=206, mimetype=MIME_OCTET_STREAM)
r.headers["Content-Range"] = f"bytes {start}-{end or (drs_object.size - 1)}/{drs_object.size}"
r.headers["Content-Length"] = (end + 1 - start) # byte range is inclusive, so need to add one
r.headers["Content-Range"] = f"bytes {start}-{end}/{drs_object.size}"
r.headers["Content-Disposition"] = \
f"attachment; filename*=UTF-8'{urllib.parse.quote(drs_object.name, encoding='utf-8')}'"
return r
Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "chord-drs"
version = "0.14.3"
version = "0.15.0"
description = "An implementation of a data repository system (as per GA4GH's specs) for the Bento platform."
authors = ["David Lougheed <[email protected]>"]
license = "LGPL-3.0"
Expand All @@ -14,7 +14,7 @@ include = [
[tool.poetry.dependencies]
python = "^3.10"
boto3 = ">=1.34.6,<1.35"
bento-lib = {version = "^11.1.1", extras = ["flask"]}
bento-lib = {version = "^11.2.0", extras = ["flask"]}
flask = ">=2.2.5,<2.3"
flask-sqlalchemy = ">=3.0.5,<3.1" # 3.1 drops SQLAlchemy 1 support
flask-migrate = ">=4.0.5,<4.1"
Expand Down
12 changes: 6 additions & 6 deletions tests/test_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,14 @@ def _test_object_and_download(client, obj, test_range=False):
body = res.get_data(as_text=False)
assert len(body) == 5

# - bytes 100-19999
# - bytes 100-1999
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-1999"),))
assert res.status_code == 206
body = res.get_data(as_text=False)
assert len(body) == 1900

# Size is 2455, so these'll run off the end and return the whole thing after 100

res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-19999"),))
assert res.status_code == 206
body = res.get_data(as_text=False)
assert len(body) == 2355
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-"),))
assert res.status_code == 206
body = res.get_data(as_text=False)
Expand All @@ -165,9 +161,13 @@ def _test_object_and_download(client, obj, test_range=False):
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bites=0-4"),))
assert res.status_code == 400

# - cannot request more than what is available
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-19999"),))
assert res.status_code == 416

# - reversed interval
res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=4-0"),))
assert res.status_code == 400
assert res.status_code == 416


@responses.activate
Expand Down