From 77918fb3fafbcd5a0944b40b2cdfa28efd2affa9 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Wed, 3 Jan 2024 15:30:15 -0500 Subject: [PATCH] fix!: correct range-not-satisfiable errs + content length for partial res --- chord_drs/app.py | 9 ++++++++- chord_drs/routes.py | 31 +++++++++++++++++++++++-------- poetry.lock | 8 ++++---- pyproject.toml | 4 ++-- tests/test_routes.py | 12 ++++++------ 5 files changed, 43 insertions(+), 21 deletions(-) diff --git a/chord_drs/app.py b/chord_drs/app.py index 72c52db..e41bad7 100644 --- a/chord_drs/app.py +++ b/chord_drs/app.py @@ -4,7 +4,7 @@ from flask import Flask from flask_cors import CORS from flask_migrate import Migrate -from werkzeug.exceptions import BadRequest, Forbidden, NotFound +from werkzeug.exceptions import BadRequest, Forbidden, NotFound, RequestedRangeNotSatisfiable from .authz import authz_middleware from .backend import close_backend @@ -64,6 +64,13 @@ drs_compat=True, authz=authz_middleware, )(e)) +application.register_error_handler( + RequestedRangeNotSatisfiable, + flask_errors.flask_error_wrap( + flask_errors.flask_range_not_satisfiable_error, + drs_compat=True, + authz=authz_middleware, + )) # Attach the database to the application and run migrations if needed db.init_app(application) diff --git a/chord_drs/routes.py b/chord_drs/routes.py index 8212d47..97599b6 100644 --- a/chord_drs/routes.py +++ b/chord_drs/routes.py @@ -20,7 +20,7 @@ ) from sqlalchemy import or_ from urllib.parse import urlparse -from werkzeug.exceptions import BadRequest, Forbidden, NotFound, InternalServerError +from werkzeug.exceptions import BadRequest, Forbidden, NotFound, InternalServerError, RequestedRangeNotSatisfiable from . import __version__ from .authz import authz_middleware @@ -110,6 +110,12 @@ def bad_request_log_mark(err: str) -> BadRequest: return BadRequest(err) +def range_not_satisfiable_log_mark(description: str, length: int) -> RequestedRangeNotSatisfiable: + authz_middleware.mark_authz_done(request) + current_app.logger.error(f"Requested range not satisfiable: {description}; true length: {length}") + return RequestedRangeNotSatisfiable(description=description, length=length) + + def get_drs_base_path() -> str: parsed_service_url = urlparse(current_app.config["SERVICE_BASE_URL"]) return f"{parsed_service_url.netloc}{parsed_service_url.path}" @@ -335,6 +341,8 @@ def object_download(object_id: str): res.headers["Accept-Ranges"] = "bytes" return res + drs_end_byte = drs_object.size - 1 + logger.debug(f"Found Range header: {range_header}") range_err = f"Malformatted range header: expected bytes=X-Y or bytes=X-, got {range_header}" @@ -346,14 +354,20 @@ def object_download(object_id: str): logger.debug(f"Retrieving byte range {byte_range}") try: - start = int(byte_range[0]) - end = int(byte_range[1]) if byte_range[1] else None + start: int = int(byte_range[0]) + end: int = int(byte_range[1]) if byte_range[1] else drs_end_byte except (IndexError, ValueError): raise bad_request_log_mark(range_err) - if end is not None and end < start: - raise bad_request_log_mark( - f"Invalid range header: end cannot be less than start (start={start}, end={end})") + if end > drs_end_byte: + raise range_not_satisfiable_log_mark( + f"End cannot be past last byte ({end} > {drs_end_byte})", + drs_object.size) + + if end < start: + raise range_not_satisfiable_log_mark( + f"Invalid range header: end cannot be less than start (start={start}, end={end})", + drs_object.size) def generate_bytes(): with open(drs_object.location, "rb") as fh2: @@ -372,12 +386,13 @@ def generate_bytes(): # If we've hit the end of the file and are reading empty byte strings, or we've reached the # end of our range (inclusive), then escape the loop. # This is guaranteed to terminate with a finite-sized file. - if len(data) == 0 or (end is not None and byte_offset > end): + if len(data) == 0 or byte_offset > end: break # Stream the bytes of the file or file segment from the generator function r = current_app.response_class(generate_bytes(), status=206, mimetype=MIME_OCTET_STREAM) - r.headers["Content-Range"] = f"bytes {start}-{end or (drs_object.size - 1)}/{drs_object.size}" + r.headers["Content-Length"] = (end + 1 - start) # byte range is inclusive, so need to add one + r.headers["Content-Range"] = f"bytes {start}-{end}/{drs_object.size}" r.headers["Content-Disposition"] = \ f"attachment; filename*=UTF-8'{urllib.parse.quote(drs_object.name, encoding='utf-8')}'" return r diff --git a/poetry.lock b/poetry.lock index 9f94044..bdb0b3e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -189,13 +189,13 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p [[package]] name = "bento-lib" -version = "11.1.2" +version = "11.2.0" description = "A set of common utilities and helpers for Bento platform services." optional = false python-versions = ">=3.10.0" files = [ - {file = "bento_lib-11.1.2-py3-none-any.whl", hash = "sha256:b588284f8ac72916a0882daa1240ae62bae6225b391f1451ce47a717cb506981"}, - {file = "bento_lib-11.1.2.tar.gz", hash = "sha256:ad231e0bd6f00b80c9fa7cd2857885afb936fd019f58a595facfb3cf8fdcb09a"}, + {file = "bento_lib-11.2.0-py3-none-any.whl", hash = "sha256:1a758e0595aa3a640f48165740aa3d8593a1368bf7a0ed9d2391019a635421f9"}, + {file = "bento_lib-11.2.0.tar.gz", hash = "sha256:72b2fa4575aa004d054d917a357ec064611a60e3167e4b255a0c9daa74023b0f"}, ] [package.dependencies] @@ -2234,4 +2234,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "d5c2a1cfa35fa91a2baa9fead6466ee97e8ff39ba82a6b17c415a30dec1a3353" +content-hash = "c0d08e820457ce46699a1ba2fcd68da5efd4aa7723720af21772793071d9ad07" diff --git a/pyproject.toml b/pyproject.toml index 70d46e7..56740fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "chord-drs" -version = "0.14.3" +version = "0.15.0" description = "An implementation of a data repository system (as per GA4GH's specs) for the Bento platform." authors = ["David Lougheed "] license = "LGPL-3.0" @@ -14,7 +14,7 @@ include = [ [tool.poetry.dependencies] python = "^3.10" boto3 = ">=1.34.6,<1.35" -bento-lib = {version = "^11.1.1", extras = ["flask"]} +bento-lib = {version = "^11.2.0", extras = ["flask"]} flask = ">=2.2.5,<2.3" flask-sqlalchemy = ">=3.0.5,<3.1" # 3.1 drops SQLAlchemy 1 support flask-migrate = ">=4.0.5,<4.1" diff --git a/tests/test_routes.py b/tests/test_routes.py index 6768b1b..84d760a 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -129,7 +129,7 @@ def _test_object_and_download(client, obj, test_range=False): body = res.get_data(as_text=False) assert len(body) == 5 - # - bytes 100-19999 + # - bytes 100-1999 res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-1999"),)) assert res.status_code == 206 body = res.get_data(as_text=False) @@ -137,10 +137,6 @@ def _test_object_and_download(client, obj, test_range=False): # Size is 2455, so these'll run off the end and return the whole thing after 100 - res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-19999"),)) - assert res.status_code == 206 - body = res.get_data(as_text=False) - assert len(body) == 2355 res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-"),)) assert res.status_code == 206 body = res.get_data(as_text=False) @@ -165,9 +161,13 @@ def _test_object_and_download(client, obj, test_range=False): res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bites=0-4"),)) assert res.status_code == 400 + # - cannot request more than what is available + res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=100-19999"),)) + assert res.status_code == 416 + # - reversed interval res = client.get(data["access_methods"][0]["access_url"]["url"], headers=(("Range", "bytes=4-0"),)) - assert res.status_code == 400 + assert res.status_code == 416 @responses.activate