From dea0d523c69fb2d0d81ece394b4ec36bf06806a4 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 29 May 2024 10:09:26 +0200 Subject: [PATCH] Transparently open compressed files in DatasetDataProvider Fixes https://sentry.galaxyproject.org/share/issue/026b0ea1f8aa478daea1cdb0b18df78a/: ``` UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte File "starlette/applications.py", line 123, in __call__ await self.middleware_stack(scope, receive, send) File "starlette/middleware/errors.py", line 186, in __call__ raise exc File "starlette/middleware/errors.py", line 164, in __call__ await self.app(scope, receive, _send) File "starlette_context/middleware/raw_middleware.py", line 92, in __call__ await self.app(scope, receive, send_wrapper) File "starlette/middleware/base.py", line 189, in __call__ with collapse_excgroups(): File "contextlib.py", line 155, in __exit__ self.gen.throw(typ, value, traceback) File "starlette/_utils.py", line 93, in collapse_excgroups raise exc File "starlette/middleware/base.py", line 191, in __call__ response = await self.dispatch_func(request, call_next) File "galaxy/webapps/galaxy/fast_app.py", line 109, in add_x_frame_options response = await call_next(request) File "starlette/middleware/base.py", line 165, in call_next raise app_exc File "starlette/middleware/base.py", line 151, in coro await self.app(scope, receive_or_disconnect, send_no_error) File "starlette/middleware/exceptions.py", line 62, in __call__ await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) File "starlette/_exception_handler.py", line 64, in wrapped_app raise exc File "starlette/_exception_handler.py", line 53, in wrapped_app await app(scope, receive, sender) File "starlette/routing.py", line 758, in __call__ await self.middleware_stack(scope, receive, send) File "starlette/routing.py", line 778, in app await route.handle(scope, receive, send) File "starlette/routing.py", line 299, in handle await self.app(scope, receive, send) File "starlette/routing.py", line 79, in app await wrap_app_handling_exceptions(app, request)(scope, receive, send) File "starlette/_exception_handler.py", line 64, in wrapped_app raise exc File "starlette/_exception_handler.py", line 53, in wrapped_app await app(scope, receive, sender) File "starlette/routing.py", line 74, in app response = await func(request) File "fastapi/routing.py", line 278, in app raw_response = await run_endpoint_function( File "fastapi/routing.py", line 193, in run_endpoint_function return await run_in_threadpool(dependant.call, **values) File "starlette/concurrency.py", line 42, in run_in_threadpool return await anyio.to_thread.run_sync(func, *args) File "anyio/to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( File "anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread return await future File "anyio/_backends/_asyncio.py", line 851, in run result = context.run(func, *args) File "galaxy/webapps/galaxy/api/datasets.py", line 446, in show return self.service.show(trans, dataset_id, hda_ldda, serialization_params, data_type, **extra_params) File "galaxy/webapps/galaxy/services/datasets.py", line 395, in show rval = self._raw_data(trans, dataset, **extra_params) File "galaxy/webapps/galaxy/services/datasets.py", line 1009, in _raw_data return DataResult(data=list(dataset.datatype.dataprovider(dataset, provider, **kwargs))) File "galaxy/datatypes/dataproviders/base.py", line 262, in __iter__ for datum in parent_gen: File "galaxy/datatypes/dataproviders/base.py", line 199, in __iter__ for datum in parent_gen: File "galaxy/datatypes/dataproviders/base.py", line 137, in __iter__ yield from self.source File "galaxy/datatypes/dataproviders/base.py", line 137, in __iter__ yield from self.source File "", line 322, in decode ``` --- lib/galaxy/datatypes/dataproviders/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/datatypes/dataproviders/dataset.py b/lib/galaxy/datatypes/dataproviders/dataset.py index dfce2edef527..60b5f6295f3e 100644 --- a/lib/galaxy/datatypes/dataproviders/dataset.py +++ b/lib/galaxy/datatypes/dataproviders/dataset.py @@ -15,6 +15,7 @@ ) from galaxy.util import sqlite +from galaxy.util.compression_utils import get_fileobj from . import ( base, column, @@ -54,7 +55,7 @@ def __init__(self, dataset, **kwargs): # this dataset file is obviously the source # TODO: this might be a good place to interface with the object_store... mode = "rb" if dataset.datatype.is_binary else "r" - super().__init__(open(dataset.get_file_name(), mode)) + super().__init__(get_fileobj(dataset.get_file_name(), mode)) # TODO: this is a bit of a mess @classmethod