Skip to content

Commit

Permalink
Fix get_content_as_text for compressed text datatypes
Browse files Browse the repository at this point in the history
Fixes:
```
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte
  File "starlette/applications.py", line 123, in __call__
    await self.middleware_stack(scope, receive, send)
  File "starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "starlette/middleware/errors.py", line 164, in __call__
    await self.app(scope, receive, _send)
  File "starlette_context/middleware/raw_middleware.py", line 92, in __call__
    await self.app(scope, receive, send_wrapper)
  File "starlette/middleware/base.py", line 189, in __call__
    with collapse_excgroups():
  File "contextlib.py", line 155, in __exit__
    self.gen.throw(typ, value, traceback)
  File "starlette/_utils.py", line 93, in collapse_excgroups
    raise exc
  File "starlette/middleware/base.py", line 191, in __call__
    response = await self.dispatch_func(request, call_next)
  File "galaxy/webapps/galaxy/fast_app.py", line 108, in add_x_frame_options
    response = await call_next(request)
  File "starlette/middleware/base.py", line 165, in call_next
    raise app_exc
  File "starlette/middleware/base.py", line 151, in coro
    await self.app(scope, receive_or_disconnect, send_no_error)
  File "starlette/middleware/exceptions.py", line 62, in __call__
    await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  File "starlette/_exception_handler.py", line 64, in wrapped_app
    raise exc
  File "starlette/_exception_handler.py", line 53, in wrapped_app
    await app(scope, receive, sender)
  File "starlette/routing.py", line 758, in __call__
    await self.middleware_stack(scope, receive, send)
  File "starlette/routing.py", line 778, in app
    await route.handle(scope, receive, send)
  File "starlette/routing.py", line 299, in handle
    await self.app(scope, receive, send)
  File "starlette/routing.py", line 79, in app
    await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  File "starlette/_exception_handler.py", line 64, in wrapped_app
    raise exc
  File "starlette/_exception_handler.py", line 53, in wrapped_app
    await app(scope, receive, sender)
  File "starlette/routing.py", line 74, in app
    response = await func(request)
  File "fastapi/routing.py", line 278, in app
    raw_response = await run_endpoint_function(
  File "fastapi/routing.py", line 193, in run_endpoint_function
    return await run_in_threadpool(dependant.call, **values)
  File "starlette/concurrency.py", line 42, in run_in_threadpool
    return await anyio.to_thread.run_sync(func, *args)
  File "anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
    return await future
  File "anyio/_backends/_asyncio.py", line 851, in run
    result = context.run(func, *args)
  File "galaxy/webapps/galaxy/api/datasets.py", line 192, in get_content_as_text
    return self.service.get_content_as_text(trans, dataset_id)
  File "galaxy/webapps/galaxy/services/datasets.py", line 643, in get_content_as_text
    truncated, dataset_data = self.hda_manager.text_data(hda, preview=True)
  File "galaxy/managers/hdas.py", line 310, in text_data
    hda_data = open(hda.get_file_name()).read(MAX_PEEK_SIZE)
  File "<frozen codecs>", line 322, in decode
```
from https://sentry.galaxyproject.org/share/issue/9eb8e5b692b94700ac9b304b6d1c2418/
  • Loading branch information
mvdbeek committed Apr 12, 2024
1 parent 32327ae commit 1ed2af1
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
9 changes: 6 additions & 3 deletions lib/galaxy/managers/hdas.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
MinimalManagerApp,
StructuredApp,
)
from galaxy.util.compression_utils import get_fileobj

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -303,11 +304,13 @@ def text_data(self, hda, preview=True):
# For now, cannot get data from non-text datasets.
if not isinstance(hda.datatype, datatypes.data.Text):
return truncated, hda_data
if not os.path.exists(hda.get_file_name()):
file_path = hda.get_file_name()
if not os.path.exists(file_path):
return truncated, hda_data

truncated = preview and os.stat(hda.get_file_name()).st_size > MAX_PEEK_SIZE
hda_data = open(hda.get_file_name()).read(MAX_PEEK_SIZE)
truncated = preview and os.stat(file_path).st_size > MAX_PEEK_SIZE
with get_fileobj(file_path) as fh:
hda_data = fh.read(MAX_PEEK_SIZE)
return truncated, hda_data

# .... annotatable
Expand Down
10 changes: 10 additions & 0 deletions lib/galaxy_test/api/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
one_hda_model_store_dict,
TEST_SOURCE_URI,
)
from galaxy.tool_util.verify.test_data import TestDataResolver
from galaxy.util.unittest_utils import skip_if_github_down
from galaxy_test.base.api_asserts import assert_has_keys
from galaxy_test.base.decorators import (
Expand Down Expand Up @@ -356,6 +357,15 @@ def test_get_content_as_text(self, history_id):
self._assert_has_key(get_content_as_text_response.json(), "item_data")
assert get_content_as_text_response.json().get("item_data") == contents

def test_get_content_as_text_with_compressed_text_data(self, history_id):
test_data_resolver = TestDataResolver()
with open(test_data_resolver.get_filename("1.fasta.gz"), mode="rb") as fh:
hda1 = self.dataset_populator.new_dataset(history_id, content=fh, ftype="fasta.gz", wait=True)
get_content_as_text_response = self._get(f"datasets/{hda1['id']}/get_content_as_text")
self._assert_status_code_is(get_content_as_text_response, 200)
self._assert_has_key(get_content_as_text_response.json(), "item_data")
assert ">hg17" in get_content_as_text_response.json().get("item_data")

def test_anon_get_content_as_text(self, history_id):
contents = "accessible data"
hda1 = self.dataset_populator.new_dataset(history_id, content=contents, wait=True)
Expand Down

0 comments on commit 1ed2af1

Please sign in to comment.