From 95965de1bee8c8efcbf7a7ffd5069d0f29d3244b Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 17 Jul 2024 17:47:19 +0200 Subject: [PATCH] Also fail ensure_dataset_on_disk if dataset is in new state I believe this should cover https://sentry.galaxyproject.org/share/issue/0a9fcacfab584166910e14329e3a0e89/: ``` FileNotFoundError: [Errno 2] No such file or directory: '' File "galaxy/webapps/galaxy/services/datasets.py", line 637, in display rval, headers = dataset_instance.datatype.display_data( File "galaxy/datatypes/tabular.py", line 190, in display_data return self._serve_raw(dataset, to_ext, headers, **kwd) File "galaxy/datatypes/data.py", line 427, in _serve_raw headers["Content-Length"] = str(os.stat(dataset.get_file_name()).st_size) InternalServerError: Could not get display data for dataset: [Errno 2] No such file or directory: '' File "starlette/_exception_handler.py", line 53, in wrapped_app await app(scope, receive, sender) File "starlette/routing.py", line 72, in app response = await func(request) File "fastapi/routing.py", line 278, in app raw_response = await run_endpoint_function( File "fastapi/routing.py", line 193, in run_endpoint_function return await run_in_threadpool(dependant.call, **values) File "starlette/concurrency.py", line 42, in run_in_threadpool return await anyio.to_thread.run_sync(func, *args) File "anyio/to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( File "anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread return await future File "anyio/_backends/_asyncio.py", line 851, in run result = context.run(func, *args) File "galaxy/webapps/galaxy/api/datasets.py", line 300, in display_history_content return self._display(request, trans, history_content_id, preview, filename, to_ext, raw, offset, ck_size) File "galaxy/webapps/galaxy/api/datasets.py", line 341, in _display display_data, headers = self.service.display( File "galaxy/webapps/galaxy/services/datasets.py", line 643, in display raise galaxy_exceptions.InternalServerError(f"Could not get display data for dataset: {util.unicodify(e)}") ``` It's a little hard to tell if this was really the reason here, but the job went straight from new to paused. If the job state is new I don't think we've even set the object store id yet. --- lib/galaxy/managers/datasets.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/galaxy/managers/datasets.py b/lib/galaxy/managers/datasets.py index 304879b934ad..3a23a25e19a1 100644 --- a/lib/galaxy/managers/datasets.py +++ b/lib/galaxy/managers/datasets.py @@ -489,12 +489,16 @@ def serialize_dataset_association_roles(self, trans, dataset_assoc): def ensure_dataset_on_disk(self, trans, dataset): # Not a guarantee data is really present, but excludes a lot of expected cases + if not dataset.dataset: + raise exceptions.InternalServerError("Item has no associated dataset.") if dataset.purged or dataset.dataset.purged: raise exceptions.ItemDeletionException("The dataset you are attempting to view has been purged.") elif dataset.deleted and not (trans.user_is_admin or self.is_owner(dataset, trans.get_user())): raise exceptions.ItemDeletionException("The dataset you are attempting to view has been deleted.") elif dataset.state == Dataset.states.UPLOAD: raise exceptions.Conflict("Please wait until this dataset finishes uploading before attempting to view it.") + elif dataset.state == Dataset.states.NEW: + raise exceptions.Conflict("The dataset you are attempting to view is new and has no data.") elif dataset.state == Dataset.states.DISCARDED: raise exceptions.ItemDeletionException("The dataset you are attempting to view has been discarded.") elif dataset.state == Dataset.states.DEFERRED: