From 76f321ee24ad1c4aa714a25018a5da3f64c24eaa Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 28 May 2024 09:21:04 +0200 Subject: [PATCH 01/10] Only log error if deleting directory really failed If we can't look up the directory it's safe to assume it really is gone. Fixes `None delete error [Errno 39] Directory not empty: 'outputs_populated'`. --- lib/galaxy/objectstore/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 6b25071ccad3..f6bb0f4b07ae 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -851,7 +851,12 @@ def _delete(self, obj, entire_dir=False, **kwargs): except OSError as ex: # Likely a race condition in which we delete the job working directory # and another process writes files into that directory. - log.critical(f"{self.__get_filename(obj, **kwargs)} delete error {ex}", exc_info=True) + # If the path doesn't exist anymore, another rmtree call was successful. + path = self.__get_filename(obj, **kwargs) + if path is None: + return True + else: + log.critical(f"{path} delete error {ex}", exc_info=True) return False def _get_data(self, obj, start=0, count=-1, **kwargs): From 4da885f7d7eff713bd2b485bdae8530b9e7fffae Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 28 May 2024 09:39:26 +0200 Subject: [PATCH 02/10] Don't attempt to download purged datasets Fixes https://sentry.galaxyproject.org/share/issue/cde32ad8b3cc4d58a519743a90e0d9a3/: ``` FileNotFoundError: [Errno 2] No such file or directory: '' File "starlette/applications.py", line 123, in __call__ await self.middleware_stack(scope, receive, send) File "starlette/middleware/errors.py", line 186, in __call__ raise exc File "starlette/middleware/errors.py", line 164, in __call__ await self.app(scope, receive, _send) File "starlette_context/middleware/raw_middleware.py", line 92, in __call__ await self.app(scope, receive, send_wrapper) File "starlette/middleware/base.py", line 189, in __call__ with collapse_excgroups(): File "contextlib.py", line 155, in __exit__ self.gen.throw(typ, value, traceback) File "starlette/_utils.py", line 93, in collapse_excgroups raise exc File "starlette/middleware/base.py", line 191, in __call__ response = await self.dispatch_func(request, call_next) File "galaxy/webapps/galaxy/fast_app.py", line 109, in add_x_frame_options response = await call_next(request) File "starlette/middleware/base.py", line 165, in call_next raise app_exc File "starlette/middleware/base.py", line 151, in coro await self.app(scope, receive_or_disconnect, send_no_error) File "starlette/middleware/exceptions.py", line 62, in __call__ await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) File "starlette/_exception_handler.py", line 64, in wrapped_app raise exc File "starlette/_exception_handler.py", line 53, in wrapped_app await app(scope, receive, sender) File "starlette/routing.py", line 758, in __call__ await self.middleware_stack(scope, receive, send) File "starlette/routing.py", line 778, in app await route.handle(scope, receive, send) File "starlette/routing.py", line 299, in handle await self.app(scope, receive, send) File "starlette/routing.py", line 79, in app await wrap_app_handling_exceptions(app, request)(scope, receive, send) File "starlette/_exception_handler.py", line 64, in wrapped_app raise exc File "starlette/_exception_handler.py", line 53, in wrapped_app await app(scope, receive, sender) File "starlette/routing.py", line 74, in app response = await func(request) File "fastapi/routing.py", line 278, in app raw_response = await run_endpoint_function( File "fastapi/routing.py", line 193, in run_endpoint_function return await run_in_threadpool(dependant.call, **values) File "starlette/concurrency.py", line 42, in run_in_threadpool return await anyio.to_thread.run_sync(func, *args) File "anyio/to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( File "anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread return await future File "anyio/_backends/_asyncio.py", line 851, in run result = context.run(func, *args) File "galaxy/webapps/galaxy/api/history_contents.py", line 646, in download_dataset_collection return self._download_collection(trans, id) File "galaxy/webapps/galaxy/api/history_contents.py", line 1036, in _download_collection archive = self.service.get_dataset_collection_archive_for_download(trans, id) File "galaxy/webapps/galaxy/services/history_contents.py", line 479, in get_dataset_collection_archive_for_download return self.__stream_dataset_collection(trans, dataset_collection_instance) File "galaxy/webapps/galaxy/services/history_contents.py", line 498, in __stream_dataset_collection archive = hdcas.stream_dataset_collection( File "galaxy/managers/hdcas.py", line 39, in stream_dataset_collection write_dataset_collection(dataset_collection_instance, archive) File "galaxy/managers/hdcas.py", line 51, in write_dataset_collection archive.write(file_path, relpath) File "galaxy/util/zipstream.py", line 82, in write self.add_path(path, archive_name or os.path.basename(path)) File "galaxy/util/zipstream.py", line 52, in add_path size = int(os.stat(path).st_size) ``` --- lib/galaxy/managers/hdcas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/managers/hdcas.py b/lib/galaxy/managers/hdcas.py index 42d448245157..76d5ef10cdab 100644 --- a/lib/galaxy/managers/hdcas.py +++ b/lib/galaxy/managers/hdcas.py @@ -45,7 +45,7 @@ def write_dataset_collection(dataset_collection_instance, archive): raise RequestParameterInvalidException("Attempt to write dataset collection that has not been populated yet") names, hdas = get_hda_and_element_identifiers(dataset_collection_instance) for name, hda in zip(names, hdas): - if hda.state != hda.states.OK: + if hda.state != hda.states.OK or hda.purged or hda.dataset.purged: continue for file_path, relpath in hda.datatype.to_archive(dataset=hda, name=name): archive.write(file_path, relpath) From dea0d523c69fb2d0d81ece394b4ec36bf06806a4 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 29 May 2024 10:09:26 +0200 Subject: [PATCH 03/10] Transparently open compressed files in DatasetDataProvider Fixes https://sentry.galaxyproject.org/share/issue/026b0ea1f8aa478daea1cdb0b18df78a/: ``` UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte File "starlette/applications.py", line 123, in __call__ await self.middleware_stack(scope, receive, send) File "starlette/middleware/errors.py", line 186, in __call__ raise exc File "starlette/middleware/errors.py", line 164, in __call__ await self.app(scope, receive, _send) File "starlette_context/middleware/raw_middleware.py", line 92, in __call__ await self.app(scope, receive, send_wrapper) File "starlette/middleware/base.py", line 189, in __call__ with collapse_excgroups(): File "contextlib.py", line 155, in __exit__ self.gen.throw(typ, value, traceback) File "starlette/_utils.py", line 93, in collapse_excgroups raise exc File "starlette/middleware/base.py", line 191, in __call__ response = await self.dispatch_func(request, call_next) File "galaxy/webapps/galaxy/fast_app.py", line 109, in add_x_frame_options response = await call_next(request) File "starlette/middleware/base.py", line 165, in call_next raise app_exc File "starlette/middleware/base.py", line 151, in coro await self.app(scope, receive_or_disconnect, send_no_error) File "starlette/middleware/exceptions.py", line 62, in __call__ await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) File "starlette/_exception_handler.py", line 64, in wrapped_app raise exc File "starlette/_exception_handler.py", line 53, in wrapped_app await app(scope, receive, sender) File "starlette/routing.py", line 758, in __call__ await self.middleware_stack(scope, receive, send) File "starlette/routing.py", line 778, in app await route.handle(scope, receive, send) File "starlette/routing.py", line 299, in handle await self.app(scope, receive, send) File "starlette/routing.py", line 79, in app await wrap_app_handling_exceptions(app, request)(scope, receive, send) File "starlette/_exception_handler.py", line 64, in wrapped_app raise exc File "starlette/_exception_handler.py", line 53, in wrapped_app await app(scope, receive, sender) File "starlette/routing.py", line 74, in app response = await func(request) File "fastapi/routing.py", line 278, in app raw_response = await run_endpoint_function( File "fastapi/routing.py", line 193, in run_endpoint_function return await run_in_threadpool(dependant.call, **values) File "starlette/concurrency.py", line 42, in run_in_threadpool return await anyio.to_thread.run_sync(func, *args) File "anyio/to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( File "anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread return await future File "anyio/_backends/_asyncio.py", line 851, in run result = context.run(func, *args) File "galaxy/webapps/galaxy/api/datasets.py", line 446, in show return self.service.show(trans, dataset_id, hda_ldda, serialization_params, data_type, **extra_params) File "galaxy/webapps/galaxy/services/datasets.py", line 395, in show rval = self._raw_data(trans, dataset, **extra_params) File "galaxy/webapps/galaxy/services/datasets.py", line 1009, in _raw_data return DataResult(data=list(dataset.datatype.dataprovider(dataset, provider, **kwargs))) File "galaxy/datatypes/dataproviders/base.py", line 262, in __iter__ for datum in parent_gen: File "galaxy/datatypes/dataproviders/base.py", line 199, in __iter__ for datum in parent_gen: File "galaxy/datatypes/dataproviders/base.py", line 137, in __iter__ yield from self.source File "galaxy/datatypes/dataproviders/base.py", line 137, in __iter__ yield from self.source File "", line 322, in decode ``` --- lib/galaxy/datatypes/dataproviders/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/datatypes/dataproviders/dataset.py b/lib/galaxy/datatypes/dataproviders/dataset.py index dfce2edef527..60b5f6295f3e 100644 --- a/lib/galaxy/datatypes/dataproviders/dataset.py +++ b/lib/galaxy/datatypes/dataproviders/dataset.py @@ -15,6 +15,7 @@ ) from galaxy.util import sqlite +from galaxy.util.compression_utils import get_fileobj from . import ( base, column, @@ -54,7 +55,7 @@ def __init__(self, dataset, **kwargs): # this dataset file is obviously the source # TODO: this might be a good place to interface with the object_store... mode = "rb" if dataset.datatype.is_binary else "r" - super().__init__(open(dataset.get_file_name(), mode)) + super().__init__(get_fileobj(dataset.get_file_name(), mode)) # TODO: this is a bit of a mess @classmethod From 455a851eef1b9301dc23817604cad73db9016bf0 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 29 May 2024 10:20:45 +0200 Subject: [PATCH 04/10] Raise exception when extracting dataset from collection without datasets Fixes https://github.com/galaxyproject/galaxy/issues/18240. --- lib/galaxy/model/__init__.py | 2 +- lib/galaxy/tools/__init__.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 172aab242b03..43d6bbe35586 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6507,7 +6507,7 @@ def dataset_elements_and_identifiers(self, identifiers=None): return elements @property - def first_dataset_element(self): + def first_dataset_element(self) -> Optional[HistoryDatasetAssociation]: for element in self.elements: if element.is_collection: first_element = element.child_collection.first_dataset_element diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 20d7e4f47608..d1fd2e199bee 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3355,18 +3355,20 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history how = incoming["which"]["which_dataset"] if how == "first": extracted_element = collection.first_dataset_element + if not extracted_element: + raise exceptions.RequestParameterInvalidException("Input collection has no dataset elements.") elif how == "by_identifier": try: extracted_element = collection[incoming["which"]["identifier"]] except KeyError as e: - raise exceptions.MessageException(e.args[0]) + raise exceptions.RequestParameterInvalidException(e.args[0]) elif how == "by_index": try: extracted_element = collection[int(incoming["which"]["index"])] except KeyError as e: - raise exceptions.MessageException(e.args[0]) + raise exceptions.RequestParameterInvalidException(e.args[0]) else: - raise exceptions.MessageException("Invalid tool parameters.") + raise exceptions.RequestParameterInvalidException("Invalid tool parameters.") extracted = extracted_element.element_object extracted_o = extracted.copy( copy_tags=extracted.tags, new_name=extracted_element.element_identifier, flush=False From 7cfba26fe1415a25946fdd3e78b7d7b6c01d7973 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 29 May 2024 16:27:00 +0200 Subject: [PATCH 05/10] Fix Invenio credentials handling Only ask for token when is really required --- lib/galaxy/files/sources/_rdm.py | 7 +------ lib/galaxy/files/sources/invenio.py | 31 +++++++++++++++++------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/lib/galaxy/files/sources/_rdm.py b/lib/galaxy/files/sources/_rdm.py index 14f7e9e1daa0..25e33ed8b757 100644 --- a/lib/galaxy/files/sources/_rdm.py +++ b/lib/galaxy/files/sources/_rdm.py @@ -7,7 +7,6 @@ from typing_extensions import Unpack -from galaxy.exceptions import AuthenticationRequired from galaxy.files import ProvidesUserFileSourcesUserContext from galaxy.files.sources import ( BaseFilesSource, @@ -193,15 +192,11 @@ def _serialization_props(self, user_context: OptionalUserContext = None): effective_props[key] = self._evaluate_prop(val, user_context=user_context) return effective_props - def get_authorization_token(self, user_context: OptionalUserContext) -> str: + def get_authorization_token(self, user_context: OptionalUserContext) -> Optional[str]: token = None if user_context: effective_props = self._serialization_props(user_context) token = effective_props.get("token") - if not token: - raise AuthenticationRequired( - f"Please provide a personal access token in your user's preferences for '{self.label}'" - ) return token def get_public_name(self, user_context: OptionalUserContext) -> Optional[str]: diff --git a/lib/galaxy/files/sources/invenio.py b/lib/galaxy/files/sources/invenio.py index 921438a446bd..99ec58f1e161 100644 --- a/lib/galaxy/files/sources/invenio.py +++ b/lib/galaxy/files/sources/invenio.py @@ -217,12 +217,7 @@ def create_draft_record( }, } - headers = self._get_request_headers(user_context) - if "Authorization" not in headers: - raise Exception( - "Cannot create record without authentication token. Please set your personal access token in your Galaxy preferences." - ) - + headers = self._get_request_headers(user_context, auth_required=True) response = requests.post(self.records_url, json=create_record_request, headers=headers) self._ensure_response_has_expected_status_code(response, 201) record = response.json() @@ -238,7 +233,7 @@ def upload_file_to_draft_record( ): record = self._get_draft_record(record_id, user_context=user_context) upload_file_url = record["links"]["files"] - headers = self._get_request_headers(user_context) + headers = self._get_request_headers(user_context, auth_required=True) # Add file metadata entry response = requests.post(upload_file_url, json=[{"key": filename}], headers=headers) @@ -394,28 +389,38 @@ def _get_creator_from_public_name(self, public_name: Optional[str] = None) -> Cr } def _get_response( - self, user_context: OptionalUserContext, request_url: str, params: Optional[Dict[str, Any]] = None + self, + user_context: OptionalUserContext, + request_url: str, + params: Optional[Dict[str, Any]] = None, + auth_required: bool = False, ) -> dict: - headers = self._get_request_headers(user_context) + headers = self._get_request_headers(user_context, auth_required) response = requests.get(request_url, params=params, headers=headers) self._ensure_response_has_expected_status_code(response, 200) return response.json() - def _get_request_headers(self, user_context: OptionalUserContext): + def _get_request_headers(self, user_context: OptionalUserContext, auth_required: bool = False): token = self.plugin.get_authorization_token(user_context) headers = {"Authorization": f"Bearer {token}"} if token else {} + if auth_required and token is None: + self._raise_auth_required() return headers def _ensure_response_has_expected_status_code(self, response, expected_status_code: int): - if response.status_code == 403: - record_url = response.url.replace("/api", "").replace("/files", "") - raise AuthenticationRequired(f"Please make sure you have the necessary permissions to access: {record_url}") if response.status_code != expected_status_code: + if response.status_code == 403: + self._raise_auth_required() error_message = self._get_response_error_message(response) raise Exception( f"Request to {response.url} failed with status code {response.status_code}: {error_message}" ) + def _raise_auth_required(self): + raise AuthenticationRequired( + f"Please provide a personal access token in your user's preferences for '{self.plugin.label}'" + ) + def _get_response_error_message(self, response): response_json = response.json() error_message = response_json.get("message") if response.status_code == 400 else response.text From 0b6d925b8a65bf5764b21ed3dea535c349832be8 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 29 May 2024 18:45:23 +0200 Subject: [PATCH 06/10] Set page importable to false when serializing Fixes https://github.com/galaxyproject/galaxy/issues/18258 --- lib/galaxy/model/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 172aab242b03..cf8b819f49b4 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -10085,7 +10085,7 @@ class Page(Base, HasTags, Dictifiable, RepresentById, UsesCreateAndUpdateTime): ) title = Column(TEXT) deleted = Column(Boolean, index=True, default=False) - importable = Column(Boolean, index=True, default=False) + importable = Column(Boolean, index=True, default=False, key="_importable") slug = Column(TEXT) published = Column(Boolean, index=True, default=False) user = relationship("User") @@ -10135,6 +10135,10 @@ class Page(Base, HasTags, Dictifiable, RepresentById, UsesCreateAndUpdateTime): def to_dict(self, view="element"): rval = super().to_dict(view=view) + if "importable" in rval and rval["importable"] is None: + # pages created prior to 2011 might not have importable field + # probably not worth creating a migration to fix that + rval["importable"] = False rev = [] for a in self.revisions: rev.append(a.id) From 6d068ca5f92f5126a034ad5efe7ece3a28fc7ce0 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 30 May 2024 15:16:34 +0100 Subject: [PATCH 07/10] Add ``hgweb_repo_prefix`` attribute to ``TestToolShedConfig`` Fix the following error in unit tests: ``` AttributeError: 'TestToolShedConfig' object has no attribute 'hgweb_repo_prefix' ``` caused by merging commit b00beec3d26b7dff76b32d56078bf7c90eff915b forward. --- test/unit/tool_shed/_util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/unit/tool_shed/_util.py b/test/unit/tool_shed/_util.py index d59991bca0f1..cf4f82255d0c 100644 --- a/test/unit/tool_shed/_util.py +++ b/test/unit/tool_shed/_util.py @@ -46,6 +46,7 @@ class TestToolShedConfig: file_path: str id_secret: str = "thisistheshedunittestsecret" smtp_server: Optional[str] = None + hgweb_repo_prefix = "repos/" config_hg_for_dev = False def __init__(self, temp_directory): From 53bf8ce1de93d7b8b46f7daa770f5e4d1c68f4ec Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 30 May 2024 17:58:42 +0100 Subject: [PATCH 08/10] Format with black --- .../1b5bf427db25_add_non_nullable_column_deleted_to_api_.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/tool_shed/webapp/model/migrations/alembic/versions/1b5bf427db25_add_non_nullable_column_deleted_to_api_.py b/lib/tool_shed/webapp/model/migrations/alembic/versions/1b5bf427db25_add_non_nullable_column_deleted_to_api_.py index 390402f37b53..5f493a5aa8d1 100644 --- a/lib/tool_shed/webapp/model/migrations/alembic/versions/1b5bf427db25_add_non_nullable_column_deleted_to_api_.py +++ b/lib/tool_shed/webapp/model/migrations/alembic/versions/1b5bf427db25_add_non_nullable_column_deleted_to_api_.py @@ -5,6 +5,7 @@ Create Date: 2024-05-29 21:53:53.516506 """ + import sqlalchemy as sa from alembic import op from sqlalchemy import ( From 48231391a8d9ecde5b88a49a9f600588c0ada3c0 Mon Sep 17 00:00:00 2001 From: Marius van den Beek Date: Fri, 31 May 2024 10:18:23 +0200 Subject: [PATCH 09/10] Drop key argument Did not want to do that actually --- lib/galaxy/model/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index cf8b819f49b4..66631e136092 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -10085,7 +10085,7 @@ class Page(Base, HasTags, Dictifiable, RepresentById, UsesCreateAndUpdateTime): ) title = Column(TEXT) deleted = Column(Boolean, index=True, default=False) - importable = Column(Boolean, index=True, default=False, key="_importable") + importable = Column(Boolean, index=True, default=False) slug = Column(TEXT) published = Column(Boolean, index=True, default=False) user = relationship("User") From 99aeadec06e3e0dbe7b8c9561a18931441efc916 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Fri, 31 May 2024 15:09:31 +0200 Subject: [PATCH 10/10] Fix first_dataset_element type hint --- lib/galaxy/model/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 63318fdec3ee..97ff0f6b0fbf 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6507,7 +6507,7 @@ def dataset_elements_and_identifiers(self, identifiers=None): return elements @property - def first_dataset_element(self) -> Optional[HistoryDatasetAssociation]: + def first_dataset_element(self) -> Optional["DatasetCollectionElement"]: for element in self.elements: if element.is_collection: first_element = element.child_collection.first_dataset_element