From acec4f6de36f08158ad36e63cd220f880fe4e675 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 23 Apr 2024 11:54:47 +0200 Subject: [PATCH 1/2] Improve error message for __EXTRACT_DATASETS__ tool Provides a reasonable error message to users, instead of the internal error https://sentry.galaxyproject.org/share/issue/a197159192b64b9db065a762ee5dbbfc/: ``` KeyError: 'Dataset collection has no element_identifier with key 2.' File "galaxy/tools/__init__.py", line 1972, in handle_single_execution rval = self.execute( File "galaxy/tools/__init__.py", line 2069, in execute return self.tool_action.execute( File "galaxy/tools/actions/model_operations.py", line 88, in execute self._produce_outputs( File "galaxy/tools/actions/model_operations.py", line 119, in _produce_outputs tool.produce_outputs( File "galaxy/tools/__init__.py", line 3351, in produce_outputs extracted_element = collection[incoming["which"]["identifier"]] File "galaxy/model/__init__.py", line 6434, in __getitem__ raise KeyError(error_message) ``` Also makes the `identifier` parameter explicitly required (it is now inferred to be optional because text parameters are by default optional if no validator raises an exception upon validating an empty string). That prevents ``` KeyError: 'Dataset collection has no element_identifier with key None.' File "galaxy/tools/__init__.py", line 1972, in handle_single_execution rval = self.execute( File "galaxy/tools/__init__.py", line 2069, in execute return self.tool_action.execute( File "galaxy/tools/actions/model_operations.py", line 88, in execute self._produce_outputs( File "galaxy/tools/actions/model_operations.py", line 119, in _produce_outputs tool.produce_outputs( File "galaxy/tools/__init__.py", line 3351, in produce_outputs extracted_element = collection[incoming["which"]["identifier"]] File "galaxy/model/__init__.py", line 6434, in __getitem__ raise KeyError(error_message) ``` --- lib/galaxy/tools/__init__.py | 10 ++++++++-- lib/galaxy/tools/extract_dataset.xml | 6 +++--- lib/galaxy_test/api/test_tools.py | 23 +++++++++++++++++++++-- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 57aa93d8000e..20d7e4f47608 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3356,9 +3356,15 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history if how == "first": extracted_element = collection.first_dataset_element elif how == "by_identifier": - extracted_element = collection[incoming["which"]["identifier"]] + try: + extracted_element = collection[incoming["which"]["identifier"]] + except KeyError as e: + raise exceptions.MessageException(e.args[0]) elif how == "by_index": - extracted_element = collection[int(incoming["which"]["index"])] + try: + extracted_element = collection[int(incoming["which"]["index"])] + except KeyError as e: + raise exceptions.MessageException(e.args[0]) else: raise exceptions.MessageException("Invalid tool parameters.") extracted = extracted_element.element_object diff --git a/lib/galaxy/tools/extract_dataset.xml b/lib/galaxy/tools/extract_dataset.xml index 3e066ebebec5..37986eb722f9 100644 --- a/lib/galaxy/tools/extract_dataset.xml +++ b/lib/galaxy/tools/extract_dataset.xml @@ -19,7 +19,7 @@ - + @@ -52,9 +52,9 @@ Description The tool allow extracting datasets based on position (**The first dataset** and **Select by index** options) or name (**Select by element identifier** option). This tool effectively collapses the inner-most collection into a dataset. For nested collections (e.g a list of lists of lists: outer:middle:inner, extracting the inner dataset element) a new list is created where the selected element takes the position of the inner-most collection (so outer:middle, where middle is not a collection but the inner dataset element). -.. class:: warningmark +.. class:: warningmark -**Note**: Dataset index (numbering) begins with 0 (zero). +**Note**: Dataset index (numbering) begins with 0 (zero). .. class:: infomark diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 7befe7f221b1..3e99d0115ac1 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -701,7 +701,7 @@ def test_database_operation_tool_with_pending_inputs(self): hdca1_id = self.dataset_collection_populator.create_list_in_history( history_id, contents=["a\nb\nc\nd", "e\nf\ng\nh"], wait=True ).json()["outputs"][0]["id"] - self.dataset_populator.run_tool( + run_response = self.dataset_populator.run_tool( tool_id="cat_data_and_sleep", inputs={ "sleep_time": 15, @@ -709,15 +709,34 @@ def test_database_operation_tool_with_pending_inputs(self): }, history_id=history_id, ) + output_hdca_id = run_response["implicit_collections"][0]["id"] run_response = self.dataset_populator.run_tool( tool_id="__EXTRACT_DATASET__", inputs={ - "data_collection": {"src": "hdca", "id": hdca1_id}, + "data_collection": {"src": "hdca", "id": output_hdca_id}, }, history_id=history_id, ) assert run_response["outputs"][0]["state"] != "ok" + @skip_without_tool("__EXTRACT_DATASET__") + def test_extract_dataset_invalid_element_identifier(self): + with self.dataset_populator.test_history(require_new=False) as history_id: + hdca1_id = self.dataset_collection_populator.create_list_in_history( + history_id, contents=["a\nb\nc\nd", "e\nf\ng\nh"], wait=True + ).json()["outputs"][0]["id"] + run_response = self.dataset_populator.run_tool_raw( + tool_id="__EXTRACT_DATASET__", + inputs={ + "data_collection": {"src": "hdca", "id": hdca1_id}, + "which": {"which_dataset": "by_index", "index": 100}, + }, + history_id=history_id, + input_format="21.01", + ) + assert run_response.status_code == 400 + assert run_response.json()["err_msg"] == "Dataset collection has no element_index with key 100." + @skip_without_tool("__FILTER_FAILED_DATASETS__") def test_filter_failed_list(self): with self.dataset_populator.test_history(require_new=False) as history_id: From 1bea8a71f7f3aabb256ed4654b9f345fdcc4bc3d Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 2 May 2024 17:31:21 +0200 Subject: [PATCH 2/2] Cancel ProcessPoolFuture when aborting job Fixes the occasional timeout error in jobs that follow the `test_abort_fetch_job` test. --- lib/galaxy/celery/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/galaxy/celery/tasks.py b/lib/galaxy/celery/tasks.py index fc60f6921327..657ae3ef909b 100644 --- a/lib/galaxy/celery/tasks.py +++ b/lib/galaxy/celery/tasks.py @@ -288,6 +288,7 @@ def abort_when_job_stops(function: Callable, session: galaxy_scoped_session, job return future.result(timeout=1) except TimeoutError: if is_aborted(session, job_id): + future.cancel() return