From 03d6ca8dea17b81008259c9a4990a86a5c41dba9 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 15 Jan 2024 11:32:29 +0100 Subject: [PATCH 01/11] add __KEEP_SUCCESS_DATASETS__ --- .../ToolsView/testData/toolsList.json | 20 ++++ lib/galaxy/tools/__init__.py | 10 ++ lib/galaxy/tools/keep_success_collection.xml | 64 +++++++++++ lib/galaxy_test/api/test_workflows.py | 100 ++++++++++++++++++ 4 files changed, 194 insertions(+) create mode 100644 lib/galaxy/tools/keep_success_collection.xml diff --git a/client/src/components/ToolsView/testData/toolsList.json b/client/src/components/ToolsView/testData/toolsList.json index cceb264bee95..6277e8b94654 100644 --- a/client/src/components/ToolsView/testData/toolsList.json +++ b/client/src/components/ToolsView/testData/toolsList.json @@ -59,6 +59,26 @@ "id": "__FILTER_FAILED_DATASETS__", "name": "Filter failed" }, + { + "panel_section_name": "Collection Operations", + "xrefs": [], + "description": "datasets from a collection", + "is_workflow_compatible": true, + "labels": [], + "help": "

This tool takes a dataset collection and filters in datasets in the success state. This is useful for continuing a multi-sample analysis when one of more of the samples fails or is slow to run or is in paused state.<\/p>\n

This tool will create new history datasets from your collection but your quota usage will not increase.<\/p>\n", + "edam_operations": [], + "form_style": "regular", + "edam_topics": [], + "panel_section_id": "collection_operations", + "version": "1.0.0", + "link": "/tool_runner?tool_id=__KEEP_SUCCESS_DATASETS__", + "target": "galaxy_main", + "min_width": -1, + "model_class": "KeepSuccessDatasetsTool", + "hidden": "", + "id": "__KEEP_SUCCESS_DATASETS__", + "name": "Keep success" + }, { "panel_section_name": "Collection Operations", "xrefs": [], diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 5831359a8411..8e119947cb1a 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3487,6 +3487,16 @@ def element_is_valid(element: model.DatasetCollectionElement): return element.element_object.is_ok +class KeepSuccessDatasetsTool(FilterDatasetsTool): + tool_type = "keep_success_datasets_collection" + require_terminal_states = False + require_dataset_ok = False + + @staticmethod + def element_is_valid(element: model.DatasetCollectionElement): + return element.element_object.is_ok + + class FilterEmptyDatasetsTool(FilterDatasetsTool): tool_type = "filter_empty_datasets_collection" require_dataset_ok = False diff --git a/lib/galaxy/tools/keep_success_collection.xml b/lib/galaxy/tools/keep_success_collection.xml new file mode 100644 index 000000000000..3e72baf53b4f --- /dev/null +++ b/lib/galaxy/tools/keep_success_collection.xml @@ -0,0 +1,64 @@ + + + + + + operation_3695 + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index aa0d83b3c9e5..071d59396d86 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -3255,6 +3255,106 @@ def filter_jobs_by_tool(tool_id): # Follow proves one job was filtered out of the result of cat1 assert len(filter_jobs_by_tool("cat1")) == 1, jobs + def test_keep_success_mapping_error(self): + with self.dataset_populator.test_history() as history_id: + summary = self._run_workflow( + """ +class: GalaxyWorkflow +inputs: + input_c: collection + +steps: + mixed_collection: + tool_id: exit_code_from_file + state: + input: + $link: input_c + + filtered_collection: + tool_id: "__KEEP_SUCCESS_DATASETS__" + state: + input: + $link: mixed_collection/out_file1 + + cat: + tool_id: cat1 + state: + input1: + $link: filtered_collection +""", + test_data=""" +input_c: + collection_type: list + elements: + - identifier: i1 + content: "0" + - identifier: i2 + content: "1" +""", + history_id=history_id, + wait=True, + assert_ok=False, + ) + jobs = summary.jobs + + def filter_jobs_by_tool(tool_id): + return [j for j in summary.jobs if j["tool_id"] == tool_id] + + assert len(filter_jobs_by_tool("exit_code_from_file")) == 2, jobs + assert len(filter_jobs_by_tool("__KEEP_SUCCESS_DATASETS__")) == 1, jobs + # Follow proves one job was filtered out of the result of cat1 + assert len(filter_jobs_by_tool("cat1")) == 1, jobs + + def test_keep_success_mapping_paused(self): + with self.dataset_populator.test_history() as history_id: + summary = self._run_workflow( + """ +class: GalaxyWorkflow +inputs: + input_c: collection + +steps: + mixed_collection: + tool_id: exit_code_from_file + state: + input: + $link: input_c + + cat: + tool_id: cat1 + state: + input1: + $link: mixed_collection/out_file1 + + filtered_collection: + tool_id: "__FILTER_FAILED_DATASETS__" + state: + input: + $link: cat/out_file1 + +""", + test_data=""" +input_c: + collection_type: list + elements: + - identifier: i1 + content: "0" + - identifier: i2 + content: "1" +""", + history_id=history_id, + wait=True, + assert_ok=False, + ) + jobs = summary.jobs + + def filter_jobs_by_tool(tool_id): + return [j for j in summary.jobs if j["tool_id"] == tool_id] + + assert len(filter_jobs_by_tool("exit_code_from_file")) == 2, jobs + assert len(filter_jobs_by_tool("cat1")) == 2, jobs + assert len(filter_jobs_by_tool("__KEEP_SUCCESS_DATASETS__")) == 1, jobs + def test_workflow_request(self): workflow = self.workflow_populator.load_workflow(name="test_for_queue") workflow_request, history_id, workflow_id = self._setup_workflow_run(workflow) From 15c923d57dd4a04f308fe632fa71602c2c5670bb Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 15 Jan 2024 12:54:12 +0100 Subject: [PATCH 02/11] fix typo in test --- lib/galaxy_test/api/test_workflows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 071d59396d86..1e0ac4669f9c 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -3327,7 +3327,7 @@ def test_keep_success_mapping_paused(self): $link: mixed_collection/out_file1 filtered_collection: - tool_id: "__FILTER_FAILED_DATASETS__" + tool_id: "__KEEP_SUCCESS_DATASETS__" state: input: $link: cat/out_file1 From 5ba85f3f9faa438675e353d40789bc2267a03792 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 15 Jan 2024 14:56:37 +0100 Subject: [PATCH 03/11] add the tool to the sample_tool_conf.xml --- test/functional/tools/sample_tool_conf.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/functional/tools/sample_tool_conf.xml b/test/functional/tools/sample_tool_conf.xml index ae0f15a5a7bc..a88f44d68b3b 100644 --- a/test/functional/tools/sample_tool_conf.xml +++ b/test/functional/tools/sample_tool_conf.xml @@ -289,6 +289,7 @@ + From a7893ffb1440d6bab3201577afa62124b9228f4a Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 15 Jan 2024 14:51:53 +0100 Subject: [PATCH 04/11] remove from ToolsView Co-authored-by: Marius van den Beek --- .../ToolsView/testData/toolsList.json | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/client/src/components/ToolsView/testData/toolsList.json b/client/src/components/ToolsView/testData/toolsList.json index 6277e8b94654..cceb264bee95 100644 --- a/client/src/components/ToolsView/testData/toolsList.json +++ b/client/src/components/ToolsView/testData/toolsList.json @@ -59,26 +59,6 @@ "id": "__FILTER_FAILED_DATASETS__", "name": "Filter failed" }, - { - "panel_section_name": "Collection Operations", - "xrefs": [], - "description": "datasets from a collection", - "is_workflow_compatible": true, - "labels": [], - "help": "

This tool takes a dataset collection and filters in datasets in the success state. This is useful for continuing a multi-sample analysis when one of more of the samples fails or is slow to run or is in paused state.<\/p>\n

This tool will create new history datasets from your collection but your quota usage will not increase.<\/p>\n", - "edam_operations": [], - "form_style": "regular", - "edam_topics": [], - "panel_section_id": "collection_operations", - "version": "1.0.0", - "link": "/tool_runner?tool_id=__KEEP_SUCCESS_DATASETS__", - "target": "galaxy_main", - "min_width": -1, - "model_class": "KeepSuccessDatasetsTool", - "hidden": "", - "id": "__KEEP_SUCCESS_DATASETS__", - "name": "Keep success" - }, { "panel_section_name": "Collection Operations", "xrefs": [], From ca13bcd0553c8262115a842bb73faa3806ddd84c Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 15 Jan 2024 15:31:20 +0100 Subject: [PATCH 05/11] requires a semi-terminal state --- lib/galaxy/tools/__init__.py | 4 ++++ lib/galaxy/tools/keep_success_collection.xml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 8e119947cb1a..1a87fb4968ae 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3188,6 +3188,7 @@ class DatabaseOperationTool(Tool): require_terminal_states = True require_dataset_ok = True tool_type_local = True + require_terminal_or_paused_states = False @property def valid_input_states(self): @@ -3195,6 +3196,8 @@ def valid_input_states(self): return (model.Dataset.states.OK,) elif self.require_terminal_states: return model.Dataset.terminal_states + elif self.require_terminal_or_paused_states: + return model.Dataset.terminal_states or model.Dataset.states.PAUSED else: return model.Dataset.valid_input_states @@ -3491,6 +3494,7 @@ class KeepSuccessDatasetsTool(FilterDatasetsTool): tool_type = "keep_success_datasets_collection" require_terminal_states = False require_dataset_ok = False + require_terminal_or_paused_states = True @staticmethod def element_is_valid(element: model.DatasetCollectionElement): diff --git a/lib/galaxy/tools/keep_success_collection.xml b/lib/galaxy/tools/keep_success_collection.xml index 3e72baf53b4f..69a0653df214 100644 --- a/lib/galaxy/tools/keep_success_collection.xml +++ b/lib/galaxy/tools/keep_success_collection.xml @@ -48,7 +48,7 @@ Keep datasets in success (green) from a collection. Description =========== -This tool takes a dataset collection and filters in (keep) datasets in the success (green) state. This is useful for continuing a multi-sample analysis when one of more of the samples fails or is slow to run or is in paused state. +This tool takes a dataset collection and filters in (keep) datasets in the success (green) state. This is useful for continuing a multi-sample analysis when one of more of the samples fails or is in paused state. .. image:: ${static_path}/images/tools/collection_ops/keep_success.svg :width: 500 From e5ade6ea21e2738226b5a9315a3da29afb3be5c8 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 25 Jan 2024 16:55:30 +0100 Subject: [PATCH 06/11] WIP: fix KEEP_SUCCESS for non-terminal inputs --- lib/galaxy/tools/__init__.py | 5 +++++ lib/galaxy_test/api/test_workflows.py | 15 ++++++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 1a87fb4968ae..922baf8ca9ff 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3498,6 +3498,11 @@ class KeepSuccessDatasetsTool(FilterDatasetsTool): @staticmethod def element_is_valid(element: model.DatasetCollectionElement): + if ( + element.element_object.state != model.Dataset.states.PAUSED + and element.element_object.state in model.Dataset.non_ready_states + ): + raise ToolInputsNotReadyException("An input dataset is pending.") return element.element_object.is_ok diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 1e0ac4669f9c..341d47b5618f 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -3266,21 +3266,18 @@ def test_keep_success_mapping_error(self): steps: mixed_collection: tool_id: exit_code_from_file - state: - input: - $link: input_c + in: + input: input_c filtered_collection: tool_id: "__KEEP_SUCCESS_DATASETS__" - state: - input: - $link: mixed_collection/out_file1 + in: + input: mixed_collection/out_file1 cat: tool_id: cat1 - state: - input1: - $link: filtered_collection + in: + input1: filtered_collection/output """, test_data=""" input_c: From b091599c1f80af2f21c54aacf07fb8bbe2aed4ae Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Fri, 26 Jan 2024 19:43:16 +0100 Subject: [PATCH 07/11] fix comment on test_keep_success_mapping_error --- lib/galaxy_test/api/test_workflows.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 341d47b5618f..a134d6b5d63d 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -3299,7 +3299,8 @@ def filter_jobs_by_tool(tool_id): assert len(filter_jobs_by_tool("exit_code_from_file")) == 2, jobs assert len(filter_jobs_by_tool("__KEEP_SUCCESS_DATASETS__")) == 1, jobs - # Follow proves one job was filtered out of the result of cat1 + # Follow proves one job was filtered out of the exit_code_from_file + # And a single one has been sent to cat1 assert len(filter_jobs_by_tool("cat1")) == 1, jobs def test_keep_success_mapping_paused(self): From 576a4047ed505d963f63874bf12ed047ad3edf3a Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Fri, 26 Jan 2024 19:44:15 +0100 Subject: [PATCH 08/11] fix workflow syntax --- lib/galaxy_test/api/test_workflows.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index a134d6b5d63d..d353524ade3e 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -3314,22 +3314,18 @@ def test_keep_success_mapping_paused(self): steps: mixed_collection: tool_id: exit_code_from_file - state: - input: - $link: input_c + in: + input: input_c cat: tool_id: cat1 - state: - input1: - $link: mixed_collection/out_file1 + in: + input1: mixed_collection/out_file1 filtered_collection: tool_id: "__KEEP_SUCCESS_DATASETS__" - state: - input: - $link: cat/out_file1 - + in: + input: cat/out_file1 """, test_data=""" input_c: From a88be0dd22dc0ac95605baf89d103f995a251da8 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Fri, 26 Jan 2024 20:27:12 +0100 Subject: [PATCH 09/11] improve test --- lib/galaxy_test/api/test_workflows.py | 28 ++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index d353524ade3e..edc14dd9aecd 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -3335,6 +3335,8 @@ def test_keep_success_mapping_paused(self): content: "0" - identifier: i2 content: "1" + - identifier: i3 + content: "0" """, history_id=history_id, wait=True, @@ -3344,10 +3346,30 @@ def test_keep_success_mapping_paused(self): def filter_jobs_by_tool(tool_id): return [j for j in summary.jobs if j["tool_id"] == tool_id] - - assert len(filter_jobs_by_tool("exit_code_from_file")) == 2, jobs - assert len(filter_jobs_by_tool("cat1")) == 2, jobs + + # Get invocation to access output collections + invocation = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True) + # Check there are 3 exit_code_from_file + assert len(filter_jobs_by_tool("exit_code_from_file")) == 3, jobs + # Check output collection has 3 elements + output_mixed_collection_id = invocation['steps'][1]['output_collections']['out_file1']['id'] + mixed_collection = self.dataset_populator.get_history_collection_details(history_id, content_id=output_mixed_collection_id, assert_ok=False) + assert mixed_collection["element_count"] == 3, mixed_collection + # Check 3 jobs cat1 has been "scheduled": + assert len(filter_jobs_by_tool("cat1")) == 3, jobs + # Check 2 are 'ok' the other is 'paused' + output_cat_id = invocation['steps'][2]['output_collections']['out_file1']['id'] + cat_collection = self.dataset_populator.get_history_collection_details(history_id, content_id=output_cat_id, assert_ok=False) + assert cat_collection["element_count"] == 3, cat_collection + cat1_states = [e["object"]["state"] for e in cat_collection["elements"]] + assert 'paused' in cat1_states, jobs + assert len([s for s in cat1_states if s == 'ok']) == 2, cat_collection + # Check the KEEP_SUCCESS_DATASETS have been run assert len(filter_jobs_by_tool("__KEEP_SUCCESS_DATASETS__")) == 1, jobs + # Check the output has 2 elements + output_filtered_id = invocation['steps'][3]['output_collections']['output']['id'] + output_filtered = self.dataset_populator.get_history_collection_details(history_id, content_id=output_filtered_id, assert_ok=False) + assert output_filtered["element_count"] == 2, output_filtered def test_workflow_request(self): workflow = self.workflow_populator.load_workflow(name="test_for_queue") From 26091203c2f2dce512dc12a511ec24a2c19bf64c Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Fri, 26 Jan 2024 21:07:07 +0100 Subject: [PATCH 10/11] lint --- lib/galaxy_test/api/test_workflows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index edc14dd9aecd..cb01b89df199 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -3346,7 +3346,7 @@ def test_keep_success_mapping_paused(self): def filter_jobs_by_tool(tool_id): return [j for j in summary.jobs if j["tool_id"] == tool_id] - + # Get invocation to access output collections invocation = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True) # Check there are 3 exit_code_from_file From d8dc597886f3361af93334e0ec1354146bd495a7 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Fri, 26 Jan 2024 21:17:43 +0100 Subject: [PATCH 11/11] run black --- lib/galaxy_test/api/test_workflows.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index cb01b89df199..4d6b820e79e3 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -3352,23 +3352,29 @@ def filter_jobs_by_tool(tool_id): # Check there are 3 exit_code_from_file assert len(filter_jobs_by_tool("exit_code_from_file")) == 3, jobs # Check output collection has 3 elements - output_mixed_collection_id = invocation['steps'][1]['output_collections']['out_file1']['id'] - mixed_collection = self.dataset_populator.get_history_collection_details(history_id, content_id=output_mixed_collection_id, assert_ok=False) + output_mixed_collection_id = invocation["steps"][1]["output_collections"]["out_file1"]["id"] + mixed_collection = self.dataset_populator.get_history_collection_details( + history_id, content_id=output_mixed_collection_id, assert_ok=False + ) assert mixed_collection["element_count"] == 3, mixed_collection # Check 3 jobs cat1 has been "scheduled": assert len(filter_jobs_by_tool("cat1")) == 3, jobs # Check 2 are 'ok' the other is 'paused' - output_cat_id = invocation['steps'][2]['output_collections']['out_file1']['id'] - cat_collection = self.dataset_populator.get_history_collection_details(history_id, content_id=output_cat_id, assert_ok=False) + output_cat_id = invocation["steps"][2]["output_collections"]["out_file1"]["id"] + cat_collection = self.dataset_populator.get_history_collection_details( + history_id, content_id=output_cat_id, assert_ok=False + ) assert cat_collection["element_count"] == 3, cat_collection cat1_states = [e["object"]["state"] for e in cat_collection["elements"]] - assert 'paused' in cat1_states, jobs - assert len([s for s in cat1_states if s == 'ok']) == 2, cat_collection + assert "paused" in cat1_states, jobs + assert len([s for s in cat1_states if s == "ok"]) == 2, cat_collection # Check the KEEP_SUCCESS_DATASETS have been run assert len(filter_jobs_by_tool("__KEEP_SUCCESS_DATASETS__")) == 1, jobs # Check the output has 2 elements - output_filtered_id = invocation['steps'][3]['output_collections']['output']['id'] - output_filtered = self.dataset_populator.get_history_collection_details(history_id, content_id=output_filtered_id, assert_ok=False) + output_filtered_id = invocation["steps"][3]["output_collections"]["output"]["id"] + output_filtered = self.dataset_populator.get_history_collection_details( + history_id, content_id=output_filtered_id, assert_ok=False + ) assert output_filtered["element_count"] == 2, output_filtered def test_workflow_request(self):