From 0f77df7951c5be59266e77312dcb45be9ecb7cf2 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 5 Aug 2024 16:40:36 +0200 Subject: [PATCH 1/2] Fix extract workflow from history with empty mapped over collections Fixes https://github.com/galaxyproject/galaxy/issues/18484. It's not ideal on the one hand, but better to have a workflow with gaps than an internal server error IMO. We do the same for datasets in some non-terminal states. --- lib/galaxy/webapps/galaxy/controllers/workflow.py | 1 + lib/galaxy/workflow/extract.py | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/webapps/galaxy/controllers/workflow.py b/lib/galaxy/webapps/galaxy/controllers/workflow.py index ca0cb88b4c3c..085834f67ec0 100644 --- a/lib/galaxy/webapps/galaxy/controllers/workflow.py +++ b/lib/galaxy/webapps/galaxy/controllers/workflow.py @@ -320,6 +320,7 @@ def build_from_current_history( # Optionally target a different history than the current one. history = self.history_manager.get_owned(self.decode_id(history_id), trans.user, current_history=history) if not user: + trans.response.status = 403 return trans.show_error_message("Must be logged in to create workflows") if (job_ids is None and dataset_ids is None) or workflow_name is None: jobs, warnings = summarize(trans, history) diff --git a/lib/galaxy/workflow/extract.py b/lib/galaxy/workflow/extract.py index 5d10aa8a8a19..6072f322d9cb 100644 --- a/lib/galaxy/workflow/extract.py +++ b/lib/galaxy/workflow/extract.py @@ -321,7 +321,15 @@ def __summarize_dataset_collection(self, dataset_collection): # tracking with creating_job_associations. Will delete at some point. elif dataset_collection.implicit_output_name: # TODO: Optimize db call - dataset_instance = dataset_collection.collection.dataset_instances[0] + element = dataset_collection.collection.first_dataset_element + if not element: + # Got no dataset instance to walk back up to creating job. + # TODO track this via tool request model + job = DatasetCollectionCreationJob(dataset_collection) + self.jobs[job] = [(None, dataset_collection)] + return + else: + dataset_instance = element.hda if not self.__check_state(dataset_instance): # Just checking the state of one instance, don't need more but # makes me wonder if even need this check at all? From b30fed83eb03b1bf737060ee611e75df7dd329a2 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 7 Aug 2024 13:41:01 +0200 Subject: [PATCH 2/2] Add extraction test for empty implicit collection --- .../api/test_workflow_extraction.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/lib/galaxy_test/api/test_workflow_extraction.py b/lib/galaxy_test/api/test_workflow_extraction.py index 4d48887cc30d..ada641171af6 100644 --- a/lib/galaxy_test/api/test_workflow_extraction.py +++ b/lib/galaxy_test/api/test_workflow_extraction.py @@ -224,6 +224,46 @@ def test_extract_workflows_with_dataset_collections(self, history_id): collection_step_state = loads(collection_step["tool_state"]) assert collection_step_state["collection_type"] == "paired" + def test_empty_collection_map_over_extract_workflow(self): + with self.dataset_populator.test_history() as history_id: + self._run_workflow( + """class: GalaxyWorkflow +inputs: + input: collection + filter_file: data +steps: + filter_collection: + tool_id: __FILTER_FROM_FILE__ + in: + input: input + how|filter_source: filter_file + state: + how: + how_filter: remove_if_present + concat: + tool_id: cat1 + in: + input1: filter_collection/output_filtered +test_data: + input: + collection_type: list + elements: + - identifier: i1 + content: "0" + filter_file: i1""", + history_id, + wait=True, + ) + response = self._post( + "workflows", data={"from_history_id": history_id, "workflow_name": "extract with empty collection test"} + ) + assert response.status_code == 200 + workflow_id = response.json()["id"] + workflow = self.workflow_populator.download_workflow(workflow_id) + assert workflow + # TODO: after adding request models we should be able to recover implicit collection job requests. + # assert len(workflow["steps"]) == 4 + @skip_without_tool("cat_collection") def test_subcollection_mapping(self, history_id): jobs_summary = self._run_workflow(