From 0f77df7951c5be59266e77312dcb45be9ecb7cf2 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Mon, 5 Aug 2024 16:40:36 +0200
Subject: [PATCH 1/2] Fix extract workflow from history with empty mapped over
 collections

Fixes https://github.com/galaxyproject/galaxy/issues/18484.

It's not ideal on the one hand, but better to have a workflow with gaps
than an internal server error IMO. We do the same for datasets in some
non-terminal states.
---
 lib/galaxy/webapps/galaxy/controllers/workflow.py |  1 +
 lib/galaxy/workflow/extract.py                    | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/galaxy/webapps/galaxy/controllers/workflow.py b/lib/galaxy/webapps/galaxy/controllers/workflow.py
index ca0cb88b4c3c..085834f67ec0 100644
--- a/lib/galaxy/webapps/galaxy/controllers/workflow.py
+++ b/lib/galaxy/webapps/galaxy/controllers/workflow.py
@@ -320,6 +320,7 @@ def build_from_current_history(
             # Optionally target a different history than the current one.
             history = self.history_manager.get_owned(self.decode_id(history_id), trans.user, current_history=history)
         if not user:
+            trans.response.status = 403
             return trans.show_error_message("Must be logged in to create workflows")
         if (job_ids is None and dataset_ids is None) or workflow_name is None:
             jobs, warnings = summarize(trans, history)
diff --git a/lib/galaxy/workflow/extract.py b/lib/galaxy/workflow/extract.py
index 5d10aa8a8a19..6072f322d9cb 100644
--- a/lib/galaxy/workflow/extract.py
+++ b/lib/galaxy/workflow/extract.py
@@ -321,7 +321,15 @@ def __summarize_dataset_collection(self, dataset_collection):
         # tracking with creating_job_associations. Will delete at some point.
         elif dataset_collection.implicit_output_name:
             # TODO: Optimize db call
-            dataset_instance = dataset_collection.collection.dataset_instances[0]
+            element = dataset_collection.collection.first_dataset_element
+            if not element:
+                # Got no dataset instance to walk back up to creating job.
+                # TODO track this via tool request model
+                job = DatasetCollectionCreationJob(dataset_collection)
+                self.jobs[job] = [(None, dataset_collection)]
+                return
+            else:
+                dataset_instance = element.hda
             if not self.__check_state(dataset_instance):
                 # Just checking the state of one instance, don't need more but
                 # makes me wonder if even need this check at all?

From b30fed83eb03b1bf737060ee611e75df7dd329a2 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Wed, 7 Aug 2024 13:41:01 +0200
Subject: [PATCH 2/2] Add extraction test for empty implicit collection

---
 .../api/test_workflow_extraction.py           | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/lib/galaxy_test/api/test_workflow_extraction.py b/lib/galaxy_test/api/test_workflow_extraction.py
index 4d48887cc30d..ada641171af6 100644
--- a/lib/galaxy_test/api/test_workflow_extraction.py
+++ b/lib/galaxy_test/api/test_workflow_extraction.py
@@ -224,6 +224,46 @@ def test_extract_workflows_with_dataset_collections(self, history_id):
         collection_step_state = loads(collection_step["tool_state"])
         assert collection_step_state["collection_type"] == "paired"
 
+    def test_empty_collection_map_over_extract_workflow(self):
+        with self.dataset_populator.test_history() as history_id:
+            self._run_workflow(
+                """class: GalaxyWorkflow
+inputs:
+  input: collection
+  filter_file: data
+steps:
+  filter_collection:
+    tool_id: __FILTER_FROM_FILE__
+    in:
+       input: input
+       how|filter_source: filter_file
+    state:
+       how:
+         how_filter: remove_if_present
+  concat:
+    tool_id: cat1
+    in:
+      input1: filter_collection/output_filtered
+test_data:
+  input:
+    collection_type: list
+    elements:
+      - identifier: i1
+        content: "0"
+  filter_file: i1""",
+                history_id,
+                wait=True,
+            )
+            response = self._post(
+                "workflows", data={"from_history_id": history_id, "workflow_name": "extract with empty collection test"}
+            )
+            assert response.status_code == 200
+            workflow_id = response.json()["id"]
+            workflow = self.workflow_populator.download_workflow(workflow_id)
+            assert workflow
+            # TODO: after adding request models we should be able to recover implicit collection job requests.
+            # assert len(workflow["steps"]) == 4
+
     @skip_without_tool("cat_collection")
     def test_subcollection_mapping(self, history_id):
         jobs_summary = self._run_workflow(