From 424695763b4e7a6d2c02810ea25528fc7ce09e20 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Tue, 24 Oct 2023 11:02:48 +0200
Subject: [PATCH 01/11] Provide error message instead of internal server error

---
 lib/galaxy/webapps/galaxy/controllers/history.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/galaxy/webapps/galaxy/controllers/history.py b/lib/galaxy/webapps/galaxy/controllers/history.py
index 3f16bc0ecc94..e2246c28c158 100644
--- a/lib/galaxy/webapps/galaxy/controllers/history.py
+++ b/lib/galaxy/webapps/galaxy/controllers/history.py
@@ -488,6 +488,10 @@ def view(self, trans, id=None, show_deleted=False, show_hidden=False, use_panels
             history_is_current = history_to_view == trans.history
         else:
             history_to_view = trans.history
+            if not history_to_view:
+                raise exceptions.RequestParameterMissingException(
+                    "No 'id' parameter provided for history, and user does not have a current history."
+                )
             user_is_owner = True
             history_is_current = True
 

From 81e9d52b4a85aff36c1a47b72e57025d2c4c8bae Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Tue, 24 Oct 2023 11:40:35 +0200
Subject: [PATCH 02/11] Fix unbound runner variable when there is an error in
 the job config

Fixes
```
UnboundLocalError: local variable 'runner' referenced before assignment
  File "galaxy/jobs/handler.py", line 541, in __handle_waiting_jobs
    self.dispatcher.put(self.job_wrappers.pop(job.id))
  File "galaxy/jobs/handler.py", line 1224, in put
    runner = self.get_job_runner(job_wrapper, get_task_runner=True)
  File "galaxy/jobs/handler.py", line 1221, in get_job_runner
    return runner
```
from https://sentry.galaxyproject.org/share/issue/a0a0049feb53487f9c84ede3784624b0/
which followed `(53095435) Invalid job runner: bridges`.
---
 lib/galaxy/jobs/handler.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/galaxy/jobs/handler.py b/lib/galaxy/jobs/handler.py
index e654423f2713..92beb0b4f1a0 100644
--- a/lib/galaxy/jobs/handler.py
+++ b/lib/galaxy/jobs/handler.py
@@ -1216,12 +1216,16 @@ def get_job_runner(self, job_wrapper, get_task_runner=False):
         except KeyError:
             log.error(f"({job_wrapper.job_id}) Invalid job runner: {runner_name}")
             job_wrapper.fail(DEFAULT_JOB_RUNNER_FAILURE_MESSAGE)
+            return None
         if get_task_runner and job_wrapper.can_split() and runner.runner_name != "PulsarJobRunner":
             return self.job_runners["tasks"]
         return runner
 
     def put(self, job_wrapper):
         runner = self.get_job_runner(job_wrapper, get_task_runner=True)
+        if runner is None:
+            # Something went wrong, we've already failed the job wrapper
+            return
         if isinstance(job_wrapper, TaskWrapper):
             # DBTODO Refactor
             log.debug(f"({job_wrapper.job_id}) Dispatching task {job_wrapper.task_id} to task runner")

From 856930eef6d2d63bdd71ea99f3c1a9fa7b438e03 Mon Sep 17 00:00:00 2001
From: Matthias Bernt <m.bernt@ufz.de>
Date: Thu, 22 Dec 2022 12:19:32 +0100
Subject: [PATCH 03/11] Fix missing grep input in sort1 tool

---
 tools/filters/sorter.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/filters/sorter.py b/tools/filters/sorter.py
index e9ccb53d9a01..50e81ddb419f 100644
--- a/tools/filters/sorter.py
+++ b/tools/filters/sorter.py
@@ -41,7 +41,8 @@ def main():
 
         # grep comments
         grep_comments = ["grep", "^#"]
-        exit_code = subprocess.call(grep_comments, stdout=output_fh)
+        exit_code = subprocess.call(grep_comments, stdin=input_fh, stdout=output_fh)
+        input_fh.seek(0)
         if exit_code not in [0, 1]:
             stop_err("Searching for comment lines failed")
 

From 285db62fd38a3d41bc9e0d243c0abfe4c2196612 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Wed, 25 Oct 2023 10:39:41 +0200
Subject: [PATCH 04/11] Improve invocation error reporting when step requires
 datasets in ok state

---
 lib/galaxy/exceptions/__init__.py      | 10 ++++++++++
 lib/galaxy/exceptions/error_codes.json |  5 +++++
 lib/galaxy/tools/__init__.py           | 11 ++++++++---
 lib/galaxy/tools/execute.py            | 20 ++++++++++++--------
 lib/galaxy/workflow/modules.py         | 10 ++++++++++
 5 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/lib/galaxy/exceptions/__init__.py b/lib/galaxy/exceptions/__init__.py
index 2448fd817960..5806ed157c21 100644
--- a/lib/galaxy/exceptions/__init__.py
+++ b/lib/galaxy/exceptions/__init__.py
@@ -151,6 +151,16 @@ class ToolInputsNotReadyException(MessageException):
     error_code = error_codes_by_name["TOOL_INPUTS_NOT_READY"]
 
 
+class ToolInputsNotOKException(MessageException):
+    def __init__(self, err_msg=None, type="info", *, src: str, id: int, **extra_error_info):
+        super().__init__(err_msg, type, **extra_error_info)
+        self.src = src
+        self.id = id
+
+    status_code = 400
+    error_code = error_codes_by_name["TOOL_INPUTS_NOT_OK"]
+
+
 class RealUserRequiredException(MessageException):
     status_code = 400
     error_code = error_codes_by_name["REAL_USER_REQUIRED"]
diff --git a/lib/galaxy/exceptions/error_codes.json b/lib/galaxy/exceptions/error_codes.json
index 7d8f521b8218..99464306acb6 100644
--- a/lib/galaxy/exceptions/error_codes.json
+++ b/lib/galaxy/exceptions/error_codes.json
@@ -94,6 +94,11 @@
         "code": 400016,
         "message": "Only real users can make this request."
     },
+    {
+        "name": "TOOL_INPUTS_NOT_OK",
+        "code": 400017,
+        "message": "Tool inputs not in required OK state."
+    },
     {
         "name": "USER_AUTHENTICATION_FAILED",
         "code": 401001,
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
index 5a92fd960e4b..62bf8b5e217b 100644
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -34,7 +34,10 @@
     exceptions,
     model,
 )
-from galaxy.exceptions import ToolInputsNotReadyException
+from galaxy.exceptions import (
+    ToolInputsNotOKException,
+    ToolInputsNotReadyException,
+)
 from galaxy.job_execution import output_collect
 from galaxy.metadata import get_metadata_compute_strategy
 from galaxy.model.base import transaction
@@ -3200,8 +3203,10 @@ def check_dataset_state(state):
 
             if self.require_dataset_ok:
                 if state != model.Dataset.states.OK:
-                    raise ValueError(
-                        f"Tool requires inputs to be in valid state, but dataset {input_dataset} is in state '{input_dataset.state}'"
+                    raise ToolInputsNotOKException(
+                        f"Tool requires inputs to be in valid state, but dataset {input_dataset} is in state '{input_dataset.state}'",
+                        src="hda",
+                        id=input_dataset.id,
                     )
 
         for input_dataset in input_datasets.values():
diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py
index 412c36bcefdd..c935aff13ecc 100644
--- a/lib/galaxy/tools/execute.py
+++ b/lib/galaxy/tools/execute.py
@@ -19,6 +19,7 @@
 from boltons.iterutils import remap
 
 from galaxy import model
+from galaxy.exceptions import ToolInputsNotOKException
 from galaxy.model.base import transaction
 from galaxy.model.dataset_collections.matching import MatchingCollections
 from galaxy.model.dataset_collections.structure import (
@@ -143,14 +144,17 @@ def execute_single_job(execution_slice, completed_job, skip=False):
     if check_inputs_ready:
         for params in execution_tracker.param_combinations:
             # This will throw an exception if the tool is not ready.
-            check_inputs_ready(
-                tool,
-                trans,
-                params,
-                history,
-                execution_cache=execution_cache,
-                collection_info=collection_info,
-            )
+            try:
+                check_inputs_ready(
+                    tool,
+                    trans,
+                    params,
+                    history,
+                    execution_cache=execution_cache,
+                    collection_info=collection_info,
+                )
+            except ToolInputsNotOKException as e:
+                execution_tracker.record_error(e)
 
     execution_tracker.ensure_implicit_collections_populated(history, mapping_params.param_template)
     job_count = len(execution_tracker.param_combinations)
diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py
index 36bdd374b172..805a8ba3f2f6 100644
--- a/lib/galaxy/workflow/modules.py
+++ b/lib/galaxy/workflow/modules.py
@@ -2314,6 +2314,16 @@ def callback(input, prefixed_name, **kwargs):
         if execution_tracker.execution_errors:
             # TODO: formalize into InvocationFailure ?
             message = f"Failed to create {len(execution_tracker.execution_errors)} job(s) for workflow step {step.order_index + 1}: {str(execution_tracker.execution_errors[0])}"
+            for error in execution_tracker.execution_errors:
+                # try first to raise a structured invocation error message
+                if isinstance(error, exceptions.ToolInputsNotOKException) and error.src == "hda":
+                    raise FailWorkflowEvaluation(
+                        why=InvocationFailureDatasetFailed(
+                            reason=FailureReason.dataset_failed,
+                            hda_id=error.id,
+                            workflow_step_id=step.id,
+                        )
+                    )
             raise exceptions.MessageException(message)
 
         return complete

From c3513016029d5b622294199b2be3eb14d084335f Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Wed, 25 Oct 2023 11:53:29 +0200
Subject: [PATCH 05/11] Include failing step for unexpected failures, turn more
 exceptions into MessageExceptions

---
 .../InvocationMessage.vue                     | 15 ++++++++++---
 .../invocationMessageModel.ts                 | 16 ++++++++++----
 lib/galaxy/schema/invocation.py               |  6 ++++-
 lib/galaxy/tools/__init__.py                  | 22 ++++++++++---------
 lib/galaxy/workflow/run.py                    | 10 ++++++++-
 5 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/client/src/components/WorkflowInvocationState/InvocationMessage.vue b/client/src/components/WorkflowInvocationState/InvocationMessage.vue
index d31a4189b0bd..840ad7ed44f3 100644
--- a/client/src/components/WorkflowInvocationState/InvocationMessage.vue
+++ b/client/src/components/WorkflowInvocationState/InvocationMessage.vue
@@ -62,7 +62,12 @@ const workflow = computed(() => {
 });
 
 const workflowStep = computed(() => {
-    if ("workflow_step_id" in props.invocationMessage && workflow.value) {
+    if (
+        "workflow_step_id" in props.invocationMessage &&
+        props.invocationMessage.workflow_step_id !== undefined &&
+        props.invocationMessage.workflow_step_id !== null &&
+        workflow.value
+    ) {
         return workflow.value.steps[props.invocationMessage.workflow_step_id];
     }
     return undefined;
@@ -146,10 +151,14 @@ const infoString = computed(() => {
             invocationMessage.workflow_step_id + 1
         } is a conditional step and the result of the when expression is not a boolean type.`;
     } else if (reason === "unexpected_failure") {
+        let atStep = "";
+        if (invocationMessage.workflow_step_id !== null && invocationMessage.workflow_step_id !== undefined) {
+            atStep = ` at step ${invocationMessage.workflow_step_id + 1}`;
+        }
         if (invocationMessage.details) {
-            return `${failFragment} an unexpected failure occurred: '${invocationMessage.details}'`;
+            return `${failFragment} an unexpected failure occurred${atStep}: '${invocationMessage.details}'`;
         }
-        return `${failFragment} an unexpected failure occurred.`;
+        return `${failFragment} an unexpected failure occurred${atStep}.`;
     } else if (reason === "workflow_output_not_found") {
         return `Defined workflow output '${invocationMessage.output_name}' was not found in step ${
             invocationMessage.workflow_step_id + 1
diff --git a/client/src/components/WorkflowInvocationState/invocationMessageModel.ts b/client/src/components/WorkflowInvocationState/invocationMessageModel.ts
index 7c86d43ad17c..29ab6a989847 100644
--- a/client/src/components/WorkflowInvocationState/invocationMessageModel.ts
+++ b/client/src/components/WorkflowInvocationState/invocationMessageModel.ts
@@ -77,7 +77,7 @@ export interface GenericInvocationFailureCollectionFailedEncodedDatabaseIdField
     /**
      * HistoryDatasetCollectionAssociation ID that relates to failure.
      */
-    hdca_id?: string;
+    hdca_id: string;
     /**
      * Workflow step id of step that caused failure.
      */
@@ -92,7 +92,7 @@ export interface GenericInvocationFailureCollectionFailedInt {
     /**
      * HistoryDatasetCollectionAssociation ID that relates to failure.
      */
-    hdca_id?: number;
+    hdca_id: number;
     /**
      * Workflow step id of step that caused failure.
      */
@@ -159,7 +159,7 @@ export interface GenericInvocationFailureJobFailedEncodedDatabaseIdField {
     /**
      * Job ID that relates to failure.
      */
-    job_id?: string;
+    job_id: string;
     /**
      * Workflow step id of step that caused failure.
      */
@@ -174,7 +174,7 @@ export interface GenericInvocationFailureJobFailedInt {
     /**
      * Job ID that relates to failure.
      */
-    job_id?: number;
+    job_id: number;
     /**
      * Workflow step id of step that caused failure.
      */
@@ -232,6 +232,10 @@ export interface GenericInvocationUnexpectedFailureEncodedDatabaseIdField {
      * May contains details to help troubleshoot this problem.
      */
     details?: string;
+    /**
+     * Workflow step id of step that failed.
+     */
+    workflow_step_id?: number;
 }
 export interface GenericInvocationUnexpectedFailureInt {
     reason: "unexpected_failure";
@@ -239,4 +243,8 @@ export interface GenericInvocationUnexpectedFailureInt {
      * May contains details to help troubleshoot this problem.
      */
     details?: string;
+    /**
+     * Workflow step id of step that failed.
+     */
+    workflow_step_id?: number;
 }
diff --git a/lib/galaxy/schema/invocation.py b/lib/galaxy/schema/invocation.py
index 0d2617a25862..fad8f6deba8c 100644
--- a/lib/galaxy/schema/invocation.py
+++ b/lib/galaxy/schema/invocation.py
@@ -51,7 +51,10 @@ def get(self, key: Any, default: Any = None) -> Any:
         # Fetch the order_index when serializing for the API,
         # which makes much more sense when pointing to steps.
         if key == "workflow_step_id":
-            return self._obj.workflow_step.order_index
+            if self._obj.workflow_step:
+                return self._obj.workflow_step.order_index
+            else:
+                return default
         elif key == "dependent_workflow_step_id":
             if self._obj.dependent_workflow_step_id:
                 return self._obj.dependent_workflow_step.order_index
@@ -132,6 +135,7 @@ class GenericInvocationFailureWhenNotBoolean(InvocationFailureMessageBase[Databa
 class GenericInvocationUnexpectedFailure(InvocationMessageBase, Generic[DatabaseIdT]):
     reason: Literal[FailureReason.unexpected_failure]
     details: Optional[str] = Field(None, description="May contains details to help troubleshoot this problem.")
+    workflow_step_id: Optional[int] = Field(None, description="Workflow step id of step that failed.")
 
 
 class GenericInvocationWarning(InvocationMessageBase, Generic[DatabaseIdT]):
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
index 62bf8b5e217b..3d5838d96490 100644
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -2861,7 +2861,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw
                         copy_object = input_dataset
                         break
                 if copy_object is None:
-                    raise Exception("Failed to find dataset output.")
+                    raise exceptions.MessageException("Failed to find dataset output.")
                 out_data[key].copy_from(copy_object)
 
     def parse_environment_variables(self, tool_source):
@@ -3335,7 +3335,7 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
         elif how == "by_index":
             extracted_element = collection[int(incoming["which"]["index"])]
         else:
-            raise Exception("Invalid tool parameters.")
+            raise exceptions.MessageException("Invalid tool parameters.")
         extracted = extracted_element.element_object
         extracted_o = extracted.copy(
             copy_tags=extracted.tags, new_name=extracted_element.element_identifier, flush=False
@@ -3375,7 +3375,9 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
                 if element_identifier not in identifiers_map:
                     identifiers_map[element_identifier] = []
                 elif dupl_actions == "fail":
-                    raise Exception(f"Duplicate collection element identifiers found for [{element_identifier}]")
+                    raise exceptions.MessageException(
+                        f"Duplicate collection element identifiers found for [{element_identifier}]"
+                    )
                 identifiers_map[element_identifier].append(input_num)
 
         for copy, input_list in enumerate(input_lists):
@@ -3567,12 +3569,12 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
                 except KeyError:
                     hdca_history_name = f"{hdca.hid}: {hdca.name}"
                     message = f"List of element identifiers does not match element identifiers in collection '{hdca_history_name}'"
-                    raise Exception(message)
+                    raise exceptions.MessageException(message)
             else:
                 message = f"Number of lines must match number of list elements ({len(elements)}), but file has {data_lines} lines"
-                raise Exception(message)
+                raise exceptions.MessageException(message)
         else:
-            raise Exception(f"Unknown sort_type '{sorttype}'")
+            raise exceptions.MessageException(f"Unknown sort_type '{sorttype}'")
 
         if presort_elements is not None:
             sorted_elements = [x[1] for x in sorted(presort_elements, key=lambda x: x[0])]
@@ -3604,7 +3606,7 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
         def add_copied_value_to_new_elements(new_label, dce_object):
             new_label = new_label.strip()
             if new_label in new_elements:
-                raise Exception(
+                raise exceptions.MessageException(
                     f"New identifier [{new_label}] appears twice in resulting collection, these values must be unique."
                 )
             if getattr(dce_object, "history_content_type", None) == "dataset":
@@ -3617,7 +3619,7 @@ def add_copied_value_to_new_elements(new_label, dce_object):
         with open(new_labels_path) as fh:
             new_labels = fh.readlines(1024 * 1000000)
         if strict and len(hdca.collection.elements) != len(new_labels):
-            raise Exception("Relabel mapping file contains incorrect number of identifiers")
+            raise exceptions.MessageException("Relabel mapping file contains incorrect number of identifiers")
         if how_type == "tabular":
             # We have a tabular file, where the first column is an existing element identifier,
             # and the second column is the new element identifier.
@@ -3629,7 +3631,7 @@ def add_copied_value_to_new_elements(new_label, dce_object):
                 default = None if strict else element_identifier
                 new_label = new_labels_dict.get(element_identifier, default)
                 if not new_label:
-                    raise Exception(f"Failed to find new label for identifier [{element_identifier}]")
+                    raise exceptions.MessageException(f"Failed to find new label for identifier [{element_identifier}]")
                 add_copied_value_to_new_elements(new_label, dce_object)
         else:
             # If new_labels_dataset_assoc is not a two-column tabular dataset we label with the current line of the dataset
@@ -3638,7 +3640,7 @@ def add_copied_value_to_new_elements(new_label, dce_object):
                 add_copied_value_to_new_elements(new_labels[i], dce_object)
         for key in new_elements.keys():
             if not re.match(r"^[\w\- \.,]+$", key):
-                raise Exception(f"Invalid new collection identifier [{key}]")
+                raise exceptions.MessageException(f"Invalid new collection identifier [{key}]")
         self._add_datasets_to_history(history, new_elements.values())
         output_collections.create_collection(
             next(iter(self.outputs.values())), "output", elements=new_elements, propagate_hda_tags=False
diff --git a/lib/galaxy/workflow/run.py b/lib/galaxy/workflow/run.py
index 6ddc56427672..d69fe79b6aa2 100644
--- a/lib/galaxy/workflow/run.py
+++ b/lib/galaxy/workflow/run.py
@@ -240,12 +240,20 @@ def invoke(self) -> Dict[int, Any]:
             except modules.DelayedWorkflowEvaluation as de:
                 step_delayed = delayed_steps = True
                 self.progress.mark_step_outputs_delayed(step, why=de.why)
-            except Exception:
+            except Exception as e:
                 log.exception(
                     "Failed to schedule %s, problem occurred on %s.",
                     self.workflow_invocation.workflow.log_str(),
                     step.log_str(),
                 )
+                if isinstance(e, MessageException):
+                    # This is the highest level at which we can inject the step id
+                    # to provide some more context to the exception.
+                    raise modules.FailWorkflowEvaluation(
+                        why=InvocationUnexpectedFailure(
+                            reason=FailureReason.unexpected_failure, details=str(e), workflow_step_id=step.id
+                        )
+                    )
                 raise
 
             if not step_delayed:

From ce38206990ee83bd8b9b7327cb1eb387c0441fb7 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Wed, 25 Oct 2023 13:26:31 +0200
Subject: [PATCH 06/11] Extend tests for new invocation failure messages

---
 lib/galaxy_test/api/test_workflows.py | 72 +++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py
index 393546d95ec6..5b7487404532 100644
--- a/lib/galaxy_test/api/test_workflows.py
+++ b/lib/galaxy_test/api/test_workflows.py
@@ -4060,6 +4060,78 @@ def test_workflow_warning_workflow_output_not_found(self, history_id):
         assert "workflow_step_id" in message
         assert message["output_name"] == "does_not_exist"
 
+    @skip_without_tool("__APPLY_RULES__")
+    @skip_without_tool("job_properties")
+    def test_workflow_failed_input_not_ok(self, history_id):
+        summary = self._run_workflow(
+            """
+class: GalaxyWorkflow
+steps:
+  job_props:
+    tool_id: job_properties
+    state:
+      thebool: true
+      failbool: true
+  apply:
+    tool_id: __APPLY_RULES__
+    in:
+      input: job_props/list_output
+    state:
+      rules:
+        rules:
+          - type: add_column_metadata
+            value: identifier0
+        mapping:
+          - type: list_identifiers
+            columns: [0]
+        """,
+            history_id=history_id,
+            assert_ok=False,
+            wait=True,
+        )
+        invocation_details = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True)
+        assert invocation_details["state"] == "failed"
+        assert len(invocation_details["messages"]) == 1
+        message = invocation_details["messages"][0]
+        assert message["reason"] == "dataset_failed"
+        assert message["workflow_step_id"] == 1
+
+    @skip_without_tool("__RELABEL_FROM_FILE__")
+    def test_workflow_failed_with_message_exception(self, history_id):
+        summary = self._run_workflow(
+            """
+class: GalaxyWorkflow
+inputs:
+  input_collection:
+    collection_type: list
+    type: collection
+  relabel_file:
+    type: data
+steps:
+  relabel:
+    tool_id: __RELABEL_FROM_FILE__
+    in:
+      input: input_collection
+      how|labels: relabel_file
+test_data:
+  input_collection:
+    collection_type: "list:list"
+  relabel_file:
+    value: 1.bed
+    type: File
+        """,
+            history_id=history_id,
+            assert_ok=False,
+            wait=True,
+        )
+        invocation_details = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True)
+        assert invocation_details["state"] == "failed"
+        assert len(invocation_details["messages"]) == 1
+        message = invocation_details["messages"][0]
+        assert message["reason"] == "unexpected_failure"
+        assert message["workflow_step_id"] == 2
+        assert "Invalid new collection identifier" in message["details"]
+
     @skip_without_tool("identifier_multiple")
     def test_invocation_map_over(self, history_id):
         summary = self._run_workflow(

From d411861d31819f190bee4f9e2a2a01a9752bb792 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Wed, 25 Oct 2023 14:39:02 +0200
Subject: [PATCH 07/11] Revert always reset external filename

8ae0b9241ea5a4aa4298855605057340d90b9639
---
 lib/galaxy/metadata/set_metadata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/galaxy/metadata/set_metadata.py b/lib/galaxy/metadata/set_metadata.py
index b512f3f905b5..ce5dea696ae4 100644
--- a/lib/galaxy/metadata/set_metadata.py
+++ b/lib/galaxy/metadata/set_metadata.py
@@ -448,7 +448,7 @@ def set_meta(new_dataset_instance, file_dict):
                     object_store_update_actions.append(
                         partial(push_if_necessary, object_store, dataset, external_filename)
                     )
-                object_store_update_actions.append(partial(reset_external_filename, dataset))
+                    object_store_update_actions.append(partial(reset_external_filename, dataset))
                 object_store_update_actions.append(partial(dataset.set_total_size))
                 object_store_update_actions.append(partial(export_store.add_dataset, dataset))
                 if dataset_instance_id not in unnamed_id_to_path:

From f620b11a94460bb6162acda2cacbd4231a69f254 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Wed, 25 Oct 2023 16:16:50 +0200
Subject: [PATCH 08/11] Test link data with extended metadata

---
 .../test_upload_configuration_options.py      | 126 +++++++++++-------
 1 file changed, 81 insertions(+), 45 deletions(-)

diff --git a/test/integration/test_upload_configuration_options.py b/test/integration/test_upload_configuration_options.py
index 72bd2356721f..7b910f499bb9 100644
--- a/test/integration/test_upload_configuration_options.py
+++ b/test/integration/test_upload_configuration_options.py
@@ -30,6 +30,7 @@
 
 from requests import Response
 
+from galaxy.tool_util.verify.test_data import TestDataResolver
 from galaxy.util.unittest import TestCase
 from galaxy_test.base.api_util import TEST_USER
 from galaxy_test.base.constants import (
@@ -75,17 +76,19 @@ def fetch_target(
         response = self.dataset_populator.fetch(payload, assert_ok=assert_ok, wait=wait)
         return response
 
-    def _write_file(self, dir_path: str, content: str, filename: str = "test") -> str:
-        """Helper for writing ftp/server dir files."""
-        self._ensure_directory(dir_path)
-        path = os.path.join(dir_path, filename)
-        with open(path, "w") as f:
-            f.write(content)
-        return path
 
-    def _ensure_directory(self, path: str) -> None:
-        if not os.path.exists(path):
-            os.makedirs(path)
+def _write_file(dir_path: str, content: str, filename: str = "test") -> str:
+    """Helper for writing ftp/server dir files."""
+    _ensure_directory(dir_path)
+    path = os.path.join(dir_path, filename)
+    with open(path, "w") as f:
+        f.write(content)
+    return path
+
+
+def _ensure_directory(path: str) -> None:
+    if not os.path.exists(path):
+        os.makedirs(path)
 
 
 class BaseUploadContentConfigurationTestCase(BaseUploadContentConfigurationInstance, TestCase):
@@ -190,6 +193,42 @@ def test_disallowed_for_fetch_urls(self, history_id: str) -> None:
         assert os.path.exists(path)
 
 
+def fetch_link_data_only(
+    library_populator: LibraryPopulator, dataset_populator: DatasetPopulator, test_data_resolver: TestDataResolver
+):
+    history_id, library, destination = library_populator.setup_fetch_to_folder("fetch_and_link")
+    bed_test_data_path = test_data_resolver.get_filename("4.bed")
+    assert os.path.exists(bed_test_data_path)
+    items = [{"src": "path", "path": bed_test_data_path, "info": "my cool bed", "link_data_only": True}]
+    targets = [{"destination": destination, "items": items}]
+    payload = {
+        "history_id": history_id,  # TODO: Shouldn't be needed :(
+        "targets": targets,
+    }
+    dataset_populator.fetch(payload)
+    dataset = library_populator.get_library_contents_with_path(library["id"], "/4.bed")
+    assert dataset["file_size"] == 61, dataset
+    assert dataset["file_name"] == bed_test_data_path, dataset
+    assert os.path.exists(bed_test_data_path)
+
+
+def link_data_only(server_dir: str, library_populator: LibraryPopulator) -> None:
+    content = "hello world\n"
+    dir_path = os.path.join(server_dir, "lib1")
+    file_path = _write_file(dir_path, content)
+    library = library_populator.new_private_library("serverdirupload")
+    # upload $GALAXY_ROOT/test-data/library
+    payload, files = library_populator.create_dataset_request(
+        library, upload_option="upload_directory", server_dir="lib1", link_data=True
+    )
+    response = library_populator.raw_library_contents_create(library["id"], payload, files=files)
+    assert response.status_code == 200, response.json()
+    dataset = response.json()[0]
+    ok_dataset = library_populator.wait_on_library_dataset(library["id"], dataset["id"])
+    assert ok_dataset["file_size"] == 12, ok_dataset
+    assert ok_dataset["file_name"] == file_path, ok_dataset
+
+
 class TestAdminsCanPasteFilePaths(BaseUploadContentConfigurationTestCase):
     require_admin_user = True
 
@@ -506,9 +545,9 @@ def test_ftp_uploads_not_purged(self, history_id: str) -> None:
 class TestAdvancedFtpUploadFetch(BaseFtpUploadConfigurationTestCase):
     def test_fetch_ftp_directory(self, history_id: str) -> None:
         dir_path = self._get_user_ftp_path()
-        self._write_file(os.path.join(dir_path, "subdir"), "content 1", filename="1")
-        self._write_file(os.path.join(dir_path, "subdir"), "content 22", filename="2")
-        self._write_file(os.path.join(dir_path, "subdir"), "content 333", filename="3")
+        _write_file(os.path.join(dir_path, "subdir"), "content 1", filename="1")
+        _write_file(os.path.join(dir_path, "subdir"), "content 22", filename="2")
+        _write_file(os.path.join(dir_path, "subdir"), "content 333", filename="3")
         target = {
             "destination": {"type": "hdca"},
             "elements_from": "directory",
@@ -525,9 +564,9 @@ def test_fetch_ftp_directory(self, history_id: str) -> None:
 
     def test_fetch_nested_elements_from(self, history_id: str) -> None:
         dir_path = self._get_user_ftp_path()
-        self._write_file(os.path.join(dir_path, "subdir1"), "content 1", filename="1")
-        self._write_file(os.path.join(dir_path, "subdir1"), "content 22", filename="2")
-        self._write_file(os.path.join(dir_path, "subdir2"), "content 333", filename="3")
+        _write_file(os.path.join(dir_path, "subdir1"), "content 1", filename="1")
+        _write_file(os.path.join(dir_path, "subdir1"), "content 22", filename="2")
+        _write_file(os.path.join(dir_path, "subdir2"), "content 333", filename="3")
         elements = [
             {
                 "name": "subdirel1",
@@ -667,7 +706,7 @@ def _copy_to_user_ftp_file(self, test_data_path: str) -> None:
         shutil.copyfile(input_path, os.path.join(target_dir, test_data_path))
 
     def _write_user_ftp_file(self, path: str, content: str) -> str:
-        return self._write_file(os.path.join(self.ftp_dir(), TEST_USER), content, filename=path)
+        return _write_file(os.path.join(self.ftp_dir(), TEST_USER), content, filename=path)
 
 
 class TestServerDirectoryOffByDefault(BaseUploadContentConfigurationTestCase):
@@ -721,20 +760,7 @@ def test_valid_server_dir_uploads_okay(self) -> None:
         assert library_dataset["file_size"] == 12, library_dataset
 
     def test_link_data_only(self) -> None:
-        content = "hello world\n"
-        dir_path = os.path.join(self.server_dir(), "lib1")
-        file_path = self._write_file(dir_path, content)
-        library = self.library_populator.new_private_library("serverdirupload")
-        # upload $GALAXY_ROOT/test-data/library
-        payload, files = self.library_populator.create_dataset_request(
-            library, upload_option="upload_directory", server_dir="lib1", link_data=True
-        )
-        response = self.library_populator.raw_library_contents_create(library["id"], payload, files=files)
-        assert response.status_code == 200, response.json()
-        dataset = response.json()[0]
-        ok_dataset = self.library_populator.wait_on_library_dataset(library["id"], dataset["id"])
-        assert ok_dataset["file_size"] == 12, ok_dataset
-        assert ok_dataset["file_name"] == file_path, ok_dataset
+        link_data_only(self.server_dir(), self.library_populator)
 
     @classmethod
     def server_dir(cls) -> str:
@@ -813,20 +839,7 @@ def test_fetch_path_to_folder(self) -> None:
         assert os.path.exists(bed_test_data_path)
 
     def test_fetch_link_data_only(self) -> None:
-        history_id, library, destination = self.library_populator.setup_fetch_to_folder("fetch_and_link")
-        bed_test_data_path = self.test_data_resolver.get_filename("4.bed")
-        assert os.path.exists(bed_test_data_path)
-        items = [{"src": "path", "path": bed_test_data_path, "info": "my cool bed", "link_data_only": True}]
-        targets = [{"destination": destination, "items": items}]
-        payload = {
-            "history_id": history_id,  # TODO: Shouldn't be needed :(
-            "targets": targets,
-        }
-        self.dataset_populator.fetch(payload)
-        dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
-        assert dataset["file_size"] == 61, dataset
-        assert dataset["file_name"] == bed_test_data_path, dataset
-        assert os.path.exists(bed_test_data_path)
+        fetch_link_data_only(self.library_populator, self.dataset_populator, self.test_data_resolver)
 
     def test_fetch_recursive_archive(self) -> None:
         history_id, library, destination = self.library_populator.setup_fetch_to_folder("recursive_archive")
@@ -948,3 +961,26 @@ def test_tar_to_directory(self, history_id: str) -> None:
             history_id=history_id,
         )
         self.dataset_populator.wait_for_job(response["jobs"][0]["id"])
+
+
+class TestLinkDataUploadExtendedMetadata(BaseUploadContentConfigurationTestCase):
+    require_admin_user = True
+
+    @classmethod
+    def handle_galaxy_config_kwds(cls, config) -> None:
+        super().handle_galaxy_config_kwds(config)
+        config["allow_path_paste"] = True
+        config["metadata_strategy"] = "extended"
+        server_dir = cls.server_dir()
+        os.makedirs(server_dir)
+        config["library_import_dir"] = server_dir
+
+    @classmethod
+    def server_dir(cls) -> str:
+        return cls.temp_config_dir("server")
+
+    def test_fetch_link_data_only(self) -> None:
+        fetch_link_data_only(self.library_populator, self.dataset_populator, self.test_data_resolver)
+
+    def test_link_data_only(self) -> None:
+        link_data_only(self.server_dir(), self.library_populator)

From ca5132d7bf5165877dfbed873a89bbe7b1a5f5c5 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Thu, 26 Oct 2023 15:43:18 +0200
Subject: [PATCH 09/11] Skip change_datatype things if we're not actually
 changing the extension

Side-steps a problem with FileParameter in the most efficient way
possible. This likely became a problem for one of Wolfgang's workflow
afer we dropped some earlier unnecessary flushes.
---
 lib/galaxy/datatypes/registry.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/galaxy/datatypes/registry.py b/lib/galaxy/datatypes/registry.py
index 8c14bb64d8d5..ce5f9ad890e5 100644
--- a/lib/galaxy/datatypes/registry.py
+++ b/lib/galaxy/datatypes/registry.py
@@ -590,13 +590,14 @@ def get_datatype_by_extension(self, ext):
         return self.datatypes_by_extension.get(ext, None)
 
     def change_datatype(self, data, ext):
-        data.extension = ext
-        # call init_meta and copy metadata from itself.  The datatype
-        # being converted *to* will handle any metadata copying and
-        # initialization.
-        if data.has_data():
-            data.set_size()
-            data.init_meta(copy_from=data)
+        if data.extension != ext:
+            data.extension = ext
+            # call init_meta and copy metadata from itself.  The datatype
+            # being converted *to* will handle any metadata copying and
+            # initialization.
+            if data.has_data():
+                data.set_size()
+                data.init_meta(copy_from=data)
         return data
 
     def load_datatype_converters(self, toolbox, use_cached=False):

From d7beda0f1acf2fc923f4389fad3cc9fb049b7afe Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Thu, 26 Oct 2023 16:44:48 +0200
Subject: [PATCH 10/11] Commit if we've got outstanding MetadataFile instances

---
 lib/galaxy/model/metadata.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/galaxy/model/metadata.py b/lib/galaxy/model/metadata.py
index ed757314edca..a0b53a1527ae 100644
--- a/lib/galaxy/model/metadata.py
+++ b/lib/galaxy/model/metadata.py
@@ -604,7 +604,16 @@ def wrap(self, value, session):
         if isinstance(value, int):
             return session.query(galaxy.model.MetadataFile).get(value)
         else:
-            return session.query(galaxy.model.MetadataFile).filter_by(uuid=value).one()
+            wrapped_value = session.query(galaxy.model.MetadataFile).filter_by(uuid=value).one_or_none()
+            if wrapped_value:
+                return wrapped_value
+            else:
+                # If we've simultaneously copied the  dataset and we've changed the datatype on the
+                # copy we may not have committed the MetadataFile yet, so we need to commit the session.
+                # TODO: It would be great if we can avoid the commit in the future.
+                with transaction(session):
+                    session.commit()
+            return session.query(galaxy.model.MetadataFile).filter_by(uuid=value).one_or_none()
 
     def make_copy(self, value, target_context: MetadataCollection, source_context):
         session = target_context._object_session(target_context.parent)

From d92bbb144ffcda7e17368cf43dd25c8a9a3a7dd6 Mon Sep 17 00:00:00 2001
From: Martin Cech <cech.marten@gmail.com>
Date: Thu, 26 Oct 2023 20:36:48 +0200
Subject: [PATCH 11/11] unify statement format

Co-authored-by: John Davis <jdavcs@gmail.com>
---
 lib/galaxy/model/metadata.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/galaxy/model/metadata.py b/lib/galaxy/model/metadata.py
index c2302e4a23e8..bcc68f429ac4 100644
--- a/lib/galaxy/model/metadata.py
+++ b/lib/galaxy/model/metadata.py
@@ -605,7 +605,9 @@ def wrap(self, value, session):
         if isinstance(value, int):
             return session.get(galaxy.model.MetadataFile, value)
         else:
-            wrapped_value = session.query(galaxy.model.MetadataFile).filter_by(uuid=value).one_or_none()
+            wrapped_value = session.execute(
+                select(galaxy.model.MetadataFile).filter_by(uuid=value)
+            ).scalar_one_or_none()
             if wrapped_value:
                 return wrapped_value
             else: