Add an experimental non-cheetah way of building command lines

The primary benefit is that the command section does not have access the app or any dangling reference to the database connection or any other secrets. There are two flavors here, one uses base_command and arguments, and allows building up an (escaped) argv list, the other is a shortcut for writing shell scripts and feels maybe a bit more like writing a very simple cheetah section. base_command: ```yml name: base_command tool class: GalaxyTool version: 1.0.0 base_command: cat arguments: - $(inputs.input.path) - '>' - output.fastq inputs: - type: data name: input outputs: output: type: data from_work_dir: output.fastq name: output ``` shell_command style: ```yml name: shell_command tool class: GalaxyTool version: 1.0.0 shell_command: cat '$(inputs.input.path)' > output.fastq inputs: - type: data name: input outputs: output: type: data from_work_dir: output.fastq name: output ```
mvdbeek · Dec 3, 2024 · f33a66f · f33a66f
1 parent a648915
commit f33a66f
Show file tree

Hide file tree

Showing 8 changed files with 216 additions and 25 deletions.
diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py
@@ -86,6 +86,7 @@
 from galaxy.tools.evaluation import (
     PartialToolEvaluator,
     ToolEvaluator,
+    UserToolEvaluator,
 )
 from galaxy.util import (
     parse_xml_string,
@@ -1373,7 +1374,12 @@ def _load_job(self):
         return job
 
     def _get_tool_evaluator(self, job):
-        klass = PartialToolEvaluator if self.remote_command_line else ToolEvaluator
+        if self.remote_command_line:
+            klass = PartialToolEvaluator
+        elif self.tool.base_command or self.tool.shell_command:
+            klass = UserToolEvaluator
+        else:
+            klass = ToolEvaluator
         tool_evaluator = klass(
             app=self.app,
             job=job,

diff --git a/lib/galaxy/tool_util/parser/interface.py b/lib/galaxy/tool_util/parser/interface.py
@@ -220,6 +220,18 @@ def parse_request_param_translation_elem(self):
     def parse_command(self):
         """Return string contianing command to run."""
 
+    def parse_shell_command(self) -> Optional[str]:
+        """Return string that after input binding can be executed."""
+        return None
+
+    def parse_base_command(self) -> Optional[str]:
+        """Return string containing script entrypoint."""
+        return None
+
+    def parse_arguments(self) -> Optional[str]:
+        """Return list of strings to append to base_command."""
+        return None
+
     def parse_expression(self):
         """Return string contianing command to run."""
         return None

diff --git a/lib/galaxy/tool_util/parser/yaml.py b/lib/galaxy/tool_util/parser/yaml.py
@@ -97,6 +97,16 @@ def parse_command(self):
     def parse_expression(self):
         return self.root_dict.get("expression")
 
+    def parse_shell_command(self):
+        return self.root_dict.get("shell_command")
+
+    def parse_base_command(self):
+        """Return string containing script entrypoint."""
+        return self.root_dict.get("base_command")
+
+    def parse_arguments(self) -> str | None:
+        return self.root_dict.get("arguments")
+
     def parse_environment_variables(self):
         return []
 

diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
@@ -847,6 +847,10 @@ def __init__(
         self.tool_errors = None
         # Parse XML element containing configuration
         self.tool_source = tool_source
+        self.command = None
+        self.base_command: Optional[str] = None
+        self.arguments: List[str] = []
+        self.shell_command: Optional[str] = None
         self._is_workflow_compatible = None
         self.__help = None
         self.__tests: Optional[str] = None
@@ -1089,6 +1093,9 @@ def parse(self, tool_source: ToolSource, guid: Optional[str] = None, dynamic: bo
             self.input_translator = None
 
         self.parse_command(tool_source)
+        self.parse_shell_command(tool_source)
+        self.parse_base_command(tool_source)
+        self.parse_arguments(tool_source)
         self.environment_variables = self.parse_environment_variables(tool_source)
         self.tmp_directory_vars = tool_source.parse_tmp_directory_vars()
 
@@ -1419,6 +1426,15 @@ def parse_command(self, tool_source):
             self.command = ""
             self.interpreter = None
 
+    def parse_shell_command(self, tool_source: ToolSource):
+        self.shell_command = tool_source.parse_shell_command()
+
+    def parse_base_command(self, tool_source: ToolSource):
+        self.base_command = tool_source.parse_base_command()
+
+    def parse_arguments(self, tool_source: ToolSource):
+        self.arguments = tool_source.parse_arguments()
+
     def parse_environment_variables(self, tool_source):
         return tool_source.parse_environment_variables()
 

diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py
@@ -33,6 +33,7 @@
     MinimalToolApp,
 )
 from galaxy.tool_util.data import TabularToolDataTable
+from galaxy.tools.expressions import do_eval
 from galaxy.tools.parameters import (
     visit_input_values,
     wrapped_json,
@@ -129,6 +130,7 @@ class ToolEvaluator:
     app: MinimalToolApp
     job: model.Job
     materialize_datasets: bool = True
+    param_dict_style = "regular"
 
     def __init__(self, app: MinimalToolApp, tool, job, local_working_directory):
         self.app = app
@@ -171,22 +173,32 @@ def set_compute_environment(self, compute_environment: ComputeEnvironment, get_s
         # replace materialized objects back into tool input parameters
         self._replaced_deferred_objects(inp_data, incoming, materialized_objects)
 
-        if get_special:
-            special = get_special()
-            if special:
-                out_data["output_file"] = special
+        if self.param_dict_style == "regular":
+            if get_special:
+                special = get_special()
+                if special:
+                    out_data["output_file"] = special
 
-        # These can be passed on the command line if wanted as $__user_*__
-        incoming.update(model.User.user_template_environment(self._user))
+            # These can be passed on the command line if wanted as $__user_*__
 
-        # Build params, done before hook so hook can use
-        self.param_dict = self.build_param_dict(
-            incoming,
-            inp_data,
-            out_data,
-            output_collections=out_collections,
-        )
-        self.execute_tool_hooks(inp_data=inp_data, out_data=out_data, incoming=incoming)
+            incoming.update(model.User.user_template_environment(self._user))
+
+            # Build params, done before hook so hook can use
+            self.param_dict = self.build_param_dict(
+                incoming,
+                inp_data,
+                out_data,
+                output_collections=out_collections,
+            )
+            self.execute_tool_hooks(inp_data=inp_data, out_data=out_data, incoming=incoming)
+
+        else:
+            self.param_dict = self.build_param_dict(
+                incoming,
+                inp_data,
+                out_data,
+                output_collections=out_collections,
+            )
 
     def execute_tool_hooks(self, inp_data, out_data, incoming):
         # Certain tools require tasks to be completed prior to job execution
@@ -883,6 +895,60 @@ def build(self):
         )
 
 
+class UserToolEvaluator(ToolEvaluator):
+
+    param_dict_style = "json"
+
+    def _build_config_files(self):
+        pass
+
+    def _build_param_file(self):
+        pass
+
+    def _build_version_command(self):
+        pass
+
+    def __sanitize_param_dict(self, param_dict):
+        pass
+
+    def build_param_dict(self, incoming, input_datasets, output_datasets, output_collections):
+        """
+        Build the dictionary of parameters for substituting into the command
+        line. We're effecively building the CWL job object here.
+        """
+        compute_environment = self.compute_environment
+        job_working_directory = compute_environment.working_directory()
+        from galaxy.workflow.modules import to_cwl
+
+        hda_references = []
+
+        cwl_style_inputs = to_cwl(incoming, hda_references=hda_references)
+        return {"inputs": cwl_style_inputs, "outdir": job_working_directory}
+
+    def _build_command_line(self):
+        if self.tool.base_command:
+            base_command = self.tool.base_command
+            arguments = self.tool.arguments
+            bound_arguments = [base_command]
+            for argument in arguments:
+                if (
+                    bound_argument := do_eval(argument, self.param_dict["inputs"], outdir=self.param_dict["outdir"])
+                ) != argument:
+                    # variables will be shell-escaped, but you can of course still
+                    # write invalid things into the literal portion of the arguments.
+                    # The upside is that we can use `>`, `|`.
+                    # Maybe we should wrap this in `sh -c` or something like that though.
+                    bound_argument = shlex.quote(str(bound_argument))
+                if bound_argument is not None:
+                    bound_arguments.append(bound_argument)
+            command_line = " ".join(bound_arguments)
+        elif self.tool.shell_command:
+            command_line = do_eval(self.tool.shell_command, self.param_dict["inputs"], outdir=self.param_dict["outdir"])
+        else:
+            raise Exception("Tool must define shell_command or base_command")
+        self.command_line = command_line
+
+
 class RemoteToolEvaluator(ToolEvaluator):
     """ToolEvaluator that skips unnecessary steps already executed during job setup."""
 

diff --git a/lib/galaxy/tools/expressions/evaluation.py b/lib/galaxy/tools/expressions/evaluation.py
@@ -20,7 +20,13 @@
 NODE_ENGINE = os.path.join(FILE_DIRECTORY, "cwlNodeEngine.js")
 
 
-def do_eval(expression: str, jobinput: "CWLObjectType", context: Optional["CWLOutputType"] = None):
+def do_eval(
+    expression: str,
+    jobinput: "CWLObjectType",
+    outdir: Optional[str] = None,
+    tmpdir: Optional[str] = None,
+    context: Optional["CWLOutputType"] = None,
+):
     return _do_eval(
         expression,
         jobinput,

diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py
@@ -141,7 +141,7 @@ class ConditionalStepWhen(BooleanToolParameter):
     pass
 
 
-def to_cwl(value, hda_references, step):
+def to_cwl(value, hda_references, step: Optional[WorkflowStep] = None):
     element_identifier = None
     if isinstance(value, model.HistoryDatasetCollectionAssociation):
         value = value.collection
@@ -151,15 +151,16 @@ def to_cwl(value, hda_references, step):
     if isinstance(value, model.HistoryDatasetAssociation):
         # I think the following two checks are needed but they may
         # not be needed.
-        if not value.dataset.in_ready_state():
-            why = f"dataset [{value.id}] is needed for valueFrom expression and is non-ready"
-            raise DelayedWorkflowEvaluation(why=why)
-        if not value.is_ok:
-            raise FailWorkflowEvaluation(
-                why=InvocationFailureDatasetFailed(
-                    reason=FailureReason.dataset_failed, hda_id=value.id, workflow_step_id=step.id
+        if step:
+            if not value.dataset.in_ready_state():
+                why = f"dataset [{value.id}] is needed for valueFrom expression and is non-ready"
+                raise DelayedWorkflowEvaluation(why=why)
+            if not value.is_ok:
+                raise FailWorkflowEvaluation(
+                    why=InvocationFailureDatasetFailed(
+                        reason=FailureReason.dataset_failed, hda_id=value.id, workflow_step_id=step.id
+                    )
                 )
-            )
         if value.ext == "expression.json":
             with open(value.get_file_name()) as f:
                 # OUR safe_loads won't work, will not load numbers, etc...
@@ -169,6 +170,7 @@ def to_cwl(value, hda_references, step):
             properties = {
                 "class": "File",
                 "location": f"step_input://{len(hda_references)}",
+                "path": value.get_file_name(),
             }
             set_basename_and_derived_properties(
                 properties, value.dataset.created_from_basename or element_identifier or value.name

diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py
@@ -57,6 +57,45 @@
         output1=dict(format="txt"),
     ),
 }
+TOOL_WITH_BASE_COMMAND = {
+    "name": "Base command tool",
+    "class": "GalaxyTool",
+    "version": "1.0.0",
+    "base_command": "cat",
+    "arguments": ["$(inputs.input.path)", ">", "output.fastq"],
+    "inputs": [
+        {
+            "type": "data",
+            "name": "input",
+        }
+    ],
+    "outputs": {
+        "output": {
+            "type": "data",
+            "from_work_dir": "output.fastq",
+            "name": "output",
+        }
+    },
+}
+TOOL_WITH_SHELL_COMMAND = {
+    "name": "Base command tool",
+    "class": "GalaxyTool",
+    "version": "1.0.0",
+    "shell_command": "cat '$(inputs.input.path)' > output.fastq",
+    "inputs": [
+        {
+            "type": "data",
+            "name": "input",
+        }
+    ],
+    "outputs": {
+        "output": {
+            "type": "data",
+            "from_work_dir": "output.fastq",
+            "name": "output",
+        }
+    },
+}
 
 
 class TestsTools:
@@ -1381,6 +1420,40 @@ def test_dynamic_tool_no_id(self):
         output_content = self.dataset_populator.get_history_dataset_content(history_id)
         assert output_content == "Hello World 2\n"
 
+    def test_dynamic_tool_base_command(self):
+        tool_response = self.dataset_populator.create_tool(TOOL_WITH_BASE_COMMAND)
+        self._assert_has_keys(tool_response, "uuid")
+
+        # Run tool.
+        history_id = self.dataset_populator.new_history()
+        dataset = self.dataset_populator.new_dataset(history_id=history_id, content="abc")
+        self._run(
+            history_id=history_id,
+            tool_uuid=tool_response["uuid"],
+            inputs={"input": {"src": "hda", "id": dataset["id"]}},
+        )
+
+        self.dataset_populator.wait_for_history(history_id, assert_ok=True)
+        output_content = self.dataset_populator.get_history_dataset_content(history_id)
+        assert output_content == "abc\n"
+
+    def test_dynamic_tool_shell_command(self):
+        tool_response = self.dataset_populator.create_tool(TOOL_WITH_SHELL_COMMAND)
+        self._assert_has_keys(tool_response, "uuid")
+
+        # Run tool.
+        history_id = self.dataset_populator.new_history()
+        dataset = self.dataset_populator.new_dataset(history_id=history_id, content="abc")
+        self._run(
+            history_id=history_id,
+            tool_uuid=tool_response["uuid"],
+            inputs={"input": {"src": "hda", "id": dataset["id"]}},
+        )
+
+        self.dataset_populator.wait_for_history(history_id, assert_ok=True)
+        output_content = self.dataset_populator.get_history_dataset_content(history_id)
+        assert output_content == "abc\n"
+
     def test_show_dynamic_tools(self):
         # Create tool.
         original_list = self.dataset_populator.list_dynamic_tools()