Skip to content

Commit

Permalink
Add an experimental non-cheetah way of building command lines
Browse files Browse the repository at this point in the history
The primary benefit is that the command section does not have access the
app or any dangling reference to the database connection or any other
secrets. There are two flavors here, one uses base_command and
arguments, and allows building up an (escaped) argv list, the other is a
shortcut for writing shell scripts and feels maybe a bit more like
writing a very simple cheetah section.

base_command:

```yml
name: base_command tool
class: GalaxyTool
version: 1.0.0
base_command: cat
arguments:
- $(inputs.input.path)
- '>'
- output.fastq
inputs:
- type: data
  name: input
outputs:
  output:
    type: data
    from_work_dir: output.fastq
    name: output
```

shell_command style:

```yml
name: shell_command tool
class: GalaxyTool
version: 1.0.0
shell_command: cat '$(inputs.input.path)' > output.fastq
inputs:
- type: data
  name: input
outputs:
  output:
    type: data
    from_work_dir: output.fastq
    name: output
```
  • Loading branch information
mvdbeek committed Dec 3, 2024
1 parent a648915 commit f33a66f
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 25 deletions.
8 changes: 7 additions & 1 deletion lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
from galaxy.tools.evaluation import (
PartialToolEvaluator,
ToolEvaluator,
UserToolEvaluator,
)
from galaxy.util import (
parse_xml_string,
Expand Down Expand Up @@ -1373,7 +1374,12 @@ def _load_job(self):
return job

def _get_tool_evaluator(self, job):
klass = PartialToolEvaluator if self.remote_command_line else ToolEvaluator
if self.remote_command_line:
klass = PartialToolEvaluator
elif self.tool.base_command or self.tool.shell_command:
klass = UserToolEvaluator
else:
klass = ToolEvaluator
tool_evaluator = klass(
app=self.app,
job=job,
Expand Down
12 changes: 12 additions & 0 deletions lib/galaxy/tool_util/parser/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,18 @@ def parse_request_param_translation_elem(self):
def parse_command(self):
"""Return string contianing command to run."""

def parse_shell_command(self) -> Optional[str]:
"""Return string that after input binding can be executed."""
return None

def parse_base_command(self) -> Optional[str]:
"""Return string containing script entrypoint."""
return None

def parse_arguments(self) -> Optional[str]:
"""Return list of strings to append to base_command."""
return None

def parse_expression(self):
"""Return string contianing command to run."""
return None
Expand Down
10 changes: 10 additions & 0 deletions lib/galaxy/tool_util/parser/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,16 @@ def parse_command(self):
def parse_expression(self):
return self.root_dict.get("expression")

def parse_shell_command(self):
return self.root_dict.get("shell_command")

def parse_base_command(self):
"""Return string containing script entrypoint."""
return self.root_dict.get("base_command")

def parse_arguments(self) -> str | None:
return self.root_dict.get("arguments")

def parse_environment_variables(self):
return []

Expand Down
16 changes: 16 additions & 0 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,10 @@ def __init__(
self.tool_errors = None
# Parse XML element containing configuration
self.tool_source = tool_source
self.command = None
self.base_command: Optional[str] = None
self.arguments: List[str] = []
self.shell_command: Optional[str] = None
self._is_workflow_compatible = None
self.__help = None
self.__tests: Optional[str] = None
Expand Down Expand Up @@ -1089,6 +1093,9 @@ def parse(self, tool_source: ToolSource, guid: Optional[str] = None, dynamic: bo
self.input_translator = None

self.parse_command(tool_source)
self.parse_shell_command(tool_source)
self.parse_base_command(tool_source)
self.parse_arguments(tool_source)
self.environment_variables = self.parse_environment_variables(tool_source)
self.tmp_directory_vars = tool_source.parse_tmp_directory_vars()

Expand Down Expand Up @@ -1419,6 +1426,15 @@ def parse_command(self, tool_source):
self.command = ""
self.interpreter = None

def parse_shell_command(self, tool_source: ToolSource):
self.shell_command = tool_source.parse_shell_command()

def parse_base_command(self, tool_source: ToolSource):
self.base_command = tool_source.parse_base_command()

def parse_arguments(self, tool_source: ToolSource):
self.arguments = tool_source.parse_arguments()

def parse_environment_variables(self, tool_source):
return tool_source.parse_environment_variables()

Expand Down
94 changes: 80 additions & 14 deletions lib/galaxy/tools/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
MinimalToolApp,
)
from galaxy.tool_util.data import TabularToolDataTable
from galaxy.tools.expressions import do_eval
from galaxy.tools.parameters import (
visit_input_values,
wrapped_json,
Expand Down Expand Up @@ -129,6 +130,7 @@ class ToolEvaluator:
app: MinimalToolApp
job: model.Job
materialize_datasets: bool = True
param_dict_style = "regular"

def __init__(self, app: MinimalToolApp, tool, job, local_working_directory):
self.app = app
Expand Down Expand Up @@ -171,22 +173,32 @@ def set_compute_environment(self, compute_environment: ComputeEnvironment, get_s
# replace materialized objects back into tool input parameters
self._replaced_deferred_objects(inp_data, incoming, materialized_objects)

if get_special:
special = get_special()
if special:
out_data["output_file"] = special
if self.param_dict_style == "regular":
if get_special:
special = get_special()
if special:
out_data["output_file"] = special

# These can be passed on the command line if wanted as $__user_*__
incoming.update(model.User.user_template_environment(self._user))
# These can be passed on the command line if wanted as $__user_*__

# Build params, done before hook so hook can use
self.param_dict = self.build_param_dict(
incoming,
inp_data,
out_data,
output_collections=out_collections,
)
self.execute_tool_hooks(inp_data=inp_data, out_data=out_data, incoming=incoming)
incoming.update(model.User.user_template_environment(self._user))

# Build params, done before hook so hook can use
self.param_dict = self.build_param_dict(
incoming,
inp_data,
out_data,
output_collections=out_collections,
)
self.execute_tool_hooks(inp_data=inp_data, out_data=out_data, incoming=incoming)

else:
self.param_dict = self.build_param_dict(
incoming,
inp_data,
out_data,
output_collections=out_collections,
)

def execute_tool_hooks(self, inp_data, out_data, incoming):
# Certain tools require tasks to be completed prior to job execution
Expand Down Expand Up @@ -883,6 +895,60 @@ def build(self):
)


class UserToolEvaluator(ToolEvaluator):

param_dict_style = "json"

def _build_config_files(self):
pass

def _build_param_file(self):
pass

def _build_version_command(self):
pass

def __sanitize_param_dict(self, param_dict):
pass

def build_param_dict(self, incoming, input_datasets, output_datasets, output_collections):
"""
Build the dictionary of parameters for substituting into the command
line. We're effecively building the CWL job object here.
"""
compute_environment = self.compute_environment
job_working_directory = compute_environment.working_directory()
from galaxy.workflow.modules import to_cwl

hda_references = []

cwl_style_inputs = to_cwl(incoming, hda_references=hda_references)
return {"inputs": cwl_style_inputs, "outdir": job_working_directory}

def _build_command_line(self):
if self.tool.base_command:
base_command = self.tool.base_command
arguments = self.tool.arguments
bound_arguments = [base_command]
for argument in arguments:
if (
bound_argument := do_eval(argument, self.param_dict["inputs"], outdir=self.param_dict["outdir"])
) != argument:
# variables will be shell-escaped, but you can of course still
# write invalid things into the literal portion of the arguments.
# The upside is that we can use `>`, `|`.
# Maybe we should wrap this in `sh -c` or something like that though.
bound_argument = shlex.quote(str(bound_argument))
if bound_argument is not None:
bound_arguments.append(bound_argument)
command_line = " ".join(bound_arguments)
elif self.tool.shell_command:
command_line = do_eval(self.tool.shell_command, self.param_dict["inputs"], outdir=self.param_dict["outdir"])
else:
raise Exception("Tool must define shell_command or base_command")
self.command_line = command_line


class RemoteToolEvaluator(ToolEvaluator):
"""ToolEvaluator that skips unnecessary steps already executed during job setup."""

Expand Down
8 changes: 7 additions & 1 deletion lib/galaxy/tools/expressions/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@
NODE_ENGINE = os.path.join(FILE_DIRECTORY, "cwlNodeEngine.js")


def do_eval(expression: str, jobinput: "CWLObjectType", context: Optional["CWLOutputType"] = None):
def do_eval(
expression: str,
jobinput: "CWLObjectType",
outdir: Optional[str] = None,
tmpdir: Optional[str] = None,
context: Optional["CWLOutputType"] = None,
):
return _do_eval(
expression,
jobinput,
Expand Down
20 changes: 11 additions & 9 deletions lib/galaxy/workflow/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ class ConditionalStepWhen(BooleanToolParameter):
pass


def to_cwl(value, hda_references, step):
def to_cwl(value, hda_references, step: Optional[WorkflowStep] = None):
element_identifier = None
if isinstance(value, model.HistoryDatasetCollectionAssociation):
value = value.collection
Expand All @@ -151,15 +151,16 @@ def to_cwl(value, hda_references, step):
if isinstance(value, model.HistoryDatasetAssociation):
# I think the following two checks are needed but they may
# not be needed.
if not value.dataset.in_ready_state():
why = f"dataset [{value.id}] is needed for valueFrom expression and is non-ready"
raise DelayedWorkflowEvaluation(why=why)
if not value.is_ok:
raise FailWorkflowEvaluation(
why=InvocationFailureDatasetFailed(
reason=FailureReason.dataset_failed, hda_id=value.id, workflow_step_id=step.id
if step:
if not value.dataset.in_ready_state():
why = f"dataset [{value.id}] is needed for valueFrom expression and is non-ready"
raise DelayedWorkflowEvaluation(why=why)
if not value.is_ok:
raise FailWorkflowEvaluation(
why=InvocationFailureDatasetFailed(
reason=FailureReason.dataset_failed, hda_id=value.id, workflow_step_id=step.id
)
)
)
if value.ext == "expression.json":
with open(value.get_file_name()) as f:
# OUR safe_loads won't work, will not load numbers, etc...
Expand All @@ -169,6 +170,7 @@ def to_cwl(value, hda_references, step):
properties = {
"class": "File",
"location": f"step_input://{len(hda_references)}",
"path": value.get_file_name(),
}
set_basename_and_derived_properties(
properties, value.dataset.created_from_basename or element_identifier or value.name
Expand Down
73 changes: 73 additions & 0 deletions lib/galaxy_test/api/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,45 @@
output1=dict(format="txt"),
),
}
TOOL_WITH_BASE_COMMAND = {
"name": "Base command tool",
"class": "GalaxyTool",
"version": "1.0.0",
"base_command": "cat",
"arguments": ["$(inputs.input.path)", ">", "output.fastq"],
"inputs": [
{
"type": "data",
"name": "input",
}
],
"outputs": {
"output": {
"type": "data",
"from_work_dir": "output.fastq",
"name": "output",
}
},
}
TOOL_WITH_SHELL_COMMAND = {
"name": "Base command tool",
"class": "GalaxyTool",
"version": "1.0.0",
"shell_command": "cat '$(inputs.input.path)' > output.fastq",
"inputs": [
{
"type": "data",
"name": "input",
}
],
"outputs": {
"output": {
"type": "data",
"from_work_dir": "output.fastq",
"name": "output",
}
},
}


class TestsTools:
Expand Down Expand Up @@ -1381,6 +1420,40 @@ def test_dynamic_tool_no_id(self):
output_content = self.dataset_populator.get_history_dataset_content(history_id)
assert output_content == "Hello World 2\n"

def test_dynamic_tool_base_command(self):
tool_response = self.dataset_populator.create_tool(TOOL_WITH_BASE_COMMAND)
self._assert_has_keys(tool_response, "uuid")

# Run tool.
history_id = self.dataset_populator.new_history()
dataset = self.dataset_populator.new_dataset(history_id=history_id, content="abc")
self._run(
history_id=history_id,
tool_uuid=tool_response["uuid"],
inputs={"input": {"src": "hda", "id": dataset["id"]}},
)

self.dataset_populator.wait_for_history(history_id, assert_ok=True)
output_content = self.dataset_populator.get_history_dataset_content(history_id)
assert output_content == "abc\n"

def test_dynamic_tool_shell_command(self):
tool_response = self.dataset_populator.create_tool(TOOL_WITH_SHELL_COMMAND)
self._assert_has_keys(tool_response, "uuid")

# Run tool.
history_id = self.dataset_populator.new_history()
dataset = self.dataset_populator.new_dataset(history_id=history_id, content="abc")
self._run(
history_id=history_id,
tool_uuid=tool_response["uuid"],
inputs={"input": {"src": "hda", "id": dataset["id"]}},
)

self.dataset_populator.wait_for_history(history_id, assert_ok=True)
output_content = self.dataset_populator.get_history_dataset_content(history_id)
assert output_content == "abc\n"

def test_show_dynamic_tools(self):
# Create tool.
original_list = self.dataset_populator.list_dynamic_tools()
Expand Down

0 comments on commit f33a66f

Please sign in to comment.