Skip to content

Commit

Permalink
Workflow tool state validation plumbing.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed Oct 17, 2024
1 parent f2a0e52 commit cf70af5
Show file tree
Hide file tree
Showing 21 changed files with 895 additions and 2 deletions.
44 changes: 44 additions & 0 deletions doc/source/dev/tool_state_state_classes.plantuml.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 22 additions & 0 deletions doc/source/dev/tool_state_state_classes.plantuml.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,38 @@ state_representation = "job_internal"
}
note bottom: Object references of the form \n{src: "hda", id: <decoded_id>}.\n Mapping constructs expanded out.\n (Defaults are inserted?)

class TestCaseToolState {
state_representation = "test_case"
+ _to_base_model(input_models: ToolParameterBundle): Type[BaseModel]
}
note bottom: Object references of the form file name and URIs.\n Mapping constructs not allowed.\n

class WorkflowStepToolState {
state_representation = "workflow_step"
+ _to_base_model(input_models: ToolParameterBundle): Type[BaseModel]
}
note bottom: Nearly everything optional except conditional discriminators.\n

class WorkflowStepLinkedToolState {
state_representation = "workflow_step_linked"
+ _to_base_model(input_models: ToolParameterBundle): Type[BaseModel]
}
note bottom: Expect pre-process ``in`` dictionaries and bring in representation\n of links and defaults and validate them in model.\n

ToolState <|-- RequestToolState
ToolState <|-- RequestInternalToolState
ToolState <|-- RequestInternalDereferencedToolState
ToolState <|-- JobInternalToolState
ToolState <|-- TestCaseToolState
ToolState <|-- WorkflowStepToolState
ToolState <|-- WorkflowStepLinkedToolState

RequestToolState - RequestInternalToolState : decode >

RequestInternalToolState - RequestInternalDereferencedToolState : dereference >

RequestInternalDereferencedToolState o-- JobInternalToolState : expand >

WorkflowStepToolState o-- WorkflowStepLinkedToolState : preprocess_links_and_defaults >
}
@enduml
2 changes: 2 additions & 0 deletions lib/galaxy/tool_util/parameters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
FloatParameterModel,
HiddenParameterModel,
IntegerParameterModel,
is_optional,
LabelValue,
RawStateDict,
RepeatParameterModel,
Expand Down Expand Up @@ -124,6 +125,7 @@
"RepeatParameterModel",
"RawStateDict",
"ValidationFunctionT",
"is_optional",
"validate_against_model",
"validate_internal_job",
"validate_internal_landing_request",
Expand Down
12 changes: 10 additions & 2 deletions lib/galaxy/tool_util/parameters/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
from ._types import (
cast_as_type,
expand_annotation,
is_optional,
is_optional as is_python_type_optional,
list_type,
optional,
optional_if_needed,
Expand Down Expand Up @@ -152,7 +152,7 @@ def dynamic_model_information_from_py_type(
if requires_value is None:
requires_value = param_model.request_requires_value
initialize = ... if requires_value else None
py_type_is_optional = is_optional(py_type)
py_type_is_optional = is_python_type_optional(py_type)
validators = validators or {}
if not py_type_is_optional and not requires_value:
validators["not_null"] = field_validator(name)(Validators.validate_not_none)
Expand Down Expand Up @@ -1458,6 +1458,14 @@ class ToolParameterModel(RootModel):
CwlUnionParameterModel.model_rebuild()


def is_optional(tool_parameter: ToolParameterT):
if isinstance(tool_parameter, BaseGalaxyToolParameterModelDefinition):
return tool_parameter.optional
else:
# refine CWL logic in CWL branch...
return False


class ToolParameterBundle(Protocol):
"""An object having a dictionary of input models (i.e. a 'Tool')"""

Expand Down
23 changes: 23 additions & 0 deletions lib/galaxy/tool_util/workflow_state/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Abstractions for reasoning about tool state within Galaxy workflows.
Like everything else in galaxy-tool-util, this package should be independent of
Galaxy's runtime. It is meant to provide utilities for reasonsing about tool state
(largely building on the abstractions in galaxy.tool_util.parameters) within the
context of workflows.
"""

from ._types import GetToolInfo
from .convert import (
ConversionValidationFailure,
convert_state_to_format2,
Format2State,
)
from .validation import validate_workflow

__all__ = (
"ConversionValidationFailure",
"convert_state_to_format2",
"GetToolInfo",
"Format2State",
"validate_workflow",
)
28 changes: 28 additions & 0 deletions lib/galaxy/tool_util/workflow_state/_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import (
Any,
Dict,
Optional,
Union,
)

from typing_extensions import (
Literal,
Protocol,
)

from galaxy.tool_util.models import ParsedTool

NativeWorkflowDict = Dict[str, Any]
Format2WorkflowDict = Dict[str, Any]
AnyWorkflowDict = Union[NativeWorkflowDict, Format2WorkflowDict]
WorkflowFormat = Literal["gxformat2", "native"]
NativeStepDict = Dict[str, Any]
Format2StepDict = Dict[str, Any]
NativeToolStateDict = Dict[str, Any]
Format2StateDict = Dict[str, Any]


class GetToolInfo(Protocol):
"""An interface for fetching tool information for steps in a workflow."""

def get_tool_info(self, tool_id: str, tool_version: Optional[str]) -> ParsedTool: ...
131 changes: 131 additions & 0 deletions lib/galaxy/tool_util/workflow_state/convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from typing import (
Dict,
List,
Optional,
)

from pydantic import (
BaseModel,
Field,
)

from galaxy.tool_util.models import ParsedTool
from galaxy.tool_util.parameters import ToolParameterT
from ._types import (
Format2StateDict,
GetToolInfo,
NativeStepDict,
)
from .validation_format2 import validate_step_against
from .validation_native import (
get_parsed_tool_for_native_step,
native_tool_state,
validate_native_step_against,
)

Format2InputsDictT = Dict[str, str]


class Format2State(BaseModel):
state: Format2StateDict
inputs: Format2InputsDictT = Field(alias="in")


class ConversionValidationFailure(Exception):
pass


def convert_state_to_format2(native_step_dict: NativeStepDict, get_tool_info: GetToolInfo) -> Format2State:
parsed_tool = get_parsed_tool_for_native_step(native_step_dict, get_tool_info)
return convert_state_to_format2_using(native_step_dict, parsed_tool)


def convert_state_to_format2_using(native_step_dict: NativeStepDict, parsed_tool: Optional[ParsedTool]) -> Format2State:
"""Create a "clean" gxformat2 workflow tool state from a native workflow step.
gxformat2 does not know about tool specifications so it cannot reason about the native
tool state attribute and just copies it as is. This native state can be pretty ugly. The purpose
of this function is to build a cleaned up state to replace the gxformat2 copied native tool_state
with that is more readable and has stronger typing by using the tool's inputs to guide
the conversion (the parsed_tool parameter).
This method validates both the native tool state and the resulting gxformat2 tool state
so that we can be more confident the conversion doesn't corrupt the workflow. If no meta
model to validate against is supplied or if either validation fails this method throws
ConversionValidationFailure to signal the caller to just use the native tool state as is
instead of trying to convert it to a cleaner gxformat2 tool state - under the assumption
it is better to have an "ugly" workflow than a corrupted one during conversion.
"""
if parsed_tool is None:
raise ConversionValidationFailure("Could not resolve tool inputs")
try:
validate_native_step_against(native_step_dict, parsed_tool)
except Exception:
raise ConversionValidationFailure(
"Failed to validate native step - not going to convert a tool state that isn't understood"
)
result = _convert_valid_state_to_format2(native_step_dict, parsed_tool)
print(result.dict())
try:
validate_step_against(result.dict(), parsed_tool)
except Exception:
raise ConversionValidationFailure(
"Failed to validate resulting cleaned step - not going to convert to an unvalidated tool state"
)
return result


def _convert_valid_state_to_format2(native_step_dict: NativeStepDict, parsed_tool: ParsedTool) -> Format2State:
format2_state: Format2StateDict = {}
format2_in: Format2InputsDictT = {}

root_tool_state = native_tool_state(native_step_dict)
tool_inputs = parsed_tool.inputs
_convert_state_level(native_step_dict, tool_inputs, root_tool_state, format2_state, format2_in)
return Format2State(
**{
"state": format2_state,
"in": format2_in,
}
)


def _convert_state_level(
step: NativeStepDict,
tool_inputs: List[ToolParameterT],
native_state: dict,
format2_state_at_level: dict,
format2_in: Format2InputsDictT,
prefix: Optional[str] = None,
) -> None:
prefix = prefix or ""
assert prefix is not None
for tool_input in tool_inputs:
_convert_state_at_level(step, tool_input, native_state, format2_state_at_level, format2_in, prefix)


def _convert_state_at_level(
step: NativeStepDict,
tool_input: ToolParameterT,
native_state_at_level: dict,
format2_state_at_level: dict,
format2_in: Format2InputsDictT,
prefix: str,
) -> None:
parameter_type = tool_input.parameter_type
parameter_name = tool_input.name
value = native_state_at_level.get(parameter_name, None)
state_path = parameter_name if prefix is None else f"{prefix}|{parameter_name}"
if parameter_type == "gx_integer":
# check for runtime input
format2_value = int(value)
format2_state_at_level[parameter_name] = format2_value
elif parameter_type == "gx_data":
input_connections = step.get("input_connections", {})
print(state_path)
print(input_connections)
if state_path in input_connections:
format2_in[state_path] = "placeholder"
else:
pass
# raise NotImplementedError(f"Unhandled parameter type {parameter_type}")
21 changes: 21 additions & 0 deletions lib/galaxy/tool_util/workflow_state/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from ._types import (
AnyWorkflowDict,
GetToolInfo,
WorkflowFormat,
)
from .validation_format2 import validate_workflow_format2
from .validation_native import validate_workflow_native


def validate_workflow(workflow_dict: AnyWorkflowDict, get_tool_info: GetToolInfo):
if _format(workflow_dict) == "gxformat2":
validate_workflow_format2(workflow_dict, get_tool_info)
else:
validate_workflow_native(workflow_dict, get_tool_info)


def _format(workflow_dict: AnyWorkflowDict) -> WorkflowFormat:
if workflow_dict.get("a_galaxy_workflow") == "true":
return "native"
else:
return "gxformat2"
Loading

0 comments on commit cf70af5

Please sign in to comment.