diff --git a/doc/source/dev/tool_state_state_classes.plantuml.svg b/doc/source/dev/tool_state_state_classes.plantuml.svg
index de86dbcd3787..28c7da1c9092 100644
--- a/doc/source/dev/tool_state_state_classes.plantuml.svg
+++ b/doc/source/dev/tool_state_state_classes.plantuml.svg
@@ -1,21 +1,17 @@
-
\ No newline at end of file
diff --git a/doc/source/dev/tool_state_task.plantuml.txt b/doc/source/dev/tool_state_task.plantuml.txt
new file mode 100644
index 000000000000..64286f2f5ed5
--- /dev/null
+++ b/doc/source/dev/tool_state_task.plantuml.txt
@@ -0,0 +1,25 @@
+@startuml
+'!include plantuml_options.txt
+queue TaskQueue as queue
+participant "queue_jobs Task" as task
+participant "JobSubmitter.queue_jobs" as queue_jobs
+participant "JobSubmitter.dereference" as dereference
+participant "materialize Task" as materialize_task
+participant "Tool.handle_input_async" as handle_input
+participant "expand_meta_parameters_async" as expand
+participant "ToolAction.execute" as tool_action
+
+queue -> task :
+task -> queue_jobs : QueueJobs pydantic model
+queue_jobs -> dereference : RequestInternalToolState
+dereference -> queue_jobs : RequestInternalDereferencedToolState
+queue_jobs -> materialize_task : HDA (with state deferred)
+materialize_task -> queue_jobs : return when state is okay
+queue_jobs -> handle_input : RequestInternalDereferencedToolState
+handle_input -> expand : RequestInternalDereferencedToolState
+expand -> handle_input : JobInternalToolState[]
+loop over expanded job tool states
+ handle_input -> tool_action :
+ tool_action -> handle_input : A Galaxy Job
+end
+@enduml
diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py
index 08ffca33fbb6..9f8a6a34ec2f 100644
--- a/lib/galaxy/managers/hdas.py
+++ b/lib/galaxy/managers/hdas.py
@@ -173,7 +173,7 @@ def create(
session.commit()
return hda
- def materialize(self, request: MaterializeDatasetInstanceTaskRequest) -> None:
+ def materialize(self, request: MaterializeDatasetInstanceTaskRequest, in_place: bool = False) -> None:
request_user: RequestUser = request.user
materializer = materializer_factory(
True, # attached...
@@ -187,8 +187,9 @@ def materialize(self, request: MaterializeDatasetInstanceTaskRequest) -> None:
else:
dataset_instance = self.ldda_manager.get_accessible(request.content, user)
history = self.app.history_manager.by_id(request.history_id)
- new_hda = materializer.ensure_materialized(dataset_instance, target_history=history)
- history.add_dataset(new_hda, set_hid=True)
+ new_hda = materializer.ensure_materialized(dataset_instance, target_history=history, in_place=in_place)
+ if not in_place:
+ history.add_dataset(new_hda, set_hid=True)
session = self.session()
with transaction(session):
session.commit()
diff --git a/lib/galaxy/managers/jobs.py b/lib/galaxy/managers/jobs.py
index 893a3bcaf1ac..78213d4f3526 100644
--- a/lib/galaxy/managers/jobs.py
+++ b/lib/galaxy/managers/jobs.py
@@ -1,5 +1,6 @@
import json
import logging
+from dataclasses import dataclass
from datetime import (
date,
datetime,
@@ -10,6 +11,7 @@
Dict,
List,
Optional,
+ Tuple,
Union,
)
@@ -65,6 +67,7 @@
YIELD_PER_ROWS,
)
from galaxy.model.base import transaction
+from galaxy.model.dereference import dereference_to_model
from galaxy.model.index_filter_util import (
raw_text_column_filter,
text_column_filter,
@@ -74,12 +77,22 @@
JobIndexQueryPayload,
JobIndexSortByEnum,
)
-from galaxy.schema.tasks import QueueJobs
+from galaxy.schema.tasks import (
+ MaterializeDatasetInstanceTaskRequest,
+ QueueJobs,
+)
from galaxy.security.idencoding import IdEncodingHelper
from galaxy.structured_app import (
MinimalManagerApp,
StructuredApp,
)
+from galaxy.tool_util.parameters import (
+ DataRequestInternalHda,
+ DataRequestUri,
+ dereference,
+ RequestInternalDereferencedToolState,
+ RequestInternalToolState,
+)
from galaxy.tools import Tool
from galaxy.tools._types import (
ToolStateDumpedToJsonInternalT,
@@ -1170,35 +1183,75 @@ def get_job(session, *where_clauses):
return session.scalars(stmt).first()
+@dataclass
+class DereferencedDatasetPair:
+ hda: model.HistoryDatasetAssociation
+ request: DataRequestUri
+
+
class JobSubmitter:
def __init__(
self,
history_manager: HistoryManager,
user_manager: UserManager,
+ hda_manager: HDAManager,
app: MinimalManagerApp,
):
self.history_manager = history_manager
self.user_manager = user_manager
+ self.hda_manager = hda_manager
self.app = app
+ def materialize_request_for(
+ self, trans: WorkRequestContext, hda: model.HistoryDatasetAssociation
+ ) -> MaterializeDatasetInstanceTaskRequest:
+ return MaterializeDatasetInstanceTaskRequest(
+ user=trans.async_request_user,
+ history_id=trans.history.id,
+ source="hda",
+ content=hda.id,
+ )
+
+ def dereference(
+ self, trans: WorkRequestContext, tool: Tool, request: QueueJobs, tool_request: ToolRequest
+ ) -> Tuple[RequestInternalDereferencedToolState, List[DereferencedDatasetPair]]:
+ new_hdas: List[DereferencedDatasetPair] = []
+
+ def dereference_callback(data_request: DataRequestUri) -> DataRequestInternalHda:
+ # a deferred dataset corresponding to request
+ hda = dereference_to_model(trans.sa_session, trans.user, trans.history, data_request)
+ new_hdas.append(DereferencedDatasetPair(hda, data_request))
+ permissions = trans.app.security_agent.history_get_default_permissions(trans.history)
+ trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions, new=True, flush=False)
+ with transaction(trans.sa_session):
+ trans.sa_session.commit()
+ return DataRequestInternalHda(id=hda.id)
+
+ tool_state = RequestInternalToolState(tool_request.request)
+ return dereference(tool_state, tool, dereference_callback), new_hdas
+
def queue_jobs(self, tool: Tool, request: QueueJobs) -> None:
- user = self.user_manager.by_id(request.user.user_id)
+ tool_request: ToolRequest = self._tool_request(request.tool_request_id)
sa_session = self.app.model.context
- tool_request: ToolRequest = cast(ToolRequest, sa_session.query(ToolRequest).get(request.tool_request_id))
- if tool_request is None:
- raise Exception(f"Problem fetching request with ID {request.tool_request_id}")
try:
- target_history = tool_request.history
+ request_context = self._context(tool_request, request)
+ target_history = request_context.history
use_cached_jobs = request.use_cached_jobs
rerun_remap_job_id = request.rerun_remap_job_id
- trans = WorkRequestContext(
- self.app,
- user,
- history=target_history,
- )
+ tool_state: RequestInternalDereferencedToolState
+ new_hdas: List[DereferencedDatasetPair]
+ tool_state, new_hdas = self.dereference(request_context, tool, request, tool_request)
+ to_materialize_list: List[DereferencedDatasetPair] = [p for p in new_hdas if not p.request.deferred]
+ for to_materialize in to_materialize_list:
+ materialize_request = self.materialize_request_for(request_context, to_materialize.hda)
+ # API dataset materialization is immutable and produces new datasets
+ # here we just created the datasets - lets just materialize them in place
+ # and avoid extra and confusing input copies
+ self.hda_manager.materialize(materialize_request, in_place=True)
tool.handle_input_async(
- trans,
+ request_context,
tool_request,
+ tool_state,
history=target_history,
use_cached_job=use_cached_jobs,
rerun_remap_job_id=rerun_remap_job_id,
@@ -1208,8 +1261,26 @@ def queue_jobs(self, tool: Tool, request: QueueJobs) -> None:
with transaction(sa_session):
sa_session.commit()
except Exception as e:
+ log.exception("Problem here....")
tool_request.state = ToolRequest.states.FAILED
tool_request.state_message = str(e)
sa_session.add(tool_request)
with transaction(sa_session):
sa_session.commit()
+
+ def _context(self, tool_request: ToolRequest, request: QueueJobs) -> WorkRequestContext:
+ user = self.user_manager.by_id(request.user.user_id)
+ target_history = tool_request.history
+ trans = WorkRequestContext(
+ self.app,
+ user,
+ history=target_history,
+ )
+ return trans
+
+ def _tool_request(self, tool_request_id: int) -> ToolRequest:
+ sa_session = self.app.model.context
+ tool_request: ToolRequest = cast(ToolRequest, sa_session.query(ToolRequest).get(tool_request_id))
+ if tool_request is None:
+ raise Exception(f"Problem fetching request with ID {tool_request_id}")
+ return tool_request
diff --git a/lib/galaxy/model/deferred.py b/lib/galaxy/model/deferred.py
index a241d1418c1c..042b6879cd2a 100644
--- a/lib/galaxy/model/deferred.py
+++ b/lib/galaxy/model/deferred.py
@@ -95,6 +95,7 @@ def ensure_materialized(
self,
dataset_instance: Union[HistoryDatasetAssociation, LibraryDatasetDatasetAssociation],
target_history: Optional[History] = None,
+ in_place: bool = False,
) -> HistoryDatasetAssociation:
"""Create a new detached dataset instance from the supplied instance.
@@ -148,10 +149,16 @@ def ensure_materialized(
history = dataset_instance.history
except DetachedInstanceError:
history = None
- materialized_dataset_instance = HistoryDatasetAssociation(
- create_dataset=False, # is the default but lets make this really clear...
- history=history,
- )
+
+ materialized_dataset_instance: HistoryDatasetAssociation
+ if not in_place:
+ materialized_dataset_instance = HistoryDatasetAssociation(
+ create_dataset=False, # is the default but lets make this really clear...
+ history=history,
+ )
+ else:
+ assert isinstance(dataset_instance, HistoryDatasetAssociation)
+ materialized_dataset_instance = cast(HistoryDatasetAssociation, dataset_instance)
if attached:
sa_session = self._sa_session
if sa_session is None:
diff --git a/lib/galaxy/model/dereference.py b/lib/galaxy/model/dereference.py
new file mode 100644
index 000000000000..dd960e183c2b
--- /dev/null
+++ b/lib/galaxy/model/dereference.py
@@ -0,0 +1,30 @@
+import os.path
+
+from galaxy.model import (
+ DatasetSource,
+ HistoryDatasetAssociation,
+)
+from galaxy.tool_util.parameters import DataRequestUri
+
+
+def dereference_to_model(sa_session, user, history, data_request_uri: DataRequestUri) -> HistoryDatasetAssociation:
+ # based on code from upload_common
+ name = os.path.basename(data_request_uri.url)
+ dbkey = "?"
+ hda = HistoryDatasetAssociation(
+ name=name,
+ extension=data_request_uri.ext,
+ dbkey=dbkey, # TODO
+ history=history,
+ create_dataset=True,
+ sa_session=sa_session,
+ )
+ hda.state = hda.states.DEFERRED
+ dataset_source = DatasetSource()
+ dataset_source.source_uri = data_request_uri.url
+ hda.dataset.sources = [dataset_source]
+
+ sa_session.add(hda)
+ sa_session.add(dataset_source)
+ history.add_dataset(hda, genome_build=dbkey, quota=False)
+ return hda
diff --git a/lib/galaxy/schema/tasks.py b/lib/galaxy/schema/tasks.py
index ad81ff1b7324..31255d968268 100644
--- a/lib/galaxy/schema/tasks.py
+++ b/lib/galaxy/schema/tasks.py
@@ -104,8 +104,8 @@ class MaterializeDatasetInstanceTaskRequest(Model):
title="Content",
description=(
"Depending on the `source` it can be:\n"
- "- The encoded id of the source library dataset\n"
- "- The encoded id of the HDA\n"
+ "- The decoded id of the source library dataset\n"
+ "- The decoded id of the HDA\n"
),
)
diff --git a/lib/galaxy/tool_util/parameters/__init__.py b/lib/galaxy/tool_util/parameters/__init__.py
index ac0caf72e3a5..d96ab07375cd 100644
--- a/lib/galaxy/tool_util/parameters/__init__.py
+++ b/lib/galaxy/tool_util/parameters/__init__.py
@@ -1,6 +1,7 @@
from .case import test_case_state
from .convert import (
decode,
+ dereference,
encode,
encode_test,
)
@@ -30,6 +31,9 @@
DataCollectionRequest,
DataParameterModel,
DataRequest,
+ DataRequestHda,
+ DataRequestInternalHda,
+ DataRequestUri,
FloatParameterModel,
HiddenParameterModel,
IntegerParameterModel,
@@ -45,6 +49,7 @@
validate_against_model,
validate_internal_job,
validate_internal_request,
+ validate_internal_request_dereferenced,
validate_request,
validate_test_case,
validate_workflow_step,
@@ -52,6 +57,7 @@
)
from .state import (
JobInternalToolState,
+ RequestInternalDereferencedToolState,
RequestInternalToolState,
RequestToolState,
TestCaseToolState,
@@ -79,6 +85,9 @@
"ToolParameterBundle",
"ToolParameterBundleModel",
"DataRequest",
+ "DataRequestInternalHda",
+ "DataRequestHda",
+ "DataRequestUri",
"DataCollectionRequest",
"ToolParameterModel",
"IntegerParameterModel",
@@ -106,6 +115,7 @@
"validate_against_model",
"validate_internal_job",
"validate_internal_request",
+ "validate_internal_request_dereferenced",
"validate_request",
"validate_test_case",
"validate_workflow_step",
@@ -118,6 +128,7 @@
"test_case_state",
"RequestToolState",
"RequestInternalToolState",
+ "RequestInternalDereferencedToolState",
"flat_state_path",
"keys_starting_with",
"visit_input_values",
@@ -126,6 +137,7 @@
"decode",
"encode",
"encode_test",
+ "dereference",
"WorkflowStepToolState",
"WorkflowStepLinkedToolState",
)
diff --git a/lib/galaxy/tool_util/parameters/convert.py b/lib/galaxy/tool_util/parameters/convert.py
index 77423034d82e..0a20fd0e5f0b 100644
--- a/lib/galaxy/tool_util/parameters/convert.py
+++ b/lib/galaxy/tool_util/parameters/convert.py
@@ -16,12 +16,15 @@
from .models import (
DataCollectionRequest,
DataParameterModel,
- DataRequest,
+ DataRequestHda,
+ DataRequestInternalHda,
+ DataRequestUri,
SelectParameterModel,
ToolParameterBundle,
ToolParameterT,
)
from .state import (
+ RequestInternalDereferencedToolState,
RequestInternalToolState,
RequestToolState,
TestCaseToolState,
@@ -42,10 +45,12 @@ def decode(
external_state.validate(input_models)
def decode_src_dict(src_dict: dict):
- assert "id" in src_dict
- decoded_dict = src_dict.copy()
- decoded_dict["id"] = decode_id(src_dict["id"])
- return decoded_dict
+ if "id" in src_dict:
+ decoded_dict = src_dict.copy()
+ decoded_dict["id"] = decode_id(src_dict["id"])
+ return decoded_dict
+ else:
+ return src_dict
def decode_callback(parameter: ToolParameterT, value: Any):
if parameter.parameter_type == "gx_data":
@@ -79,10 +84,12 @@ def encode(
"""Prepare an external representation of tool state (request) for storing in the database (request_internal)."""
def encode_src_dict(src_dict: dict):
- assert "id" in src_dict
- encoded_dict = src_dict.copy()
- encoded_dict["id"] = encode_id(src_dict["id"])
- return encoded_dict
+ if "id" in src_dict:
+ encoded_dict = src_dict.copy()
+ encoded_dict["id"] = encode_id(src_dict["id"])
+ return encoded_dict
+ else:
+ return src_dict
def encode_callback(parameter: ToolParameterT, value: Any):
if parameter.parameter_type == "gx_data":
@@ -109,9 +116,47 @@ def encode_callback(parameter: ToolParameterT, value: Any):
return request_state
+DereferenceCallable = Callable[[DataRequestUri], DataRequestInternalHda]
+
+
+def dereference(
+ internal_state: RequestInternalToolState, input_models: ToolParameterBundle, dereference: DereferenceCallable
+) -> RequestInternalDereferencedToolState:
+
+ def derefrence_dict(src_dict: dict):
+ src = src_dict.get("src")
+ if src == "url":
+ data_request_uri: DataRequestUri = DataRequestUri.model_validate(src_dict)
+ data_request_hda: DataRequestInternalHda = dereference(data_request_uri)
+ return data_request_hda.model_dump()
+ else:
+ return src_dict
+
+ def dereference_callback(parameter: ToolParameterT, value: Any):
+ if parameter.parameter_type == "gx_data":
+ data_parameter = cast(DataParameterModel, parameter)
+ if data_parameter.multiple:
+ assert isinstance(value, list), str(value)
+ return list(map(derefrence_dict, value))
+ else:
+ assert isinstance(value, dict), str(value)
+ return derefrence_dict(value)
+ else:
+ return VISITOR_NO_REPLACEMENT
+
+ request_state_dict = visit_input_values(
+ input_models,
+ internal_state,
+ dereference_callback,
+ )
+ request_state = RequestInternalDereferencedToolState(request_state_dict)
+ request_state.validate(input_models)
+ return request_state
+
+
# interfaces for adapting test data dictionaries to tool request dictionaries
# e.g. {class: File, path: foo.bed} => {src: hda, id: ab1235cdfea3}
-AdaptDatasets = Callable[[JsonTestDatasetDefDict], DataRequest]
+AdaptDatasets = Callable[[JsonTestDatasetDefDict], DataRequestHda]
AdaptCollections = Callable[[JsonTestCollectionDefDict], DataCollectionRequest]
diff --git a/lib/galaxy/tool_util/parameters/models.py b/lib/galaxy/tool_util/parameters/models.py
index d8f4e3816cd7..7b8b4b9ffcef 100644
--- a/lib/galaxy/tool_util/parameters/models.py
+++ b/lib/galaxy/tool_util/parameters/models.py
@@ -61,7 +61,13 @@
# + request_internal: This is a pydantic model to validate what Galaxy expects to find in the database,
# in particular dataset and collection references should be decoded integers.
StateRepresentationT = Literal[
- "request", "request_internal", "job_internal", "test_case_xml", "workflow_step", "workflow_step_linked"
+ "request",
+ "request_internal",
+ "request_internal_dereferenced",
+ "job_internal",
+ "test_case_xml",
+ "workflow_step",
+ "workflow_step_linked",
]
@@ -239,40 +245,96 @@ def request_requires_value(self) -> bool:
TestCaseDataSrcT = Literal["File"]
-class DataRequest(StrictModel):
- src: DataSrcT
+class DataRequestHda(StrictModel):
+ src: Literal["hda"] = "hda"
id: StrictStr
-class BatchDataInstance(StrictModel):
- src: MultiDataSrcT
+class DataRequestLdda(StrictModel):
+ src: Literal["ldda"] = "ldda"
+ id: StrictStr
+
+
+class DataRequestHdca(StrictModel):
+ src: Literal["hdca"] = "hdca"
id: StrictStr
-class MultiDataInstance(StrictModel):
+class DataRequestUri(StrictModel):
+ # calling it url instead of uri to match data fetch schema...
+ src: Literal["url"] = "url"
+ url: StrictStr
+ ext: StrictStr
+ dbkey: StrictStr = "?"
+ deferred: StrictBool = False
+
+
+DataRequest: Type = cast(
+ Type, Annotated[union_type([DataRequestHda, DataRequestLdda, DataRequestUri]), Field(discriminator="src")]
+)
+
+
+class BatchDataInstance(StrictModel):
src: MultiDataSrcT
id: StrictStr
-MultiDataRequest: Type = union_type([MultiDataInstance, List[MultiDataInstance]])
+MultiDataInstance: Type = cast(
+ Type,
+ Annotated[
+ union_type([DataRequestHda, DataRequestLdda, DataRequestHdca, DataRequestUri]), Field(discriminator="src")
+ ],
+)
+MultiDataRequest: Type = cast(Type, union_type([MultiDataInstance, list_type(MultiDataInstance)]))
-class DataRequestInternal(StrictModel):
- src: DataSrcT
+class DataRequestInternalHda(StrictModel):
+ src: Literal["hda"] = "hda"
id: StrictInt
-class BatchDataInstanceInternal(StrictModel):
- src: MultiDataSrcT
+class DataRequestInternalLdda(StrictModel):
+ src: Literal["ldda"] = "ldda"
+ id: StrictInt
+
+
+class DataRequestInternalHdca(StrictModel):
+ src: Literal["hdca"] = "hdca"
id: StrictInt
-class MultiDataInstanceInternal(StrictModel):
+DataRequestInternal: Type = cast(
+ Type, Annotated[Union[DataRequestInternalHda, DataRequestInternalLdda, DataRequestUri], Field(discriminator="src")]
+)
+DataRequestInternalDereferenced: Type = cast(
+ Type, Annotated[Union[DataRequestInternalHda, DataRequestInternalLdda], Field(discriminator="src")]
+)
+DataJobInternal = DataRequestInternalDereferenced
+
+
+class BatchDataInstanceInternal(StrictModel):
src: MultiDataSrcT
id: StrictInt
-MultiDataRequestInternal: Type = union_type([MultiDataInstanceInternal, List[MultiDataInstanceInternal]])
+MultiDataInstanceInternal: Type = cast(
+ Type,
+ Annotated[
+ Union[DataRequestInternalHda, DataRequestInternalLdda, DataRequestInternalHdca, DataRequestUri],
+ Field(discriminator="src"),
+ ],
+)
+MultiDataInstanceInternalDereferenced: Type = cast(
+ Type,
+ Annotated[
+ Union[DataRequestInternalHda, DataRequestInternalLdda, DataRequestInternalHdca], Field(discriminator="src")
+ ],
+)
+
+MultiDataRequestInternal: Type = union_type([MultiDataInstanceInternal, list_type(MultiDataInstanceInternal)])
+MultiDataRequestInternalDereferenced: Type = union_type(
+ [MultiDataInstanceInternalDereferenced, list_type(MultiDataInstanceInternalDereferenced)]
+)
class DataParameterModel(BaseGalaxyToolParameterModelDefinition):
@@ -300,6 +362,15 @@ def py_type_internal(self) -> Type:
base_model = DataRequestInternal
return optional_if_needed(base_model, self.optional)
+ @property
+ def py_type_internal_dereferenced(self) -> Type:
+ base_model: Type
+ if self.multiple:
+ base_model = MultiDataRequestInternalDereferenced
+ else:
+ base_model = DataRequestInternalDereferenced
+ return optional_if_needed(base_model, self.optional)
+
@property
def py_type_test_case(self) -> Type:
base_model: Type
@@ -316,8 +387,13 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam
return allow_batching(
dynamic_model_information_from_py_type(self, self.py_type_internal), BatchDataInstanceInternal
)
+ elif state_representation == "request_internal_dereferenced":
+ return allow_batching(
+ dynamic_model_information_from_py_type(self, self.py_type_internal_dereferenced),
+ BatchDataInstanceInternal,
+ )
elif state_representation == "job_internal":
- return dynamic_model_information_from_py_type(self, self.py_type_internal)
+ return dynamic_model_information_from_py_type(self, self.py_type_internal_dereferenced)
elif state_representation == "test_case_xml":
return dynamic_model_information_from_py_type(self, self.py_type_test_case)
elif state_representation == "workflow_step":
@@ -357,7 +433,7 @@ def py_type_internal(self) -> Type:
def pydantic_template(self, state_representation: StateRepresentationT) -> DynamicModelInformation:
if state_representation == "request":
return allow_batching(dynamic_model_information_from_py_type(self, self.py_type))
- elif state_representation == "request_internal":
+ elif state_representation in ["request_internal", "request_internal_dereferenced"]:
return allow_batching(dynamic_model_information_from_py_type(self, self.py_type_internal))
elif state_representation == "job_internal":
return dynamic_model_information_from_py_type(self, self.py_type_internal)
@@ -1177,9 +1253,7 @@ def to_simple_model(input_parameter: Union[ToolParameterModel, ToolParameterT])
return cast(ToolParameterT, input_parameter)
-def simple_input_models(
- parameters: Union[List[ToolParameterModel], List[ToolParameterT]]
-) -> Iterable[ToolParameterT]:
+def simple_input_models(parameters: Union[List[ToolParameterModel], List[ToolParameterT]]) -> Iterable[ToolParameterT]:
return [to_simple_model(m) for m in parameters]
@@ -1198,6 +1272,12 @@ def create_request_internal_model(tool: ToolParameterBundle, name: str = "Dynami
return create_field_model(tool.parameters, name, "request_internal")
+def create_request_internal_dereferenced_model(
+ tool: ToolParameterBundle, name: str = "DynamicModelForTool"
+) -> Type[BaseModel]:
+ return create_field_model(tool.parameters, name, "request_internal_dereferenced")
+
+
def create_job_internal_model(tool: ToolParameterBundle, name: str = "DynamicModelForTool") -> Type[BaseModel]:
return create_field_model(tool.parameters, name, "job_internal")
@@ -1258,6 +1338,11 @@ def validate_internal_request(tool: ToolParameterBundle, request: Dict[str, Any]
validate_against_model(pydantic_model, request)
+def validate_internal_request_dereferenced(tool: ToolParameterBundle, request: Dict[str, Any]) -> None:
+ pydantic_model = create_request_internal_dereferenced_model(tool)
+ validate_against_model(pydantic_model, request)
+
+
def validate_internal_job(tool: ToolParameterBundle, request: Dict[str, Any]) -> None:
pydantic_model = create_job_internal_model(tool)
validate_against_model(pydantic_model, request)
diff --git a/lib/galaxy/tool_util/parameters/state.py b/lib/galaxy/tool_util/parameters/state.py
index 94eb48b5de52..af15cf23ac7b 100644
--- a/lib/galaxy/tool_util/parameters/state.py
+++ b/lib/galaxy/tool_util/parameters/state.py
@@ -15,6 +15,7 @@
from .models import (
create_job_internal_model,
+ create_request_internal_dereferenced_model,
create_request_internal_model,
create_request_model,
create_test_case_model,
@@ -83,6 +84,14 @@ def _parameter_model_for(cls, parameters: ToolParameterBundle) -> Type[BaseModel
return create_request_internal_model(parameters)
+class RequestInternalDereferencedToolState(ToolState):
+ state_representation: Literal["request_internal_dereferenced"] = "request_internal_dereferenced"
+
+ @classmethod
+ def _parameter_model_for(cls, parameters: ToolParameterBundle) -> Type[BaseModel]:
+ return create_request_internal_dereferenced_model(parameters)
+
+
class JobInternalToolState(ToolState):
state_representation: Literal["job_internal"] = "job_internal"
diff --git a/lib/galaxy/tool_util/verify/interactor.py b/lib/galaxy/tool_util/verify/interactor.py
index 970a6e03ae94..7f84681a130a 100644
--- a/lib/galaxy/tool_util/verify/interactor.py
+++ b/lib/galaxy/tool_util/verify/interactor.py
@@ -37,7 +37,7 @@
from galaxy import util
from galaxy.tool_util.parameters import (
DataCollectionRequest,
- DataRequest,
+ DataRequestHda,
encode_test,
input_models_from_json,
TestCaseToolState,
@@ -669,8 +669,8 @@ def run_tool(
assert request_schema is not None, "Request schema not set"
parameters = request_schema["parameters"]
- def adapt_datasets(test_input: JsonTestDatasetDefDict) -> DataRequest:
- return DataRequest(**self.uploads[test_input["path"]])
+ def adapt_datasets(test_input: JsonTestDatasetDefDict) -> DataRequestHda:
+ return DataRequestHda(**self.uploads[test_input["path"]])
def adapt_collections(test_input: JsonTestCollectionDefDict) -> DataCollectionRequest:
test_collection_def = TestCollectionDef.from_dict(test_input)
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
index 7c9238d6b737..995d15693918 100644
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -75,7 +75,7 @@
from galaxy.tool_util.parameters import (
input_models_for_pages,
JobInternalToolState,
- RequestInternalToolState,
+ RequestInternalDereferencedToolState,
ToolParameterBundle,
)
from galaxy.tool_util.parser import (
@@ -1835,7 +1835,7 @@ def visit_inputs(self, values, callback):
def expand_incoming_async(
self,
request_context: WorkRequestContext,
- tool_request_internal_state: RequestInternalToolState,
+ tool_request_internal_state: RequestInternalDereferencedToolState,
rerun_remap_job_id: Optional[int],
) -> Tuple[
List[ToolStateJobInstancePopulatedT],
@@ -1998,8 +1998,9 @@ def completed_jobs(
def handle_input_async(
self,
- trans,
+ request_context: WorkRequestContext,
tool_request: ToolRequest,
+ tool_state: RequestInternalDereferencedToolState,
history: Optional[model.History] = None,
use_cached_job: bool = DEFAULT_USE_CACHED_JOB,
preferred_object_store_id: Optional[str] = DEFAULT_PREFERRED_OBJECT_STORE_ID,
@@ -2007,15 +2008,15 @@ def handle_input_async(
input_format: str = "legacy",
):
"""The tool request API+tasks version of handle_input."""
- request_context = proxy_work_context_for_history(trans, history=history)
- tool_request_state = RequestInternalToolState(tool_request.request)
all_params, all_errors, collection_info, job_tool_states = self.expand_incoming_async(
- request_context, tool_request_state, rerun_remap_job_id
+ request_context, tool_state, rerun_remap_job_id
)
self.handle_incoming_errors(all_errors)
- mapping_params = MappingParameters(tool_request.request, all_params, tool_request_state, job_tool_states)
- completed_jobs: Dict[int, Optional[model.Job]] = self.completed_jobs(trans, use_cached_job, all_params)
+ mapping_params = MappingParameters(tool_request.request, all_params, tool_state, job_tool_states)
+ completed_jobs: Dict[int, Optional[model.Job]] = self.completed_jobs(
+ request_context, use_cached_job, all_params
+ )
execute_async(
request_context,
self,
diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py
index 6b5c7e4a18a1..f3bc20fa4503 100644
--- a/lib/galaxy/tools/execute.py
+++ b/lib/galaxy/tools/execute.py
@@ -33,7 +33,7 @@
)
from galaxy.tool_util.parameters.state import (
JobInternalToolState,
- RequestInternalToolState,
+ RequestInternalDereferencedToolState,
)
from galaxy.tool_util.parser import ToolOutputCollectionPart
from galaxy.tools.execution_helpers import (
@@ -80,7 +80,7 @@ class MappingParameters(NamedTuple):
param_combinations: List[ToolStateJobInstancePopulatedT]
# schema driven parameters
# model validated tool request - might correspond to multiple jobs
- validated_param_template: Optional[RequestInternalToolState] = None
+ validated_param_template: Optional[RequestInternalDereferencedToolState] = None
# validated job parameters for individual jobs
validated_param_combinations: Optional[List[JobInternalToolState]] = None
diff --git a/lib/galaxy/tools/parameters/meta.py b/lib/galaxy/tools/parameters/meta.py
index 5fe669e6cc73..1e40c1d89f3f 100644
--- a/lib/galaxy/tools/parameters/meta.py
+++ b/lib/galaxy/tools/parameters/meta.py
@@ -21,7 +21,7 @@
)
from galaxy.tool_util.parameters import (
JobInternalToolState,
- RequestInternalToolState,
+ RequestInternalDereferencedToolState,
ToolParameterBundle,
)
from galaxy.util import permutations
@@ -237,7 +237,9 @@ def classifier(input_key):
Expanded2T = Tuple[List[JobInternalToolState], Optional[matching.MatchingCollections]]
-def expand_meta_parameters_async(app, tool: ToolParameterBundle, incoming: RequestInternalToolState) -> Expanded2T:
+def expand_meta_parameters_async(
+ app, tool: ToolParameterBundle, incoming: RequestInternalDereferencedToolState
+) -> Expanded2T:
# TODO: Tool State 2.0 Follow Up: rework this to only test permutation at actual input value roots.
def classifier(input_key):
diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py
index 33515c9ec071..8ccacd422a30 100644
--- a/lib/galaxy_test/base/populators.py
+++ b/lib/galaxy_test/base/populators.py
@@ -1451,7 +1451,7 @@ def is_ready():
return state() == "submitted"
def get_tool_request(self, tool_request_id: str) -> Dict[str, Any]:
- response = self._get(f"tool_requests/{tool_request_id}/state")
+ response = self._get(f"tool_requests/{tool_request_id}")
api_asserts.assert_status_code_is_ok(response)
return response.json()
diff --git a/test/unit/data/test_dereference.py b/test/unit/data/test_dereference.py
new file mode 100644
index 000000000000..e7df3084cb73
--- /dev/null
+++ b/test/unit/data/test_dereference.py
@@ -0,0 +1,13 @@
+from galaxy.model.dereference import dereference_to_model
+from galaxy.tool_util.parameters import DataRequestUri
+from .model.test_model_store import setup_fixture_context_with_history
+
+TEST_URI = "gxfiles://test/1.bed"
+
+
+def test_dereference():
+ app, sa_session, user, history = setup_fixture_context_with_history()
+ uri_request = DataRequestUri(url=TEST_URI, ext="bed")
+ hda = dereference_to_model(sa_session, user, history, uri_request)
+ assert hda.name == "1.bed"
+ assert hda.dataset.sources[0].source_uri == TEST_URI
diff --git a/test/unit/tool_util/parameter_specification.yml b/test/unit/tool_util/parameter_specification.yml
index d4d3e07c0218..ed6b1abbb319 100644
--- a/test/unit/tool_util/parameter_specification.yml
+++ b/test/unit/tool_util/parameter_specification.yml
@@ -118,17 +118,25 @@ gx_int_required_via_empty_string:
<<: *gx_int_required
gx_text:
- request_valid:
+ request_valid: &gx_text_request_valid
- parameter: moocow
- parameter: 'some spaces'
- parameter: ''
- {}
- request_invalid:
+ request_invalid: &gx_text_request_invalid
- parameter: 5
- parameter: null
- parameter: {}
- parameter: { "moo": "cow" }
- parameter: {__class__: 'ConnectedValue'}
+ request_internal_valid:
+ *gx_text_request_valid
+ request_internal_invalid:
+ *gx_text_request_invalid
+ request_internal_dereferenced_valid:
+ *gx_text_request_valid
+ request_internal_dereferenced_invalid:
+ *gx_text_request_invalid
workflow_step_valid:
- parameter: moocow
- parameter: 'some spaces'
@@ -199,11 +207,15 @@ gx_select:
- parameter: null
- parameter: {}
- parameter: 5
- request_internal_valid:
+ request_internal_valid: &gx_select_request_valid
- parameter: "--ex1"
- parameter: "ex2"
- request_internal_invalid:
+ request_internal_invalid: &gx_select_request_invalid
- parameter: {}
+ request_internal_dereferenced_valid:
+ *gx_select_request_valid
+ request_internal_dereferenced_invalid:
+ *gx_select_request_invalid
test_case_xml_valid:
- parameter: 'ex2'
- parameter: '--ex1'
@@ -501,6 +513,7 @@ gx_color:
gx_data:
request_valid:
- parameter: {src: hda, id: abcdabcd}
+ - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", "ext": "txt"}
- parameter: {__class__: "Batch", values: [{src: hdca, id: abcdabcd}]}
request_invalid:
- parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]}
@@ -518,6 +531,7 @@ gx_data:
- parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]}
- parameter: {src: hda, id: 5}
- parameter: {src: hda, id: 0}
+ - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}
request_internal_invalid:
- parameter: {__class__: "Batch", values: [{src: hdca, id: abcdabcd}]}
- parameter: {src: hda, id: abcdabcd}
@@ -526,6 +540,14 @@ gx_data:
- parameter: true
- parameter: 5
- parameter: "5"
+ request_internal_dereferenced_valid:
+ - parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]}
+ - parameter: {src: hda, id: 5}
+ - parameter: {src: hda, id: 0}
+ request_internal_dereferenced_invalid:
+ # the difference between request internal and request internal dereferenced is that these have been converted
+ # to datasets.
+ - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}
job_internal_valid:
- parameter: {src: hda, id: 7}
job_internal_invalid:
@@ -533,6 +555,8 @@ gx_data:
# expanded out.
- parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]}
- parameter: {src: hda, id: abcdabcd}
+ # url parameters should be dereferrenced into datasets by this point...
+ - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", "ext": "txt"}
test_case_xml_valid:
- parameter: {class: File, path: foo.bed}
- parameter: {class: File, location: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt"}
@@ -627,6 +651,8 @@ gx_data_multiple:
- parameter: [{src: hda, id: 5}]
- parameter: [{src: hdca, id: 5}]
- parameter: [{src: hdca, id: 5}, {src: hda, id: 5}]
+ - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}]
+ - parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]}
request_internal_invalid:
- parameter: {src: hda, id: abcdabcd}
- parameter: [{src: hdca, id: abcdabcd}, {src: hda, id: abcdabcd}]
@@ -636,6 +662,14 @@ gx_data_multiple:
- parameter: true
- parameter: 5
- parameter: "5"
+ request_internal_dereferenced_valid:
+ - parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]}
+ - parameter: [{src: hda, id: 5}]
+ - parameter: [{src: hda, id: 0}]
+ request_internal_dereferenced_invalid:
+ # the difference between request internal and request internal dereferenced is that these have been converted
+ # to datasets.
+ - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}]
gx_data_multiple_optional:
request_valid:
@@ -646,6 +680,8 @@ gx_data_multiple_optional:
- parameter: [{src: hdca, id: abcdabcd}, {src: hda, id: abcdabcd}]
- parameter: null
- {}
+ - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}
+ - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}]
request_invalid:
- parameter: {src: hda, id: 5}
- parameter: {}
@@ -660,12 +696,19 @@ gx_data_multiple_optional:
- parameter: [{src: hdca, id: 5}, {src: hda, id: 5}]
- parameter: null
- {}
+ - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}]
request_internal_invalid:
- parameter: {src: hda, id: abcdabcd}
- parameter: {}
- parameter: true
- parameter: 5
- parameter: "5"
+ request_internal_dereferenced_valid:
+ - parameter: {src: hda, id: 5}
+ request_internal_dereferenced_invalid:
+ - parameter: {src: hda, id: abcdabcd}
+ - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}]
+ - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}
gx_data_collection:
request_valid:
@@ -692,6 +735,10 @@ gx_data_collection:
- parameter: true
- parameter: 5
- parameter: "5"
+ request_internal_dereferenced_valid:
+ - parameter: {src: hdca, id: 5}
+ request_internal_dereferenced_invalid:
+ - parameter: {src: hdca, id: abcdabcd}
workflow_step_valid:
- {}
workflow_step_invalid:
diff --git a/test/unit/tool_util/test_parameter_covert.py b/test/unit/tool_util/test_parameter_covert.py
index 434032c1c1bc..1b2891fb97df 100644
--- a/test/unit/tool_util/test_parameter_covert.py
+++ b/test/unit/tool_util/test_parameter_covert.py
@@ -1,9 +1,17 @@
-from typing import Dict
+from typing import (
+ Dict,
+ Optional,
+)
from galaxy.tool_util.parameters import (
+ DataRequestInternalHda,
+ DataRequestUri,
decode,
+ dereference,
encode,
input_models_for_tool_source,
+ RequestInternalDereferencedToolState,
+ RequestInternalToolState,
RequestToolState,
)
from .test_parameter_test_cases import tool_source_for
@@ -91,6 +99,31 @@ def test_multi_data():
assert encoded_state.input_state["parameter"][1]["id"] == EXAMPLE_ID_2_ENCODED
+def test_dereference():
+ tool_source = tool_source_for("parameters/gx_data")
+ bundle = input_models_for_tool_source(tool_source)
+ raw_request_state = {"parameter": {"src": "url", "url": "gxfiles://mystorage/1.bed", "ext": "bed"}}
+ request_state = RequestInternalToolState(raw_request_state)
+ request_state.validate(bundle)
+
+ exception: Optional[Exception] = None
+ try:
+ # quickly verify this request needs to be dereferenced
+ bad_state = RequestInternalDereferencedToolState(raw_request_state)
+ bad_state.validate(bundle)
+ except Exception as e:
+ exception = e
+ assert exception is not None
+
+ dereferenced_state = dereference(request_state, bundle, _fake_dereference)
+ assert isinstance(dereferenced_state, RequestInternalDereferencedToolState)
+ dereferenced_state.validate(bundle)
+
+
+def _fake_dereference(input: DataRequestUri) -> DataRequestInternalHda:
+ return DataRequestInternalHda(id=EXAMPLE_ID_1)
+
+
def _fake_decode(input: str) -> int:
return next(key for key, value in ID_MAP.items() if value == input)
diff --git a/test/unit/tool_util/test_parameter_specification.py b/test/unit/tool_util/test_parameter_specification.py
index 6a54d78cfabf..01afc9b37a40 100644
--- a/test/unit/tool_util/test_parameter_specification.py
+++ b/test/unit/tool_util/test_parameter_specification.py
@@ -18,6 +18,7 @@
ToolParameterBundleModel,
validate_internal_job,
validate_internal_request,
+ validate_internal_request_dereferenced,
validate_request,
validate_test_case,
validate_workflow_step,
@@ -91,6 +92,8 @@ def _test_file(file: str, specification=None, parameter_bundle: Optional[ToolPar
"request_invalid": _assert_requests_invalid,
"request_internal_valid": _assert_internal_requests_validate,
"request_internal_invalid": _assert_internal_requests_invalid,
+ "request_internal_dereferenced_valid": _assert_internal_requests_dereferenced_validate,
+ "request_internal_dereferenced_invalid": _assert_internal_requests_dereferenced_invalid,
"job_internal_valid": _assert_internal_jobs_validate,
"job_internal_invalid": _assert_internal_jobs_invalid,
"test_case_xml_valid": _assert_test_cases_validate,
@@ -153,6 +156,26 @@ def _assert_internal_request_invalid(parameters: ToolParameterBundleModel, reque
), f"Parameters {parameters} didn't result in validation error on internal request {request} as expected."
+def _assert_internal_request_dereferenced_validates(
+ parameters: ToolParameterBundleModel, request: RawStateDict
+) -> None:
+ try:
+ validate_internal_request_dereferenced(parameters, request)
+ except RequestParameterInvalidException as e:
+ raise AssertionError(f"Parameters {parameters} failed to validate dereferenced internal request {request}. {e}")
+
+
+def _assert_internal_request_dereferenced_invalid(parameters: ToolParameterBundleModel, request: RawStateDict) -> None:
+ exc = None
+ try:
+ validate_internal_request_dereferenced(parameters, request)
+ except RequestParameterInvalidException as e:
+ exc = e
+ assert (
+ exc is not None
+ ), f"Parameters {parameters} didn't result in validation error on dereferenced internal request {request} as expected."
+
+
def _assert_internal_job_validates(parameters: ToolParameterBundleModel, request: RawStateDict) -> None:
try:
validate_internal_job(parameters, request)
@@ -235,6 +258,8 @@ def _assert_workflow_step_linked_invalid(
_assert_requests_invalid = partial(_for_each, _assert_request_invalid)
_assert_internal_requests_validate = partial(_for_each, _assert_internal_request_validates)
_assert_internal_requests_invalid = partial(_for_each, _assert_internal_request_invalid)
+_assert_internal_requests_dereferenced_validate = partial(_for_each, _assert_internal_request_dereferenced_validates)
+_assert_internal_requests_dereferenced_invalid = partial(_for_each, _assert_internal_request_dereferenced_invalid)
_assert_internal_jobs_validate = partial(_for_each, _assert_internal_job_validates)
_assert_internal_jobs_invalid = partial(_for_each, _assert_internal_job_invalid)
_assert_test_cases_validate = partial(_for_each, _assert_test_case_validates)