Skip to content

Commit

Permalink
Implement default locations for data and collection parameters.
Browse files Browse the repository at this point in the history
Works for both files and collections. Workflow defaults override tool defaults.

TODO:

- Unit test case to ensure this only works for non-default, non-multi data parameters.
- Implement XSD once syntax is finalized.
  • Loading branch information
jmchilton committed Nov 1, 2023
1 parent af386d2 commit dce5376
Show file tree
Hide file tree
Showing 18 changed files with 513 additions and 40 deletions.
7 changes: 6 additions & 1 deletion lib/galaxy/managers/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,7 +969,7 @@ def _workflow_to_dict_run(self, trans, stored, workflow, history=None):
for pja in step.post_job_actions
]
else:
inputs = step.module.get_runtime_inputs(connections=step.output_connections)
inputs = step.module.get_runtime_inputs(step, connections=step.output_connections)
step_model = {"inputs": [input.to_dict(trans) for input in inputs.values()]}
step_model["when"] = step.when_expression
step_model["replacement_parameters"] = step.module.get_informal_replacement_parameters(step)
Expand Down Expand Up @@ -1770,6 +1770,11 @@ def __module_from_dict(

if "in" in step_dict:
for input_name, input_dict in step_dict["in"].items():
# This is just a bug in gxformat? I think the input
# defaults should be called input to match the input modules's
# input parameter name.
if input_name == "default":
input_name = "input"
step_input = step.get_or_add_input(input_name)
NO_DEFAULT_DEFINED = object()
default = input_dict.get("default", NO_DEFAULT_DEFINED)
Expand Down
18 changes: 14 additions & 4 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7625,10 +7625,20 @@ def input_type(self):

@property
def input_default_value(self):
tool_state = self.tool_inputs
default_value = tool_state.get("default")
if default_value:
default_value = json.loads(default_value)["value"]
self.get_input_default_value(None)

def get_input_default_value(self, default_default):
# parameter_input and the data parameters handle this slightly differently
# unfortunately.
if self.type == "parameter_input":
tool_state = self.tool_inputs
default_value = tool_state.get("default", default_default)
else:
default_value = default_default
for step_input in self.inputs:
if step_input.name == "input" and step_input.default_value_set:
default_value = step_input.default_value
break
return default_value

@property
Expand Down
3 changes: 3 additions & 0 deletions lib/galaxy/tool_util/parser/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,9 @@ def parse_test_input_source(self) -> "InputSource":
def parse_when_input_sources(self):
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)

def parse_default(self) -> Optional[Dict[str, Any]]:
return None


class PageSource(metaclass=ABCMeta):
def parse_display(self):
Expand Down
52 changes: 52 additions & 0 deletions lib/galaxy/tool_util/parser/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import re
import uuid
from typing import (
Any,
cast,
Dict,
Iterable,
List,
Optional,
Expand Down Expand Up @@ -1274,6 +1276,56 @@ def parse_when_input_sources(self):
sources.append((value, case_page_source))
return sources

def parse_default(self) -> Optional[Dict[str, Any]]:
def file_default_from_elem(elem):
# TODO: hashes, created_from_basename, etc...
return {"class": "File", "location": elem.get("location")}

def read_elements(collection_elem):
element_dicts = []
elements = collection_elem.findall("element")
for element in elements:
identifier = element.get("name")
subcollection_elem = element.find("collection")
if subcollection_elem:
collection_type = subcollection_elem.get("collection_type")
element_dicts.append(
{
"class": "Collection",
"identifier": identifier,
"collection_type": collection_type,
"elements": read_elements(subcollection_elem),
}
)
else:
element_dict = file_default_from_elem(element)
element_dict["identifier"] = identifier
element_dicts.append(element_dict)
return element_dicts

elem = self.input_elem
element_type = self.input_elem.get("type")
if element_type == "data":
default_elem = elem.find("default")
if default_elem is not None:
return file_default_from_elem(default_elem)
else:
return None
else:
default_elem = elem.find("default")
if default_elem is not None:
default_elem = elem.find("default")
collection_type = default_elem.get("collection_type")
name = default_elem.get("name", elem.get("name"))
return {
"class": "Collection",
"name": name,
"collection_type": collection_type,
"elements": read_elements(default_elem),
}
else:
return None


class ParallelismInfo:
"""
Expand Down
7 changes: 7 additions & 0 deletions lib/galaxy/tool_util/parser/yaml.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
from typing import (
Any,
Dict,
List,
Optional,
)

import packaging.version
Expand Down Expand Up @@ -358,6 +360,11 @@ def parse_static_options(self):
static_options.append((label, value, selected))
return static_options

def parse_default(self) -> Optional[Dict[str, Any]]:
input_dict = self.input_dict
default_def = input_dict.get("default", None)
return default_def


def _ensure_has(dict, defaults):
for key, value in defaults.items():
Expand Down
107 changes: 105 additions & 2 deletions lib/galaxy/tools/parameters/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,16 @@
from galaxy.model import (
cached_id,
Dataset,
DatasetCollection,
DatasetCollectionElement,
DatasetHash,
DatasetInstance,
DatasetSource,
HistoryDatasetAssociation,
HistoryDatasetCollectionAssociation,
LibraryDatasetDatasetAssociation,
)
from galaxy.model.dataset_collections import builder
from galaxy.schema.fetch_data import FilesPayload
from galaxy.tool_util.parser import get_input_source as ensure_input_source
from galaxy.util import (
Expand All @@ -43,6 +47,7 @@
)
from galaxy.util.dictifiable import Dictifiable
from galaxy.util.expressions import ExpressionContext
from galaxy.util.hash_util import HASH_NAMES
from galaxy.util.rules_dsl import RuleSet
from . import (
dynamic_options,
Expand Down Expand Up @@ -2094,6 +2099,11 @@ def __init__(self, tool, input_source, trans=None):
self._parse_options(input_source)
# Load conversions required for the dataset input
self.conversions = []
self.default_object = input_source.parse_default()
if self.optional and self.default_object is not None:
raise ParameterValueError(
"Cannot specify a Galaxy tool data parameter to be both optional and have a default value.", self.name
)
for name, conv_extension in input_source.parse_conversion_tuples():
assert None not in [
name,
Expand All @@ -2114,9 +2124,11 @@ def from_json(self, value, trans, other_values=None):
other_values = other_values or {}
if trans.workflow_building_mode is workflow_building_modes.ENABLED or is_runtime_value(value):
return None
if not value and not self.optional:
if not value and not self.optional and not self.default_object:
raise ParameterValueError("specify a dataset of the required format / build for parameter", self.name)
if value in [None, "None", ""]:
if self.default_object:
return raw_to_galaxy(trans, self.default_object)
return None
if isinstance(value, dict) and "values" in value:
value = self.to_python(value, trans.app)
Expand Down Expand Up @@ -2411,6 +2423,11 @@ def __init__(self, tool, input_source, trans=None):
self.multiple = False # Accessed on DataToolParameter a lot, may want in future
self.is_dynamic = True
self._parse_options(input_source) # TODO: Review and test.
self.default_object = input_source.parse_default()
if self.optional and self.default_object is not None:
raise ParameterValueError(
"Cannot specify a Galaxy tool data parameter to be both optional and have a default value.", self.name
)

@property
def collection_types(self):
Expand Down Expand Up @@ -2447,9 +2464,11 @@ def from_json(self, value, trans, other_values=None):
rval: Optional[Union[DatasetCollectionElement, HistoryDatasetCollectionAssociation]] = None
if trans.workflow_building_mode is workflow_building_modes.ENABLED:
return None
if not value and not self.optional:
if not value and not self.optional and not self.default_object:
raise ParameterValueError("specify a dataset collection of the correct type", self.name)
if value in [None, "None"]:
if self.default_object:
return raw_to_galaxy(trans, self.default_object)
return None
if isinstance(value, dict) and "values" in value:
value = self.to_python(value, trans.app)
Expand Down Expand Up @@ -2664,6 +2683,90 @@ def to_text(self, value):
return ""


# Code from CWL branch to massage in order to be shared across tools and workflows,
# and for CWL artifacts as well as Galaxy ones.
def raw_to_galaxy(trans, as_dict_value):
app = trans.app
history = trans.history

object_class = as_dict_value["class"]
if object_class == "File":
relative_to = "/" # TODO
from galaxy.tool_util.cwl.util import abs_path

path = abs_path(as_dict_value.get("location"), relative_to)

name = os.path.basename(path)
extension = as_dict_value.get("format") or "data"
dataset = Dataset()
source = DatasetSource()
source.source_uri = path
# TODO: validate this...
source.transform = as_dict_value.get("transform")
dataset.sources.append(source)

for hash_name in HASH_NAMES:
# TODO: Convert md5 -> MD5 during tool parsing.
if hash_name in as_dict_value:
hash_object = DatasetHash()
hash_object.hash_function = hash_name
hash_object.hash_value = as_dict_value[hash_name]
dataset.hashes.append(hash_object)

if "created_from_basename" in as_dict_value:
dataset.created_from_basename = as_dict_value["created_from_basename"]

dataset.state = Dataset.states.DEFERRED
primary_data = HistoryDatasetAssociation(
name=name,
extension=extension,
metadata_deferred=True,
designation=None,
visible=True,
dbkey="?",
dataset=dataset,
flush=False,
sa_session=trans.sa_session,
)
primary_data.state = Dataset.states.DEFERRED
permissions = app.security_agent.history_get_default_permissions(history)
app.security_agent.set_all_dataset_permissions(primary_data.dataset, permissions, new=True, flush=False)
trans.sa_session.add(primary_data)
history.stage_addition(primary_data)
history.add_pending_items()
trans.sa_session.flush()
return primary_data
else:
name = as_dict_value.get("name")
collection_type = as_dict_value.get("collection_type")
collection = DatasetCollection(
collection_type=collection_type,
)
hdca = HistoryDatasetCollectionAssociation(
name=name,
collection=collection,
)

def write_elements_to_collection(has_elements, collection_builder):
element_dicts = has_elements.get("elements")
for element_dict in element_dicts:
element_class = element_dict["class"]
identifier = element_dict["identifier"]
if element_class == "File":
hda = raw_to_galaxy(trans, element_dict)
collection_builder.add_dataset(identifier, hda)
else:
subcollection_builder = collection_builder.get_level(identifier)
write_elements_to_collection(element_dict, subcollection_builder)

collection_builder = builder.BoundCollectionBuilder(collection)
write_elements_to_collection(as_dict_value, collection_builder)
collection_builder.populate()
trans.sa_session.add(hdca)
trans.sa_session.flush()
return hdca


parameter_types = dict(
text=TextToolParameter,
integer=IntegerToolParameter,
Expand Down
Loading

0 comments on commit dce5376

Please sign in to comment.