Skip to content

Commit

Permalink
Merge pull request #17435 from mvdbeek/bundle_as_input
Browse files Browse the repository at this point in the history
Allow using tool data bundles as inputs to reference data select parameters
  • Loading branch information
mvdbeek authored Feb 19, 2024
2 parents 15115f3 + 9d7a012 commit 0d9abfe
Show file tree
Hide file tree
Showing 26 changed files with 333 additions and 81 deletions.
25 changes: 24 additions & 1 deletion client/src/api/schema/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4559,6 +4559,23 @@ export interface components {
*/
user_email?: string | null;
};
/** EncodedJobParameterHistoryItem */
EncodedJobParameterHistoryItem: {
/** Hid */
hid?: number | null;
/**
* Id
* @example 0123456789ABCDEF
*/
id: string;
/** Name */
name: string;
/**
* Source
* @description The source of this dataset, either `hda`, `ldda`, `hdca`, `dce` or `dc` depending of its origin.
*/
src: components["schemas"]["DataItemSourceType"];
};
/** ExportHistoryArchivePayload */
ExportHistoryArchivePayload: {
/**
Expand Down Expand Up @@ -7739,7 +7756,13 @@ export interface components {
* Value
* @description The values of the job parameter
*/
value?: Record<string, never> | null;
value?:
| components["schemas"]["EncodedJobParameterHistoryItem"][]
| number
| number
| boolean
| string
| null;
};
/**
* JobSourceType
Expand Down
17 changes: 17 additions & 0 deletions client/src/components/Tool/ToolForm.vue
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@
title="Attempt to re-use jobs with identical parameters?"
help="This may skip executing jobs that you have already run."
type="boolean" />
<FormSelect
v-if="formConfig.model_class === 'DataManagerTool'"
id="data_manager_mode"
v-model="dataManagerMode"
:options="bundleOptions"
title="Create dataset bundle instead of adding data table to loc file ?"></FormSelect>
</div>
</template>
<template v-slot:header-buttons>
Expand Down Expand Up @@ -120,13 +126,16 @@ import { getToolFormData, submitJob, updateToolFormData } from "./services";
import ToolCard from "./ToolCard";
import { allowCachedJobs } from "./utilities";
import FormSelect from "@/components/Form/Elements/FormSelect.vue";
export default {
components: {
ButtonSpinner,
LoadingSpan,
FormDisplay,
ToolCard,
FormElement,
FormSelect,
ToolEntryPoints,
ToolRecommendation,
Heading,
Expand Down Expand Up @@ -175,13 +184,18 @@ export default {
useCachedJobs: false,
useEmail: false,
useJobRemapping: false,
dataManagerMode: "populate",
entryPoints: [],
jobDef: {},
jobResponse: {},
validationInternal: null,
validationScrollTo: null,
currentVersion: this.version,
preferredObjectStoreId: null,
bundleOptions: [
{ label: "populate", value: "populate" },
{ label: "bundle", value: "bundle" },
],
};
},
computed: {
Expand Down Expand Up @@ -326,6 +340,9 @@ export default {
if (this.preferredObjectStoreId) {
jobDef.preferred_object_store_id = this.preferredObjectStoreId;
}
if (this.dataManagerMode === "bundle") {
jobDef.data_manager_mode = this.dataManagerMode;
}
console.debug("toolForm::onExecute()", jobDef);
const prevRoute = this.$route.fullPath;
submitJob(jobDef).then(
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/config/sample/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
<datatype extension="d3_hierarchy" type="galaxy.datatypes.text:Json" mimetype="application/json" subclass="true" display_in_upload="true"/>
<datatype extension="imgt.json" type="galaxy.datatypes.text:ImgtJson" mimetype="application/json" display_in_upload="True"/>
<datatype extension="geojson" type="galaxy.datatypes.text:GeoJson" mimetype="application/json" display_in_upload="True"/>
<datatype extension="data_manager_json" type="galaxy.datatypes.text:Json" mimetype="application/json" subclass="true" display_in_upload="false"/>
<datatype extension="data_manager_json" type="galaxy.datatypes.text:DataManagerJson" mimetype="application/json" subclass="true" display_in_upload="false"/>
<datatype extension="dbn" type="galaxy.datatypes.sequence:DotBracket" display_in_upload="true" description="Dot-Bracket format is a text-based format for storing both an RNA sequence and its corresponding 2D structure." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Dbn"/>
<datatype extension="fai" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true" subclass="true" description="A Fasta Index File is a text file consisting of lines each with five TAB-delimited columns : Name, Length, offset, linebases, Linewidth" description_url="http://www.htslib.org/doc/faidx.html"/>
<datatype extension="fasta" auto_compressed_types="gz" type="galaxy.datatypes.sequence:Fasta" display_in_upload="true" description="A sequence in FASTA format consists of a single-line description, followed by lines of sequence data. The first character of the description line is a greater-than ('&gt;') symbol in the first column. All lines should be shorter than 80 characters." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Fasta">
Expand Down
13 changes: 13 additions & 0 deletions lib/galaxy/datatypes/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,19 @@ def display_peek(self, dataset: DatasetProtocol) -> str:
return f"JSON file ({nice_size(dataset.get_size())})"


class DataManagerJson(Json):
file_ext = "data_manager_json"
MetadataElement(
name="data_tables", default=None, desc="Data tables represented by this dataset", readonly=True, visible=True
)

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd):
super().set_meta(dataset=dataset, overwrite=overwrite, **kwd)
with open(dataset.get_file_name()) as fh:
data_tables = json.load(fh)["data_tables"]
dataset.metadata.data_tables = data_tables


class ExpressionJson(Json):
"""Represents the non-data input or output to a tool or workflow."""

Expand Down
23 changes: 14 additions & 9 deletions lib/galaxy/managers/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,23 +901,24 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)

for input in input_params.values():
if input.name in param_values:
input_value = param_values[input.name]
if input.type == "repeat":
for i in range(len(param_values[input.name])):
rval.extend(inputs_recursive(input.inputs, param_values[input.name][i], depth=depth + 1))
for i in range(len(input_value)):
rval.extend(inputs_recursive(input.inputs, input_value[i], depth=depth + 1))
elif input.type == "section":
# Get the value of the current Section parameter
rval.append(dict(text=input.name, depth=depth))
rval.extend(
inputs_recursive(
input.inputs,
param_values[input.name],
input_value,
depth=depth + 1,
upgrade_messages=upgrade_messages.get(input.name),
)
)
elif input.type == "conditional":
try:
current_case = param_values[input.name]["__current_case__"]
current_case = input_value["__current_case__"]
is_valid = True
except Exception:
current_case = None
Expand All @@ -929,7 +930,7 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
rval.extend(
inputs_recursive(
input.cases[current_case].inputs,
param_values[input.name],
input_value,
depth=depth + 1,
upgrade_messages=upgrade_messages.get(input.name),
)
Expand All @@ -948,12 +949,16 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
dict(
text=input.group_title(param_values),
depth=depth,
value=f"{len(param_values[input.name])} uploaded datasets",
value=f"{len(input_value)} uploaded datasets",
)
)
elif input.type == "data" or input.type == "data_collection":
elif (
input.type == "data"
or input.type == "data_collection"
or isinstance(input_value, model.HistoryDatasetAssociation)
):
value = []
for element in listify(param_values[input.name]):
for element in listify(input_value):
element_id = element.id
if isinstance(element, model.HistoryDatasetAssociation):
hda = element
Expand All @@ -977,7 +982,7 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
dict(
text=label,
depth=depth,
value=input.value_to_display_text(param_values[input.name]),
value=input.value_to_display_text(input_value),
notes=upgrade_messages.get(input.name, ""),
)
)
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/managers/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@
DataCollectionToolParameter,
DataToolParameter,
RuntimeValue,
workflow_building_modes,
)
from galaxy.tools.parameters.workflow_building_modes import workflow_building_modes
from galaxy.util.hash_util import md5_hash_str
from galaxy.util.json import (
safe_dumps,
Expand Down
21 changes: 21 additions & 0 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,27 @@ def __init__(self, email=None, password=None, username=None):
self.active = False
self.username = username

def get_user_data_tables(self, data_table: str):
session = object_session(self)
assert session
metadata_select = (
select(HistoryDatasetAssociation)
.join(Dataset)
.join(History)
.where(
HistoryDatasetAssociation.deleted == false(),
HistoryDatasetAssociation.extension == "data_manager_json",
History.user_id == self.id,
Dataset.state == "ok",
# excludes data manager runs that actually populated tables.
# maybe track this formally by creating a different datatype for bundles ?
Dataset.total_size != Dataset.file_size,
HistoryDatasetAssociation._metadata.contains(data_table),
)
.order_by(HistoryDatasetAssociation.id)
)
return session.execute(metadata_select).scalars().all()

@property
def extra_preferences(self):
data = defaultdict(lambda: None)
Expand Down
11 changes: 6 additions & 5 deletions lib/galaxy/model/custom_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,11 +343,12 @@ class MetadataType(JSONType):
def process_bind_param(self, value, dialect):
if value is not None:
if MAX_METADATA_VALUE_SIZE is not None:
for k, v in list(value.items()):
sz = total_size(v)
if sz > MAX_METADATA_VALUE_SIZE:
del value[k]
log.warning(f"Refusing to bind metadata key {k} due to size ({sz})")
if hasattr(value, "items"):
for k, v in list(value.items()):
sz = total_size(v)
if sz > MAX_METADATA_VALUE_SIZE:
del value[k]
log.warning(f"Refusing to bind metadata key {k} due to size ({sz})")
value = json_encoder.encode(value).encode()
return value

Expand Down
6 changes: 5 additions & 1 deletion lib/galaxy/schema/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Dict,
List,
Optional,
Union,
)

from pydantic import (
Expand All @@ -21,6 +22,7 @@
from galaxy.schema.schema import (
DataItemSourceType,
EncodedDataItemSourceId,
EncodedJobParameterHistoryItem,
JobMetricCollection,
JobState,
JobSummary,
Expand Down Expand Up @@ -221,7 +223,9 @@ class JobParameter(Model):
title="Depth",
description="The depth of the job parameter.",
)
value: Optional[Any] = Field(default=None, title="Value", description="The values of the job parameter")
value: Optional[Union[List[EncodedJobParameterHistoryItem], float, int, bool, str]] = Field(
default=None, title="Value", description="The values of the job parameter", union_mode="left_to_right"
)
notes: Optional[str] = Field(default=None, title="Notes", description="Notes associated with the job parameter.")


Expand Down
5 changes: 5 additions & 0 deletions lib/galaxy/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1965,6 +1965,11 @@ class EncodedDataItemSourceId(Model):
)


class EncodedJobParameterHistoryItem(EncodedDataItemSourceId):
hid: Optional[int] = None
name: str


class DatasetJobInfo(DatasetSourceId):
uuid: UuidField

Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tool_util/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ def handle_found_index_file(self, filename):

# This method is used in tools, so need to keep its API stable
def get_fields(self) -> List[List[str]]:
return self.data
return self.data.copy()

def get_field(self, value):
rval = None
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@
SelectTagParameter,
SelectToolParameter,
ToolParameter,
workflow_building_modes,
)
from galaxy.tools.parameters.dataset_matcher import (
set_dataset_matcher_factory,
Expand All @@ -143,6 +142,7 @@
)
from galaxy.tools.parameters.input_translation import ToolInputTranslator
from galaxy.tools.parameters.meta import expand_meta_parameters
from galaxy.tools.parameters.workflow_building_modes import workflow_building_modes
from galaxy.tools.parameters.wrapped_json import json_wrap
from galaxy.tools.test import parse_tests
from galaxy.util import (
Expand Down
3 changes: 3 additions & 0 deletions lib/galaxy/tools/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
DataCollectionToolParameter,
DataToolParameter,
RuntimeValue,
SelectToolParameter,
)
from galaxy.tools.parameters.wrapped import (
LegacyUnprefixedDict,
Expand Down Expand Up @@ -283,6 +284,8 @@ def process_dataset(data, formats=None):
value.child_collection = new_collection
else:
value.collection = new_collection
elif isinstance(input, SelectToolParameter) and isinstance(value, HistoryDatasetAssociation):
input_datasets[prefixed_name] = value

tool.visit_inputs(param_values, visitor)
return input_datasets, all_permissions
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tools/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def __populate_input_dataset_wrappers(self, param_dict, input_datasets):
if wrapper:
param_dict[name] = wrapper
continue
if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)):
if not isinstance(param_dict_value, ToolParameterValueWrapper):
wrapper_kwds = dict(
datatypes_registry=self.app.datatypes_registry,
tool=self.tool,
Expand Down
Loading

0 comments on commit 0d9abfe

Please sign in to comment.