Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow using tool data bundles as inputs to reference data select parameters #17435

Merged
merged 14 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion client/src/api/schema/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4555,6 +4555,23 @@ export interface components {
*/
user_email?: string | null;
};
/** EncodedJobParameterHistoryItem */
EncodedJobParameterHistoryItem: {
/** Hid */
hid?: number | null;
/**
* Id
* @example 0123456789ABCDEF
*/
id: string;
/** Name */
name: string;
/**
* Source
* @description The source of this dataset, either `hda`, `ldda`, `hdca`, `dce` or `dc` depending of its origin.
*/
src: components["schemas"]["DataItemSourceType"];
};
/** ExportHistoryArchivePayload */
ExportHistoryArchivePayload: {
/**
Expand Down Expand Up @@ -7735,7 +7752,13 @@ export interface components {
* Value
* @description The values of the job parameter
*/
value?: Record<string, never> | null;
value?:
| components["schemas"]["EncodedJobParameterHistoryItem"][]
| number
| number
| boolean
| string
| null;
};
/**
* JobSourceType
Expand Down
17 changes: 17 additions & 0 deletions client/src/components/Tool/ToolForm.vue
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@
title="Attempt to re-use jobs with identical parameters?"
help="This may skip executing jobs that you have already run."
type="boolean" />
<FormSelect
v-if="formConfig.model_class === 'DataManagerTool'"
id="data_manager_mode"
v-model="dataManagerMode"
:options="bundleOptions"
title="Create dataset bundle instead of adding data table to loc file ?"></FormSelect>
</div>
</template>
<template v-slot:header-buttons>
Expand Down Expand Up @@ -120,13 +126,16 @@ import { getToolFormData, submitJob, updateToolFormData } from "./services";
import ToolCard from "./ToolCard";
import { allowCachedJobs } from "./utilities";
import FormSelect from "@/components/Form/Elements/FormSelect.vue";
export default {
components: {
ButtonSpinner,
LoadingSpan,
FormDisplay,
ToolCard,
FormElement,
FormSelect,
ToolEntryPoints,
ToolRecommendation,
Heading,
Expand Down Expand Up @@ -175,13 +184,18 @@ export default {
useCachedJobs: false,
useEmail: false,
useJobRemapping: false,
dataManagerMode: "populate",
entryPoints: [],
jobDef: {},
jobResponse: {},
validationInternal: null,
validationScrollTo: null,
currentVersion: this.version,
preferredObjectStoreId: null,
bundleOptions: [
{ label: "populate", value: "populate" },
{ label: "bundle", value: "bundle" },
],
};
},
computed: {
Expand Down Expand Up @@ -326,6 +340,9 @@ export default {
if (this.preferredObjectStoreId) {
jobDef.preferred_object_store_id = this.preferredObjectStoreId;
}
if (this.dataManagerMode === "bundle") {
jobDef.data_manager_mode = this.dataManagerMode;
}
console.debug("toolForm::onExecute()", jobDef);
const prevRoute = this.$route.fullPath;
submitJob(jobDef).then(
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/config/sample/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
<datatype extension="d3_hierarchy" type="galaxy.datatypes.text:Json" mimetype="application/json" subclass="true" display_in_upload="true"/>
<datatype extension="imgt.json" type="galaxy.datatypes.text:ImgtJson" mimetype="application/json" display_in_upload="True"/>
<datatype extension="geojson" type="galaxy.datatypes.text:GeoJson" mimetype="application/json" display_in_upload="True"/>
<datatype extension="data_manager_json" type="galaxy.datatypes.text:Json" mimetype="application/json" subclass="true" display_in_upload="false"/>
<datatype extension="data_manager_json" type="galaxy.datatypes.text:DataManagerJson" mimetype="application/json" subclass="true" display_in_upload="false"/>
<datatype extension="dbn" type="galaxy.datatypes.sequence:DotBracket" display_in_upload="true" description="Dot-Bracket format is a text-based format for storing both an RNA sequence and its corresponding 2D structure." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Dbn"/>
<datatype extension="fai" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true" subclass="true" description="A Fasta Index File is a text file consisting of lines each with five TAB-delimited columns : Name, Length, offset, linebases, Linewidth" description_url="http://www.htslib.org/doc/faidx.html"/>
<datatype extension="fasta" auto_compressed_types="gz" type="galaxy.datatypes.sequence:Fasta" display_in_upload="true" description="A sequence in FASTA format consists of a single-line description, followed by lines of sequence data. The first character of the description line is a greater-than ('&gt;') symbol in the first column. All lines should be shorter than 80 characters." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Fasta">
Expand Down
13 changes: 13 additions & 0 deletions lib/galaxy/datatypes/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,19 @@ def display_peek(self, dataset: DatasetProtocol) -> str:
return f"JSON file ({nice_size(dataset.get_size())})"


class DataManagerJson(Json):
file_ext = "data_manager_json"
MetadataElement(
name="data_tables", default=None, desc="Data tables represented by this dataset", readonly=True, visible=True
)

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd):
super().set_meta(dataset=dataset, overwrite=overwrite, **kwd)
with open(dataset.get_file_name()) as fh:
data_tables = json.load(fh)["data_tables"]
dataset.metadata.data_tables = data_tables


class ExpressionJson(Json):
"""Represents the non-data input or output to a tool or workflow."""

Expand Down
23 changes: 14 additions & 9 deletions lib/galaxy/managers/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,23 +901,24 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)

for input in input_params.values():
if input.name in param_values:
input_value = param_values[input.name]
if input.type == "repeat":
for i in range(len(param_values[input.name])):
rval.extend(inputs_recursive(input.inputs, param_values[input.name][i], depth=depth + 1))
for i in range(len(input_value)):
rval.extend(inputs_recursive(input.inputs, input_value[i], depth=depth + 1))
elif input.type == "section":
# Get the value of the current Section parameter
rval.append(dict(text=input.name, depth=depth))
rval.extend(
inputs_recursive(
input.inputs,
param_values[input.name],
input_value,
depth=depth + 1,
upgrade_messages=upgrade_messages.get(input.name),
)
)
elif input.type == "conditional":
try:
current_case = param_values[input.name]["__current_case__"]
current_case = input_value["__current_case__"]
is_valid = True
except Exception:
current_case = None
Expand All @@ -929,7 +930,7 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
rval.extend(
inputs_recursive(
input.cases[current_case].inputs,
param_values[input.name],
input_value,
depth=depth + 1,
upgrade_messages=upgrade_messages.get(input.name),
)
Expand All @@ -948,12 +949,16 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
dict(
text=input.group_title(param_values),
depth=depth,
value=f"{len(param_values[input.name])} uploaded datasets",
value=f"{len(input_value)} uploaded datasets",
)
)
elif input.type == "data" or input.type == "data_collection":
elif (
input.type == "data"
or input.type == "data_collection"
or isinstance(input_value, model.HistoryDatasetAssociation)
):
value = []
for element in listify(param_values[input.name]):
for element in listify(input_value):
element_id = element.id
if isinstance(element, model.HistoryDatasetAssociation):
hda = element
Expand All @@ -977,7 +982,7 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
dict(
text=label,
depth=depth,
value=input.value_to_display_text(param_values[input.name]),
value=input.value_to_display_text(input_value),
notes=upgrade_messages.get(input.name, ""),
)
)
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/managers/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@
DataCollectionToolParameter,
DataToolParameter,
RuntimeValue,
workflow_building_modes,
)
from galaxy.tools.parameters.workflow_building_modes import workflow_building_modes
from galaxy.util.hash_util import md5_hash_str
from galaxy.util.json import (
safe_dumps,
Expand Down
21 changes: 21 additions & 0 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,27 @@ def __init__(self, email=None, password=None, username=None):
self.active = False
self.username = username

def get_user_data_tables(self, data_table: str):
session = object_session(self)
mvdbeek marked this conversation as resolved.
Show resolved Hide resolved
assert session
metadata_select = (
select(HistoryDatasetAssociation)
.join(Dataset)
.join(History)
.where(
HistoryDatasetAssociation.deleted == false(),
HistoryDatasetAssociation.extension == "data_manager_json",
History.user_id == self.id,
Dataset.state == "ok",
# excludes data manager runs that actually populated tables.
# maybe track this formally by creating a different datatype for bundles ?
Dataset.total_size != Dataset.file_size,
HistoryDatasetAssociation._metadata.contains(data_table),
)
.order_by(HistoryDatasetAssociation.id)
)
return session.execute(metadata_select).scalars().all()

@property
def extra_preferences(self):
data = defaultdict(lambda: None)
Expand Down
11 changes: 6 additions & 5 deletions lib/galaxy/model/custom_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,11 +343,12 @@ class MetadataType(JSONType):
def process_bind_param(self, value, dialect):
if value is not None:
if MAX_METADATA_VALUE_SIZE is not None:
for k, v in list(value.items()):
sz = total_size(v)
if sz > MAX_METADATA_VALUE_SIZE:
del value[k]
log.warning(f"Refusing to bind metadata key {k} due to size ({sz})")
if hasattr(value, "items"):
for k, v in list(value.items()):
sz = total_size(v)
if sz > MAX_METADATA_VALUE_SIZE:
del value[k]
log.warning(f"Refusing to bind metadata key {k} due to size ({sz})")
value = json_encoder.encode(value).encode()
return value

Expand Down
6 changes: 5 additions & 1 deletion lib/galaxy/schema/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Dict,
List,
Optional,
Union,
)

from pydantic import (
Expand All @@ -21,6 +22,7 @@
from galaxy.schema.schema import (
DataItemSourceType,
EncodedDataItemSourceId,
EncodedJobParameterHistoryItem,
JobMetricCollection,
JobState,
JobSummary,
Expand Down Expand Up @@ -221,7 +223,9 @@ class JobParameter(Model):
title="Depth",
description="The depth of the job parameter.",
)
value: Optional[Any] = Field(default=None, title="Value", description="The values of the job parameter")
value: Optional[Union[List[EncodedJobParameterHistoryItem], float, int, bool, str]] = Field(
default=None, title="Value", description="The values of the job parameter", union_mode="left_to_right"
)
notes: Optional[str] = Field(default=None, title="Notes", description="Notes associated with the job parameter.")


Expand Down
5 changes: 5 additions & 0 deletions lib/galaxy/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1965,6 +1965,11 @@ class EncodedDataItemSourceId(Model):
)


class EncodedJobParameterHistoryItem(EncodedDataItemSourceId):
hid: Optional[int] = None
name: str


class DatasetJobInfo(DatasetSourceId):
uuid: UuidField

Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tool_util/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ def handle_found_index_file(self, filename):

# This method is used in tools, so need to keep its API stable
def get_fields(self) -> List[List[str]]:
return self.data
return self.data.copy()

def get_field(self, value):
rval = None
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@
SelectTagParameter,
SelectToolParameter,
ToolParameter,
workflow_building_modes,
)
from galaxy.tools.parameters.dataset_matcher import (
set_dataset_matcher_factory,
Expand All @@ -143,6 +142,7 @@
)
from galaxy.tools.parameters.input_translation import ToolInputTranslator
from galaxy.tools.parameters.meta import expand_meta_parameters
from galaxy.tools.parameters.workflow_building_modes import workflow_building_modes
from galaxy.tools.parameters.wrapped_json import json_wrap
from galaxy.tools.test import parse_tests
from galaxy.util import (
Expand Down
3 changes: 3 additions & 0 deletions lib/galaxy/tools/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
DataCollectionToolParameter,
DataToolParameter,
RuntimeValue,
SelectToolParameter,
)
from galaxy.tools.parameters.wrapped import (
LegacyUnprefixedDict,
Expand Down Expand Up @@ -283,6 +284,8 @@ def process_dataset(data, formats=None):
value.child_collection = new_collection
else:
value.collection = new_collection
elif isinstance(input, SelectToolParameter) and isinstance(value, HistoryDatasetAssociation):
input_datasets[prefixed_name] = value

tool.visit_inputs(param_values, visitor)
return input_datasets, all_permissions
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tools/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def __populate_input_dataset_wrappers(self, param_dict, input_datasets):
if wrapper:
param_dict[name] = wrapper
continue
if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)):
if not isinstance(param_dict_value, ToolParameterValueWrapper):
wrapper_kwds = dict(
datatypes_registry=self.app.datatypes_registry,
tool=self.tool,
Expand Down
Loading
Loading