galaxyproject · mvdbeek · Feb 19, 2024 · Feb 6, 2024 · Feb 7, 2024 · Feb 7, 2024
diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts
@@ -4555,6 +4555,23 @@ export interface components {
              */
             user_email?: string | null;
         };
+        /** EncodedJobParameterHistoryItem */
+        EncodedJobParameterHistoryItem: {
+            /** Hid */
+            hid?: number | null;
+            /**
+             * Id
+             * @example 0123456789ABCDEF
+             */
+            id: string;
+            /** Name */
+            name: string;
+            /**
+             * Source
+             * @description The source of this dataset, either `hda`, `ldda`, `hdca`, `dce` or `dc` depending of its origin.
+             */
+            src: components["schemas"]["DataItemSourceType"];
+        };
         /** ExportHistoryArchivePayload */
         ExportHistoryArchivePayload: {
             /**
@@ -7735,7 +7752,13 @@ export interface components {
              * Value
              * @description The values of the job parameter
              */
-            value?: Record<string, never> | null;
+            value?:
+                | components["schemas"]["EncodedJobParameterHistoryItem"][]
+                | number
+                | number
+                | boolean
+                | string
+                | null;
         };
         /**
          * JobSourceType

diff --git a/client/src/components/Tool/ToolForm.vue b/client/src/components/Tool/ToolForm.vue
@@ -75,6 +75,12 @@
                         title="Attempt to re-use jobs with identical parameters?"
                         help="This may skip executing jobs that you have already run."
                         type="boolean" />
+                    <FormSelect
+                        v-if="formConfig.model_class === 'DataManagerTool'"
+                        id="data_manager_mode"
+                        v-model="dataManagerMode"
+                        :options="bundleOptions"
+                        title="Create dataset bundle instead of adding data table to loc file ?"></FormSelect>
                 </div>
             </template>
             <template v-slot:header-buttons>
@@ -120,13 +126,16 @@ import { getToolFormData, submitJob, updateToolFormData } from "./services";
 import ToolCard from "./ToolCard";
 import { allowCachedJobs } from "./utilities";
 
+import FormSelect from "@/components/Form/Elements/FormSelect.vue";
+
 export default {
     components: {
         ButtonSpinner,
         LoadingSpan,
         FormDisplay,
         ToolCard,
         FormElement,
+        FormSelect,
         ToolEntryPoints,
         ToolRecommendation,
         Heading,
@@ -175,13 +184,18 @@ export default {
             useCachedJobs: false,
             useEmail: false,
             useJobRemapping: false,
+            dataManagerMode: "populate",
             entryPoints: [],
             jobDef: {},
             jobResponse: {},
             validationInternal: null,
             validationScrollTo: null,
             currentVersion: this.version,
             preferredObjectStoreId: null,
+            bundleOptions: [
+                { label: "populate", value: "populate" },
+                { label: "bundle", value: "bundle" },
+            ],
         };
     },
     computed: {
@@ -326,6 +340,9 @@ export default {
             if (this.preferredObjectStoreId) {
                 jobDef.preferred_object_store_id = this.preferredObjectStoreId;
             }
+            if (this.dataManagerMode === "bundle") {
+                jobDef.data_manager_mode = this.dataManagerMode;
+            }
             console.debug("toolForm::onExecute()", jobDef);
             const prevRoute = this.$route.fullPath;
             submitJob(jobDef).then(

diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample
@@ -121,7 +121,7 @@
     <datatype extension="d3_hierarchy" type="galaxy.datatypes.text:Json" mimetype="application/json" subclass="true" display_in_upload="true"/>
     <datatype extension="imgt.json" type="galaxy.datatypes.text:ImgtJson" mimetype="application/json" display_in_upload="True"/>
     <datatype extension="geojson" type="galaxy.datatypes.text:GeoJson" mimetype="application/json" display_in_upload="True"/>
-    <datatype extension="data_manager_json" type="galaxy.datatypes.text:Json" mimetype="application/json" subclass="true" display_in_upload="false"/>
+    <datatype extension="data_manager_json" type="galaxy.datatypes.text:DataManagerJson" mimetype="application/json" subclass="true" display_in_upload="false"/>
     <datatype extension="dbn" type="galaxy.datatypes.sequence:DotBracket" display_in_upload="true" description="Dot-Bracket format is a text-based format for storing both an RNA sequence and its corresponding 2D structure." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Dbn"/>
     <datatype extension="fai" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true" subclass="true" description="A Fasta Index File is a text file consisting of lines each with five TAB-delimited columns : Name, Length, offset, linebases, Linewidth" description_url="http://www.htslib.org/doc/faidx.html"/>
     <datatype extension="fasta" auto_compressed_types="gz" type="galaxy.datatypes.sequence:Fasta" display_in_upload="true" description="A sequence in FASTA format consists of a single-line description, followed by lines of sequence data. The first character of the description line is a greater-than ('&gt;') symbol in the first column. All lines should be shorter than 80 characters." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Fasta">

diff --git a/lib/galaxy/datatypes/text.py b/lib/galaxy/datatypes/text.py
@@ -133,6 +133,19 @@ def display_peek(self, dataset: DatasetProtocol) -> str:
             return f"JSON file ({nice_size(dataset.get_size())})"
 
 
+class DataManagerJson(Json):
+    file_ext = "data_manager_json"
+    MetadataElement(
+        name="data_tables", default=None, desc="Data tables represented by this dataset", readonly=True, visible=True
+    )
+
+    def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd):
+        super().set_meta(dataset=dataset, overwrite=overwrite, **kwd)
+        with open(dataset.get_file_name()) as fh:
+            data_tables = json.load(fh)["data_tables"]
+        dataset.metadata.data_tables = data_tables
+
+
 class ExpressionJson(Json):
     """Represents the non-data input or output to a tool or workflow."""
 

diff --git a/lib/galaxy/managers/jobs.py b/lib/galaxy/managers/jobs.py
@@ -901,23 +901,24 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
 
         for input in input_params.values():
             if input.name in param_values:
+                input_value = param_values[input.name]
                 if input.type == "repeat":
-                    for i in range(len(param_values[input.name])):
-                        rval.extend(inputs_recursive(input.inputs, param_values[input.name][i], depth=depth + 1))
+                    for i in range(len(input_value)):
+                        rval.extend(inputs_recursive(input.inputs, input_value[i], depth=depth + 1))
                 elif input.type == "section":
                     # Get the value of the current Section parameter
                     rval.append(dict(text=input.name, depth=depth))
                     rval.extend(
                         inputs_recursive(
                             input.inputs,
-                            param_values[input.name],
+                            input_value,
                             depth=depth + 1,
                             upgrade_messages=upgrade_messages.get(input.name),
                         )
                     )
                 elif input.type == "conditional":
                     try:
-                        current_case = param_values[input.name]["__current_case__"]
+                        current_case = input_value["__current_case__"]
                         is_valid = True
                     except Exception:
                         current_case = None
@@ -929,7 +930,7 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
                         rval.extend(
                             inputs_recursive(
                                 input.cases[current_case].inputs,
-                                param_values[input.name],
+                                input_value,
                                 depth=depth + 1,
                                 upgrade_messages=upgrade_messages.get(input.name),
                             )
@@ -948,12 +949,16 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
                         dict(
                             text=input.group_title(param_values),
                             depth=depth,
-                            value=f"{len(param_values[input.name])} uploaded datasets",
+                            value=f"{len(input_value)} uploaded datasets",
                         )
                     )
-                elif input.type == "data" or input.type == "data_collection":
+                elif (
+                    input.type == "data"
+                    or input.type == "data_collection"
+                    or isinstance(input_value, model.HistoryDatasetAssociation)
+                ):
                     value = []
-                    for element in listify(param_values[input.name]):
+                    for element in listify(input_value):
                         element_id = element.id
                         if isinstance(element, model.HistoryDatasetAssociation):
                             hda = element
@@ -977,7 +982,7 @@ def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None)
                         dict(
                             text=label,
                             depth=depth,
-                            value=input.value_to_display_text(param_values[input.name]),
+                            value=input.value_to_display_text(input_value),
                             notes=upgrade_messages.get(input.name, ""),
                         )
                     )

diff --git a/lib/galaxy/managers/workflows.py b/lib/galaxy/managers/workflows.py
@@ -92,8 +92,8 @@
     DataCollectionToolParameter,
     DataToolParameter,
     RuntimeValue,
-    workflow_building_modes,
 )
+from galaxy.tools.parameters.workflow_building_modes import workflow_building_modes
 from galaxy.util.hash_util import md5_hash_str
 from galaxy.util.json import (
     safe_dumps,

diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
@@ -792,6 +792,27 @@ def __init__(self, email=None, password=None, username=None):
         self.active = False
         self.username = username
 
+    def get_user_data_tables(self, data_table: str):
+        session = object_session(self)
+        assert session
+        metadata_select = (
+            select(HistoryDatasetAssociation)
+            .join(Dataset)
+            .join(History)
+            .where(
+                HistoryDatasetAssociation.deleted == false(),
+                HistoryDatasetAssociation.extension == "data_manager_json",
+                History.user_id == self.id,
+                Dataset.state == "ok",
+                # excludes data manager runs that actually populated tables.
+                # maybe track this formally by creating a different datatype for bundles ?
+                Dataset.total_size != Dataset.file_size,
+                HistoryDatasetAssociation._metadata.contains(data_table),
+            )
+            .order_by(HistoryDatasetAssociation.id)
+        )
+        return session.execute(metadata_select).scalars().all()
+
     @property
     def extra_preferences(self):
         data = defaultdict(lambda: None)

diff --git a/lib/galaxy/model/custom_types.py b/lib/galaxy/model/custom_types.py
@@ -343,11 +343,12 @@ class MetadataType(JSONType):
     def process_bind_param(self, value, dialect):
         if value is not None:
             if MAX_METADATA_VALUE_SIZE is not None:
-                for k, v in list(value.items()):
-                    sz = total_size(v)
-                    if sz > MAX_METADATA_VALUE_SIZE:
-                        del value[k]
-                        log.warning(f"Refusing to bind metadata key {k} due to size ({sz})")
+                if hasattr(value, "items"):
+                    for k, v in list(value.items()):
+                        sz = total_size(v)
+                        if sz > MAX_METADATA_VALUE_SIZE:
+                            del value[k]
+                            log.warning(f"Refusing to bind metadata key {k} due to size ({sz})")
             value = json_encoder.encode(value).encode()
         return value
 

diff --git a/lib/galaxy/schema/jobs.py b/lib/galaxy/schema/jobs.py
@@ -4,6 +4,7 @@
     Dict,
     List,
     Optional,
+    Union,
 )
 
 from pydantic import (
@@ -21,6 +22,7 @@
 from galaxy.schema.schema import (
     DataItemSourceType,
     EncodedDataItemSourceId,
+    EncodedJobParameterHistoryItem,
     JobMetricCollection,
     JobState,
     JobSummary,
@@ -221,7 +223,9 @@ class JobParameter(Model):
         title="Depth",
         description="The depth of the job parameter.",
     )
-    value: Optional[Any] = Field(default=None, title="Value", description="The values of the job parameter")
+    value: Optional[Union[List[EncodedJobParameterHistoryItem], float, int, bool, str]] = Field(
+        default=None, title="Value", description="The values of the job parameter", union_mode="left_to_right"
+    )
     notes: Optional[str] = Field(default=None, title="Notes", description="Notes associated with the job parameter.")
 
 

diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py
@@ -1965,6 +1965,11 @@ class EncodedDataItemSourceId(Model):
     )
 
 
+class EncodedJobParameterHistoryItem(EncodedDataItemSourceId):
+    hid: Optional[int] = None
+    name: str
+
+
 class DatasetJobInfo(DatasetSourceId):
     uuid: UuidField
 

diff --git a/lib/galaxy/tool_util/data/__init__.py b/lib/galaxy/tool_util/data/__init__.py
@@ -489,7 +489,7 @@ def handle_found_index_file(self, filename):
 
     # This method is used in tools, so need to keep its API stable
     def get_fields(self) -> List[List[str]]:
-        return self.data
+        return self.data.copy()
 
     def get_field(self, value):
         rval = None

diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
@@ -127,7 +127,6 @@
     SelectTagParameter,
     SelectToolParameter,
     ToolParameter,
-    workflow_building_modes,
 )
 from galaxy.tools.parameters.dataset_matcher import (
     set_dataset_matcher_factory,
@@ -143,6 +142,7 @@
 )
 from galaxy.tools.parameters.input_translation import ToolInputTranslator
 from galaxy.tools.parameters.meta import expand_meta_parameters
+from galaxy.tools.parameters.workflow_building_modes import workflow_building_modes
 from galaxy.tools.parameters.wrapped_json import json_wrap
 from galaxy.tools.test import parse_tests
 from galaxy.util import (

diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py
@@ -32,6 +32,7 @@
     DataCollectionToolParameter,
     DataToolParameter,
     RuntimeValue,
+    SelectToolParameter,
 )
 from galaxy.tools.parameters.wrapped import (
     LegacyUnprefixedDict,
@@ -283,6 +284,8 @@ def process_dataset(data, formats=None):
                         value.child_collection = new_collection
                     else:
                         value.collection = new_collection
+            elif isinstance(input, SelectToolParameter) and isinstance(value, HistoryDatasetAssociation):
+                input_datasets[prefixed_name] = value
 
         tool.visit_inputs(param_values, visitor)
         return input_datasets, all_permissions

diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py
@@ -407,7 +407,7 @@ def __populate_input_dataset_wrappers(self, param_dict, input_datasets):
                 if wrapper:
                     param_dict[name] = wrapper
                     continue
-            if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)):
+            if not isinstance(param_dict_value, ToolParameterValueWrapper):
                 wrapper_kwds = dict(
                     datatypes_registry=self.app.datatypes_registry,
                     tool=self.tool,