From dd171fe34df25a7fdc206b9738a008c1dc2defca Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 13 Jun 2024 11:50:37 +0200 Subject: [PATCH 1/3] Add test tool for default ext handling in discovered collections --- .../functional/tools/discover_default_ext.xml | 52 +++++++++++++++++++ test/functional/tools/sample_tool_conf.xml | 1 + 2 files changed, 53 insertions(+) create mode 100644 test/functional/tools/discover_default_ext.xml diff --git a/test/functional/tools/discover_default_ext.xml b/test/functional/tools/discover_default_ext.xml new file mode 100644 index 000000000000..7f7c7d83cdd8 --- /dev/null +++ b/test/functional/tools/discover_default_ext.xml @@ -0,0 +1,52 @@ + + 1.txt; +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/functional/tools/sample_tool_conf.xml b/test/functional/tools/sample_tool_conf.xml index 2c074381db5e..a9277f4fab78 100644 --- a/test/functional/tools/sample_tool_conf.xml +++ b/test/functional/tools/sample_tool_conf.xml @@ -206,6 +206,7 @@ + From e5fca086712051d04568fb028be15c2901e86184 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 13 Jun 2024 12:15:00 +0200 Subject: [PATCH 2/3] Assign default `data` extension on discovered collection output They used to be persisted as null in the database prior to this change. --- lib/galaxy/model/store/discover.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py index 679bd2aa38f7..17389c7b83eb 100644 --- a/lib/galaxy/model/store/discover.py +++ b/lib/galaxy/model/store/discover.py @@ -38,6 +38,10 @@ from galaxy.util.hash_util import HASH_NAME_MAP if TYPE_CHECKING: + from galaxy.job_execution.output_collect import ( + DatasetCollector, + ToolMetadataDatasetCollector, + ) from galaxy.model.store import ModelExportStore log = logging.getLogger(__name__) @@ -50,7 +54,7 @@ class MaxDiscoveredFilesExceededError(ValueError): pass -CollectorT = Any # TODO: setup an interface for these file collectors data classes. +CollectorT = Union["DatasetCollector", "ToolMetadataDatasetCollector"] class ModelPersistenceContext(metaclass=abc.ABCMeta): @@ -1056,19 +1060,21 @@ def name(self): return self.as_dict.get("name") @property - def dbkey(self): - return self.as_dict.get("dbkey", getattr(self.collector, "default_dbkey", "?")) + def dbkey(self) -> str: + return self.as_dict.get("dbkey", self.collector and self.collector.default_dbkey or "?") @property - def ext(self): - return self.as_dict.get("ext", getattr(self.collector, "default_ext", "data")) + def ext(self) -> str: + return self.as_dict.get("ext", self.collector and self.collector.default_ext or "data") @property - def visible(self): + def visible(self) -> bool: try: return self.as_dict["visible"].lower() == "visible" except KeyError: - return getattr(self.collector, "default_visible", True) + if self.collector and self.collector.default_visible is not None: + return self.collector.default_visible + return True @property def link_data(self): From 6551f31671b51e664ca4bd2b0c1dd3e716922d17 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 13 Jun 2024 12:52:48 +0200 Subject: [PATCH 3/3] Implement default format on collection level --- .../tool_util/parser/output_collection_def.py | 8 +++++++- lib/galaxy/tool_util/xsd/galaxy.xsd | 10 ++++++---- .../app/tools/test_collect_primary_datasets.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/tool_util/parser/output_collection_def.py b/lib/galaxy/tool_util/parser/output_collection_def.py index 4d16cb5d3539..b7929ebe1665 100644 --- a/lib/galaxy/tool_util/parser/output_collection_def.py +++ b/lib/galaxy/tool_util/parser/output_collection_def.py @@ -34,7 +34,13 @@ def dataset_collector_descriptions_from_elem(elem, legacy=True): if num_discover_dataset_blocks == 0 and legacy: collectors = [DEFAULT_DATASET_COLLECTOR_DESCRIPTION] else: - collectors = [dataset_collection_description(**e.attrib) for e in primary_dataset_elems] + default_format = elem.attrib.get("format") + collectors = [] + for e in primary_dataset_elems: + description_attributes = e.attrib + if default_format and "format" not in description_attributes and "ext" not in description_attributes: + description_attributes["format"] = default_format + collectors.append(dataset_collection_description(**description_attributes)) return _validate_collectors(collectors) diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index bc1f18780a60..daebc29070ac 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -5360,11 +5360,13 @@ The default is ``galaxy.json``. - The short name for the output datatype. -The valid values for format can be found in + +(e.g. ``format="pdf"`` or ``format="fastqsanger"``). For collections this is the default +format for all included elements. Note that the format specified here is ignored for +discovered data sets on Galaxy versions prior to 24.0 and should be specified using the ```` tag set. + ]]> diff --git a/test/unit/app/tools/test_collect_primary_datasets.py b/test/unit/app/tools/test_collect_primary_datasets.py index 16f3c6aae13e..fb612853b4db 100644 --- a/test/unit/app/tools/test_collect_primary_datasets.py +++ b/test/unit/app/tools/test_collect_primary_datasets.py @@ -128,6 +128,20 @@ def test_collect_multiple_recurse_dict(self): created_hda_3 = datasets[DEFAULT_TOOL_OUTPUT]["test3"] assert_created_with_path(self.app.object_store, created_hda_3.dataset, path3) + def test_collect_collection_default_format(self): + self._replace_output_collectors( + """ + + """ + ) + self._setup_extra_file(subdir="subdir_for_name_discovery", filename="test1") + self._setup_extra_file(subdir="subdir_for_name_discovery", filename="test2") + + datasets = self._collect() + assert DEFAULT_TOOL_OUTPUT in datasets + for dataset in datasets[DEFAULT_TOOL_OUTPUT].values(): + assert dataset.ext == "abcdef" + def test_collect_sorted_reverse(self): self._replace_output_collectors( """