Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[24.0] Assign default data extension on discovered collection output #18389

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions lib/galaxy/model/store/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
from galaxy.util.hash_util import HASH_NAME_MAP

if TYPE_CHECKING:
from galaxy.job_execution.output_collect import (
DatasetCollector,
ToolMetadataDatasetCollector,
)
from galaxy.model.store import ModelExportStore

log = logging.getLogger(__name__)
Expand All @@ -50,7 +54,7 @@ class MaxDiscoveredFilesExceededError(ValueError):
pass


CollectorT = Any # TODO: setup an interface for these file collectors data classes.
CollectorT = Union["DatasetCollector", "ToolMetadataDatasetCollector"]


class ModelPersistenceContext(metaclass=abc.ABCMeta):
Expand Down Expand Up @@ -1056,19 +1060,21 @@ def name(self):
return self.as_dict.get("name")

@property
def dbkey(self):
return self.as_dict.get("dbkey", getattr(self.collector, "default_dbkey", "?"))
def dbkey(self) -> str:
return self.as_dict.get("dbkey", self.collector and self.collector.default_dbkey or "?")

@property
def ext(self):
return self.as_dict.get("ext", getattr(self.collector, "default_ext", "data"))
def ext(self) -> str:
return self.as_dict.get("ext", self.collector and self.collector.default_ext or "data")

@property
def visible(self):
def visible(self) -> bool:
try:
return self.as_dict["visible"].lower() == "visible"
except KeyError:
return getattr(self.collector, "default_visible", True)
if self.collector and self.collector.default_visible is not None:
return self.collector.default_visible
return True

@property
def link_data(self):
Expand Down
8 changes: 7 additions & 1 deletion lib/galaxy/tool_util/parser/output_collection_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ def dataset_collector_descriptions_from_elem(elem, legacy=True):
if num_discover_dataset_blocks == 0 and legacy:
collectors = [DEFAULT_DATASET_COLLECTOR_DESCRIPTION]
else:
collectors = [dataset_collection_description(**e.attrib) for e in primary_dataset_elems]
default_format = elem.attrib.get("format")
collectors = []
for e in primary_dataset_elems:
description_attributes = e.attrib
if default_format and "format" not in description_attributes and "ext" not in description_attributes:
description_attributes["format"] = default_format
collectors.append(dataset_collection_description(**description_attributes))

return _validate_collectors(collectors)

Expand Down
10 changes: 6 additions & 4 deletions lib/galaxy/tool_util/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -5360,11 +5360,13 @@ The default is ``galaxy.json``.
<xs:attributeGroup name="OutputCommon">
<xs:attribute name="format" type="xs:string">
<xs:annotation>
<xs:documentation xml:lang="en">The short name for the output datatype.
The valid values for format can be found in
<xs:documentation xml:lang="en"><![CDATA[
The short name for the output datatype. The valid values for format can be found in
[/config/datatypes_conf.xml.sample](https://github.com/galaxyproject/galaxy/blob/dev/config/datatypes_conf.xml.sample)
(e.g. ``format="pdf"`` or ``format="fastqsanger"``). For collections this is the default format for all included
elements. Note that the format specified here is ignored for discovered data sets.</xs:documentation>
(e.g. ``format="pdf"`` or ``format="fastqsanger"``). For collections this is the default
format for all included elements. Note that the format specified here is ignored for
discovered data sets on Galaxy versions prior to 24.0 and should be specified using the ``<discovered_data>`` tag set.
]]></xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="format_source" type="xs:string">
Expand Down
52 changes: 52 additions & 0 deletions test/functional/tools/discover_default_ext.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<tool id="discover_default_ext" name="discover_default_ext" version="0.1.0">
<command><![CDATA[
echo 1 > 1.txt;
]]></command>
<inputs />
<outputs>
<collection name="collection_with_default_ext" type="list" label="with default format" format="fasta">
<discover_datasets pattern="__name_and_ext__" />
</collection>
<collection name="collection_default_ext_and_explicit_format" type="list" label="with default format and static element format" format="fasta">
<discover_datasets pattern="__name__" format="txt" />
</collection>
<collection name="collection_default_ext_used" type="list" label="with default format and no override" format="fasta">
<discover_datasets pattern="__name__" />
</collection>
<collection name="collection_without_default_ext" type="list" label="wihtout default ext, should be data">
<discover_datasets pattern="__name__" />
</collection>
</outputs>
<tests>
<test expect_num_outputs="4">
<output_collection name="collection_with_default_ext" type="list" count="1">
<element name="1" ftype="txt">
<assert_contents>
<has_text text="1" />
</assert_contents>
</element>
</output_collection>
<output_collection name="collection_default_ext_and_explicit_format" type="list" count="1">
<element name="1.txt" ftype="txt">
<assert_contents>
<has_text text="1" />
</assert_contents>
</element>
</output_collection>
<output_collection name="collection_default_ext_used" type="list" count="1">
<element name="1.txt" ftype="fasta">
<assert_contents>
<has_text text="1" />
</assert_contents>
</element>
</output_collection>
<output_collection name="collection_without_default_ext" type="list" count="1">
<element name="1.txt" ftype="data">
<assert_contents>
<has_text text="1" />
</assert_contents>
</element>
</output_collection>
</test>
</tests>
</tool>
1 change: 1 addition & 0 deletions test/functional/tools/sample_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@
<tool file="collection_cat_group_tag.xml" />
<tool file="collection_cat_group_tag_multiple.xml" />
<tool file="discover_sort_by.xml" />
<tool file="discover_default_ext.xml" />
<tool file="expression_forty_two.xml" />
<tool file="expression_pick_larger_file.xml" />
<tool file="expression_parse_int.xml" />
Expand Down
14 changes: 14 additions & 0 deletions test/unit/app/tools/test_collect_primary_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,20 @@ def test_collect_multiple_recurse_dict(self):
created_hda_3 = datasets[DEFAULT_TOOL_OUTPUT]["test3"]
assert_created_with_path(self.app.object_store, created_hda_3.dataset, path3)

def test_collect_collection_default_format(self):
self._replace_output_collectors(
"""<dataset_collection name="parent" format="abcdef">
<discover_datasets pattern="__name__" directory="subdir_for_name_discovery" sort_by="reverse_filename" />
</dataset_collection>"""
)
self._setup_extra_file(subdir="subdir_for_name_discovery", filename="test1")
self._setup_extra_file(subdir="subdir_for_name_discovery", filename="test2")

datasets = self._collect()
assert DEFAULT_TOOL_OUTPUT in datasets
for dataset in datasets[DEFAULT_TOOL_OUTPUT].values():
assert dataset.ext == "abcdef"

def test_collect_sorted_reverse(self):
self._replace_output_collectors(
"""<output>
Expand Down
Loading