Skip to content

Commit

Permalink
Merge pull request galaxyproject#18389 from mvdbeek/ensure_data_exten…
Browse files Browse the repository at this point in the history
…sion_if_no_extension_specified

[24.0] Assign default ``data`` extension on discovered collection output
  • Loading branch information
martenson authored Jun 18, 2024
2 parents f5610e7 + 6551f31 commit 2891489
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 12 deletions.
20 changes: 13 additions & 7 deletions lib/galaxy/model/store/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
from galaxy.util.hash_util import HASH_NAME_MAP

if TYPE_CHECKING:
from galaxy.job_execution.output_collect import (
DatasetCollector,
ToolMetadataDatasetCollector,
)
from galaxy.model.store import ModelExportStore

log = logging.getLogger(__name__)
Expand All @@ -50,7 +54,7 @@ class MaxDiscoveredFilesExceededError(ValueError):
pass


CollectorT = Any # TODO: setup an interface for these file collectors data classes.
CollectorT = Union["DatasetCollector", "ToolMetadataDatasetCollector"]


class ModelPersistenceContext(metaclass=abc.ABCMeta):
Expand Down Expand Up @@ -1056,19 +1060,21 @@ def name(self):
return self.as_dict.get("name")

@property
def dbkey(self):
return self.as_dict.get("dbkey", getattr(self.collector, "default_dbkey", "?"))
def dbkey(self) -> str:
return self.as_dict.get("dbkey", self.collector and self.collector.default_dbkey or "?")

@property
def ext(self):
return self.as_dict.get("ext", getattr(self.collector, "default_ext", "data"))
def ext(self) -> str:
return self.as_dict.get("ext", self.collector and self.collector.default_ext or "data")

@property
def visible(self):
def visible(self) -> bool:
try:
return self.as_dict["visible"].lower() == "visible"
except KeyError:
return getattr(self.collector, "default_visible", True)
if self.collector and self.collector.default_visible is not None:
return self.collector.default_visible
return True

@property
def link_data(self):
Expand Down
8 changes: 7 additions & 1 deletion lib/galaxy/tool_util/parser/output_collection_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ def dataset_collector_descriptions_from_elem(elem, legacy=True):
if num_discover_dataset_blocks == 0 and legacy:
collectors = [DEFAULT_DATASET_COLLECTOR_DESCRIPTION]
else:
collectors = [dataset_collection_description(**e.attrib) for e in primary_dataset_elems]
default_format = elem.attrib.get("format")
collectors = []
for e in primary_dataset_elems:
description_attributes = e.attrib
if default_format and "format" not in description_attributes and "ext" not in description_attributes:
description_attributes["format"] = default_format
collectors.append(dataset_collection_description(**description_attributes))

return _validate_collectors(collectors)

Expand Down
10 changes: 6 additions & 4 deletions lib/galaxy/tool_util/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -5360,11 +5360,13 @@ The default is ``galaxy.json``.
<xs:attributeGroup name="OutputCommon">
<xs:attribute name="format" type="xs:string">
<xs:annotation>
<xs:documentation xml:lang="en">The short name for the output datatype.
The valid values for format can be found in
<xs:documentation xml:lang="en"><![CDATA[
The short name for the output datatype. The valid values for format can be found in
[/config/datatypes_conf.xml.sample](https://github.com/galaxyproject/galaxy/blob/dev/config/datatypes_conf.xml.sample)
(e.g. ``format="pdf"`` or ``format="fastqsanger"``). For collections this is the default format for all included
elements. Note that the format specified here is ignored for discovered data sets.</xs:documentation>
(e.g. ``format="pdf"`` or ``format="fastqsanger"``). For collections this is the default
format for all included elements. Note that the format specified here is ignored for
discovered data sets on Galaxy versions prior to 24.0 and should be specified using the ``<discovered_data>`` tag set.
]]></xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="format_source" type="xs:string">
Expand Down
52 changes: 52 additions & 0 deletions test/functional/tools/discover_default_ext.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<tool id="discover_default_ext" name="discover_default_ext" version="0.1.0">
<command><![CDATA[
echo 1 > 1.txt;
]]></command>
<inputs />
<outputs>
<collection name="collection_with_default_ext" type="list" label="with default format" format="fasta">
<discover_datasets pattern="__name_and_ext__" />
</collection>
<collection name="collection_default_ext_and_explicit_format" type="list" label="with default format and static element format" format="fasta">
<discover_datasets pattern="__name__" format="txt" />
</collection>
<collection name="collection_default_ext_used" type="list" label="with default format and no override" format="fasta">
<discover_datasets pattern="__name__" />
</collection>
<collection name="collection_without_default_ext" type="list" label="wihtout default ext, should be data">
<discover_datasets pattern="__name__" />
</collection>
</outputs>
<tests>
<test expect_num_outputs="4">
<output_collection name="collection_with_default_ext" type="list" count="1">
<element name="1" ftype="txt">
<assert_contents>
<has_text text="1" />
</assert_contents>
</element>
</output_collection>
<output_collection name="collection_default_ext_and_explicit_format" type="list" count="1">
<element name="1.txt" ftype="txt">
<assert_contents>
<has_text text="1" />
</assert_contents>
</element>
</output_collection>
<output_collection name="collection_default_ext_used" type="list" count="1">
<element name="1.txt" ftype="fasta">
<assert_contents>
<has_text text="1" />
</assert_contents>
</element>
</output_collection>
<output_collection name="collection_without_default_ext" type="list" count="1">
<element name="1.txt" ftype="data">
<assert_contents>
<has_text text="1" />
</assert_contents>
</element>
</output_collection>
</test>
</tests>
</tool>
1 change: 1 addition & 0 deletions test/functional/tools/sample_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@
<tool file="collection_cat_group_tag.xml" />
<tool file="collection_cat_group_tag_multiple.xml" />
<tool file="discover_sort_by.xml" />
<tool file="discover_default_ext.xml" />
<tool file="expression_forty_two.xml" />
<tool file="expression_pick_larger_file.xml" />
<tool file="expression_parse_int.xml" />
Expand Down
14 changes: 14 additions & 0 deletions test/unit/app/tools/test_collect_primary_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,20 @@ def test_collect_multiple_recurse_dict(self):
created_hda_3 = datasets[DEFAULT_TOOL_OUTPUT]["test3"]
assert_created_with_path(self.app.object_store, created_hda_3.dataset, path3)

def test_collect_collection_default_format(self):
self._replace_output_collectors(
"""<dataset_collection name="parent" format="abcdef">
<discover_datasets pattern="__name__" directory="subdir_for_name_discovery" sort_by="reverse_filename" />
</dataset_collection>"""
)
self._setup_extra_file(subdir="subdir_for_name_discovery", filename="test1")
self._setup_extra_file(subdir="subdir_for_name_discovery", filename="test2")

datasets = self._collect()
assert DEFAULT_TOOL_OUTPUT in datasets
for dataset in datasets[DEFAULT_TOOL_OUTPUT].values():
assert dataset.ext == "abcdef"

def test_collect_sorted_reverse(self):
self._replace_output_collectors(
"""<output>
Expand Down

0 comments on commit 2891489

Please sign in to comment.