diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index 0e026b5a7b0a..6c34faa0f8c9 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -975,6 +975,7 @@ + @@ -1033,6 +1034,7 @@ + diff --git a/lib/galaxy/datatypes/constructive_solid_geometry.py b/lib/galaxy/datatypes/constructive_solid_geometry.py index 73973abb65a5..9ef0e84ae9ea 100644 --- a/lib/galaxy/datatypes/constructive_solid_geometry.py +++ b/lib/galaxy/datatypes/constructive_solid_geometry.py @@ -5,6 +5,7 @@ """ import abc +import re from typing import ( List, Optional, @@ -29,6 +30,7 @@ FilePrefix, ) from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.xml import GenericXml if TYPE_CHECKING: from io import TextIOBase @@ -189,9 +191,6 @@ class Vtk: Binary data must be placed into the file immediately after the newline ('\\n') character from the previous ASCII keyword and parameter sequence. - - TODO: only legacy formats are currently supported and support for XML formats - should be added. """ subtype = "" @@ -813,3 +812,59 @@ def get_next_line(fh): # Discard the rest of the line fh.readline() return line.strip() + + +class VtkXml(GenericXml): + """Format for defining VTK (XML based) and its sub-datatypes. https://docs.vtk.org/en/latest/design_documents/VTKFileFormats.html""" + + edam_format = "edam:format_2332" + file_ext = "vtkxml" + + # The same MetadataElements are also available for legacy VTK datatypes. + MetadataElement(name="vtk_version", default=None, desc="Vtk version", readonly=True, optional=True, visible=True) + MetadataElement(name="file_format", default=None, desc="File format", readonly=True, optional=True, visible=True) + MetadataElement(name="dataset_type", default=None, desc="Dataset type", readonly=True, optional=True, visible=True) + + def extract_version(self, line: str) -> str: + match = re.search(r'version="([^"]+)"', line) + if match: + return match.group(1) + return "?" + + def extract_type(self, line: str) -> str: + match = re.search(r'type="([^"]+)"', line) + if match: + return match.group(1) + return "?" + + def set_meta(self, dataset: DatasetProtocol, **kwd) -> None: + dataset.metadata.file_format = "XML" + with open(dataset.get_file_name(), errors="ignore") as file: + # first line might be the xml header, so we take two + first_line = file.readline() + if first_line.startswith(" None: + """Set the peek and blurb text for VTK dataset files.""" + if not dataset.dataset.purged: + dataset.peek = "VTK Dataset file" + dataset.blurb = f"type {dataset.metadata.dataset_type} version {dataset.metadata.vtk_version}" + else: + dataset.peek = "File does not exist" + dataset.blurb = "File purged from disk" + + def sniff_prefix(self, file_prefix: FilePrefix) -> bool: + """Check for the key string 'VTKFile' to determine if this is a VTK dataset file. + + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('data.vtu') + >>> VtkXml().sniff(fname) + True + >>> fname = get_test_fname('1.phyloxml') + >>> VtkXml().sniff(fname) + False + """ + return self._has_root_element_in_prefix(file_prefix, "VTKFile") diff --git a/lib/galaxy/datatypes/test/data.vtu b/lib/galaxy/datatypes/test/data.vtu new file mode 100644 index 000000000000..f9483ba6c0f5 --- /dev/null +++ b/lib/galaxy/datatypes/test/data.vtu @@ -0,0 +1,41 @@ + + + + + + + AQAAAACAAACoAAAAFgAAAA==eJxjYCAKHCBAE1JPqvno4gwABYYGwQ== + + + AQAAAACAAACoAAAAOwAAAA==eJwtxRECgDAAAMBJkDQomQySBk2SYNKg/v+kQXdyIfySJ2fP3r34cHTx6tObqy/fbn7c/frzAL7YAwQ= + + + + + + + AQAAAACAAAD4AQAAYwAAAA==eJxjYAABOwcw1RAIoeEAXVzlAIo4gx929XBxqHq4fjT1GOLo6j1xqPdEVQ+31wWHe2Di6OajqccQRzffDlU9hjiaerh5Jmjm+KGKY4QDurgnqjiGv9DVu+BQbwcXBwAoLCke + + + 42.426406871 + + + 94.339811321 + + + + + + + AQAAAACAAACoAAAAMQAAAA==eJwtxUECQCAAALAQRSLx/7d2sF0Wwm/y7MXRqzcnZ+8+XHy6+vLt5sfdrz8PMMgA0w== + + + AQAAAACAAACoAAAAMgAAAA==eJwtxbcBgCAAADArFsAC8v+rDiRLxqGbPHvx6uDNuw+fjk7Ovnz78evi6s/NPzgAAOg= + + + AQAAAACAAAAVAAAACwAAAA==eJxjZMQCAAD8ABY= + + + + + diff --git a/test/unit/data/datatypes/test_constructive_solid_geometry.py b/test/unit/data/datatypes/test_constructive_solid_geometry.py new file mode 100644 index 000000000000..bfaa95ed804d --- /dev/null +++ b/test/unit/data/datatypes/test_constructive_solid_geometry.py @@ -0,0 +1,16 @@ +from galaxy.datatypes.constructive_solid_geometry import VtkXml +from .util import ( + get_dataset, + MockDatasetDataset, +) + + +def test_vtkXml_set_meta(): + vtkXml = VtkXml() + with get_dataset("data.vtu") as dataset: + dataset.dataset = MockDatasetDataset(dataset.get_file_name()) + vtkXml.set_meta(dataset) + + assert dataset.metadata.vtk_version == "0.1" + assert dataset.metadata.file_format == "XML" + assert dataset.metadata.dataset_type == "UnstructuredGrid"