From 6d4a56493670ffe18d45055fc8dda81f0ff535fe Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 5 Dec 2023 12:12:05 +0100 Subject: [PATCH 1/4] improve stream checking in Bruker --- .../brukertiffimagingextractor.py | 63 ++++++++++++++----- tests/test_brukertiffimagingextactor.py | 6 +- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py b/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py index e307d826..328869b8 100644 --- a/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py +++ b/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py @@ -65,9 +65,16 @@ def _determine_imaging_is_volumetric(folder_path: PathType) -> bool: is_volumetric: bool True if the imaging is volumetric (multiplane), False otherwise (single plane). """ - xml_root = _parse_xml(folder_path=folder_path) - z_device_element = xml_root.find(".//PVStateValue[@key='zDevice']") - is_volumetric = bool(int(z_device_element.attrib["value"])) + folder_path = Path(folder_path) + xml_file_path = folder_path / f"{folder_path.name}.xml" + assert xml_file_path.is_file(), f"The XML configuration file is not found at '{xml_file_path}'." + + is_volumetric = False + with open(xml_file_path, "r") as xml_file: + for event, elem in ElementTree.iterparse(xml_file, events=("start",)): + if elem.tag == "PVStateValue" and elem.attrib.get("key") == "zDevice": + is_volumetric = bool(int(elem.attrib["value"])) + break # Stop parsing as we've found the required element return is_volumetric @@ -107,25 +114,53 @@ def get_streams(cls, folder_path: PathType) -> dict: """ natsort = get_package(package_name="natsort", installation_instructions="pip install natsort") - xml_root = _parse_xml(folder_path=folder_path) - channel_names = [file.attrib["channelName"] for file in xml_root.findall(".//File")] - unique_channel_names = natsort.natsorted(set(channel_names)) + folder_path = Path(folder_path) + xml_file_path = folder_path / f"{folder_path.name}.xml" + assert xml_file_path.is_file(), f"The XML configuration file is not found at '{folder_path}'." + + channel_names = set() + channel_ids = set() + file_names = [] + + # Parse the XML file iteratively to find the first Sequence element + first_sequence_element = None + with open(xml_file_path, "r") as xml_file: + for _, elem in ElementTree.iterparse(xml_file, events=("end",)): + if elem.tag == "Sequence": + first_sequence_element = elem + break + + if first_sequence_element is None: + raise ValueError("No Sequence element found in the XML configuration file. Can't get streams") + + # Then in the first Sequence we find all the Frame elements + if first_sequence_element is not None: + # Iterate over all Frame elements within the first Sequence + frame_elements = first_sequence_element.findall(".//Frame") + for frame_elemenet in frame_elements: + # Iterate over all File elements within each Frame + for file_elem in frame_elemenet.findall("File"): + channel_names.add(file_elem.attrib["channelName"]) + channel_ids.add(file_elem.attrib["channel"]) + file_names.append(file_elem.attrib["filename"]) + + unique_channel_names = natsort.natsorted(channel_names) + unique_channel_ids = natsort.natsorted(channel_ids) + streams = dict(channel_streams=unique_channel_names) streams["plane_streams"] = dict() + if not _determine_imaging_is_volumetric(folder_path=folder_path): return streams - # The "channelName" can be any name that the experimenter sets (e.g. 'Ch1', 'Ch2', 'Green', 'Red') - # Use the identifier of a channel "channel" (e.g. 1, 2) to match it to the file name - channel_ids = [file.attrib["channel"] for file in xml_root.findall(".//File")] - unique_channel_ids = natsort.natsorted(set(channel_ids)) + for channel_id, channel_name in zip(unique_channel_ids, unique_channel_names): plane_naming_pattern = rf"(?PCh{channel_id}_\d+)" - plane_stream_names = [ - re.search(plane_naming_pattern, file.attrib["filename"])["stream_name"] - for file in xml_root.findall(f".//File") - ] + regular_expression_matches = [re.search(plane_naming_pattern, filename) for filename in file_names] + plane_stream_names = [matches["stream_name"] for matches in regular_expression_matches if matches] + unique_plane_stream_names = natsort.natsorted(set(plane_stream_names)) streams["plane_streams"][channel_name] = unique_plane_stream_names + return streams def __init__( diff --git a/tests/test_brukertiffimagingextactor.py b/tests/test_brukertiffimagingextactor.py index a1e5361b..05b28aa1 100644 --- a/tests/test_brukertiffimagingextactor.py +++ b/tests/test_brukertiffimagingextactor.py @@ -133,9 +133,9 @@ def setUpClass(cls): cls.test_video[..., 1] = second_plane_video def test_stream_names(self): - self.assertEqual( - BrukerTiffMultiPlaneImagingExtractor.get_streams(folder_path=self.folder_path), self.available_streams - ) + found_streams = BrukerTiffMultiPlaneImagingExtractor.get_streams(folder_path=self.folder_path) + expected_streams = self.available_streams + self.assertEqual(found_streams, expected_streams) def test_brukertiffextractor_image_size(self): self.assertEqual(self.extractor.get_image_size(), (512, 512, 2)) From a732796d693f10536d389d7b2a4ad6d9647005c4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Dec 2023 11:18:42 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_brukertiffimagingextactor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_brukertiffimagingextactor.py b/tests/test_brukertiffimagingextactor.py index 05b28aa1..a975a3c1 100644 --- a/tests/test_brukertiffimagingextactor.py +++ b/tests/test_brukertiffimagingextactor.py @@ -134,8 +134,8 @@ def setUpClass(cls): def test_stream_names(self): found_streams = BrukerTiffMultiPlaneImagingExtractor.get_streams(folder_path=self.folder_path) - expected_streams = self.available_streams - self.assertEqual(found_streams, expected_streams) + expected_streams = self.available_streams + self.assertEqual(found_streams, expected_streams) def test_brukertiffextractor_image_size(self): self.assertEqual(self.extractor.get_image_size(), (512, 512, 2)) From 02e03cfebe49258d8be1255d9795eb274ef4124f Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 5 Dec 2023 12:27:22 +0100 Subject: [PATCH 3/4] windows no context manager --- .../brukertiffimagingextractor.py | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py b/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py index 328869b8..91197214 100644 --- a/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py +++ b/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py @@ -70,11 +70,10 @@ def _determine_imaging_is_volumetric(folder_path: PathType) -> bool: assert xml_file_path.is_file(), f"The XML configuration file is not found at '{xml_file_path}'." is_volumetric = False - with open(xml_file_path, "r") as xml_file: - for event, elem in ElementTree.iterparse(xml_file, events=("start",)): - if elem.tag == "PVStateValue" and elem.attrib.get("key") == "zDevice": - is_volumetric = bool(int(elem.attrib["value"])) - break # Stop parsing as we've found the required element + for event, elem in ElementTree.iterparse(xml_file_path, events=("start",)): + if elem.tag == "PVStateValue" and elem.attrib.get("key") == "zDevice": + is_volumetric = bool(int(elem.attrib["value"])) + break # Stop parsing as we've found the required element return is_volumetric @@ -124,25 +123,24 @@ def get_streams(cls, folder_path: PathType) -> dict: # Parse the XML file iteratively to find the first Sequence element first_sequence_element = None - with open(xml_file_path, "r") as xml_file: - for _, elem in ElementTree.iterparse(xml_file, events=("end",)): - if elem.tag == "Sequence": - first_sequence_element = elem - break - - if first_sequence_element is None: - raise ValueError("No Sequence element found in the XML configuration file. Can't get streams") - - # Then in the first Sequence we find all the Frame elements - if first_sequence_element is not None: - # Iterate over all Frame elements within the first Sequence - frame_elements = first_sequence_element.findall(".//Frame") - for frame_elemenet in frame_elements: - # Iterate over all File elements within each Frame - for file_elem in frame_elemenet.findall("File"): - channel_names.add(file_elem.attrib["channelName"]) - channel_ids.add(file_elem.attrib["channel"]) - file_names.append(file_elem.attrib["filename"]) + for _, elem in ElementTree.iterparse(xml_file_path, events=("end",)): + if elem.tag == "Sequence": + first_sequence_element = elem + break + + if first_sequence_element is None: + raise ValueError("No Sequence element found in the XML configuration file. Can't get streams") + + # Then in the first Sequence we find all the Frame elements + if first_sequence_element is not None: + # Iterate over all Frame elements within the first Sequence + frame_elements = first_sequence_element.findall(".//Frame") + for frame_elemenet in frame_elements: + # Iterate over all File elements within each Frame + for file_elem in frame_elemenet.findall("File"): + channel_names.add(file_elem.attrib["channelName"]) + channel_ids.add(file_elem.attrib["channel"]) + file_names.append(file_elem.attrib["filename"]) unique_channel_names = natsort.natsorted(channel_names) unique_channel_ids = natsort.natsorted(channel_ids) From 292b815bfcf7e2993000d0c9e0ab3e571c496351 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 5 Dec 2023 12:28:54 +0100 Subject: [PATCH 4/4] changelog add --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed349ec2..c894813b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Upcoming +### Improvements +* Improved xml parsing with Bruker [PR #267](https://github.com/catalystneuro/roiextractors/pull/267) + # v0.5.5