From 6d4a56493670ffe18d45055fc8dda81f0ff535fe Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 5 Dec 2023 12:12:05 +0100
Subject: [PATCH 1/4] improve stream checking in Bruker

---
 .../brukertiffimagingextractor.py             | 63 ++++++++++++++-----
 tests/test_brukertiffimagingextactor.py       |  6 +-
 2 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py b/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py
index e307d826..328869b8 100644
--- a/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py
+++ b/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py
@@ -65,9 +65,16 @@ def _determine_imaging_is_volumetric(folder_path: PathType) -> bool:
     is_volumetric: bool
         True if the imaging is volumetric (multiplane), False otherwise (single plane).
     """
-    xml_root = _parse_xml(folder_path=folder_path)
-    z_device_element = xml_root.find(".//PVStateValue[@key='zDevice']")
-    is_volumetric = bool(int(z_device_element.attrib["value"]))
+    folder_path = Path(folder_path)
+    xml_file_path = folder_path / f"{folder_path.name}.xml"
+    assert xml_file_path.is_file(), f"The XML configuration file is not found at '{xml_file_path}'."
+
+    is_volumetric = False
+    with open(xml_file_path, "r") as xml_file:
+        for event, elem in ElementTree.iterparse(xml_file, events=("start",)):
+            if elem.tag == "PVStateValue" and elem.attrib.get("key") == "zDevice":
+                is_volumetric = bool(int(elem.attrib["value"]))
+                break  # Stop parsing as we've found the required element
 
     return is_volumetric
 
@@ -107,25 +114,53 @@ def get_streams(cls, folder_path: PathType) -> dict:
         """
         natsort = get_package(package_name="natsort", installation_instructions="pip install natsort")
 
-        xml_root = _parse_xml(folder_path=folder_path)
-        channel_names = [file.attrib["channelName"] for file in xml_root.findall(".//File")]
-        unique_channel_names = natsort.natsorted(set(channel_names))
+        folder_path = Path(folder_path)
+        xml_file_path = folder_path / f"{folder_path.name}.xml"
+        assert xml_file_path.is_file(), f"The XML configuration file is not found at '{folder_path}'."
+
+        channel_names = set()
+        channel_ids = set()
+        file_names = []
+
+        # Parse the XML file iteratively to find the first Sequence element
+        first_sequence_element = None
+        with open(xml_file_path, "r") as xml_file:
+            for _, elem in ElementTree.iterparse(xml_file, events=("end",)):
+                if elem.tag == "Sequence":
+                    first_sequence_element = elem
+                    break
+
+            if first_sequence_element is None:
+                raise ValueError("No Sequence element found in the XML configuration file. Can't get streams")
+
+            # Then in the first Sequence we find all the Frame elements
+            if first_sequence_element is not None:
+                # Iterate over all Frame elements within the first Sequence
+                frame_elements = first_sequence_element.findall(".//Frame")
+                for frame_elemenet in frame_elements:
+                    # Iterate over all File elements within each Frame
+                    for file_elem in frame_elemenet.findall("File"):
+                        channel_names.add(file_elem.attrib["channelName"])
+                        channel_ids.add(file_elem.attrib["channel"])
+                        file_names.append(file_elem.attrib["filename"])
+
+        unique_channel_names = natsort.natsorted(channel_names)
+        unique_channel_ids = natsort.natsorted(channel_ids)
+
         streams = dict(channel_streams=unique_channel_names)
         streams["plane_streams"] = dict()
+
         if not _determine_imaging_is_volumetric(folder_path=folder_path):
             return streams
-        # The "channelName" can be any name that the experimenter sets (e.g. 'Ch1', 'Ch2', 'Green', 'Red')
-        # Use the identifier of a channel "channel" (e.g. 1, 2) to match it to the file name
-        channel_ids = [file.attrib["channel"] for file in xml_root.findall(".//File")]
-        unique_channel_ids = natsort.natsorted(set(channel_ids))
+
         for channel_id, channel_name in zip(unique_channel_ids, unique_channel_names):
             plane_naming_pattern = rf"(?P<stream_name>Ch{channel_id}_\d+)"
-            plane_stream_names = [
-                re.search(plane_naming_pattern, file.attrib["filename"])["stream_name"]
-                for file in xml_root.findall(f".//File")
-            ]
+            regular_expression_matches = [re.search(plane_naming_pattern, filename) for filename in file_names]
+            plane_stream_names = [matches["stream_name"] for matches in regular_expression_matches if matches]
+
             unique_plane_stream_names = natsort.natsorted(set(plane_stream_names))
             streams["plane_streams"][channel_name] = unique_plane_stream_names
+
         return streams
 
     def __init__(
diff --git a/tests/test_brukertiffimagingextactor.py b/tests/test_brukertiffimagingextactor.py
index a1e5361b..05b28aa1 100644
--- a/tests/test_brukertiffimagingextactor.py
+++ b/tests/test_brukertiffimagingextactor.py
@@ -133,9 +133,9 @@ def setUpClass(cls):
         cls.test_video[..., 1] = second_plane_video
 
     def test_stream_names(self):
-        self.assertEqual(
-            BrukerTiffMultiPlaneImagingExtractor.get_streams(folder_path=self.folder_path), self.available_streams
-        )
+        found_streams = BrukerTiffMultiPlaneImagingExtractor.get_streams(folder_path=self.folder_path)
+        expected_streams =  self.available_streams
+        self.assertEqual(found_streams, expected_streams)        
 
     def test_brukertiffextractor_image_size(self):
         self.assertEqual(self.extractor.get_image_size(), (512, 512, 2))

From a732796d693f10536d389d7b2a4ad6d9647005c4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 5 Dec 2023 11:18:42 +0000
Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/test_brukertiffimagingextactor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_brukertiffimagingextactor.py b/tests/test_brukertiffimagingextactor.py
index 05b28aa1..a975a3c1 100644
--- a/tests/test_brukertiffimagingextactor.py
+++ b/tests/test_brukertiffimagingextactor.py
@@ -134,8 +134,8 @@ def setUpClass(cls):
 
     def test_stream_names(self):
         found_streams = BrukerTiffMultiPlaneImagingExtractor.get_streams(folder_path=self.folder_path)
-        expected_streams =  self.available_streams
-        self.assertEqual(found_streams, expected_streams)        
+        expected_streams = self.available_streams
+        self.assertEqual(found_streams, expected_streams)
 
     def test_brukertiffextractor_image_size(self):
         self.assertEqual(self.extractor.get_image_size(), (512, 512, 2))

From 02e03cfebe49258d8be1255d9795eb274ef4124f Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 5 Dec 2023 12:27:22 +0100
Subject: [PATCH 3/4] windows no context manager

---
 .../brukertiffimagingextractor.py             | 46 +++++++++----------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py b/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py
index 328869b8..91197214 100644
--- a/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py
+++ b/src/roiextractors/extractors/tiffimagingextractors/brukertiffimagingextractor.py
@@ -70,11 +70,10 @@ def _determine_imaging_is_volumetric(folder_path: PathType) -> bool:
     assert xml_file_path.is_file(), f"The XML configuration file is not found at '{xml_file_path}'."
 
     is_volumetric = False
-    with open(xml_file_path, "r") as xml_file:
-        for event, elem in ElementTree.iterparse(xml_file, events=("start",)):
-            if elem.tag == "PVStateValue" and elem.attrib.get("key") == "zDevice":
-                is_volumetric = bool(int(elem.attrib["value"]))
-                break  # Stop parsing as we've found the required element
+    for event, elem in ElementTree.iterparse(xml_file_path, events=("start",)):
+        if elem.tag == "PVStateValue" and elem.attrib.get("key") == "zDevice":
+            is_volumetric = bool(int(elem.attrib["value"]))
+            break  # Stop parsing as we've found the required element
 
     return is_volumetric
 
@@ -124,25 +123,24 @@ def get_streams(cls, folder_path: PathType) -> dict:
 
         # Parse the XML file iteratively to find the first Sequence element
         first_sequence_element = None
-        with open(xml_file_path, "r") as xml_file:
-            for _, elem in ElementTree.iterparse(xml_file, events=("end",)):
-                if elem.tag == "Sequence":
-                    first_sequence_element = elem
-                    break
-
-            if first_sequence_element is None:
-                raise ValueError("No Sequence element found in the XML configuration file. Can't get streams")
-
-            # Then in the first Sequence we find all the Frame elements
-            if first_sequence_element is not None:
-                # Iterate over all Frame elements within the first Sequence
-                frame_elements = first_sequence_element.findall(".//Frame")
-                for frame_elemenet in frame_elements:
-                    # Iterate over all File elements within each Frame
-                    for file_elem in frame_elemenet.findall("File"):
-                        channel_names.add(file_elem.attrib["channelName"])
-                        channel_ids.add(file_elem.attrib["channel"])
-                        file_names.append(file_elem.attrib["filename"])
+        for _, elem in ElementTree.iterparse(xml_file_path, events=("end",)):
+            if elem.tag == "Sequence":
+                first_sequence_element = elem
+                break
+
+        if first_sequence_element is None:
+            raise ValueError("No Sequence element found in the XML configuration file. Can't get streams")
+
+        # Then in the first Sequence we find all the Frame elements
+        if first_sequence_element is not None:
+            # Iterate over all Frame elements within the first Sequence
+            frame_elements = first_sequence_element.findall(".//Frame")
+            for frame_elemenet in frame_elements:
+                # Iterate over all File elements within each Frame
+                for file_elem in frame_elemenet.findall("File"):
+                    channel_names.add(file_elem.attrib["channelName"])
+                    channel_ids.add(file_elem.attrib["channel"])
+                    file_names.append(file_elem.attrib["filename"])
 
         unique_channel_names = natsort.natsorted(channel_names)
         unique_channel_ids = natsort.natsorted(channel_ids)

From 292b815bfcf7e2993000d0c9e0ab3e571c496351 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 5 Dec 2023 12:28:54 +0100
Subject: [PATCH 4/4] changelog add

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ed349ec2..c894813b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Upcoming
 
+### Improvements
+* Improved xml parsing with Bruker [PR #267](https://github.com/catalystneuro/roiextractors/pull/267)
+
 
 # v0.5.5