From 4e57cc151f327247dc06ef7d1925c396c3ac099e Mon Sep 17 00:00:00 2001
From: Zach McKenzie <92116279+zm711@users.noreply.github.com>
Date: Tue, 7 May 2024 04:12:40 -0400
Subject: [PATCH] IntanRawIO: Update support of `rhs` files (#1457)

* initial fixes

* oops

* add in dtypes and channel numbers for rhs

* add test files in tests more fixes

* Fix naming of test files

* Update neo/test/rawiotest/test_intanrawio.py

* Fix pointers to test files

* oops

* dtype fixes

* split rhs and rhd + fix timestamps

* put streams in correct order

* fix test file naming.

* typo fix

* make sure stream_ids are sorted

* fix one-file-per-signal
---
 neo/rawio/intanrawio.py               | 315 +++++++++++++++++++++-----
 neo/test/iotest/test_intanio.py       |   2 +
 neo/test/rawiotest/test_intanrawio.py |   2 +
 3 files changed, 266 insertions(+), 53 deletions(-)

diff --git a/neo/rawio/intanrawio.py b/neo/rawio/intanrawio.py
index 0094c4ba3..b4c7dc320 100644
--- a/neo/rawio/intanrawio.py
+++ b/neo/rawio/intanrawio.py
@@ -22,6 +22,7 @@
 import os
 from collections import OrderedDict
 from packaging.version import Version as V
+import warnings
 
 import numpy as np
 from neo.core import NeoReadWriteError
@@ -96,23 +97,38 @@ def _parse_header(self):
             raise FileNotFoundError(f"{filename} does not exist")
 
         if self.filename.endswith(".rhs"):
-            self.file_format = "header-attached"
-            self._global_info, self._ordered_channels, data_dtype, header_size, self._block_size = read_rhs(
-                self.filename
-            )
+            if filename.name == "info.rhs":
+                if any((filename.parent / file).exists() for file in one_file_per_signal_filenames_rhs):
+                    self.file_format = "one-file-per-signal"
+                    raw_file_paths_dict = create_one_file_per_signal_dict_rhs(dirname=filename.parent)
+                else:
+                    self.file_format = "one-file-per-channel"
+                    raw_file_paths_dict = create_one_file_per_channel_dict_rhs(dirname=filename.parent)
+            else:
+                self.file_format = "header-attached"
+
+            (
+                self._global_info,
+                self._ordered_channels,
+                data_dtype,
+                header_size,
+                self._block_size,
+                channel_number_dict,
+            ) = read_rhs(self.filename, self.file_format)
+
         # 3 possibilities for rhd files, one combines the header and the data in the same file with suffix `rhd` while
         # the other two separates the data from the header which is always called `info.rhd`
         # attached to the actual binary file with data
         elif self.filename.endswith(".rhd"):
             if filename.name == "info.rhd":
                 # first we have one-file-per-signal which is where one neo stream/file is saved as .dat files
-                if any((filename.parent / file).exists() for file in one_file_per_signal_filenames):
+                if any((filename.parent / file).exists() for file in one_file_per_signal_filenames_rhd):
                     self.file_format = "one-file-per-signal"
-                    raw_file_paths_dict = create_one_file_per_signal_dict(filename.parent)
+                    raw_file_paths_dict = create_one_file_per_signal_dict_rhd(dirname=filename.parent)
                 # then there is one-file-per-channel where each channel in a neo stream is in its own .dat file
                 else:
                     self.file_format = "one-file-per-channel"
-                    raw_file_paths_dict = create_one_file_per_channel_dict(filename.parent)
+                    raw_file_paths_dict = create_one_file_per_channel_dict_rhd(dirname=filename.parent)
             # finally the format with the header-attached to the binary file as one giant file
             else:
                 self.file_format = "header-attached"
@@ -159,7 +175,8 @@ def _parse_header(self):
         # check timestamp continuity
         if self.file_format == "header-attached":
             timestamp = self._raw_data["timestamp"].flatten()
-        # timestamps are always the last stream
+
+        # timestamps are always last stream for headerless binary files
         elif self.file_format == "one-file-per-signal":
             time_stream_index = max(self._raw_data.keys())
             timestamp = self._raw_data[time_stream_index]
@@ -195,9 +212,16 @@ def _parse_header(self):
 
         stream_ids = np.unique(signal_channels["stream_id"])
         signal_streams = np.zeros(stream_ids.size, dtype=_signal_stream_dtype)
-        signal_streams["id"] = stream_ids
-        for stream_index, stream_id in enumerate(stream_ids):
-            signal_streams["name"][stream_index] = stream_type_to_name.get(int(stream_id), "")
+
+        # we need to sort the data because the string of 10 is mis-sorted.
+        stream_ids_sorted = sorted([int(stream_id) for stream_id in stream_ids])
+        signal_streams["id"] = [str(stream_id) for stream_id in stream_ids_sorted]
+
+        for stream_index, stream_id in enumerate(stream_ids_sorted):
+            if self.filename.endswith(".rhd"):
+                signal_streams["name"][stream_index] = stream_type_to_name_rhd.get(int(stream_id), "")
+            else:
+                signal_streams["name"][stream_index] = stream_type_to_name_rhs.get(int(stream_id), "")
 
         self._max_sampling_rate = np.max(signal_channels["sampling_rate"])
 
@@ -436,14 +460,29 @@ def read_variable_header(f, header):
     ("electrode_impedance_phase", "float32"),
 ]
 
+stream_type_to_name_rhs = {
+    0: "RHS2000 amplifier channel",
+    3: "USB board ADC input channel",
+    4: "USB board ADC output channel",
+    5: "USB board digital input channel",
+    6: "USB board digital output channel",
+    10: "DC Amplifier channel",
+    11: "Stim channel",
+}
 
-def read_rhs(filename):
+
+def read_rhs(filename, file_format: str):
     BLOCK_SIZE = 128  # sample per block
 
     with open(filename, mode="rb") as f:
         global_info = read_variable_header(f, rhs_global_header)
 
+        # channels_by_type is simpler than data_dtype because 0 contains 0, 10 and 11 internally
         channels_by_type = {k: [] for k in [0, 3, 4, 5, 6]}
+        if not file_format == "header-attached":
+            # data_dtype for rhs is complicated. There is not 1, 2 (supply and aux),
+            # but there are dc-amp (10) and stim (11). we make timestamps (15)
+            data_dtype = {k: [] for k in [0, 3, 4, 5, 6, 10, 11, 15]}
         for g in range(global_info["nb_signal_group"]):
             group_info = read_variable_header(f, rhs_signal_group_header)
 
@@ -455,13 +494,20 @@ def read_rhs(filename):
                     if bool(chan_info["channel_enabled"]):
                         channels_by_type[chan_info["signal_type"]].append(chan_info)
 
+        # useful dictionary for knowing the number of channels for non-header attached formats
+        channel_number_dict = {i: len(channels_by_type[i]) for i in [0, 3, 4, 5, 6]}
+
         header_size = f.tell()
 
     sr = global_info["sampling_rate"]
 
     # construct dtype by re-ordering channels by types
     ordered_channels = []
-    data_dtype = [("timestamp", "int32", BLOCK_SIZE)]
+    if file_format == "header-attached":
+        data_dtype = [("timestamp", "int32", BLOCK_SIZE)]
+    else:
+        data_dtype[15] = "int32"
+        channel_number_dict[15] = 1
 
     # 0: RHS2000 amplifier channel.
     for chan_info in channels_by_type[0]:
@@ -469,12 +515,23 @@ def read_rhs(filename):
         chan_info["sampling_rate"] = sr
         chan_info["units"] = "uV"
         chan_info["gain"] = 0.195
-        chan_info["offset"] = -32768 * 0.195
-        chan_info["dtype"] = "uint16"
+        if file_format == "header-attached":
+            chan_info["offset"] = -32768 * 0.195
+        else:
+            chan_info["offset"] = 0.0
+        if file_format == "header-attached":
+            chan_info["dtype"] = "uint16"
+        else:
+            chan_info["dtype"] = "int16"
         ordered_channels.append(chan_info)
-        data_dtype += [(name, "uint16", BLOCK_SIZE)]
+        if file_format == "header-attached":
+            data_dtype += [(name, "uint16", BLOCK_SIZE)]
+        else:
+            data_dtype[0] = "int16"
 
     if bool(global_info["dc_amplifier_data_saved"]):
+        # if we have dc amp we need to grab the correct number of channels
+        channel_number_dict[10] = channel_number_dict[0]
         for chan_info in channels_by_type[0]:
             name = chan_info["native_channel_name"]
             chan_info_dc = dict(chan_info)
@@ -486,22 +543,36 @@ def read_rhs(filename):
             chan_info_dc["signal_type"] = 10  # put it in another group
             chan_info_dc["dtype"] = "uint16"
             ordered_channels.append(chan_info_dc)
-            data_dtype += [(name + "_DC", "uint16", BLOCK_SIZE)]
+            if file_format == "header-attached":
+                data_dtype += [(name + "_DC", "uint16", BLOCK_SIZE)]
+            else:
+                data_dtype[10] = "uint16"
+    # I can't seem to get stim files to generate for one-file-per-channel
+    # so let's skip for now and can be given on request
 
-    for chan_info in channels_by_type[0]:
-        name = chan_info["native_channel_name"]
-        chan_info_stim = dict(chan_info)
-        chan_info_stim["native_channel_name"] = name + "_STIM"
-        chan_info_stim["sampling_rate"] = sr
-        # stim channel are coplicated because they are coded
-        # with bits, they do not fit the gain/offset rawio strategy
-        chan_info_stim["units"] = ""
-        chan_info_stim["gain"] = 1.0
-        chan_info_stim["offset"] = 0.0
-        chan_info_stim["signal_type"] = 11  # put it in another group
-        chan_info_stim["dtype"] = "uint16"
-        ordered_channels.append(chan_info_stim)
-        data_dtype += [(name + "_STIM", "uint16", BLOCK_SIZE)]
+    if file_format != "one-file-per-channel":
+        channel_number_dict[11] = channel_number_dict[0]  # should be one stim / amplifier channel
+        for chan_info in channels_by_type[0]:
+            name = chan_info["native_channel_name"]
+            chan_info_stim = dict(chan_info)
+            chan_info_stim["native_channel_name"] = name + "_STIM"
+            chan_info_stim["sampling_rate"] = sr
+            # stim channel are complicated because they are coded
+            # with bits, they do not fit the gain/offset rawio strategy
+            chan_info_stim["units"] = ""
+            chan_info_stim["gain"] = 1.0
+            chan_info_stim["offset"] = 0.0
+            chan_info_stim["signal_type"] = 11  # put it in another group
+            chan_info_stim["dtype"] = "uint16"
+            ordered_channels.append(chan_info_stim)
+            if file_format == "header-attached":
+                data_dtype += [(name + "_STIM", "uint16", BLOCK_SIZE)]
+            else:
+                data_dtype[11] = "uint16"
+    else:
+        warnings.warn("Stim not implemented for `one-file-per-channel` due to lack of test files")
+
+    # No supply or aux for rhs files (ie no stream 1 and 2)
 
     # 3: Analog input channel.
     # 4: Analog output channel.
@@ -517,23 +588,46 @@ def read_rhs(filename):
             chan_info["offset"] = -32768 * 0.0003125
             chan_info["dtype"] = "uint16"
             ordered_channels.append(chan_info)
-            data_dtype += [(name, "uint16", BLOCK_SIZE)]
+            if file_format == "header-attached":
+                data_dtype += [(name, "uint16", BLOCK_SIZE)]
+            else:
+                data_dtype[sig_type] = "uint16"
 
     # 5: Digital input channel.
     # 6: Digital output channel.
     for sig_type in [5, 6]:
-        # at the moment theses channel are not in sig channel list
-        # but they are in the raw memamp
         if len(channels_by_type[sig_type]) > 0:
             name = {5: "DIGITAL-IN", 6: "DIGITAL-OUT"}[sig_type]
-            data_dtype += [(name, "uint16", BLOCK_SIZE)]
+            chan_info = channels_by_type[sig_type][0]
+            # So currently until we have get_digitalsignal_chunk we need to do a tiny hack to
+            # make this memory map work correctly. So since our digital data is not organized
+            # by channel like analog/ADC are we have to overwrite the native name to create
+            # a single permanent name that we can find with channel id
+            chan_info["native_channel_name"] = name  # overwite to allow memmap to work
+            chan_info["sampling_rate"] = sr
+            chan_info["units"] = "TTL"  # arbitrary units TTL for logic
+            chan_info["gain"] = 1.0
+            chan_info["offset"] = 0.0
+            chan_info["dtype"] = "uint16"
+            ordered_channels.append(chan_info)
+            if file_format == "header-attached":
+                data_dtype += [(name, "uint16", BLOCK_SIZE)]
+            else:
+                data_dtype[sig_type] = "uint16"
 
-    if bool(global_info["notch_filter_mode"]) and global_info["major_version"] >= 3:
-        global_info["notch_filter_applied"] = True
+    if global_info["notch_filter_mode"] == 2 and global_info["major_version"] >= V("3.0"):
+        global_info["notch_filter"] = "60Hz"
+    elif global_info["notch_filter_mode"] == 1 and global_info["major_version"] >= V("3.0"):
+        global_info["notch_filter"] = "50Hz"
     else:
-        global_info["notch_filter_applied"] = False
+        global_info["notch_filter"] = False
+
+    if not file_format == "header-attached":
+        # filter out dtypes without any values
+        data_dtype = {k: v for (k, v) in data_dtype.items() if len(v) > 0}
+        channel_number_dict = {k: v for (k, v) in channel_number_dict.items() if v > 0}
 
-    return global_info, ordered_channels, data_dtype, header_size, BLOCK_SIZE
+    return global_info, ordered_channels, data_dtype, header_size, BLOCK_SIZE, channel_number_dict
 
 
 ###############
@@ -603,7 +697,7 @@ def read_rhs(filename):
     ("electrode_impedance_phase", "float32"),
 ]
 
-stream_type_to_name = {
+stream_type_to_name_rhd = {
     0: "RHD2000 amplifier channel",
     1: "RHD2000 auxiliary input channel",
     2: "RHD2000 supply voltage channel",
@@ -811,11 +905,13 @@ def read_rhd(filename, file_format: str):
     return global_info, ordered_channels, data_dtype, header_size, BLOCK_SIZE, channel_number_dict
 
 
-###########################
-# RHD Zone for Binary Files
+##########################################################################
+# RHX Zone for Binary Files
+# This section provides all possible headerless binary files in both the rhs and rhd
+# formats.
 
-# For One File Per Signal
-one_file_per_signal_filenames = [
+# RHD Binary Files for One File Per Signal
+one_file_per_signal_filenames_rhd = [
     "amplifier.dat",
     "auxiliary.dat",
     "supply.dat",
@@ -825,35 +921,148 @@ def read_rhd(filename, file_format: str):
 ]
 
 
-def create_one_file_per_signal_dict(dirname):
-    """Function for One File Per Signal Type"""
+def create_one_file_per_signal_dict_rhd(dirname):
+    """Function for One File Per Signal Type
+
+    Parameters
+    ----------
+    dirname: pathlib.Path
+        The folder to explore
+
+    Returns
+    -------
+    raw_files_paths_dict: dict
+        A dict of all the file paths
+    """
+
     raw_file_paths_dict = {}
-    for raw_index, raw_file in enumerate(one_file_per_signal_filenames):
+    for raw_index, raw_file in enumerate(one_file_per_signal_filenames_rhd):
         if Path(dirname / raw_file).is_file():
             raw_file_paths_dict[raw_index] = Path(dirname / raw_file)
+
     raw_file_paths_dict[6] = Path(dirname / "time.dat")
 
     return raw_file_paths_dict
 
 
-# For One File Per Channel
-possible_raw_file_prefixes = [
+# RHS Binary Files for One File Per Signal
+one_file_per_signal_filenames_rhs = [
+    "amplifier.dat",
+    "auxiliary.dat",
+    "supply.dat",
+    "analogin.dat",
+    "analogout.dat",
+    "digitalin.dat",
+    "digitalout.dat",
+]
+
+
+def create_one_file_per_signal_dict_rhs(dirname):
+    """Function for One File Per Signal Type
+
+    Parameters
+    ----------
+    dirname: pathlib.Path
+        The folder to explore
+
+    Returns
+    -------
+    raw_files_paths_dict: dict
+        A dict of all the file paths
+    """
+
+    raw_file_paths_dict = {}
+    for raw_index, raw_file in enumerate(one_file_per_signal_filenames_rhs):
+        if Path(dirname / raw_file).is_file():
+            raw_file_paths_dict[raw_index] = Path(dirname / raw_file)
+
+    # we need time to be the last value
+    raw_file_paths_dict[15] = Path(dirname / "time.dat")
+    # 10 and 11 are hardcoded in the rhs_reader above so hardcoded here too
+    if Path(dirname / "dcamplifier.dat").is_file():
+        raw_file_paths_dict[10] = Path(dirname / "dcamplifier.dat")
+    if Path(dirname / "stim.dat").is_file():
+        raw_file_paths_dict[11] = Path(dirname / "stim.dat")
+
+    return raw_file_paths_dict
+
+
+# RHD Binary Files for One File Per Channel
+possible_raw_file_prefixes_rhd = [
     "amp",
     "aux",
     "vdd",
-    "board-ANALOG",
+    "board-ANALOG-IN",
     "board-DIGITAL-IN",
     "board-DIGITAL-OUT",
 ]
 
 
-def create_one_file_per_channel_dict(dirname):
-    """Utility function for One File Per Channel"""
+def create_one_file_per_channel_dict_rhd(dirname):
+    """Utility function for One File Per Channel
+
+    Parameters
+    ----------
+    dirname: pathlib.Path
+        The folder to explore
+
+    Returns
+    -------
+    raw_files_paths_dict: dict
+        A dict of all the file paths
+    """
+
     file_names = dirname.glob("**/*.dat")
     files = [file for file in file_names if file.is_file()]
     raw_file_paths_dict = {}
-    for raw_index, prefix in enumerate(possible_raw_file_prefixes):
+    for raw_index, prefix in enumerate(possible_raw_file_prefixes_rhd):
         raw_file_paths_dict[raw_index] = [file for file in files if prefix in file.name]
+
     raw_file_paths_dict[6] = [Path(dirname / "time.dat")]
 
     return raw_file_paths_dict
+
+
+# RHS Binary Files for One File Per Channel
+possible_raw_file_prefixes_rhs = [
+    "amp",
+    "aux",
+    "vdd",
+    "board-ANALOG-IN",
+    "board-ANALOG-OUT",
+    "board-DIGITAL-IN",
+    "board-DIGITAL-OUT",
+]
+
+
+def create_one_file_per_channel_dict_rhs(
+    dirname,
+):
+    """Utility function for One File Per Channel
+
+    Parameters
+    ----------
+    dirname: pathlib.Path
+        The folder to explore
+
+    Returns
+    -------
+    raw_files_paths_dict: dict
+        A dict of all the file paths
+    """
+
+    file_names = dirname.glob("**/*.dat")
+    files = [file for file in file_names if file.is_file()]
+    raw_file_paths_dict = {}
+    for raw_index, prefix in enumerate(possible_raw_file_prefixes_rhs):
+        raw_file_paths_dict[raw_index] = [file for file in files if prefix in file.name]
+    
+    # we need time to be the last value
+    raw_file_paths_dict[15] = [Path(dirname / "time.dat")]
+    # 10 and 11 are hardcoded in the rhs reader so hardcoded here
+    raw_file_paths_dict[10] = [file for file in files if "dc-" in file.name]
+    # we can find the files, but I can see how to read them out of header
+    # so for now we don't expose the stim files in one-file-per-channel
+    raw_file_paths_dict[11] = [file for file in files if "stim-" in file.name]
+
+    return raw_file_paths_dict
diff --git a/neo/test/iotest/test_intanio.py b/neo/test/iotest/test_intanio.py
index a91138384..bedd65bf8 100644
--- a/neo/test/iotest/test_intanio.py
+++ b/neo/test/iotest/test_intanio.py
@@ -19,6 +19,8 @@ class TestIntanIO(
         "intan/intan_rhd_test_1.rhd",
         "intan/intan_fpc_test_231117_052630/info.rhd",
         "intan/intan_fps_test_231117_052500/info.rhd",
+        "intan/intan_fpc_rhs_test_240329_091637/info.rhs",
+        "intan/intan_fps_rhs_test_240329_091536/info.rhs",
     ]
 
 
diff --git a/neo/test/rawiotest/test_intanrawio.py b/neo/test/rawiotest/test_intanrawio.py
index 794480787..3f5ee3c7c 100644
--- a/neo/test/rawiotest/test_intanrawio.py
+++ b/neo/test/rawiotest/test_intanrawio.py
@@ -16,6 +16,8 @@ class TestIntanRawIO(
         "intan/intan_rhd_test_1.rhd",
         "intan/intan_fpc_test_231117_052630/info.rhd",
         "intan/intan_fps_test_231117_052500/info.rhd",
+        "intan/intan_fpc_rhs_test_240329_091637/info.rhs",
+        "intan/intan_fps_rhs_test_240329_091536/info.rhs",
     ]