Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IntanRawIO: Update support of rhs files #1457

Merged
merged 17 commits into from
May 7, 2024
228 changes: 183 additions & 45 deletions neo/rawio/intanrawio.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import os
from collections import OrderedDict
from packaging.version import Version as V
import warnings

import numpy as np

Expand Down Expand Up @@ -95,10 +96,25 @@ def _parse_header(self):
raise FileNotFoundError(f"{filename} does not exist")

if self.filename.endswith(".rhs"):
self.file_format = "header-attached"
self._global_info, self._ordered_channels, data_dtype, header_size, self._block_size = read_rhs(
self.filename
)
if filename.name == "info.rhs":
if any((filename.parent / file).exists for file in one_file_per_signal_filenames):
zm711 marked this conversation as resolved.
Show resolved Hide resolved
self.file_format = "one-file-per-signal"
raw_file_paths_dict = create_one_file_per_signal_dict(dirname=filename.parent, rhs=True)
else:
self.file_format = "one-file-per-channel"
raw_file_paths_dict = create_one_file_per_channel_dict(dirname=filename.parent, rhs=True)
else:
self.file_format = "header-attached"

(
self._global_info,
self._ordered_channels,
data_dtype,
header_size,
self._block_size,
channel_number_dict,
) = read_rhs(self.filename, self.file_format)

# 3 possibilities for rhd files, one combines the header and the data in the same file with suffix `rhd` while
# the other two separates the data from the header which is always called `info.rhd`
# attached to the actual binary file with data
Expand All @@ -107,11 +123,11 @@ def _parse_header(self):
# first we have one-file-per-signal which is where one neo stream/file is saved as .dat files
if any((filename.parent / file).exists() for file in one_file_per_signal_filenames):
self.file_format = "one-file-per-signal"
raw_file_paths_dict = create_one_file_per_signal_dict(filename.parent)
raw_file_paths_dict = create_one_file_per_signal_dict(dirname=filename.parent)
zm711 marked this conversation as resolved.
Show resolved Hide resolved
# then there is one-file-per-channel where each channel in a neo stream is in its own .dat file
else:
self.file_format = "one-file-per-channel"
raw_file_paths_dict = create_one_file_per_channel_dict(filename.parent)
raw_file_paths_dict = create_one_file_per_channel_dict(dirname=filename.parent)
zm711 marked this conversation as resolved.
Show resolved Hide resolved
# finally the format with the header-attached to the binary file as one giant file
else:
self.file_format = "header-attached"
Expand Down Expand Up @@ -195,7 +211,10 @@ def _parse_header(self):
signal_streams = np.zeros(stream_ids.size, dtype=_signal_stream_dtype)
signal_streams["id"] = stream_ids
for stream_index, stream_id in enumerate(stream_ids):
zm711 marked this conversation as resolved.
Show resolved Hide resolved
signal_streams["name"][stream_index] = stream_type_to_name.get(int(stream_id), "")
if self.filename.endswith('.rhd'):
signal_streams["name"][stream_index] = stream_type_to_name_rhd.get(int(stream_id), "")
else:
signal_streams["name"][stream_index] = stream_type_to_name_rhs.get(int(stream_id), "")

self._max_sampling_rate = np.max(signal_channels["sampling_rate"])

Expand Down Expand Up @@ -434,14 +453,29 @@ def read_variable_header(f, header):
("electrode_impedance_phase", "float32"),
]

stream_type_to_name_rhs= {
zm711 marked this conversation as resolved.
Show resolved Hide resolved
0: "RHS2000 amplifier channel",
3: "USB board ADC input channel",
4: "USB board ADC output channel",
5: "USB board digital input channel",
6: "USB board digital output channel",
10: "DC Amplifier channel",
11: "Stim channel"
}

def read_rhs(filename):

def read_rhs(filename, file_format: str):
BLOCK_SIZE = 128 # sample per block

with open(filename, mode="rb") as f:
global_info = read_variable_header(f, rhs_global_header)

# channels_by_type is simpler than data_dtype because 0 contains 0, 10 and 11 internally
channels_by_type = {k: [] for k in [0, 3, 4, 5, 6]}
if not file_format == "header-attached":
# data_dtype for rhs is complicated. There is not 1, 2 (supply and aux),
# but there are dc-amp (10) and stim (11). we make timestamps (7)
data_dtype = {k: [] for k in [0, 3, 4, 5, 6, 7, 10, 11]}
for g in range(global_info["nb_signal_group"]):
group_info = read_variable_header(f, rhs_signal_group_header)

Expand All @@ -452,26 +486,44 @@ def read_rhs(filename):
if bool(chan_info["channel_enabled"]):
channels_by_type[chan_info["signal_type"]].append(chan_info)

# useful dictionary for knowing the number of channels for non-header attached formats
channel_number_dict = {i: len(channels_by_type[i]) for i in [0, 3, 4, 5, 6]}

header_size = f.tell()

sr = global_info["sampling_rate"]

# construct dtype by re-ordering channels by types
ordered_channels = []
data_dtype = [("timestamp", "int32", BLOCK_SIZE)]
if file_format == "header-attached":
data_dtype = [("timestamp", "int32", BLOCK_SIZE)]
else:
data_dtype[7] = "int32"
channel_number_dict[7] = 1

# 0: RHS2000 amplifier channel.
for chan_info in channels_by_type[0]:
name = chan_info["native_channel_name"]
chan_info["sampling_rate"] = sr
chan_info["units"] = "uV"
chan_info["gain"] = 0.195
chan_info["offset"] = -32768 * 0.195
chan_info["dtype"] = "uint16"
if file_format == "header-attached":
chan_info["offset"] = -32768 * 0.195
else:
chan_info["offset"] = 0.0
if file_format == "header-attached":
chan_info["dtype"] = "uint16"
else:
chan_info["dtype"] = "int16"
ordered_channels.append(chan_info)
data_dtype += [(name, "uint16", BLOCK_SIZE)]
if file_format == "header-attached":
data_dtype += [(name, "uint16", BLOCK_SIZE)]
else:
data_dtype[0] = "int16"

if bool(global_info["dc_amplifier_data_saved"]):
# if we have dc amp we need to grab the correct number of channels
channel_number_dict[10] = channel_number_dict[0]
for chan_info in channels_by_type[0]:
name = chan_info["native_channel_name"]
chan_info_dc = dict(chan_info)
Expand All @@ -483,22 +535,36 @@ def read_rhs(filename):
chan_info_dc["signal_type"] = 10 # put it in another group
chan_info_dc["dtype"] = "uint16"
ordered_channels.append(chan_info_dc)
data_dtype += [(name + "_DC", "uint16", BLOCK_SIZE)]
if file_format == "header-attached":
data_dtype += [(name + "_DC", "uint16", BLOCK_SIZE)]
else:
data_dtype[10] = "unit16"
zm711 marked this conversation as resolved.
Show resolved Hide resolved
# I can't seem to get stim files to generate for one-file-per-channel
# so let's skip for now and can be given on request

for chan_info in channels_by_type[0]:
name = chan_info["native_channel_name"]
chan_info_stim = dict(chan_info)
chan_info_stim["native_channel_name"] = name + "_STIM"
chan_info_stim["sampling_rate"] = sr
# stim channel are coplicated because they are coded
# with bits, they do not fit the gain/offset rawio strategy
chan_info_stim["units"] = ""
chan_info_stim["gain"] = 1.0
chan_info_stim["offset"] = 0.0
chan_info_stim["signal_type"] = 11 # put it in another group
chan_info_stim["dtype"] = "uint16"
ordered_channels.append(chan_info_stim)
data_dtype += [(name + "_STIM", "uint16", BLOCK_SIZE)]
if file_format != "one-file-per-channel":
channel_number_dict[11] = channel_number_dict[0] # should be one stim / amplifier channel
for chan_info in channels_by_type[0]:
zm711 marked this conversation as resolved.
Show resolved Hide resolved
name = chan_info["native_channel_name"]
chan_info_stim = dict(chan_info)
chan_info_stim["native_channel_name"] = name + "_STIM"
chan_info_stim["sampling_rate"] = sr
# stim channel are complicated because they are coded
# with bits, they do not fit the gain/offset rawio strategy
chan_info_stim["units"] = ""
chan_info_stim["gain"] = 1.0
chan_info_stim["offset"] = 0.0
chan_info_stim["signal_type"] = 11 # put it in another group
chan_info_stim["dtype"] = "uint16"
ordered_channels.append(chan_info_stim)
if file_format == "header-attached":
data_dtype += [(name + "_STIM", "uint16", BLOCK_SIZE)]
else:
data_dtype[11] == "unit16"
zm711 marked this conversation as resolved.
Show resolved Hide resolved
else:
warnings.warn("Stim not implemented for `one-file-per-channel` due to lack of test files")

# No supply or aux for rhs files (ie no stream 1 and 2)

# 3: Analog input channel.
# 4: Analog output channel.
Expand All @@ -514,23 +580,46 @@ def read_rhs(filename):
chan_info["offset"] = -32768 * 0.0003125
chan_info["dtype"] = "uint16"
ordered_channels.append(chan_info)
data_dtype += [(name, "uint16", BLOCK_SIZE)]
if file_format == "header-attached":
data_dtype += [(name, "uint16", BLOCK_SIZE)]
else:
data_dtype[sig_type] = "unit16"
zm711 marked this conversation as resolved.
Show resolved Hide resolved

# 5: Digital input channel.
# 6: Digital output channel.
for sig_type in [5, 6]:
# at the moment theses channel are not in sig channel list
# but they are in the raw memamp
if len(channels_by_type[sig_type]) > 0:
name = {5: "DIGITAL-IN", 6: "DIGITAL-OUT"}[sig_type]
data_dtype += [(name, "uint16", BLOCK_SIZE)]
chan_info = channels_by_type[sig_type][0]
# So currently until we have get_digitalsignal_chunk we need to do a tiny hack to
# make this memory map work correctly. So since our digital data is not organized
# by channel like analog/ADC are we have to overwrite the native name to create
# a single permanent name that we can find with channel id
chan_info["native_channel_name"] = name # overwite to allow memmap to work
chan_info["sampling_rate"] = sr
chan_info["units"] = "TTL" # arbitrary units TTL for logic
chan_info["gain"] = 1.0
chan_info["offset"] = 0.0
chan_info["dtype"] = "uint16"
ordered_channels.append(chan_info)
if file_format == "header-attached":
data_dtype += [(name, "uint16", BLOCK_SIZE)]
else:
data_dtype[sig_type] = "uint16"

if bool(global_info["notch_filter_mode"]) and global_info["major_version"] >= 3:
global_info["notch_filter_applied"] = True
if global_info["notch_filter_mode"] == 2 and global_info["major_version"] >= V("3.0"):
global_info["notch_filter"] = "60Hz"
elif global_info["notch_filter_mode"] == 1 and global_info["major_version"] >= V("3.0"):
global_info["notch_filter"] = "50Hz"
else:
global_info["notch_filter_applied"] = False
global_info["notch_filter"] = False

return global_info, ordered_channels, data_dtype, header_size, BLOCK_SIZE
if not file_format == "header-attached":
# filter out dtypes without any values
data_dtype = {k: v for (k, v) in data_dtype.items() if len(v) > 0}
channel_number_dict = {k: v for (k, v) in channel_number_dict.items() if v > 0}

return global_info, ordered_channels, data_dtype, header_size, BLOCK_SIZE, channel_number_dict


###############
Expand Down Expand Up @@ -600,7 +689,7 @@ def read_rhs(filename):
("electrode_impedance_phase", "float32"),
]

stream_type_to_name = {
stream_type_to_name_rhd = {
0: "RHD2000 amplifier channel",
1: "RHD2000 auxiliary input channel",
2: "RHD2000 supply voltage channel",
Expand Down Expand Up @@ -808,8 +897,11 @@ def read_rhd(filename, file_format: str):
return global_info, ordered_channels, data_dtype, header_size, BLOCK_SIZE, channel_number_dict


###########################
# RHD Zone for Binary Files
##########################################################################
# RHX Zone for Binary Files
zm711 marked this conversation as resolved.
Show resolved Hide resolved
# This zone gives the headerless binary files for both rhs and rhd header files
# This occurs with the new rhx version of the intan recording software as an
# optional software that can be turned

# For One File Per Signal
one_file_per_signal_filenames = [
Expand All @@ -822,13 +914,37 @@ def read_rhd(filename, file_format: str):
]


def create_one_file_per_signal_dict(dirname):
"""Function for One File Per Signal Type"""
def create_one_file_per_signal_dict(dirname, rhs: bool = False):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personal preference so your milleagea might vary, I rather have two functions here instead of mixing the two of them with keywords. I prefer code duplication rather than cyclomatic complexity and given that these are almost dictionaries / configurations I don't think that duplication is a cognitive burden.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that might be fair for this. Mainly because we update the rhd a bit more often than the rhs. So splitting so that we can update the one without breaking the other is a good idea. I would say let's keep this in draft and I will work on that.

"""Function for One File Per Signal Type

Parameters
----------
dirname: pathlib.Path
The folder to explore
rhs: bool, default: False
Whether this is an rhd or an rhs file
"""

# if rhs we have an extra stream to add

if rhs:
one_file_per_signal_filenames.insert(4, "analogout.dat")

raw_file_paths_dict = {}
for raw_index, raw_file in enumerate(one_file_per_signal_filenames):
if Path(dirname / raw_file).is_file():
raw_file_paths_dict[raw_index] = Path(dirname / raw_file)
raw_file_paths_dict[6] = Path(dirname / "time.dat")
if rhs:
raw_file_paths_dict[7] = Path(dirname / "time.dat")
else:
raw_file_paths_dict[6] = Path(dirname / "time.dat")

if rhs:
# 10 and 11 are hardcoded in the rhs_reader above so hardcoded here too
if Path(dirname / "dcamplifier.dat").is_file():
raw_file_paths_dict[10] = Path(dirname / "dcamplifier.dat")
if Path(dirname / "stim.dat").is_file():
raw_file_paths_dict[11] = Path(dirname / "stim.dat")

return raw_file_paths_dict

Expand All @@ -838,19 +954,41 @@ def create_one_file_per_signal_dict(dirname):
"amp",
"aux",
"vdd",
"board-ANALOG",
"board-ANALOG-IN",
zm711 marked this conversation as resolved.
Show resolved Hide resolved
"board-DIGITAL-IN",
"board-DIGITAL-OUT",
]


def create_one_file_per_channel_dict(dirname):
"""Utility function for One File Per Channel"""
def create_one_file_per_channel_dict(dirname, rhs: bool = False):
"""Utility function for One File Per Channel

Parameters
----------
dirname: pathlib.Path
The folder to explore
rhs: bool, default: False
Whether this is an rhd or an rhs file
"""
# if rhs we have an extra stream to add
if rhs:
possible_raw_file_prefixes.insert(4, "board-ANALOG-OUT")

zm711 marked this conversation as resolved.
Show resolved Hide resolved
file_names = dirname.glob("**/*.dat")
files = [file for file in file_names if file.is_file()]
raw_file_paths_dict = {}
for raw_index, prefix in enumerate(possible_raw_file_prefixes):
raw_file_paths_dict[raw_index] = [file for file in files if prefix in file.name]
raw_file_paths_dict[6] = [Path(dirname / "time.dat")]
if rhs:
raw_file_paths_dict[7] = [Path(dirname / "time.dat")]
else:
raw_file_paths_dict[6] = [Path(dirname / "time.dat")]

if rhs:
# 10 and 11 are hardcoded in the rhs reader so hardcoded here
raw_file_paths_dict[10] = [file for file in files if "dc-" in file.name]
# we can find the files, but I can see how to read them out of header
# so for now we don't expose the stim files in one-file-per-channel
raw_file_paths_dict[11] = [file for file in files if "stim-" in file.name]

return raw_file_paths_dict
2 changes: 2 additions & 0 deletions neo/test/iotest/test_intanio.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class TestIntanIO(
"intan/intan_rhd_test_1.rhd",
"intan/intan_fpc_test_231117_052630/info.rhd",
"intan/intan_fps_test_231117_052500/info.rhd",
"intan/intan_fpc_rhs_test_240329_091637/info.rhs",
"intan/intan_fps_rhs_test_240329_091636/info.rhs",
zm711 marked this conversation as resolved.
Show resolved Hide resolved
]


Expand Down
2 changes: 2 additions & 0 deletions neo/test/rawiotest/test_intanrawio.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ class TestIntanRawIO(
"intan/intan_rhd_test_1.rhd",
"intan/intan_fpc_test_231117_052630/info.rhd",
"intan/intan_fps_test_231117_052500/info.rhd",
"intan/intan_fpc_rhs_test_240329_091637/info.rhs",
"intan/intan_fps_rhs_test_240329_091636/info.rhs",
zm711 marked this conversation as resolved.
Show resolved Hide resolved
]


Expand Down
Loading