From 2d62f2a67ef86c10ed6e761f59ac03c750fd34ab Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 26 Nov 2024 14:07:22 -0600 Subject: [PATCH 1/5] add failing test --- neo/test/rawiotest/test_intanrawio.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/neo/test/rawiotest/test_intanrawio.py b/neo/test/rawiotest/test_intanrawio.py index df3723288..db70db6ba 100644 --- a/neo/test/rawiotest/test_intanrawio.py +++ b/neo/test/rawiotest/test_intanrawio.py @@ -1,5 +1,6 @@ import unittest import numpy as np +from pathlib import Path from neo.rawio.intanrawio import IntanRawIO from neo.test.rawiotest.common_rawio_test import BaseTestRawIO @@ -66,6 +67,23 @@ def test_annotations(self): ) np.testing.assert_array_equal(signal_array_annotations["board_stream_num"][:10], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + + def test_correct_reading_one_file_per_channel(self): + # Test reading of one-file-per-channel format file. The channels should match the raw files + file_path = Path(self.get_local_path("intan/intan_fpc_test_231117_052630/info.rhd")) + intan_reader = IntanRawIO(filename=file_path) + intan_reader.parse_header() + + # This should be the folder where the files of all the channels are stored + folder_path = file_path.parent + amplifier_file_paths = [p for p in folder_path.iterdir() if "amp" in p.name] + + channel_names = [p.name[4:-4] for p in amplifier_file_paths] + + for channel_name, amplifier_file_path in zip(channel_names, amplifier_file_paths): + data_raw = np.fromfile(amplifier_file_path, dtype=np.int16).squeeze() + data_from_neo = intan_reader.get_analogsignal_chunk(channel_ids=[channel_name], stream_index=0).squeeze() + np.testing.assert_allclose(data_raw, data_from_neo) if __name__ == "__main__": unittest.main() From b1d7a8de00e21a6adc06eaecf0b25f7fb828d094 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 28 Nov 2024 13:15:27 -0600 Subject: [PATCH 2/5] fix --- neo/rawio/intanrawio.py | 327 ++++++++++++++++++++-------------------- 1 file changed, 161 insertions(+), 166 deletions(-) diff --git a/neo/rawio/intanrawio.py b/neo/rawio/intanrawio.py index 77879e69b..0cc18761d 100644 --- a/neo/rawio/intanrawio.py +++ b/neo/rawio/intanrawio.py @@ -130,7 +130,7 @@ def _parse_header(self): # see comment below for RHD which explains the division between file types if self.filename.suffix == ".rhs": if self.filename.name == "info.rhs": - if any((self.filename.parent / file).exists() for file in one_file_per_signal_filenames_rhs): + if any((self.filename.parent / file).exists() for file in stream_name_to_file_name_rhs.values()): self.file_format = "one-file-per-signal" raw_file_paths_dict = create_one_file_per_signal_dict_rhs(dirname=self.filename.parent) else: @@ -154,7 +154,7 @@ def _parse_header(self): elif self.filename.suffix == ".rhd": if self.filename.name == "info.rhd": # first we have one-file-per-signal which is where one neo stream/file is saved as .dat files - if any((self.filename.parent / file).exists() for file in one_file_per_signal_filenames_rhd): + if any((self.filename.parent / file).exists() for file in stream_name_to_file_name_rhd.values()): self.file_format = "one-file-per-signal" raw_file_paths_dict = create_one_file_per_signal_dict_rhd(dirname=self.filename.parent) # then there is one-file-per-channel where each channel in a neo stream is in its own .dat file @@ -173,8 +173,10 @@ def _parse_header(self): self._block_size, channel_number_dict, ) = read_rhd(self.filename, self.file_format) - + + ############################################# # memmap the raw data for each format type + ############################################# # if header-attached there is one giant memory-map if self.file_format == "header-attached": self._raw_data = np.memmap(self.filename, dtype=memmap_data_dtype, mode="r", offset=header_size) @@ -183,9 +185,9 @@ def _parse_header(self): # based on https://github.com/NeuralEnsemble/python-neo/issues/1556 bug in versions 0.13.1, .2, .3 elif self.file_format == "one-file-per-signal": self._raw_data = {} - for stream_index, (stream_index_key, stream_datatype) in enumerate(memmap_data_dtype.items()): - num_channels = channel_number_dict[stream_index_key] - file_path = raw_file_paths_dict[stream_index_key] + for stream_index, (stream_name, stream_datatype) in enumerate(memmap_data_dtype.items()): + num_channels = channel_number_dict[stream_name] + file_path = raw_file_paths_dict[stream_name] size_in_bytes = file_path.stat().st_size dtype_size = np.dtype(stream_datatype).itemsize n_samples = size_in_bytes // (dtype_size * num_channels) @@ -196,18 +198,16 @@ def _parse_header(self): shape=(n_samples, num_channels), order="C", ) - self._raw_data[stream_index] = signal_stream_memmap + self._raw_data[stream_name] = signal_stream_memmap # for one-file-per-channel we have one memory map / channel stored as a list / neo stream elif self.file_format == "one-file-per-channel": self._raw_data = {} - for stream_index, (stream_index_key, stream_datatype) in enumerate(memmap_data_dtype.items()): - self._raw_data[stream_index] = [] - num_channels = channel_number_dict[stream_index_key] - for channel_index in range(num_channels): - file_path = raw_file_paths_dict[stream_index_key][channel_index] - channel_memmap = np.memmap(file_path, dtype=stream_datatype, mode="r") - self._raw_data[stream_index].append(channel_memmap) + for stream_index, (stream_name, stream_datatype) in enumerate(memmap_data_dtype.items()): + channel_file_paths = raw_file_paths_dict[stream_name] + channel_memmap_list = [np.memmap(fp, dtype=stream_datatype, mode="r") for fp in channel_file_paths] + self._raw_data[stream_name] = channel_memmap_list + # Data Integrity checks # strictness of check is controlled by ignore_integrity_checks @@ -243,15 +243,14 @@ def _parse_header(self): stream_ids = np.unique(signal_channels["stream_id"]) signal_streams = np.zeros(stream_ids.size, dtype=_signal_stream_dtype) - # we need to sort the data because the string of stream_index 10 is mis-sorted. buffer_ids = [] - stream_ids_sorted = sorted([int(stream_id) for stream_id in stream_ids]) - signal_streams["id"] = [str(stream_id) for stream_id in stream_ids_sorted] - for stream_index, stream_id in enumerate(stream_ids_sorted): + # we need to sort the data because the string of stream_index 10 is mis-sorted. + signal_streams["id"] = sorted(stream_ids, key=lambda x: int(x)) + for stream_index, stream_id in enumerate(signal_streams["id"]): if self.filename.suffix == ".rhd": - name = stream_id_to_stream_name_rhd.get(int(stream_id), "") + name = stream_id_to_stream_name_rhd.get(stream_id, "") else: - name = stream_id_to_stream_name_rhs.get(int(stream_id), "") + name = stream_id_to_stream_name_rhs.get(stream_id, "") signal_streams["name"][stream_index] = name # zach I need you help here if self.file_format == "header-attached": @@ -371,13 +370,14 @@ def _segment_t_stop(self, block_index, seg_index): def _get_signal_size(self, block_index, seg_index, stream_index): + stream_name = self.header["signal_streams"][stream_index]["name"][:] + if self.file_format == "header-attached": stream_id = self.header["signal_streams"][stream_index]["id"] mask = self.header["signal_channels"]["stream_id"] == stream_id signal_channels = self.header["signal_channels"][mask] channel_ids = signal_channels["id"] - stream_name = self.header["signal_streams"][stream_index]["name"][:] stream_is_digital = stream_name in digital_stream_names field_name = stream_name if stream_is_digital else channel_ids[0] @@ -385,11 +385,10 @@ def _get_signal_size(self, block_index, seg_index, stream_index): # one-file-per-signal is (n_samples, n_channels) elif self.file_format == "one-file-per-signal": - size = self._raw_data[stream_index].shape[0] + size = self._raw_data[stream_name].shape[0] # one-file-per-channel is (n_samples) so pull from list - else: - size = self._raw_data[stream_index][0].size + size = self._raw_data[stream_name][0].size return size @@ -474,7 +473,8 @@ def _get_analogsignal_chunk_header_attached(self, i_start, i_stop, stream_index, def _get_analogsignal_chunk_one_file_per_channel(self, i_start, i_stop, stream_index, channel_indexes): - signal_data_memmap_list = self._raw_data[stream_index] + stream_name = self.header["signal_streams"][stream_index]["name"][:] + signal_data_memmap_list = self._raw_data[stream_name] channel_indexes_are_slice = isinstance(channel_indexes, slice) if channel_indexes_are_slice: num_channels = len(signal_data_memmap_list) @@ -497,7 +497,7 @@ def _get_analogsignal_chunk_one_file_per_channel(self, i_start, i_stop, stream_i def _get_analogsignal_chunk_one_file_per_signal(self, i_start, i_stop, stream_index, channel_indexes): stream_name = self.header["signal_streams"][stream_index]["name"][:] - raw_data = self._raw_data[stream_index] + raw_data = self._raw_data[stream_name] stream_is_digital = stream_name in digital_stream_names if stream_is_digital: @@ -555,11 +555,9 @@ def _assert_timestamp_continuity(self): # timestamps are always last stream for headerless binary files elif self.file_format == "one-file-per-signal": - time_stream_index = max(self._raw_data.keys()) - timestamp = self._raw_data[time_stream_index] + timestamp = self._raw_data["timestamp"] elif self.file_format == "one-file-per-channel": - time_stream_index = max(self._raw_data.keys()) - timestamp = self._raw_data[time_stream_index][0] + timestamp = self._raw_data["timestamp"][0] discontinuous_timestamps = np.diff(timestamp) != 1 timestamps_are_not_contiguous = np.any(discontinuous_timestamps) @@ -704,13 +702,13 @@ def read_variable_header(f, header: list): ] stream_id_to_stream_name_rhs = { - 0: "RHS2000 amplifier channel", - 3: "USB board ADC input channel", - 4: "USB board ADC output channel", - 5: "USB board digital input channel", - 6: "USB board digital output channel", - 10: "DC Amplifier channel", - 11: "Stim channel", + "0": "RHS2000 amplifier channel", + "3": "USB board ADC input channel", + "4": "USB board ADC output channel", + "5": "USB board digital input channel", + "6": "USB board digital output channel", + "10": "DC Amplifier channel", + "11": "Stim channel", } @@ -725,11 +723,10 @@ def read_rhs(filename, file_format: str): # contains information for stream_id 10 and stream_id 11 so we need to # break these up. See notes throughout code base; for timestamps we always # force them to be the last stream_id. - stream_id_to_channel_info_list = {k: [] for k in [0, 3, 4, 5, 6]} + stream_names = stream_id_to_stream_name_rhs.values() + stream_name_to_channel_info_list = {name: [] for name in stream_names} if not file_format == "header-attached": - # memmap_data_dtype for rhs is complicated. There is not 1, 2 (supply and aux), - # but there are dc-amp (10) and stim (11). we make timestamps (15) - memmap_data_dtype = {k: [] for k in [0, 3, 4, 5, 6, 10, 11, 15]} + memmap_data_dtype = {name: [] for name in stream_names} for g in range(global_info["nb_signal_group"]): group_info = read_variable_header(f, rhs_signal_group_header) @@ -743,10 +740,19 @@ def read_rhs(filename, file_format: str): ) raise NeoReadWriteError(error_msg) if bool(chan_info["channel_enabled"]): - stream_id_to_channel_info_list[chan_info["signal_type"]].append(chan_info) - - # useful dictionary for knowing the number of channels for non-header attached formats - channel_number_dict = {i: len(stream_id_to_channel_info_list[i]) for i in [0, 3, 4, 5, 6]} + stream_id = str(chan_info["signal_type"]) + stream_name = stream_id_to_stream_name_rhs[stream_id] + stream_name_to_channel_info_list[stream_name].append(chan_info) + + # Build a dictionary with channel count + special_cases_for_counting = [ + "DC Amplifier channel", # Same as RHS2000 amplifier channel + "Stim channel", # Same as RHS2000 amplifier channel + ] + names_to_count = [name for name in stream_names if name not in special_cases_for_counting] + channel_number_dict = {name: len(stream_name_to_channel_info_list[name]) for name in names_to_count} + channel_number_dict["DC Amplifier channel"] = channel_number_dict["RHS2000 amplifier channel"] + channel_number_dict["Stim channel"] = channel_number_dict["RHS2000 amplifier channel"] header_size = f.tell() @@ -757,11 +763,10 @@ def read_rhs(filename, file_format: str): if file_format == "header-attached": memmap_data_dtype = [("timestamp", "int32", BLOCK_SIZE)] else: - memmap_data_dtype[15] = "int32" - channel_number_dict[15] = 1 + memmap_data_dtype["timestamp"] = "int32" + channel_number_dict["timestamp"] = 1 - # 0: RHS2000 amplifier channel. - for chan_info in stream_id_to_channel_info_list[0]: + for chan_info in stream_name_to_channel_info_list["RHS2000 amplifier channel"]: chan_info["sampling_rate"] = sr chan_info["units"] = "uV" chan_info["gain"] = 0.195 @@ -778,13 +783,12 @@ def read_rhs(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[0] = "int16" + memmap_data_dtype["RHS2000 amplifier channel"] = "int16" if bool(global_info["dc_amplifier_data_saved"]): # if we have dc amp we need to grab the correct number of channels # the channel number is the same as the count for amplifier data - channel_number_dict[10] = channel_number_dict[0] - for chan_info in stream_id_to_channel_info_list[0]: + for chan_info in stream_name_to_channel_info_list["RHS2000 amplifier channel"]: chan_info_dc = dict(chan_info) name = chan_info["native_channel_name"] chan_info_dc["native_channel_name"] = name + "_DC" @@ -798,14 +802,12 @@ def read_rhs(filename, file_format: str): if file_format == "header-attached": memmap_data_dtype += [(name + "_DC", "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[10] = "uint16" + memmap_data_dtype["DC Amplifier channel"] = "uint16" # I can't seem to get stim files to generate for one-file-per-channel # so let's skip for now and can be given on request - if file_format != "one-file-per-channel": - channel_number_dict[11] = channel_number_dict[0] # should be one stim / amplifier channel - for chan_info in stream_id_to_channel_info_list[0]: + for chan_info in stream_name_to_channel_info_list["RHS2000 amplifier channel"]: chan_info_stim = dict(chan_info) name = chan_info["native_channel_name"] chan_info_stim["native_channel_name"] = name + "_STIM" @@ -821,7 +823,7 @@ def read_rhs(filename, file_format: str): if file_format == "header-attached": memmap_data_dtype += [(name + "_STIM", "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[11] = "uint16" + memmap_data_dtype["Stim channel"] = "uint16" else: warnings.warn("Stim not implemented for `one-file-per-channel` due to lack of test files") @@ -829,10 +831,8 @@ def read_rhs(filename, file_format: str): # We have an error above that requests test files to help if the spec is changed # in the future. - # 3: Analog input channel. - # 4: Analog output channel. - for stream_id in [3, 4]: - for chan_info in stream_id_to_channel_info_list[stream_id]: + for stream_name in ["USB board ADC input channel", "USB board ADC output channel"]: + for chan_info in stream_name_to_channel_info_list[stream_name]: chan_info["sampling_rate"] = sr chan_info["units"] = "V" chan_info["gain"] = 0.0003125 @@ -843,12 +843,10 @@ def read_rhs(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" - # 5: Digital input channel. - # 6: Digital output channel. - for stream_id in [5, 6]: - for chan_info in stream_id_to_channel_info_list[stream_id]: + for stream_name in ["USB board digital input channel", "USB board digital output channel"]: + for chan_info in stream_name_to_channel_info_list[stream_name]: chan_info["sampling_rate"] = sr # arbitrary units are used to indicate that Intan does not # store raw voltages but only the boolean TTL state @@ -859,14 +857,13 @@ def read_rhs(filename, file_format: str): ordered_channel_info.append(chan_info) # Note that all the channels are packed in one buffer, so the data type only needs to be added once - if len(stream_id_to_channel_info_list[stream_id]) > 0: + if len(stream_name_to_channel_info_list[stream_name]) > 0: if file_format == "header-attached": - name = stream_id_to_stream_name_rhs[stream_id] - memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] + memmap_data_dtype += [(stream_name, "uint16", BLOCK_SIZE)] elif file_format == "one-file-per-channel": - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" elif file_format == "one-file-per-signal": - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" # per discussion with Intan developers before version 3 of their software the 'notch_filter_mode' # was a request for postprocessing to be done in one of their scripts. From version 3+ the notch @@ -955,12 +952,12 @@ def read_rhs(filename, file_format: str): ] stream_id_to_stream_name_rhd = { - 0: "RHD2000 amplifier channel", - 1: "RHD2000 auxiliary input channel", - 2: "RHD2000 supply voltage channel", - 3: "USB board ADC input channel", - 4: "USB board digital input channel", - 5: "USB board digital output channel", + "0": "RHD2000 amplifier channel", + "1": "RHD2000 auxiliary input channel", + "2": "RHD2000 supply voltage channel", + "3": "USB board ADC input channel", + "4": "USB board digital input channel", + "5": "USB board digital output channel", } @@ -1004,9 +1001,11 @@ def read_rhd(filename, file_format: str): global_info.update(read_variable_header(f, header)) # read channel group and channel header - stream_id_to_channel_info_list = {k: [] for k in [0, 1, 2, 3, 4, 5]} + stream_names = stream_id_to_stream_name_rhd.values() + stream_name_to_channel_info_list = {name: [] for name in stream_names} if not file_format == "header-attached": - memmap_data_dtype = {k: [] for k in range(7)} # 5 streams + 6 for timestamps for not header attached + memmap_data_dtype = {name: [] for name in stream_names} + memmap_data_dtype["timestamp"] = [] for g in range(global_info["nb_signal_group"]): group_info = read_variable_header(f, rhd_signal_group_header) @@ -1014,9 +1013,11 @@ def read_rhd(filename, file_format: str): for c in range(group_info["channel_num"]): chan_info = read_variable_header(f, rhd_signal_channel_header) if bool(chan_info["channel_enabled"]): - stream_id_to_channel_info_list[chan_info["signal_type"]].append(chan_info) + stream_id = str(chan_info["signal_type"]) + stream_name = stream_id_to_stream_name_rhd[stream_id] + stream_name_to_channel_info_list[stream_name].append(chan_info) - channel_number_dict = {i: len(stream_id_to_channel_info_list[i]) for i in range(6)} + channel_number_dict = {name: len(stream_name_to_channel_info_list[name]) for name in stream_names} header_size = f.tell() @@ -1034,17 +1035,16 @@ def read_rhd(filename, file_format: str): if file_format == "header-attached": memmap_data_dtype = [("timestamp", "int32", BLOCK_SIZE)] else: - memmap_data_dtype[6] = "int32" - channel_number_dict[6] = 1 + memmap_data_dtype["timestamp"] = "int32" + channel_number_dict["timestamp"] = 1 else: if file_format == "header-attached": memmap_data_dtype = [("timestamp", "uint32", BLOCK_SIZE)] else: - memmap_data_dtype[6] = "uint32" - channel_number_dict[6] = 1 + memmap_data_dtype["timestamp"] = "uint32" + channel_number_dict["timestamp"] = 1 - # 0: RHD2000 amplifier channel - for chan_info in stream_id_to_channel_info_list[0]: + for chan_info in stream_name_to_channel_info_list["RHD2000 amplifier channel"]: chan_info["sampling_rate"] = sr chan_info["units"] = "uV" chan_info["gain"] = 0.195 @@ -1060,10 +1060,9 @@ def read_rhd(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[0] = "int16" + memmap_data_dtype["RHD2000 amplifier channel"] = "int16" - # 1: RHD2000 auxiliary input channel - for chan_info in stream_id_to_channel_info_list[1]: + for chan_info in stream_name_to_channel_info_list["RHD2000 auxiliary input channel"]: chan_info["sampling_rate"] = sr / 4.0 chan_info["units"] = "V" chan_info["gain"] = 0.0000374 @@ -1074,10 +1073,9 @@ def read_rhd(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE // 4)] else: - memmap_data_dtype[1] = "uint16" + memmap_data_dtype["RHD2000 auxiliary input channel"] = "uint16" - # 2: RHD2000 supply voltage channel - for chan_info in stream_id_to_channel_info_list[2]: + for chan_info in stream_name_to_channel_info_list["RHD2000 supply voltage channel"]: chan_info["sampling_rate"] = sr / BLOCK_SIZE chan_info["units"] = "V" chan_info["gain"] = 0.0000748 @@ -1088,7 +1086,7 @@ def read_rhd(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16")] else: - memmap_data_dtype[2] = "uint16" + memmap_data_dtype["RHD2000 supply voltage channel"] = "uint16" # temperature is not an official channel in the header for i in range(global_info["num_temp_sensor_channels"]): @@ -1103,7 +1101,7 @@ def read_rhd(filename, file_format: str): memmap_data_dtype += [(name, "int16")] # 3: USB board ADC input channel - for chan_info in stream_id_to_channel_info_list[3]: + for chan_info in stream_name_to_channel_info_list["USB board ADC input channel"]: chan_info["sampling_rate"] = sr chan_info["units"] = "V" if global_info["eval_board_mode"] == 0: @@ -1121,12 +1119,10 @@ def read_rhd(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[3] = "uint16" + memmap_data_dtype["USB board ADC input channel"] = "uint16" - # 4: USB board digital input channel - # 5: USB board digital output channel - for stream_id in [4, 5]: - for chan_info in stream_id_to_channel_info_list[stream_id]: + for stream_name in ["USB board digital input channel", "USB board digital output channel"]: + for chan_info in stream_name_to_channel_info_list[stream_name]: chan_info["sampling_rate"] = sr # arbitrary units are used to indicate that Intan does not # store raw voltages but only the boolean TTL state @@ -1137,14 +1133,13 @@ def read_rhd(filename, file_format: str): ordered_channel_info.append(chan_info) # Note that all the channels are packed in one buffer, so the data type only needs to be added once - if len(stream_id_to_channel_info_list[stream_id]) > 0: + if len(stream_name_to_channel_info_list[stream_name]) > 0: if file_format == "header-attached": - name = stream_id_to_stream_name_rhd[stream_id] - memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] + memmap_data_dtype += [(stream_name, "uint16", BLOCK_SIZE)] elif file_format == "one-file-per-channel": - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" elif file_format == "one-file-per-signal": - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" # per discussion with Intan developers before version 3 of their software the 'notch_filter_mode' # was a request for postprocessing to be done in one of their scripts. From version 3+ the notch @@ -1173,15 +1168,14 @@ def read_rhd(filename, file_format: str): digital_stream_names = ["USB board digital input channel", "USB board digital output channel"] -# RHD Binary Files for One File Per Signal -one_file_per_signal_filenames_rhd = [ - "amplifier.dat", - "auxiliary.dat", - "supply.dat", - "analogin.dat", - "digitalin.dat", - "digitalout.dat", -] +stream_name_to_file_name_rhd = { + "RHD2000 amplifier channel": "amplifier.dat", + "RHD2000 auxiliary input channel": "auxiliary.dat", + "RHD2000 supply voltage channel": "supply.dat", + "USB board ADC input channel": "analogin.dat", + "USB board digital input channel": "digitalin.dat", + "USB board digital output channel": "digitalout.dat", +} def create_one_file_per_signal_dict_rhd(dirname): @@ -1199,26 +1193,23 @@ def create_one_file_per_signal_dict_rhd(dirname): """ raw_file_paths_dict = {} - for raw_index, raw_file in enumerate(one_file_per_signal_filenames_rhd): - if Path(dirname / raw_file).is_file(): - raw_file_paths_dict[raw_index] = Path(dirname / raw_file) + for stream_name, file_name in stream_name_to_file_name_rhd.items(): + if Path(dirname / file_name).is_file(): + raw_file_paths_dict[stream_name] = Path(dirname / file_name) - raw_file_paths_dict[6] = Path(dirname / "time.dat") + raw_file_paths_dict["timestamp"] = Path(dirname / "time.dat") return raw_file_paths_dict - -# RHS Binary Files for One File Per Signal -one_file_per_signal_filenames_rhs = [ - "amplifier.dat", - "auxiliary.dat", - "supply.dat", - "analogin.dat", - "analogout.dat", - "digitalin.dat", - "digitalout.dat", -] - +stream_name_to_file_name_rhs = { + "RHS2000 amplifier channel": "amplifier.dat", + "RHS2000 auxiliary input channel": "auxiliary.dat", + "RHS2000 supply voltage channel": "supply.dat", + "USB board ADC input channel": "analogin.dat", + "USB board ADC output channel": "analogout.dat", + "USB board digital input channel": "digitalin.dat", + "USB board digital output channel": "digitalout.dat", +} def create_one_file_per_signal_dict_rhs(dirname): """Function for One File Per Signal Type @@ -1235,30 +1226,28 @@ def create_one_file_per_signal_dict_rhs(dirname): """ raw_file_paths_dict = {} - for raw_index, raw_file in enumerate(one_file_per_signal_filenames_rhs): - if Path(dirname / raw_file).is_file(): - raw_file_paths_dict[raw_index] = Path(dirname / raw_file) + for stream_name, file_name in stream_name_to_file_name_rhs.items(): + if Path(dirname / file_name).is_file(): + raw_file_paths_dict[stream_name] = Path(dirname / file_name) - # we need time to be the last value - raw_file_paths_dict[15] = Path(dirname / "time.dat") - # 10 and 11 are hardcoded in the rhs_reader above so hardcoded here too + raw_file_paths_dict["timestamp"] = Path(dirname / "time.dat") if Path(dirname / "dcamplifier.dat").is_file(): - raw_file_paths_dict[10] = Path(dirname / "dcamplifier.dat") + raw_file_paths_dict["DC Amplifier channel"] = Path(dirname / "dcamplifier.dat") if Path(dirname / "stim.dat").is_file(): - raw_file_paths_dict[11] = Path(dirname / "stim.dat") + raw_file_paths_dict["Stim channel"] = Path(dirname / "stim.dat") return raw_file_paths_dict # RHD Binary Files for One File Per Channel -possible_raw_file_prefixes_rhd = [ - "amp", - "aux", - "vdd", - "board-ANALOG-IN", - "board-DIGITAL-IN", - "board-DIGITAL-OUT", -] +stream_name_to_file_prefix_rhd = { + "RHD2000 amplifier channel": "amp", + "RHD2000 auxiliary input channel": "aux", + "RHD2000 supply voltage channel": "vdd", + "USB board ADC input channel": "board-ANALOG-IN", + "USB board digital input channel": "board-DIGITAL-IN", + "USB board digital output channel": "board-DIGITAL-OUT", +} def create_one_file_per_channel_dict_rhd(dirname): @@ -1278,25 +1267,27 @@ def create_one_file_per_channel_dict_rhd(dirname): file_names = dirname.glob("**/*.dat") files = [file for file in file_names if file.is_file()] raw_file_paths_dict = {} - for raw_index, prefix in enumerate(possible_raw_file_prefixes_rhd): - raw_file_paths_dict[raw_index] = [file for file in files if prefix in file.name] - - raw_file_paths_dict[6] = [Path(dirname / "time.dat")] - + for stream_name, file_prefix in stream_name_to_file_prefix_rhd.items(): + stream_files = [file for file in files if file_prefix in file.name] + # Map file name to channel name amp-A-000.dat -> A-000 amp-B-006.dat -> B-006 etc + file_path_to_channel_name = lambda x: '-'.join(x.stem.split('-')[1:]) + sorted_stream_files = sorted(stream_files, key=file_path_to_channel_name) + raw_file_paths_dict[stream_name] = sorted_stream_files + + raw_file_paths_dict["timestamp"] = [Path(dirname / "time.dat")] return raw_file_paths_dict # RHS Binary Files for One File Per Channel -possible_raw_file_prefixes_rhs = [ - "amp", - "aux", - "vdd", - "board-ANALOG-IN", - "board-ANALOG-OUT", - "board-DIGITAL-IN", - "board-DIGITAL-OUT", -] - +stream_name_to_file_prefix_rhs = { + "RHS2000 amplifier channel": "amp", + "RHS2000 auxiliary input channel": "aux", + "RHS2000 supply voltage channel": "vdd", + "USB board ADC input channel": "board-ANALOG-IN", + "USB board ADC output channel": "board-ANALOG-OUT", + "USB board digital input channel": "board-DIGITAL-IN", + "USB board digital output channel": "board-DIGITAL-OUT", +} def create_one_file_per_channel_dict_rhs( dirname, @@ -1317,15 +1308,19 @@ def create_one_file_per_channel_dict_rhs( file_names = dirname.glob("**/*.dat") files = [file for file in file_names if file.is_file()] raw_file_paths_dict = {} - for raw_index, prefix in enumerate(possible_raw_file_prefixes_rhs): - raw_file_paths_dict[raw_index] = [file for file in files if prefix in file.name] + for stream_name, file_prefix in stream_name_to_file_prefix_rhs.items(): + stream_files = [file for file in files if file_prefix in file.name] + # Map file name to channel name amp-A-000.dat -> A-000 amp-B-006.dat -> B-006 etc + file_path_to_channel_name = lambda x: '-'.join(x.stem.split('-')[1:]) + sorted_stream_files = sorted(stream_files, key=file_path_to_channel_name) + raw_file_paths_dict[stream_name] = sorted_stream_files # we need time to be the last value - raw_file_paths_dict[15] = [Path(dirname / "time.dat")] + raw_file_paths_dict["timestamp"] = [Path(dirname / "time.dat")] # 10 and 11 are hardcoded in the rhs reader so hardcoded here - raw_file_paths_dict[10] = [file for file in files if "dc-" in file.name] + raw_file_paths_dict["DC Amplifier channel"] = [file for file in files if "dc-" in file.name] # we can find the files, but I can see how to read them out of header # so for now we don't expose the stim files in one-file-per-channel - raw_file_paths_dict[11] = [file for file in files if "stim-" in file.name] + raw_file_paths_dict["Stim channel"] = [file for file in files if "stim-" in file.name] return raw_file_paths_dict From 28fdcecfc66aecb249f5c51f69841b7c8681a3ea Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 28 Nov 2024 13:33:01 -0600 Subject: [PATCH 3/5] eliminate unecessary loop --- neo/rawio/intanrawio.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/neo/rawio/intanrawio.py b/neo/rawio/intanrawio.py index 0cc18761d..d93013125 100644 --- a/neo/rawio/intanrawio.py +++ b/neo/rawio/intanrawio.py @@ -185,15 +185,15 @@ def _parse_header(self): # based on https://github.com/NeuralEnsemble/python-neo/issues/1556 bug in versions 0.13.1, .2, .3 elif self.file_format == "one-file-per-signal": self._raw_data = {} - for stream_index, (stream_name, stream_datatype) in enumerate(memmap_data_dtype.items()): + for stream_name, stream_dtype in memmap_data_dtype.items(): num_channels = channel_number_dict[stream_name] file_path = raw_file_paths_dict[stream_name] size_in_bytes = file_path.stat().st_size - dtype_size = np.dtype(stream_datatype).itemsize + dtype_size = np.dtype(stream_dtype).itemsize n_samples = size_in_bytes // (dtype_size * num_channels) signal_stream_memmap = np.memmap( file_path, - dtype=stream_datatype, + dtype=stream_dtype, mode="r", shape=(n_samples, num_channels), order="C", @@ -203,9 +203,9 @@ def _parse_header(self): # for one-file-per-channel we have one memory map / channel stored as a list / neo stream elif self.file_format == "one-file-per-channel": self._raw_data = {} - for stream_index, (stream_name, stream_datatype) in enumerate(memmap_data_dtype.items()): + for stream_name, stream_dtype in memmap_data_dtype.items(): channel_file_paths = raw_file_paths_dict[stream_name] - channel_memmap_list = [np.memmap(fp, dtype=stream_datatype, mode="r") for fp in channel_file_paths] + channel_memmap_list = [np.memmap(fp, dtype=stream_dtype, mode="r") for fp in channel_file_paths] self._raw_data[stream_name] = channel_memmap_list @@ -216,6 +216,7 @@ def _parse_header(self): # signals signal_channels = [] + # This is used to calculate bit masks self.native_channel_order = {} for chan_info in self._ordered_channel_info: name = chan_info["custom_channel_name"] From 31b92c47e73ba886495d9ce05eed8b3f72420c09 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Wed, 4 Dec 2024 11:19:34 -0600 Subject: [PATCH 4/5] zach feedback --- neo/rawio/intanrawio.py | 10 ++++++---- neo/test/rawiotest/test_intanrawio.py | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/neo/rawio/intanrawio.py b/neo/rawio/intanrawio.py index d93013125..a7b0e56bc 100644 --- a/neo/rawio/intanrawio.py +++ b/neo/rawio/intanrawio.py @@ -747,11 +747,13 @@ def read_rhs(filename, file_format: str): # Build a dictionary with channel count special_cases_for_counting = [ - "DC Amplifier channel", # Same as RHS2000 amplifier channel - "Stim channel", # Same as RHS2000 amplifier channel + "DC Amplifier channel", + "Stim channel", ] names_to_count = [name for name in stream_names if name not in special_cases_for_counting] channel_number_dict = {name: len(stream_name_to_channel_info_list[name]) for name in names_to_count} + + # Both DC Amplifier and Stim streams have the same number of channels as the amplifier stream channel_number_dict["DC Amplifier channel"] = channel_number_dict["RHS2000 amplifier channel"] channel_number_dict["Stim channel"] = channel_number_dict["RHS2000 amplifier channel"] @@ -1264,6 +1266,8 @@ def create_one_file_per_channel_dict_rhd(dirname): raw_files_paths_dict: dict A dict of all the file paths """ + # Developer note, at the moment, the channels require to be n in order + # See https://github.com/NeuralEnsemble/python-neo/issues/1599 fo file_names = dirname.glob("**/*.dat") files = [file for file in file_names if file.is_file()] @@ -1316,9 +1320,7 @@ def create_one_file_per_channel_dict_rhs( sorted_stream_files = sorted(stream_files, key=file_path_to_channel_name) raw_file_paths_dict[stream_name] = sorted_stream_files - # we need time to be the last value raw_file_paths_dict["timestamp"] = [Path(dirname / "time.dat")] - # 10 and 11 are hardcoded in the rhs reader so hardcoded here raw_file_paths_dict["DC Amplifier channel"] = [file for file in files if "dc-" in file.name] # we can find the files, but I can see how to read them out of header # so for now we don't expose the stim files in one-file-per-channel diff --git a/neo/test/rawiotest/test_intanrawio.py b/neo/test/rawiotest/test_intanrawio.py index db70db6ba..3dfe57e4f 100644 --- a/neo/test/rawiotest/test_intanrawio.py +++ b/neo/test/rawiotest/test_intanrawio.py @@ -69,6 +69,7 @@ def test_annotations(self): def test_correct_reading_one_file_per_channel(self): + "Issue: https://github.com/NeuralEnsemble/python-neo/issues/1599" # Test reading of one-file-per-channel format file. The channels should match the raw files file_path = Path(self.get_local_path("intan/intan_fpc_test_231117_052630/info.rhd")) intan_reader = IntanRawIO(filename=file_path) From c158e555b2038a867bf7eea68fe7d8e117325d39 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 13 Dec 2024 09:57:54 -0600 Subject: [PATCH 5/5] improve tests --- neo/test/rawiotest/test_intanrawio.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/neo/test/rawiotest/test_intanrawio.py b/neo/test/rawiotest/test_intanrawio.py index 3dfe57e4f..83b5f7678 100644 --- a/neo/test/rawiotest/test_intanrawio.py +++ b/neo/test/rawiotest/test_intanrawio.py @@ -77,9 +77,10 @@ def test_correct_reading_one_file_per_channel(self): # This should be the folder where the files of all the channels are stored folder_path = file_path.parent - amplifier_file_paths = [p for p in folder_path.iterdir() if "amp" in p.name] - channel_names = [p.name[4:-4] for p in amplifier_file_paths] + # The paths are named as amp-A-000.dat, amp-A-001.dat, amp-A-002.dat, ... + amplifier_file_paths = [path for path in folder_path.iterdir() if "amp" in path.name] + channel_names = [path.name[4:-4] for path in amplifier_file_paths] for channel_name, amplifier_file_path in zip(channel_names, amplifier_file_paths): data_raw = np.fromfile(amplifier_file_path, dtype=np.int16).squeeze()