diff --git a/neo/rawio/intanrawio.py b/neo/rawio/intanrawio.py index 77879e69b..d1e987964 100644 --- a/neo/rawio/intanrawio.py +++ b/neo/rawio/intanrawio.py @@ -130,7 +130,7 @@ def _parse_header(self): # see comment below for RHD which explains the division between file types if self.filename.suffix == ".rhs": if self.filename.name == "info.rhs": - if any((self.filename.parent / file).exists() for file in one_file_per_signal_filenames_rhs): + if any((self.filename.parent / file).exists() for file in stream_name_to_file_name_rhs.values()): self.file_format = "one-file-per-signal" raw_file_paths_dict = create_one_file_per_signal_dict_rhs(dirname=self.filename.parent) else: @@ -154,7 +154,7 @@ def _parse_header(self): elif self.filename.suffix == ".rhd": if self.filename.name == "info.rhd": # first we have one-file-per-signal which is where one neo stream/file is saved as .dat files - if any((self.filename.parent / file).exists() for file in one_file_per_signal_filenames_rhd): + if any((self.filename.parent / file).exists() for file in stream_name_to_file_name_rhd.values()): self.file_format = "one-file-per-signal" raw_file_paths_dict = create_one_file_per_signal_dict_rhd(dirname=self.filename.parent) # then there is one-file-per-channel where each channel in a neo stream is in its own .dat file @@ -174,7 +174,9 @@ def _parse_header(self): channel_number_dict, ) = read_rhd(self.filename, self.file_format) + ############################################# # memmap the raw data for each format type + ############################################# # if header-attached there is one giant memory-map if self.file_format == "header-attached": self._raw_data = np.memmap(self.filename, dtype=memmap_data_dtype, mode="r", offset=header_size) @@ -183,31 +185,28 @@ def _parse_header(self): # based on https://github.com/NeuralEnsemble/python-neo/issues/1556 bug in versions 0.13.1, .2, .3 elif self.file_format == "one-file-per-signal": self._raw_data = {} - for stream_index, (stream_index_key, stream_datatype) in enumerate(memmap_data_dtype.items()): - num_channels = channel_number_dict[stream_index_key] - file_path = raw_file_paths_dict[stream_index_key] + for stream_name, stream_dtype in memmap_data_dtype.items(): + num_channels = channel_number_dict[stream_name] + file_path = raw_file_paths_dict[stream_name] size_in_bytes = file_path.stat().st_size - dtype_size = np.dtype(stream_datatype).itemsize + dtype_size = np.dtype(stream_dtype).itemsize n_samples = size_in_bytes // (dtype_size * num_channels) signal_stream_memmap = np.memmap( file_path, - dtype=stream_datatype, + dtype=stream_dtype, mode="r", shape=(n_samples, num_channels), order="C", ) - self._raw_data[stream_index] = signal_stream_memmap + self._raw_data[stream_name] = signal_stream_memmap # for one-file-per-channel we have one memory map / channel stored as a list / neo stream elif self.file_format == "one-file-per-channel": self._raw_data = {} - for stream_index, (stream_index_key, stream_datatype) in enumerate(memmap_data_dtype.items()): - self._raw_data[stream_index] = [] - num_channels = channel_number_dict[stream_index_key] - for channel_index in range(num_channels): - file_path = raw_file_paths_dict[stream_index_key][channel_index] - channel_memmap = np.memmap(file_path, dtype=stream_datatype, mode="r") - self._raw_data[stream_index].append(channel_memmap) + for stream_name, stream_dtype in memmap_data_dtype.items(): + channel_file_paths = raw_file_paths_dict[stream_name] + channel_memmap_list = [np.memmap(fp, dtype=stream_dtype, mode="r") for fp in channel_file_paths] + self._raw_data[stream_name] = channel_memmap_list # Data Integrity checks # strictness of check is controlled by ignore_integrity_checks @@ -216,6 +215,7 @@ def _parse_header(self): # signals signal_channels = [] + # This is used to calculate bit masks self.native_channel_order = {} for chan_info in self._ordered_channel_info: name = chan_info["custom_channel_name"] @@ -243,15 +243,14 @@ def _parse_header(self): stream_ids = np.unique(signal_channels["stream_id"]) signal_streams = np.zeros(stream_ids.size, dtype=_signal_stream_dtype) - # we need to sort the data because the string of stream_index 10 is mis-sorted. buffer_ids = [] - stream_ids_sorted = sorted([int(stream_id) for stream_id in stream_ids]) - signal_streams["id"] = [str(stream_id) for stream_id in stream_ids_sorted] - for stream_index, stream_id in enumerate(stream_ids_sorted): + # we need to sort the data because the string of stream_index 10 is mis-sorted. + signal_streams["id"] = sorted(stream_ids, key=lambda x: int(x)) + for stream_index, stream_id in enumerate(signal_streams["id"]): if self.filename.suffix == ".rhd": - name = stream_id_to_stream_name_rhd.get(int(stream_id), "") + name = stream_id_to_stream_name_rhd.get(stream_id, "") else: - name = stream_id_to_stream_name_rhs.get(int(stream_id), "") + name = stream_id_to_stream_name_rhs.get(stream_id, "") signal_streams["name"][stream_index] = name # zach I need you help here if self.file_format == "header-attached": @@ -371,13 +370,14 @@ def _segment_t_stop(self, block_index, seg_index): def _get_signal_size(self, block_index, seg_index, stream_index): + stream_name = self.header["signal_streams"][stream_index]["name"][:] + if self.file_format == "header-attached": stream_id = self.header["signal_streams"][stream_index]["id"] mask = self.header["signal_channels"]["stream_id"] == stream_id signal_channels = self.header["signal_channels"][mask] channel_ids = signal_channels["id"] - stream_name = self.header["signal_streams"][stream_index]["name"][:] stream_is_digital = stream_name in digital_stream_names field_name = stream_name if stream_is_digital else channel_ids[0] @@ -385,11 +385,10 @@ def _get_signal_size(self, block_index, seg_index, stream_index): # one-file-per-signal is (n_samples, n_channels) elif self.file_format == "one-file-per-signal": - size = self._raw_data[stream_index].shape[0] + size = self._raw_data[stream_name].shape[0] # one-file-per-channel is (n_samples) so pull from list - else: - size = self._raw_data[stream_index][0].size + size = self._raw_data[stream_name][0].size return size @@ -474,7 +473,8 @@ def _get_analogsignal_chunk_header_attached(self, i_start, i_stop, stream_index, def _get_analogsignal_chunk_one_file_per_channel(self, i_start, i_stop, stream_index, channel_indexes): - signal_data_memmap_list = self._raw_data[stream_index] + stream_name = self.header["signal_streams"][stream_index]["name"][:] + signal_data_memmap_list = self._raw_data[stream_name] channel_indexes_are_slice = isinstance(channel_indexes, slice) if channel_indexes_are_slice: num_channels = len(signal_data_memmap_list) @@ -497,7 +497,7 @@ def _get_analogsignal_chunk_one_file_per_channel(self, i_start, i_stop, stream_i def _get_analogsignal_chunk_one_file_per_signal(self, i_start, i_stop, stream_index, channel_indexes): stream_name = self.header["signal_streams"][stream_index]["name"][:] - raw_data = self._raw_data[stream_index] + raw_data = self._raw_data[stream_name] stream_is_digital = stream_name in digital_stream_names if stream_is_digital: @@ -555,11 +555,9 @@ def _assert_timestamp_continuity(self): # timestamps are always last stream for headerless binary files elif self.file_format == "one-file-per-signal": - time_stream_index = max(self._raw_data.keys()) - timestamp = self._raw_data[time_stream_index] + timestamp = self._raw_data["timestamp"] elif self.file_format == "one-file-per-channel": - time_stream_index = max(self._raw_data.keys()) - timestamp = self._raw_data[time_stream_index][0] + timestamp = self._raw_data["timestamp"][0] discontinuous_timestamps = np.diff(timestamp) != 1 timestamps_are_not_contiguous = np.any(discontinuous_timestamps) @@ -704,13 +702,13 @@ def read_variable_header(f, header: list): ] stream_id_to_stream_name_rhs = { - 0: "RHS2000 amplifier channel", - 3: "USB board ADC input channel", - 4: "USB board ADC output channel", - 5: "USB board digital input channel", - 6: "USB board digital output channel", - 10: "DC Amplifier channel", - 11: "Stim channel", + "0": "RHS2000 amplifier channel", + "3": "USB board ADC input channel", + "4": "USB board ADC output channel", + "5": "USB board digital input channel", + "6": "USB board digital output channel", + "10": "DC Amplifier channel", + "11": "Stim channel", } @@ -725,11 +723,10 @@ def read_rhs(filename, file_format: str): # contains information for stream_id 10 and stream_id 11 so we need to # break these up. See notes throughout code base; for timestamps we always # force them to be the last stream_id. - stream_id_to_channel_info_list = {k: [] for k in [0, 3, 4, 5, 6]} + stream_names = stream_id_to_stream_name_rhs.values() + stream_name_to_channel_info_list = {name: [] for name in stream_names} if not file_format == "header-attached": - # memmap_data_dtype for rhs is complicated. There is not 1, 2 (supply and aux), - # but there are dc-amp (10) and stim (11). we make timestamps (15) - memmap_data_dtype = {k: [] for k in [0, 3, 4, 5, 6, 10, 11, 15]} + memmap_data_dtype = {name: [] for name in stream_names} for g in range(global_info["nb_signal_group"]): group_info = read_variable_header(f, rhs_signal_group_header) @@ -743,10 +740,21 @@ def read_rhs(filename, file_format: str): ) raise NeoReadWriteError(error_msg) if bool(chan_info["channel_enabled"]): - stream_id_to_channel_info_list[chan_info["signal_type"]].append(chan_info) - - # useful dictionary for knowing the number of channels for non-header attached formats - channel_number_dict = {i: len(stream_id_to_channel_info_list[i]) for i in [0, 3, 4, 5, 6]} + stream_id = str(chan_info["signal_type"]) + stream_name = stream_id_to_stream_name_rhs[stream_id] + stream_name_to_channel_info_list[stream_name].append(chan_info) + + # Build a dictionary with channel count + special_cases_for_counting = [ + "DC Amplifier channel", + "Stim channel", + ] + names_to_count = [name for name in stream_names if name not in special_cases_for_counting] + channel_number_dict = {name: len(stream_name_to_channel_info_list[name]) for name in names_to_count} + + # Both DC Amplifier and Stim streams have the same number of channels as the amplifier stream + channel_number_dict["DC Amplifier channel"] = channel_number_dict["RHS2000 amplifier channel"] + channel_number_dict["Stim channel"] = channel_number_dict["RHS2000 amplifier channel"] header_size = f.tell() @@ -757,11 +765,10 @@ def read_rhs(filename, file_format: str): if file_format == "header-attached": memmap_data_dtype = [("timestamp", "int32", BLOCK_SIZE)] else: - memmap_data_dtype[15] = "int32" - channel_number_dict[15] = 1 + memmap_data_dtype["timestamp"] = "int32" + channel_number_dict["timestamp"] = 1 - # 0: RHS2000 amplifier channel. - for chan_info in stream_id_to_channel_info_list[0]: + for chan_info in stream_name_to_channel_info_list["RHS2000 amplifier channel"]: chan_info["sampling_rate"] = sr chan_info["units"] = "uV" chan_info["gain"] = 0.195 @@ -778,13 +785,12 @@ def read_rhs(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[0] = "int16" + memmap_data_dtype["RHS2000 amplifier channel"] = "int16" if bool(global_info["dc_amplifier_data_saved"]): # if we have dc amp we need to grab the correct number of channels # the channel number is the same as the count for amplifier data - channel_number_dict[10] = channel_number_dict[0] - for chan_info in stream_id_to_channel_info_list[0]: + for chan_info in stream_name_to_channel_info_list["RHS2000 amplifier channel"]: chan_info_dc = dict(chan_info) name = chan_info["native_channel_name"] chan_info_dc["native_channel_name"] = name + "_DC" @@ -798,14 +804,12 @@ def read_rhs(filename, file_format: str): if file_format == "header-attached": memmap_data_dtype += [(name + "_DC", "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[10] = "uint16" + memmap_data_dtype["DC Amplifier channel"] = "uint16" # I can't seem to get stim files to generate for one-file-per-channel # so let's skip for now and can be given on request - if file_format != "one-file-per-channel": - channel_number_dict[11] = channel_number_dict[0] # should be one stim / amplifier channel - for chan_info in stream_id_to_channel_info_list[0]: + for chan_info in stream_name_to_channel_info_list["RHS2000 amplifier channel"]: chan_info_stim = dict(chan_info) name = chan_info["native_channel_name"] chan_info_stim["native_channel_name"] = name + "_STIM" @@ -821,7 +825,7 @@ def read_rhs(filename, file_format: str): if file_format == "header-attached": memmap_data_dtype += [(name + "_STIM", "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[11] = "uint16" + memmap_data_dtype["Stim channel"] = "uint16" else: warnings.warn("Stim not implemented for `one-file-per-channel` due to lack of test files") @@ -829,10 +833,8 @@ def read_rhs(filename, file_format: str): # We have an error above that requests test files to help if the spec is changed # in the future. - # 3: Analog input channel. - # 4: Analog output channel. - for stream_id in [3, 4]: - for chan_info in stream_id_to_channel_info_list[stream_id]: + for stream_name in ["USB board ADC input channel", "USB board ADC output channel"]: + for chan_info in stream_name_to_channel_info_list[stream_name]: chan_info["sampling_rate"] = sr chan_info["units"] = "V" chan_info["gain"] = 0.0003125 @@ -843,12 +845,10 @@ def read_rhs(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" - # 5: Digital input channel. - # 6: Digital output channel. - for stream_id in [5, 6]: - for chan_info in stream_id_to_channel_info_list[stream_id]: + for stream_name in ["USB board digital input channel", "USB board digital output channel"]: + for chan_info in stream_name_to_channel_info_list[stream_name]: chan_info["sampling_rate"] = sr # arbitrary units are used to indicate that Intan does not # store raw voltages but only the boolean TTL state @@ -859,14 +859,13 @@ def read_rhs(filename, file_format: str): ordered_channel_info.append(chan_info) # Note that all the channels are packed in one buffer, so the data type only needs to be added once - if len(stream_id_to_channel_info_list[stream_id]) > 0: + if len(stream_name_to_channel_info_list[stream_name]) > 0: if file_format == "header-attached": - name = stream_id_to_stream_name_rhs[stream_id] - memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] + memmap_data_dtype += [(stream_name, "uint16", BLOCK_SIZE)] elif file_format == "one-file-per-channel": - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" elif file_format == "one-file-per-signal": - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" # per discussion with Intan developers before version 3 of their software the 'notch_filter_mode' # was a request for postprocessing to be done in one of their scripts. From version 3+ the notch @@ -955,12 +954,12 @@ def read_rhs(filename, file_format: str): ] stream_id_to_stream_name_rhd = { - 0: "RHD2000 amplifier channel", - 1: "RHD2000 auxiliary input channel", - 2: "RHD2000 supply voltage channel", - 3: "USB board ADC input channel", - 4: "USB board digital input channel", - 5: "USB board digital output channel", + "0": "RHD2000 amplifier channel", + "1": "RHD2000 auxiliary input channel", + "2": "RHD2000 supply voltage channel", + "3": "USB board ADC input channel", + "4": "USB board digital input channel", + "5": "USB board digital output channel", } @@ -1004,9 +1003,11 @@ def read_rhd(filename, file_format: str): global_info.update(read_variable_header(f, header)) # read channel group and channel header - stream_id_to_channel_info_list = {k: [] for k in [0, 1, 2, 3, 4, 5]} + stream_names = stream_id_to_stream_name_rhd.values() + stream_name_to_channel_info_list = {name: [] for name in stream_names} if not file_format == "header-attached": - memmap_data_dtype = {k: [] for k in range(7)} # 5 streams + 6 for timestamps for not header attached + memmap_data_dtype = {name: [] for name in stream_names} + memmap_data_dtype["timestamp"] = [] for g in range(global_info["nb_signal_group"]): group_info = read_variable_header(f, rhd_signal_group_header) @@ -1014,9 +1015,11 @@ def read_rhd(filename, file_format: str): for c in range(group_info["channel_num"]): chan_info = read_variable_header(f, rhd_signal_channel_header) if bool(chan_info["channel_enabled"]): - stream_id_to_channel_info_list[chan_info["signal_type"]].append(chan_info) + stream_id = str(chan_info["signal_type"]) + stream_name = stream_id_to_stream_name_rhd[stream_id] + stream_name_to_channel_info_list[stream_name].append(chan_info) - channel_number_dict = {i: len(stream_id_to_channel_info_list[i]) for i in range(6)} + channel_number_dict = {name: len(stream_name_to_channel_info_list[name]) for name in stream_names} header_size = f.tell() @@ -1034,17 +1037,16 @@ def read_rhd(filename, file_format: str): if file_format == "header-attached": memmap_data_dtype = [("timestamp", "int32", BLOCK_SIZE)] else: - memmap_data_dtype[6] = "int32" - channel_number_dict[6] = 1 + memmap_data_dtype["timestamp"] = "int32" + channel_number_dict["timestamp"] = 1 else: if file_format == "header-attached": memmap_data_dtype = [("timestamp", "uint32", BLOCK_SIZE)] else: - memmap_data_dtype[6] = "uint32" - channel_number_dict[6] = 1 + memmap_data_dtype["timestamp"] = "uint32" + channel_number_dict["timestamp"] = 1 - # 0: RHD2000 amplifier channel - for chan_info in stream_id_to_channel_info_list[0]: + for chan_info in stream_name_to_channel_info_list["RHD2000 amplifier channel"]: chan_info["sampling_rate"] = sr chan_info["units"] = "uV" chan_info["gain"] = 0.195 @@ -1060,10 +1062,9 @@ def read_rhd(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[0] = "int16" + memmap_data_dtype["RHD2000 amplifier channel"] = "int16" - # 1: RHD2000 auxiliary input channel - for chan_info in stream_id_to_channel_info_list[1]: + for chan_info in stream_name_to_channel_info_list["RHD2000 auxiliary input channel"]: chan_info["sampling_rate"] = sr / 4.0 chan_info["units"] = "V" chan_info["gain"] = 0.0000374 @@ -1074,10 +1075,9 @@ def read_rhd(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE // 4)] else: - memmap_data_dtype[1] = "uint16" + memmap_data_dtype["RHD2000 auxiliary input channel"] = "uint16" - # 2: RHD2000 supply voltage channel - for chan_info in stream_id_to_channel_info_list[2]: + for chan_info in stream_name_to_channel_info_list["RHD2000 supply voltage channel"]: chan_info["sampling_rate"] = sr / BLOCK_SIZE chan_info["units"] = "V" chan_info["gain"] = 0.0000748 @@ -1088,7 +1088,7 @@ def read_rhd(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16")] else: - memmap_data_dtype[2] = "uint16" + memmap_data_dtype["RHD2000 supply voltage channel"] = "uint16" # temperature is not an official channel in the header for i in range(global_info["num_temp_sensor_channels"]): @@ -1103,7 +1103,7 @@ def read_rhd(filename, file_format: str): memmap_data_dtype += [(name, "int16")] # 3: USB board ADC input channel - for chan_info in stream_id_to_channel_info_list[3]: + for chan_info in stream_name_to_channel_info_list["USB board ADC input channel"]: chan_info["sampling_rate"] = sr chan_info["units"] = "V" if global_info["eval_board_mode"] == 0: @@ -1121,12 +1121,10 @@ def read_rhd(filename, file_format: str): name = chan_info["native_channel_name"] memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] else: - memmap_data_dtype[3] = "uint16" + memmap_data_dtype["USB board ADC input channel"] = "uint16" - # 4: USB board digital input channel - # 5: USB board digital output channel - for stream_id in [4, 5]: - for chan_info in stream_id_to_channel_info_list[stream_id]: + for stream_name in ["USB board digital input channel", "USB board digital output channel"]: + for chan_info in stream_name_to_channel_info_list[stream_name]: chan_info["sampling_rate"] = sr # arbitrary units are used to indicate that Intan does not # store raw voltages but only the boolean TTL state @@ -1137,14 +1135,13 @@ def read_rhd(filename, file_format: str): ordered_channel_info.append(chan_info) # Note that all the channels are packed in one buffer, so the data type only needs to be added once - if len(stream_id_to_channel_info_list[stream_id]) > 0: + if len(stream_name_to_channel_info_list[stream_name]) > 0: if file_format == "header-attached": - name = stream_id_to_stream_name_rhd[stream_id] - memmap_data_dtype += [(name, "uint16", BLOCK_SIZE)] + memmap_data_dtype += [(stream_name, "uint16", BLOCK_SIZE)] elif file_format == "one-file-per-channel": - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" elif file_format == "one-file-per-signal": - memmap_data_dtype[stream_id] = "uint16" + memmap_data_dtype[stream_name] = "uint16" # per discussion with Intan developers before version 3 of their software the 'notch_filter_mode' # was a request for postprocessing to be done in one of their scripts. From version 3+ the notch @@ -1173,15 +1170,14 @@ def read_rhd(filename, file_format: str): digital_stream_names = ["USB board digital input channel", "USB board digital output channel"] -# RHD Binary Files for One File Per Signal -one_file_per_signal_filenames_rhd = [ - "amplifier.dat", - "auxiliary.dat", - "supply.dat", - "analogin.dat", - "digitalin.dat", - "digitalout.dat", -] +stream_name_to_file_name_rhd = { + "RHD2000 amplifier channel": "amplifier.dat", + "RHD2000 auxiliary input channel": "auxiliary.dat", + "RHD2000 supply voltage channel": "supply.dat", + "USB board ADC input channel": "analogin.dat", + "USB board digital input channel": "digitalin.dat", + "USB board digital output channel": "digitalout.dat", +} def create_one_file_per_signal_dict_rhd(dirname): @@ -1199,25 +1195,24 @@ def create_one_file_per_signal_dict_rhd(dirname): """ raw_file_paths_dict = {} - for raw_index, raw_file in enumerate(one_file_per_signal_filenames_rhd): - if Path(dirname / raw_file).is_file(): - raw_file_paths_dict[raw_index] = Path(dirname / raw_file) + for stream_name, file_name in stream_name_to_file_name_rhd.items(): + if Path(dirname / file_name).is_file(): + raw_file_paths_dict[stream_name] = Path(dirname / file_name) - raw_file_paths_dict[6] = Path(dirname / "time.dat") + raw_file_paths_dict["timestamp"] = Path(dirname / "time.dat") return raw_file_paths_dict -# RHS Binary Files for One File Per Signal -one_file_per_signal_filenames_rhs = [ - "amplifier.dat", - "auxiliary.dat", - "supply.dat", - "analogin.dat", - "analogout.dat", - "digitalin.dat", - "digitalout.dat", -] +stream_name_to_file_name_rhs = { + "RHS2000 amplifier channel": "amplifier.dat", + "RHS2000 auxiliary input channel": "auxiliary.dat", + "RHS2000 supply voltage channel": "supply.dat", + "USB board ADC input channel": "analogin.dat", + "USB board ADC output channel": "analogout.dat", + "USB board digital input channel": "digitalin.dat", + "USB board digital output channel": "digitalout.dat", +} def create_one_file_per_signal_dict_rhs(dirname): @@ -1235,30 +1230,28 @@ def create_one_file_per_signal_dict_rhs(dirname): """ raw_file_paths_dict = {} - for raw_index, raw_file in enumerate(one_file_per_signal_filenames_rhs): - if Path(dirname / raw_file).is_file(): - raw_file_paths_dict[raw_index] = Path(dirname / raw_file) + for stream_name, file_name in stream_name_to_file_name_rhs.items(): + if Path(dirname / file_name).is_file(): + raw_file_paths_dict[stream_name] = Path(dirname / file_name) - # we need time to be the last value - raw_file_paths_dict[15] = Path(dirname / "time.dat") - # 10 and 11 are hardcoded in the rhs_reader above so hardcoded here too + raw_file_paths_dict["timestamp"] = Path(dirname / "time.dat") if Path(dirname / "dcamplifier.dat").is_file(): - raw_file_paths_dict[10] = Path(dirname / "dcamplifier.dat") + raw_file_paths_dict["DC Amplifier channel"] = Path(dirname / "dcamplifier.dat") if Path(dirname / "stim.dat").is_file(): - raw_file_paths_dict[11] = Path(dirname / "stim.dat") + raw_file_paths_dict["Stim channel"] = Path(dirname / "stim.dat") return raw_file_paths_dict # RHD Binary Files for One File Per Channel -possible_raw_file_prefixes_rhd = [ - "amp", - "aux", - "vdd", - "board-ANALOG-IN", - "board-DIGITAL-IN", - "board-DIGITAL-OUT", -] +stream_name_to_file_prefix_rhd = { + "RHD2000 amplifier channel": "amp", + "RHD2000 auxiliary input channel": "aux", + "RHD2000 supply voltage channel": "vdd", + "USB board ADC input channel": "board-ANALOG-IN", + "USB board digital input channel": "board-DIGITAL-IN", + "USB board digital output channel": "board-DIGITAL-OUT", +} def create_one_file_per_channel_dict_rhd(dirname): @@ -1274,28 +1267,33 @@ def create_one_file_per_channel_dict_rhd(dirname): raw_files_paths_dict: dict A dict of all the file paths """ + # Developer note, at the moment, the channels require to be n in order + # See https://github.com/NeuralEnsemble/python-neo/issues/1599 fo file_names = dirname.glob("**/*.dat") files = [file for file in file_names if file.is_file()] raw_file_paths_dict = {} - for raw_index, prefix in enumerate(possible_raw_file_prefixes_rhd): - raw_file_paths_dict[raw_index] = [file for file in files if prefix in file.name] - - raw_file_paths_dict[6] = [Path(dirname / "time.dat")] - + for stream_name, file_prefix in stream_name_to_file_prefix_rhd.items(): + stream_files = [file for file in files if file_prefix in file.name] + # Map file name to channel name amp-A-000.dat -> A-000 amp-B-006.dat -> B-006 etc + file_path_to_channel_name = lambda x: "-".join(x.stem.split("-")[1:]) + sorted_stream_files = sorted(stream_files, key=file_path_to_channel_name) + raw_file_paths_dict[stream_name] = sorted_stream_files + + raw_file_paths_dict["timestamp"] = [Path(dirname / "time.dat")] return raw_file_paths_dict # RHS Binary Files for One File Per Channel -possible_raw_file_prefixes_rhs = [ - "amp", - "aux", - "vdd", - "board-ANALOG-IN", - "board-ANALOG-OUT", - "board-DIGITAL-IN", - "board-DIGITAL-OUT", -] +stream_name_to_file_prefix_rhs = { + "RHS2000 amplifier channel": "amp", + "RHS2000 auxiliary input channel": "aux", + "RHS2000 supply voltage channel": "vdd", + "USB board ADC input channel": "board-ANALOG-IN", + "USB board ADC output channel": "board-ANALOG-OUT", + "USB board digital input channel": "board-DIGITAL-IN", + "USB board digital output channel": "board-DIGITAL-OUT", +} def create_one_file_per_channel_dict_rhs( @@ -1317,15 +1315,17 @@ def create_one_file_per_channel_dict_rhs( file_names = dirname.glob("**/*.dat") files = [file for file in file_names if file.is_file()] raw_file_paths_dict = {} - for raw_index, prefix in enumerate(possible_raw_file_prefixes_rhs): - raw_file_paths_dict[raw_index] = [file for file in files if prefix in file.name] - - # we need time to be the last value - raw_file_paths_dict[15] = [Path(dirname / "time.dat")] - # 10 and 11 are hardcoded in the rhs reader so hardcoded here - raw_file_paths_dict[10] = [file for file in files if "dc-" in file.name] + for stream_name, file_prefix in stream_name_to_file_prefix_rhs.items(): + stream_files = [file for file in files if file_prefix in file.name] + # Map file name to channel name amp-A-000.dat -> A-000 amp-B-006.dat -> B-006 etc + file_path_to_channel_name = lambda x: "-".join(x.stem.split("-")[1:]) + sorted_stream_files = sorted(stream_files, key=file_path_to_channel_name) + raw_file_paths_dict[stream_name] = sorted_stream_files + + raw_file_paths_dict["timestamp"] = [Path(dirname / "time.dat")] + raw_file_paths_dict["DC Amplifier channel"] = [file for file in files if "dc-" in file.name] # we can find the files, but I can see how to read them out of header # so for now we don't expose the stim files in one-file-per-channel - raw_file_paths_dict[11] = [file for file in files if "stim-" in file.name] + raw_file_paths_dict["Stim channel"] = [file for file in files if "stim-" in file.name] return raw_file_paths_dict diff --git a/neo/rawio/plexonrawio.py b/neo/rawio/plexonrawio.py index cc79417e3..ca73dc3aa 100644 --- a/neo/rawio/plexonrawio.py +++ b/neo/rawio/plexonrawio.py @@ -209,6 +209,10 @@ def _parse_header(self): for index, pos in enumerate(positions): bl_header = data[pos : pos + 16].view(DataBlockHeader)[0] + # To avoid overflow errors when doing arithmetic operations on numpy scalars + np_scalar_to_python_scalar = lambda x: x.item() if isinstance(x, np.generic) else x + bl_header = {key: np_scalar_to_python_scalar(bl_header[key]) for key in bl_header.dtype.names} + current_upper_byte_of_5_byte_timestamp = int(bl_header["UpperByteOf5ByteTimestamp"]) current_bl_timestamp = int(bl_header["TimeStamp"]) timestamp = current_upper_byte_of_5_byte_timestamp * 2**32 + current_bl_timestamp @@ -255,24 +259,29 @@ def _parse_header(self): else: chan_loop = range(nb_sig_chan) for chan_index in chan_loop: - h = slowChannelHeaders[chan_index] - name = h["Name"].decode("utf8") - chan_id = h["Channel"] + slow_channel_headers = slowChannelHeaders[chan_index] + + # To avoid overflow errors when doing arithmetic operations on numpy scalars + np_scalar_to_python_scalar = lambda x: x.item() if isinstance(x, np.generic) else x + slow_channel_headers = {key: np_scalar_to_python_scalar(slow_channel_headers[key]) for key in slow_channel_headers.dtype.names} + + name = slow_channel_headers["Name"].decode("utf8") + chan_id = slow_channel_headers["Channel"] length = self._data_blocks[5][chan_id]["size"].sum() // 2 if length == 0: continue # channel not added - source_id.append(h["SrcId"]) + source_id.append(slow_channel_headers["SrcId"]) channel_num_samples.append(length) - sampling_rate = float(h["ADFreq"]) + sampling_rate = float(slow_channel_headers["ADFreq"]) sig_dtype = "int16" units = "" # I don't know units if global_header["Version"] in [100, 101]: - gain = 5000.0 / (2048 * h["Gain"] * 1000.0) + gain = 5000.0 / (2048 * slow_channel_headers["Gain"] * 1000.0) elif global_header["Version"] in [102]: - gain = 5000.0 / (2048 * h["Gain"] * h["PreampGain"]) + gain = 5000.0 / (2048 * slow_channel_headers["Gain"] * slow_channel_headers["PreampGain"]) elif global_header["Version"] >= 103: gain = global_header["SlowMaxMagnitudeMV"] / ( - 0.5 * (2 ** global_header["BitsPerSpikeSample"]) * h["Gain"] * h["PreampGain"] + 0.5 * (2 ** global_header["BitsPerSpikeSample"]) * slow_channel_headers["Gain"] * slow_channel_headers["PreampGain"] ) offset = 0.0 @@ -358,21 +367,21 @@ def _parse_header(self): unit_loop = enumerate(self.internal_unit_ids) for unit_index, (chan_id, unit_id) in unit_loop: - c = np.nonzero(dspChannelHeaders["Channel"] == chan_id)[0][0] - h = dspChannelHeaders[c] + channel_index = np.nonzero(dspChannelHeaders["Channel"] == chan_id)[0][0] + dsp_channel_headers = dspChannelHeaders[channel_index] - name = h["Name"].decode("utf8") + name = dsp_channel_headers["Name"].decode("utf8") _id = f"ch{chan_id}#{unit_id}" wf_units = "" if global_header["Version"] < 103: - wf_gain = 3000.0 / (2048 * h["Gain"] * 1000.0) + wf_gain = 3000.0 / (2048 * dsp_channel_headers["Gain"] * 1000.0) elif 103 <= global_header["Version"] < 105: wf_gain = global_header["SpikeMaxMagnitudeMV"] / ( - 0.5 * 2.0 ** (global_header["BitsPerSpikeSample"]) * h["Gain"] * 1000.0 + 0.5 * 2.0 ** (global_header["BitsPerSpikeSample"]) * dsp_channel_headers["Gain"] * 1000.0 ) elif global_header["Version"] >= 105: wf_gain = global_header["SpikeMaxMagnitudeMV"] / ( - 0.5 * 2.0 ** (global_header["BitsPerSpikeSample"]) * h["Gain"] * global_header["SpikePreAmpGain"] + 0.5 * 2.0 ** (global_header["BitsPerSpikeSample"]) * dsp_channel_headers["Gain"] * global_header["SpikePreAmpGain"] ) wf_offset = 0.0 wf_left_sweep = -1 # DONT KNOWN @@ -576,7 +585,7 @@ def read_as_dict(fid, dtype, offset=None): v = v.replace("\x03", "") v = v.replace("\x00", "") - info[k] = v + info[k] = v.item() if isinstance(v, np.generic) else v return info diff --git a/neo/test/rawiotest/test_intanrawio.py b/neo/test/rawiotest/test_intanrawio.py index df3723288..3d86e5089 100644 --- a/neo/test/rawiotest/test_intanrawio.py +++ b/neo/test/rawiotest/test_intanrawio.py @@ -1,5 +1,6 @@ import unittest import numpy as np +from pathlib import Path from neo.rawio.intanrawio import IntanRawIO from neo.test.rawiotest.common_rawio_test import BaseTestRawIO @@ -66,6 +67,25 @@ def test_annotations(self): ) np.testing.assert_array_equal(signal_array_annotations["board_stream_num"][:10], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + def test_correct_reading_one_file_per_channel(self): + "Issue: https://github.com/NeuralEnsemble/python-neo/issues/1599" + # Test reading of one-file-per-channel format file. The channels should match the raw files + file_path = Path(self.get_local_path("intan/intan_fpc_test_231117_052630/info.rhd")) + intan_reader = IntanRawIO(filename=file_path) + intan_reader.parse_header() + + # This should be the folder where the files of all the channels are stored + folder_path = file_path.parent + + # The paths are named as amp-A-000.dat, amp-A-001.dat, amp-A-002.dat, ... + amplifier_file_paths = [path for path in folder_path.iterdir() if "amp" in path.name] + channel_names = [path.name[4:-4] for path in amplifier_file_paths] + + for channel_name, amplifier_file_path in zip(channel_names, amplifier_file_paths): + data_raw = np.fromfile(amplifier_file_path, dtype=np.int16).squeeze() + data_from_neo = intan_reader.get_analogsignal_chunk(channel_ids=[channel_name], stream_index=0).squeeze() + np.testing.assert_allclose(data_raw, data_from_neo) + if __name__ == "__main__": unittest.main()