diff --git a/CHANGELOG.md b/CHANGELOG.md index a544a7a9e..ffe6cc14b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ * Added alignment methods support to `MockRecordingInterface` [PR #611](https://github.com/catalystneuro/neuroconv/pull/611) * Added `NeuralynxNvtInterface`, which can read position tracking NVT files. [PR #580](https://github.com/catalystneuro/neuroconv/pull/580) * Adding radius as a conversion factor in `FicTracDataInterface`. [PR #619](https://github.com/catalystneuro/neuroconv/pull/619) +* Coerce `FicTracDataInterface` original timestamps to start from 0. [PR #619](https://github.com/catalystneuro/neuroconv/pull/619) ### Fixes * Remove `starting_time` reset to default value (0.0) when adding the rate and updating the `photon_series_kwargs` or `roi_response_series_kwargs`, in `add_photon_series` or `add_fluorescence_traces`. [PR #595](https://github.com/catalystneuro/neuroconv/pull/595) diff --git a/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py b/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py index 9ec9bd978..a7d163011 100644 --- a/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py @@ -1,8 +1,8 @@ -import importlib.util from datetime import datetime, timezone from pathlib import Path -from typing import Optional +from typing import Optional, Union +import numpy as np from pynwb.behavior import Position, SpatialSeries from pynwb.file import NWBFile @@ -22,6 +22,7 @@ class FicTracDataInterface(BaseTemporalAlignmentInterface): "visual fixation", ] + timestamps_column = 21 # Columns in the .dat binary file with the data. The full description of the header can be found in: # https://github.com/rjdmoore/fictrac/blob/master/doc/data_header.txt columns_in_dat_file = [ @@ -163,6 +164,7 @@ def __init__( self.verbose = verbose self._timestamps = None self.radius = radius + self.configuration_file_path = None super().__init__(file_path=file_path) self._timestamps = None @@ -171,8 +173,12 @@ def __init__( def get_metadata(self): metadata = super().get_metadata() - session_start_time = extract_session_start_time(self.file_path) - metadata["NWBFile"].update(session_start_time=session_start_time) + session_start_time = extract_session_start_time( + file_path=self.file_path, + configuration_file_path=self.configuration_file_path, + ) + if session_start_time: + metadata["NWBFile"].update(session_start_time=session_start_time) return metadata @@ -192,12 +198,10 @@ def add_to_nwbfile( import pandas as pd - # The first row only contains the session start time and invalid data - fictrac_data_df = pd.read_csv(self.file_path, sep=",", skiprows=1, header=None, names=self.columns_in_dat_file) + fictrac_data_df = pd.read_csv(self.file_path, sep=",", header=None, names=self.columns_in_dat_file) # Get the timestamps timestamps = self.get_timestamps() - starting_time = timestamps[0] # Note: The last values of the timestamps look very irregular for the sample file in catalyst neuro gin repo @@ -243,19 +247,67 @@ def add_to_nwbfile( processing_module.add_data_interface(position_container) def get_original_timestamps(self): + """ + Retrieve and correct timestamps from a FicTrac data file. + + This function addresses two specific issues with timestamps in FicTrac data: + + 1. Resetting Initial Timestamp: + In some instances, FicTrac replaces the initial timestamp (0) with the system time. This commonly occurs + when the data source is a video file, and OpenCV reports the first timestamp as 0. Since OpenCV also + uses 0 as a marker for invalid values, FicTrac defaults to system time in that case. This leads to + inconsistent timestamps like [system_time, t1, t2, t3, ...]. The function corrects this by resetting the + first timestamp back to 0 when a negative difference is detected between the first two timestamps. + + 2. Re-centering Unix Epoch Time: + If timestamps are in Unix epoch time format (time since 1970-01-01 00:00:00 UTC), this function re-centers + the time series by subtracting the first timestamp. This adjustment ensures that timestamps represent the + elapsed time since the start of the experiment rather than the Unix epoch. This case appears when one of the + sources of data in FicTrac (such as PGR or Basler) lacks a timestamp extraction method. FicTrac + then falls back to using the system time, which is in Unix epoch format. + + Returns + ------- + np.ndarray + An array of corrected timestamps, in seconds. + + Notes + ----- + - The issue of the initial timestamp replacement appears in FicTrac 2.1.1 and earlier versions. + - Re-centering is essential for timestamps in Unix epoch format as timestamps in an NWB file must be relative + to the start of the session. The heuristic here is to check if the first timestamp is larger than the length + of a 10-year experiment in seconds. If so, it's assumed that the timestamps are in Unix epoch format. + + References + ---------- + Issue discussion on FicTrac's timestamp inconsistencies: + https://github.com/rjdmoore/fictrac/issues/29 + """ + import pandas as pd - timestamp_index = self.columns_in_dat_file.index("timestamp") + fictrac_data_df = pd.read_csv(self.file_path, sep=",", header=None, usecols=[self.timestamps_column]) + + timestamps = fictrac_data_df[self.timestamps_column].values / 1000.0 # Transform to seconds - fictrac_data_df = pd.read_csv(self.file_path, sep=",", skiprows=1, header=None, usecols=[timestamp_index]) + # Correct for the case when only the first timestamp was replaced by system time + first_difference = timestamps[1] - timestamps[0] + if first_difference < 0: + timestamps[0] = 0.0 - return fictrac_data_df[timestamp_index].values / 1000.0 + # Heuristic to test if timestamps are in Unix epoch + length_in_seconds_of_a_10_year_experiment = 10 * 365 * 24 * 60 * 60 + all_timestamps_are_in_unix_epoch = np.all(timestamps > length_in_seconds_of_a_10_year_experiment) + if all_timestamps_are_in_unix_epoch: + timestamps = timestamps - timestamps[0] + # TODO: If we agree to ALWAYS constrain timestamps to be relative to the start of the session, we can + # Always shift here and remove the heuristic above. + + return timestamps def get_timestamps(self): timestamps = self._timestamps if self._timestamps is not None else self.get_original_timestamps() if self._starting_time is not None: - # Shift the timestamps to the starting time such that timestamps[0] == self._starting_time - # timestamps = timestamps - timestamps[0] + self._starting_time timestamps = timestamps + self._starting_time return timestamps @@ -267,24 +319,60 @@ def set_aligned_starting_time(self, aligned_starting_time): self._starting_time = aligned_starting_time -def extract_session_start_time(file_path: FilePathType) -> datetime: +def extract_session_start_time( + file_path: FilePathType, + configuration_file_path: Optional[FilePathType] = None, +) -> Union[datetime, None]: """ - Lazily extract the session start datetime from a FicTrac data file. + Extract the session start time from a FicTrac data file or its configuration file. - In FicTrac the column 22 in the data has the timestamps which are given in milliseconds since the epoch. + The session start time is determined from the data file if the timestamps are in Unix epoch format. If not, the + function defaults to extracting the date from the configuration file and assuming that the start time is midnight. + If neither of these methods works, the function returns None. + + The session start time, has two different meanings depending on the source of the FicTrac data: + - For video file sources (.avi, .mp4, etc), the session start time corresponds to the time when the + FicTrac analysis commenced. That is, the session start time reflects the analysis time rather than + the actual start of the experiment. + - For camera sources (such as PGR or Basler), the session start time is either the time reported by the camera + or the system time if the camera's SDK does not provide timestamps to Fictrac. In both cases, this time is + the experiment start time, barring synchronization issues. + + Parameters + ---------- + file_path : FilePathType + Path to the FicTrac data file. + configuration_file_path : Optional[FilePathType] + Path to the FicTrac configuration file. If omitted, the function defaults to searching for + "fictrac_config.txt" in the same directory as the data file. + + Returns + ------- + datetime | None + The session start time of in UTC as a datetime object. `None` if the session start time cannot be extracted. - The epoch in Linux is 1970-01-01 00:00:00 UTC. """ with open(file_path, "r") as file: - # Read the first data line first_line = file.readline() - # Split by comma and extract the timestamp (the 22nd column) - utc_timestamp = float(first_line.split(",")[21]) / 1000.0 # Transform to seconds + timestamps_column = FicTracDataInterface.timestamps_column + first_timestamp = float(first_line.split(",")[timestamps_column]) / 1000.0 # Convert to seconds + + # Heuristic to test if timestamps are in Unix epoch + length_in_seconds_of_a_10_year_experiment = 10 * 365 * 24 * 60 * 60 + if first_timestamp > length_in_seconds_of_a_10_year_experiment: + utc_timestamp = first_timestamp + return datetime.utcfromtimestamp(utc_timestamp).replace(tzinfo=timezone.utc) - utc_datetime = datetime.utcfromtimestamp(utc_timestamp).replace(tzinfo=timezone.utc) + if configuration_file_path is None: + configuration_file_path = file_path.parent / "fictrac_config.txt" + if configuration_file_path.is_file(): + configuration_file = parse_fictrac_config(configuration_file_path) + session_start_time = datetime.strptime(configuration_file.get("build_date", ""), "%b %d %Y") + # Set the time to midnight UTC from the extracted date + return session_start_time.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc) - return utc_datetime + return None # TODO: Parse probably will do this in a simpler way. diff --git a/tests/test_on_data/test_behavior_interfaces.py b/tests/test_on_data/test_behavior_interfaces.py index 1f1ebfb6a..a465dd925 100644 --- a/tests/test_on_data/test_behavior_interfaces.py +++ b/tests/test_on_data/test_behavior_interfaces.py @@ -65,6 +65,8 @@ def check_read_nwb(self, nwbfile_path: str): # This is currently structured to assert spatial_series.unit == expected_units assert spatial_series.conversion == 1.0 + assert spatial_series.timestamps[0] == 0.0 + class TestFicTracDataInterfaceWithRadius(DataInterfaceTestMixin, unittest.TestCase): data_interface_cls = FicTracDataInterface @@ -99,6 +101,8 @@ def check_read_nwb(self, nwbfile_path: str): # This is currently structured to assert spatial_series.unit == expected_units assert spatial_series.conversion == self.interface.radius + assert spatial_series.timestamps[0] == 0.0 + class TestFicTracDataInterfaceTiming(TemporalAlignmentMixin, unittest.TestCase): data_interface_cls = FicTracDataInterface