fictract interface

catalystneuro · Oct 31, 2023 · b6ec116 · b6ec116
1 parent f0b2bdb
commit b6ec116
Showing 1 changed file with 55 additions and 39 deletions.
diff --git a/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py b/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 from typing import Optional, Union
 
+import numpy as np
 from pynwb.behavior import Position, SpatialSeries
 from pynwb.file import NWBFile
 
@@ -21,6 +22,7 @@ class FicTracDataInterface(BaseTemporalAlignmentInterface):
         "visual fixation",
     ]
 
+    timestamps_column = 21
     # Columns in the .dat binary file with the data. The full description of the header can be found in:
     # https://github.com/rjdmoore/fictrac/blob/master/doc/data_header.txt
     columns_in_dat_file = [
@@ -162,6 +164,7 @@ def __init__(
         self.verbose = verbose
         self._timestamps = None
         self.radius = radius
+        self.configuration_file_path = None
         super().__init__(file_path=file_path)
 
         self._timestamps = None
@@ -170,7 +173,10 @@ def __init__(
     def get_metadata(self):
         metadata = super().get_metadata()
 
-        session_start_time = extract_session_start_time(self.file_path)
+        session_start_time = extract_session_start_time(
+            file_path=self.file_path,
+            configuration_file_path=self.configuration_file_path,
+        )
         if session_start_time:
             metadata["NWBFile"].update(session_start_time=session_start_time)
 
@@ -192,7 +198,6 @@ def add_to_nwbfile(
 
         import pandas as pd
 
-        # The first row only contains the session start time and invalid data
         fictrac_data_df = pd.read_csv(self.file_path, sep=",", header=None, names=self.columns_in_dat_file)
 
         # Get the timestamps
@@ -243,55 +248,60 @@ def add_to_nwbfile(
 
     def get_original_timestamps(self):
         """
-        Retrieve and correct the original timestamps from a FicTrac data file.
+        Retrieve and correct timestamps from a FicTrac data file.
 
-        This function performs two key corrections:
-        1. If the first timestamp is replaced with system time (e.g., in the case of the first timestamp being 0),
-        it is reset back to 0. This issue has been identified in FicTrac version 2.1.1 and possibly in earlier versions.
+        This function addresses two specific issues with timestamps in FicTrac data:
 
-        2. If timestamps are recorded in Unix epoch time format, indicating an absolute time reference since
-        the Unix epoch (1970-01-01 00:00:00 UTC), the entire series is re-centered by
-        subtracting the first timestamp value.
+        1. Resetting Initial Timestamp:
+        In some instances, FicTrac replaces the initial timestamp (0) with the system time. This commonly occurs
+        when the data source is a video file, and OpenCV reports the first timestamp as 0. Since OpenCV also
+        uses 0 as a marker for invalid values, FicTrac defaults to system time in that case. This leads to
+        inconsistent timestamps like [system_time, t1, t2, t3, ...]. The function corrects this by resetting the
+        first timestamp back to 0 when a negative difference is detected between the first two timestamps.
 
-        Notes
-        -----
-        - The case of the first timestamp being replaced with 0 is prominent in data whose source is a video file.
-        In that case, if OpenCV returns the first timestamps as 0, FicTrac replaces it with the system time.
-        This action results in an inconsistent sequence like [system_time, t1, t2, t3, ...].
-        The function adjusts this by changing the first timestamp back to 0.
-        - The function also detects timestamps in Unix epoch format, which might be uncharacteristically large numbers,
-        representing the time elapsed since the Unix epoch. Such timestamps are re-centered to the start of the experiment
-        for consistency.
+        2. Re-centering Unix Epoch Time:
+        If timestamps are in Unix epoch time format (time since 1970-01-01 00:00:00 UTC), this function re-centers
+        the time series by subtracting the first timestamp. This adjustment ensures that timestamps represent the
+        elapsed time since the start of the experiment rather than the Unix epoch. This case appears when one of the
+        sources of data in FicTrac (such as PGR or Basler) lacks a timestamp extraction method. FicTrac
+        then falls back to using the system time, which is in Unix epoch format.
 
         Returns
         -------
         np.ndarray
             An array of corrected timestamps, in seconds.
 
+        Notes
+        -----
+        - The issue of the initial timestamp replacement appears in FicTrac 2.1.1 and earlier versions.
+        - Re-centering is essential for timestamps in Unix epoch format as timestamps in an NWB file must be relative
+        to the start of the session. The heuristic here is to check if the first timestamp is larger than the length
+        of a 10-year experiment in seconds. If so, it's assumed that the timestamps are in Unix epoch format.
+
         References
         ----------
         Issue discussion on FicTrac's timestamp inconsistencies:
         https://github.com/rjdmoore/fictrac/issues/29
-
         """
 
         import pandas as pd
 
-        timestamp_index = self.columns_in_dat_file.index("timestamp")
-        fictrac_data_df = pd.read_csv(self.file_path, sep=",", header=None, usecols=[timestamp_index])
+        fictrac_data_df = pd.read_csv(self.file_path, sep=",", header=None, usecols=[self.timestamps_column])
 
-        timestamps = fictrac_data_df[timestamp_index].values / 1000.0  # Transform to seconds
+        timestamps = fictrac_data_df[self.timestamps_column].values / 1000.0  # Transform to seconds
 
-        # Correct for the case when the first timestamp was replaced by system time
+        # Correct for the case when only the first timestamp was replaced by system time
         first_difference = timestamps[1] - timestamps[0]
         if first_difference < 0:
             timestamps[0] = 0.0
 
         # Heuristic to test if timestamps are in Unix epoch
-        first_timestamp = timestamps[0]
         length_in_seconds_of_a_10_year_experiment = 10 * 365 * 24 * 60 * 60
-        if first_timestamp > length_in_seconds_of_a_10_year_experiment:
+        all_timestamps_are_in_unix_epoch = np.all(timestamps > length_in_seconds_of_a_10_year_experiment)
+        if all_timestamps_are_in_unix_epoch:
             timestamps = timestamps - timestamps[0]
+        # TODO: If we agree to ALWAYS constrain timestamps to be relative to the start of the session, we can
+        # Always shift here and remove the heuristic above.
 
         return timestamps
 
@@ -314,33 +324,39 @@ def extract_session_start_time(
     configuration_file_path: Optional[FilePathType] = None,
 ) -> Union[datetime, None]:
     """
-    Extract the session start time from FicTrac data or configuration file.
+    Extract the session start time from a FicTrac data file or its configuration file.
+
+    The session start time is determined from the data file if the timestamps are in Unix epoch format. If not, the
+    function defaults to extracting the date from the configuration file and assuming that the start time is midnight.
+    If neither of these methods works, the function returns None.
+
+    The session start time, has two different meanings depending on the source of the FicTrac data:
+    - For video file sources (.avi, .mp4, etc), the session start time corresponds to the time when the
+    FicTrac analysis commenced. That is, the session start time reflects the analysis time rather than
+    the actual start of the experiment.
+    - For camera sources (such as PGR or Basler), the sesion start time is either the time reported by the camera
+    or the system time if the camera's SDK does not provide timestamps to Fictrac. In both cases, this time is
+    the experiment start time, barring synchronization issues.
 
     Parameters
     ----------
-    file_path : Path
+    file_path : FilePathType
         Path to the FicTrac data file.
-    configuration_file_path : Optional[Path]
-        Path to the FicTrac configuration file. If not provided, the function will look for "fictrac_config.txt"
-        in the same directory as the data file.
+    configuration_file_path : Optional[FilePathType]
+        Path to the FicTrac configuration file. If omitted, the function defaults to searching for
+        "fictrac_config.txt" in the same directory as the data file.
 
     Returns
     -------
     datetime | None
         The session start time of in UTC as a datetime object. `None` if the session start time cannot be extracted.
 
-    Notes
-    -----
-    - In FicTrac, column 22 of the data file contains timestamps in milliseconds.
-    - If the timestamp is since the Unix epoch (1970-01-01 00:00:00 UTC), it's transformed to UTC.
-    - If timestamps are since the start of the session, the session start time is extracted from the configuration file.
-    - If neither applies or the relevant files are not found or correctly formatted, returns None.
-
     """
     with open(file_path, "r") as file:
         first_line = file.readline()
 
-    first_timestamp = float(first_line.split(",")[21]) / 1000.0  # Convert to seconds
+    timestamps_column = FicTracDataInterface.timestamps_column
+    first_timestamp = float(first_line.split(",")[timestamps_column]) / 1000.0  # Convert to seconds
 
     # Heuristic to test if timestamps are in Unix epoch
     length_in_seconds_of_a_10_year_experiment = 10 * 365 * 24 * 60 * 60
@@ -353,9 +369,9 @@ def extract_session_start_time(
     if configuration_file_path.is_file():
         configuration_file = parse_fictrac_config(configuration_file_path)
         session_start_time = datetime.strptime(configuration_file.get("build_date", ""), "%b %d %Y")
+        # Set the time to midnight UTC from the extracted date
         return session_start_time.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
 
-    # If neither of the the methods above work, return None
     return None