From 11ebc59f59f3c2b8ca3e05659f0507039f3c5f23 Mon Sep 17 00:00:00 2001
From: h-mayorquin <h.mayorquin@gmail.com>
Date: Tue, 18 Oct 2022 23:26:42 +0200
Subject: [PATCH 1/3] refactor

---
 .../behavior/movie/moviedatainterface.py      | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/movie/moviedatainterface.py b/src/neuroconv/datainterfaces/behavior/movie/moviedatainterface.py
index e98add2a1..d018573e6 100644
--- a/src/neuroconv/datainterfaces/behavior/movie/moviedatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/movie/moviedatainterface.py
@@ -156,7 +156,7 @@ def run_conversion(
         starting_times : list, optional
             List of start times for each movie. If unspecified, assumes that the movies in the file_paths list are in
             sequential order and are contiguous.
-        starting_times : list, optional
+        starting_frames : list, optional
             List of start frames for each movie written using external mode.
             Required if more than one path is specified per ImageSeries in external mode.
         timestamps : list, optional
@@ -220,8 +220,13 @@ def run_conversion(
             else:
                 raise ValueError("provide starting times as a list of len " f"{len(movies_metadata_unique)}")
 
+        # Iterate over unique movies
         for j, (image_series_kwargs, file_list) in enumerate(zip(movies_metadata_unique, file_paths_list)):
 
+            with VideoCaptureContext(str(file_list[0])) as vc:
+                fps = vc.get_movie_fps()
+                movie_timestamps = starting_times[j] + vc.get_movie_timestamps() if timestamps is None else timestamps
+
             if external_mode:
                 num_files = len(file_list)
                 if num_files > 1 and starting_frames is None:
@@ -236,10 +241,6 @@ def run_conversion(
                 elif num_files > 1:
                     image_series_kwargs.update(starting_frame=starting_frames[j])
 
-                with VideoCaptureContext(str(file_list[0])) as vc:
-                    fps = vc.get_movie_fps()
-                    if timestamps is None:
-                        timestamps = starting_times[j] + vc.get_movie_timestamps()
                 image_series_kwargs.update(
                     format="external",
                     external_file=file_list,
@@ -259,8 +260,7 @@ def run_conversion(
                         video_capture_ob.frame_count = 10
                     total_frames = video_capture_ob.get_movie_frame_count()
                     frame_shape = video_capture_ob.get_frame_shape()
-                    timestamps = starting_times[j] + video_capture_ob.get_movie_timestamps()
-                    fps = video_capture_ob.get_movie_fps()
+
                 maxshape = (total_frames, *frame_shape)
                 best_gzip_chunk = (1, frame_shape[0], frame_shape[1], 3)
                 tqdm_pos, tqdm_mininterval = (0, 10)
@@ -315,7 +315,9 @@ def run_conversion(
                         chunks=best_gzip_chunk,
                     )
                 image_series_kwargs.update(data=data)
-            rate = calculate_regular_series_rate(series=timestamps)
+
+            # Store sampling rate if timestamps are regular
+            rate = calculate_regular_series_rate(series=movie_timestamps)
             if rate is not None:
                 if fps != rate:
                     warn(
@@ -325,7 +327,7 @@ def run_conversion(
                     )
                 image_series_kwargs.update(starting_time=starting_times[j], rate=rate)
             else:
-                image_series_kwargs.update(timestamps=timestamps)
+                image_series_kwargs.update(timestamps=movie_timestamps)
 
             if module_name is None:
                 nwbfile.add_acquisition(ImageSeries(**image_series_kwargs))

From bccf3c915e576aa18767daa6301a77ae806fc579 Mon Sep 17 00:00:00 2001
From: h-mayorquin <h.mayorquin@gmail.com>
Date: Tue, 18 Oct 2022 23:44:50 +0200
Subject: [PATCH 2/3] added support for stub test in timestamps

---
 .../datainterfaces/behavior/movie/movie_utils.py    |  6 ++++--
 .../behavior/movie/moviedatainterface.py            | 13 +++++++++----
 tests/test_behavior/test_movie_interface.py         |  6 +++++-
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/movie/movie_utils.py b/src/neuroconv/datainterfaces/behavior/movie/movie_utils.py
index 69ce7356d..58f9036c6 100644
--- a/src/neuroconv/datainterfaces/behavior/movie/movie_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/movie/movie_utils.py
@@ -21,12 +21,14 @@ def __init__(self, file_path: FilePathType):
         self._frame_count = None
         self._movie_open_msg = "The Movie file is not open!"
 
-    def get_movie_timestamps(self):
+    def get_movie_timestamps(self, max_frames=None):
         """Return numpy array of the timestamps(s) for a movie file."""
         cv2 = get_package(package_name="cv2", installation_instructions="pip install opencv-python")
 
         timestamps = []
-        for _ in tqdm(range(self.get_movie_frame_count()), desc="retrieving timestamps"):
+        total_frames = self.get_movie_frame_count()
+        frames_to_extract = min(total_frames, max_frames) if max_frames else total_frames
+        for _ in tqdm(range(frames_to_extract), desc="retrieving timestamps"):
             success, _ = self.vc.read()
             if not success:
                 break
diff --git a/src/neuroconv/datainterfaces/behavior/movie/moviedatainterface.py b/src/neuroconv/datainterfaces/behavior/movie/moviedatainterface.py
index d018573e6..8416ecda7 100644
--- a/src/neuroconv/datainterfaces/behavior/movie/moviedatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/movie/moviedatainterface.py
@@ -221,11 +221,16 @@ def run_conversion(
                 raise ValueError("provide starting times as a list of len " f"{len(movies_metadata_unique)}")
 
         # Iterate over unique movies
+        stub_frames = 10
         for j, (image_series_kwargs, file_list) in enumerate(zip(movies_metadata_unique, file_paths_list)):
 
             with VideoCaptureContext(str(file_list[0])) as vc:
                 fps = vc.get_movie_fps()
-                movie_timestamps = starting_times[j] + vc.get_movie_timestamps() if timestamps is None else timestamps
+                max_frames = stub_frames if stub_test else None
+                extracted_timestamps = vc.get_movie_timestamps(max_frames)
+                movie_timestamps = (
+                    starting_times[j] + extracted_timestamps if timestamps is None else timestamps[:max_frames]
+                )
 
             if external_mode:
                 num_files = len(file_list)
@@ -257,7 +262,7 @@ def run_conversion(
                     chunk_data = True
                 with VideoCaptureContext(str(file)) as video_capture_ob:
                     if stub_test:
-                        video_capture_ob.frame_count = 10
+                        video_capture_ob.frame_count = stub_frames
                     total_frames = video_capture_ob.get_movie_frame_count()
                     frame_shape = video_capture_ob.get_frame_shape()
 
@@ -267,7 +272,7 @@ def run_conversion(
                 if chunk_data:
                     video_capture_ob = VideoCaptureContext(str(file))
                     if stub_test:
-                        video_capture_ob.frame_count = 10
+                        video_capture_ob.frame_count = stub_frames
                     iterable = DataChunkIterator(
                         data=tqdm(
                             iterable=video_capture_ob,
@@ -289,7 +294,7 @@ def run_conversion(
                     iterable = np.zeros(shape=maxshape, dtype="uint8")
                     with VideoCaptureContext(str(file)) as video_capture_ob:
                         if stub_test:
-                            video_capture_ob.frame_count = 10
+                            video_capture_ob.frame_count = stub_frames
                         with tqdm(
                             desc=f"Reading movie data for {Path(file).name}",
                             position=tqdm_pos,
diff --git a/tests/test_behavior/test_movie_interface.py b/tests/test_behavior/test_movie_interface.py
index f9e9ffbee..57b678eee 100644
--- a/tests/test_behavior/test_movie_interface.py
+++ b/tests/test_behavior/test_movie_interface.py
@@ -207,7 +207,10 @@ def test_external_mode_assertion_with_movie_name_duplication(self):
             )
 
     def test_movie_stub(self):
-        conversion_opts = dict(Movie=dict(starting_times=self.starting_times, external_mode=False, stub_test=True))
+        timestamps = [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15]
+        conversion_opts = dict(
+            Movie=dict(starting_times=self.starting_times, timestamps=timestamps, external_mode=False, stub_test=True)
+        )
         self.nwb_converter.run_conversion(
             nwbfile_path=self.nwbfile_path,
             overwrite=True,
@@ -221,6 +224,7 @@ def test_movie_stub(self):
             for no in range(len(metadata["Behavior"]["Movies"])):
                 movie_interface_name = metadata["Behavior"]["Movies"][no]["name"]
                 assert mod[movie_interface_name].data.shape[0] == 10
+                assert mod[movie_interface_name].timestamps.shape[0] == 10
 
     def test_movie_irregular_timestamps(self):
         timestamps = [1, 2, 4]

From 004fa487e1850906909f92bad291c5c6aeac5933 Mon Sep 17 00:00:00 2001
From: h-mayorquin <h.mayorquin@gmail.com>
Date: Tue, 18 Oct 2022 23:54:11 +0200
Subject: [PATCH 3/3] changelog

---
 CHANGELOG.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7c0630b73..4d9732dd7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,11 +13,12 @@
 ### Documentation and tutorial enhancements
 * Added a note in User Guide/DataInterfaces to help installing custom dependencies for users who use Z-shell (`zsh`). [PR #180](https://github.com/catalystneuro/neuroconv/pull/180)
 
-
-# v0.2.1
-
 ### Features
 * Added `ConverterPipe`, a class that allows chaining previously intialized interfaces for batch conversion and corresponding tests [PR #169](https://github.com/catalystneuro/neuroconv/pull/169)
+* Added stubbing capabilities to timestamp extraction in the `MovieInterface` avoiding scanning through the whole file when `stub_test=True` [PR #181](https://github.com/catalystneuro/neuroconv/pull/181)
+
+
+# v0.2.1
 
 ### Fixes