feat(ml): optimization

jolibrain · Sep 18, 2024 · 31428b3 · 31428b3
1 parent c7eea16
commit 31428b3
Showing 1 changed file with 17 additions and 27 deletions.
diff --git a/data/self_supervised_temporal_labeled_mask_online_dataset.py b/data/self_supervised_temporal_labeled_mask_online_dataset.py
@@ -62,37 +62,27 @@ def __init__(self, opt, phase, name=""):
         self.A_size = len(self.A_img_paths)  # get the size of dataset A
 
         # dataset form img(bbox)/vid_series/vid_series_#frame.png(.txt)
-        # Dict to track the number of frames in each video series
-        self.frames_counts = OrderedDict()
+        # a ordered list with all video series paths
+        self.vid_series_paths = list(
+            OrderedDict.fromkeys(os.path.dirname(path) for path in self.A_img_paths)
+        )
+        # Initialize a dictionary to count how many available paths belong to each directory
+        self.frames_counts = {
+            vid_serie: -self.num_frames * self.frame_step
+            for vid_serie in self.vid_series_paths
+        }
+        # Loop through self.A_img_paths and count the occurrences of each directory
         for path in self.A_img_paths:
-            vid_series_paths = os.path.dirname(path)
-            # If this video series path hasn't been processed yet, initialize it in frames_counts.
-            # The value is a tuple (count, count_minus), where:
-            # - 'count' will store the number of frames in the series.
-            # - 'count_minus' is calculated as a negative offset based on the number of frames and step size.
-            # This offset acts as a limit to determine which frames to choose from the series.
-            if vid_series_paths not in self.frames_counts:
-                self.frames_counts[vid_series_paths] = (
-                    0,
-                    -self.num_frames * self.frame_step,
-                )
-            # Retrieve the current count and count_minus for the video series.
-            count, count_minus = self.frames_counts[vid_series_paths]
-            count += 1
-            # Update frames_counts with the new count and recalculate count_minus.
-            # Count is the total number of frames in the video series
-            # count_minus is the number of available frames in this video series
-            self.frames_counts[vid_series_paths] = (
-                count,
-                count - self.num_frames * self.frame_step,
-            )
+            dirname = os.path.dirname(path)
+            if dirname in self.vid_series_paths:
+                self.frames_counts[dirname] += 1
+
         # Store cumulative sums of available frames in the order of video series.
         self.cumulative_sums = []
         cumulative_sum = 0
-        # Create a list of video series paths for tracking later
-        self.vid_series_keys = list(self.frames_counts.keys())
         # Iterate through each video series in frames_counts to compute the cumulative sum of available frame.
-        for _, (_, count_minus) in self.frames_counts.items():
+        for vid_serie in self.vid_series_paths:
+            count_minus = self.frames_counts[vid_serie]
             if count_minus > 0:
                 cumulative_sum += count_minus
             self.cumulative_sums.append(cumulative_sum)
@@ -120,7 +110,7 @@ def get_img(
 
             # according to the selected_index, get the video series and frame number
             selected_index = bisect.bisect_left(self.cumulative_sums, random_A)
-            selected_vid = self.vid_series_keys[selected_index]
+            selected_vid = self.vid_series_paths[selected_index]
             if selected_index > 0:
                 frame_num = random_A - self.cumulative_sums[selected_index - 1]
             else: