Skip to content

Commit

Permalink
feat: Enhance video_motion_score_filter capabilities (#361)
Browse files Browse the repository at this point in the history
- Support float-based sampling FPS for finer frame rate control
- Resize frames before optical flow for better efficiency
- Normalize optical flow magnitude relative to frame dimensions
- Improve robustness in frame reading and resizing
- Expand tests to cover new features
  • Loading branch information
drcege authored Jul 26, 2024
1 parent 47b54a2 commit e8dbba9
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 26 deletions.
3 changes: 3 additions & 0 deletions configs/config_all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,9 @@ process:
min_score: 0.25 # the minimum motion score to keep samples
max_score: 10000.0 # the maximum motion score to keep samples
sampling_fps: 2 # the samplig rate of frames_per_second to compute optical flow
size: null # resize frames along the smaller edge before computing optical flow, or a sequence like (h, w)
max_size: null # maximum allowed for the longer edge of resized frames
relative: false # whether to normalize the optical flow magnitude to [0, 1], relative to the frame's diagonal length
any_or_all: any # keep this sample when any/all videos meet the filter condition
- video_nsfw_filter: # filter samples according to the nsfw scores of videos in them
hf_nsfw_model: Falconsai/nsfw_image_detection # Huggingface model name for nsfw classification
Expand Down
97 changes: 80 additions & 17 deletions data_juicer/ops/filter/video_motion_score_filter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import sys
from contextlib import contextmanager
from typing import List, Optional, Sequence, Tuple, Union

import numpy as np
from jsonargparse.typing import PositiveInt
from jsonargparse.typing import PositiveFloat, PositiveInt

from data_juicer.utils.availability_utils import AvailabilityChecking
from data_juicer.utils.constant import Fields, StatsKeys
Expand Down Expand Up @@ -44,7 +45,11 @@ class VideoMotionScoreFilter(Filter):
def __init__(self,
min_score: float = 0.25,
max_score: float = sys.float_info.max,
sampling_fps: PositiveInt = 2,
sampling_fps: PositiveFloat = 2,
size: Optional[Union[PositiveInt,
Sequence[PositiveInt]]] = None,
max_size: Optional[PositiveInt] = None,
relative: bool = False,
any_or_all: str = 'any',
*args,
**kwargs):
Expand All @@ -53,8 +58,20 @@ def __init__(self,
:param min_score: The minimum motion score to keep samples.
:param max_score: The maximum motion score to keep samples.
:param sampling_fps: The samplig rate of frames_per_second to
compute optical flow.
:param sampling_fps: The sampling rate in frames_per_second for
optical flow calculations.
:param size: Resize frames before computing optical flow. If size is a
sequence like (h, w), frame size will be matched to this. If size
is an int, smaller edge of frames will be matched to this number.
i.e, if height > width, then frame will be rescaled to (size *
height / width, size). Default `None` to keep the original size.
:param max_size: The maximum allowed for the longer edge of resized
frames. If the longer edge of frames is greater than max_size after
being resized according to size, size will be overruled so that the
longer edge is equal to max_size. As a result, the smaller edge may
be shorter than size. This is only supported if size is an int.
:param relative: If `True`, the optical flow magnitude is normalized to
a [0, 1] range, relative to the frame's diagonal length.
:param any_or_all: keep this sample with 'any' or 'all' strategy of
all videos. 'any': keep this sample if any videos meet the
condition. 'all': keep this sample only if all videos meet the
Expand All @@ -67,6 +84,17 @@ def __init__(self,
self.max_score = max_score
self.sampling_fps = sampling_fps

if isinstance(size, (list, tuple)):
if len(size) not in [1, 2]:
raise ValueError(
f'Size must be an int or a 1 or 2 element tuple/list,'
f'not a {len(size)} element tuple/list.')
if isinstance(size, int):
size = [size]
self.size = size
self.max_size = max_size
self.relative = relative

self.extra_kwargs = self._default_kwargs
for key in kwargs:
if key in self.extra_kwargs:
Expand Down Expand Up @@ -98,27 +126,31 @@ def compute_stats(self, sample, context=False):

video_motion_scores = []
with VideoCapture(video_key) as cap:
fps = cap.get(cv2.CAP_PROP_FPS)
valid_fps = min(max(self.sampling_fps, 1), fps)
frame_interval = int(fps / valid_fps)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# if cannot get the second frame, use the last one
frame_interval = min(frame_interval, total_frames - 1)
if cap.isOpened():
fps = cap.get(cv2.CAP_PROP_FPS)
sampling_fps = min(self.sampling_fps, fps)
sampling_step = round(fps / sampling_fps)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# at least two frames for computing optical flow
sampling_step = max(min(sampling_step, total_frames - 1),
1)

prev_frame = None
frame_count = -1
frame_count = 0
while cap.isOpened():
ret, frame = cap.read()
frame_count += 1

if not ret:
# If the frame can't be read, it could be due to
# a corrupt frame or reaching the end of the video.
break

# skip middle frames
if frame_count % frame_interval != 0:
continue
height, width, _ = frame.shape
new_size = _compute_resized_output_size(
(height, width), self.size, self.max_size)
if new_size != (height, width):
frame = cv2.resize(frame,
new_size,
interpolation=cv2.INTER_AREA)

gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if prev_frame is None:
Expand All @@ -129,14 +161,21 @@ def compute_stats(self, sample, context=False):
prev_frame, gray_frame, None, **self.extra_kwargs)
mag, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
frame_motion_score = np.mean(mag)
if self.relative:
frame_motion_score /= np.hypot(*flow.shape[:2])
video_motion_scores.append(frame_motion_score)
prev_frame = gray_frame

# quickly skip frames
frame_count += sampling_step
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)

# may due to frame corruption
if not video_motion_scores:
unique_motion_scores[video_key] = -1
else:
unique_motion_scores[video_key] = np.mean(video_motion_scores)
unique_motion_scores[video_key] = np.mean(video_motion_scores
or [-1])

sample[Fields.stats][StatsKeys.video_motion_score] = [
unique_motion_scores[key] for key in loaded_video_keys
Expand All @@ -159,3 +198,27 @@ def process(self, sample):
return keep_bools.any()
else:
return keep_bools.all()


def _compute_resized_output_size(
frame_size: Tuple[int, int],
size: Optional[List[int]],
max_size: Optional[int] = None,
) -> List[int]:
h, w = frame_size
short, long = (w, h) if w <= h else (h, w)

if size is None: # no change
new_short, new_long = short, long
elif len(size) == 1: # specified size only for the smallest edge
new_short = size[0]
new_long = int(new_short * long / short)
else: # specified both h and w
new_short, new_long = min(size), max(size)

if max_size is not None and new_long > max_size:
new_short = int(max_size * new_short / new_long)
new_long = max_size

new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short)
return new_h, new_w
4 changes: 2 additions & 2 deletions data_juicer/utils/unittest_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def setUpClass(cls):
max_diff = os.getenv('TEST_MAX_DIFF', 'None')
cls.maxDiff = None if max_diff == 'None' else int(max_diff)

import multiprocess
cls.original_mp_method = multiprocess.get_start_method()
if is_cuda_available():
import multiprocess
cls.original_mp_method = multiprocess.get_start_method()
multiprocess.set_start_method('spawn', force=True)

@classmethod
Expand Down
56 changes: 49 additions & 7 deletions tests/ops/filter/test_video_motion_score_filter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import unittest

from data_juicer.core.data import NestedDataset as Dataset
from datasets import Dataset

from data_juicer.ops.filter.video_motion_score_filter import \
VideoMotionScoreFilter
Expand Down Expand Up @@ -29,7 +29,6 @@ def _run_helper(self, op, source_list, target_list, np=1):
self.assertEqual(res_list, target_list)

def test_default(self):

ds_list = [{
'videos': [self.vid1_path]
}, {
Expand All @@ -47,8 +46,55 @@ def test_default(self):
op = VideoMotionScoreFilter()
self._run_helper(op, ds_list, tgt_list)

def test_high(self):
def test_downscale(self):
ds_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid2_path]
}, {
'videos': [self.vid3_path]
}]
tgt_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid2_path]
}]
op = VideoMotionScoreFilter(min_score=1.0, size=120)
self._run_helper(op, ds_list, tgt_list)

def test_downscale_max(self):
ds_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid2_path]
}, {
'videos': [self.vid3_path]
}]
tgt_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid2_path]
}]
op = VideoMotionScoreFilter(min_score=1.0, size=120, max_size=160)
self._run_helper(op, ds_list, tgt_list)

def test_downscale_relative(self):
ds_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid2_path]
}, {
'videos': [self.vid3_path]
}]
tgt_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid2_path]
}]
op = VideoMotionScoreFilter(min_score=0.005, size=(120, 160), relative=True)
self._run_helper(op, ds_list, tgt_list)

def test_high(self):
ds_list = [{
'videos': [self.vid1_path]
}, {
Expand All @@ -61,7 +107,6 @@ def test_high(self):
self._run_helper(op, ds_list, tgt_list)

def test_low(self):

ds_list = [{
'videos': [self.vid1_path]
}, {
Expand All @@ -74,7 +119,6 @@ def test_low(self):
self._run_helper(op, ds_list, tgt_list)

def test_middle(self):

ds_list = [{
'videos': [self.vid1_path]
}, {
Expand All @@ -87,7 +131,6 @@ def test_middle(self):
self._run_helper(op, ds_list, tgt_list)

def test_any(self):

ds_list = [{
'videos': [self.vid1_path, self.vid2_path]
}, {
Expand All @@ -106,7 +149,6 @@ def test_any(self):
self._run_helper(op, ds_list, tgt_list)

def test_all(self):

ds_list = [{
'videos': [self.vid1_path, self.vid2_path]
}, {
Expand Down

0 comments on commit e8dbba9

Please sign in to comment.