From 57df24c5344766039016a4098c55a378b5e39353 Mon Sep 17 00:00:00 2001 From: panxuchen Date: Thu, 12 Dec 2024 17:11:09 +0800 Subject: [PATCH] fix comments --- data_juicer/ops/filter/flagged_words_filter.py | 6 +++--- data_juicer/ops/filter/image_aspect_ratio_filter.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/data_juicer/ops/filter/flagged_words_filter.py b/data_juicer/ops/filter/flagged_words_filter.py index 499ea596..406ae1a2 100644 --- a/data_juicer/ops/filter/flagged_words_filter.py +++ b/data_juicer/ops/filter/flagged_words_filter.py @@ -126,7 +126,7 @@ def compute_stats_batched(self, samples, context=False): def process_batched(self, samples): return list( map( - lambda stat: stat[StatsKeys.flagged_words_ratio] <= self.max_ratio, + lambda stat: stat[StatsKeys.flagged_words_ratio] <= self. + max_ratio, samples[Fields.stats], - ) - ) \ No newline at end of file + )) diff --git a/data_juicer/ops/filter/image_aspect_ratio_filter.py b/data_juicer/ops/filter/image_aspect_ratio_filter.py index 390c510b..d3b3785e 100644 --- a/data_juicer/ops/filter/image_aspect_ratio_filter.py +++ b/data_juicer/ops/filter/image_aspect_ratio_filter.py @@ -76,9 +76,9 @@ def compute_stats_batched(self, samples, context=False): def process_batched(self, samples): def process_single(values): - keep_bools = np.array( - [self.min_ratio <= value <= self.max_ratio for value in values] - ) + keep_bools = np.array([ + self.min_ratio <= value <= self.max_ratio for value in values + ]) if len(keep_bools) <= 0: return True