From 2a1a1a72bde67b4cf0f86ad9b83c7ffcb0835aa8 Mon Sep 17 00:00:00 2001 From: panxuchen Date: Wed, 20 Nov 2024 14:11:16 +0800 Subject: [PATCH] fix pre-comment --- data_juicer/ops/deduplicator/ray_basic_deduplicator.py | 2 -- data_juicer/ops/deduplicator/ray_document_deduplicator.py | 5 +---- data_juicer/ops/deduplicator/ray_image_deduplicator.py | 5 +---- data_juicer/ops/deduplicator/ray_video_deduplicator.py | 6 +----- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/data_juicer/ops/deduplicator/ray_basic_deduplicator.py b/data_juicer/ops/deduplicator/ray_basic_deduplicator.py index 3fb902386..58343e98d 100644 --- a/data_juicer/ops/deduplicator/ray_basic_deduplicator.py +++ b/data_juicer/ops/deduplicator/ray_basic_deduplicator.py @@ -1,5 +1,3 @@ -from pydantic import PositiveInt - from data_juicer.utils.constant import HashKeys from data_juicer.utils.lazy_loader import LazyLoader diff --git a/data_juicer/ops/deduplicator/ray_document_deduplicator.py b/data_juicer/ops/deduplicator/ray_document_deduplicator.py index 667f86e38..7720e8810 100644 --- a/data_juicer/ops/deduplicator/ray_document_deduplicator.py +++ b/data_juicer/ops/deduplicator/ray_document_deduplicator.py @@ -2,7 +2,6 @@ import string import regex as re -from pydantic import PositiveInt from ..base_op import OPERATORS from .ray_basic_deduplicator import RayBasicDeduplicator @@ -31,9 +30,7 @@ def __init__(self, :param args: extra args :param kwargs: extra args. """ - super().__init__(redis_address=redis_address, - *args, - **kwargs) + super().__init__(redis_address=redis_address, *args, **kwargs) self.lowercase = lowercase self.remove_non_character_regex = re.compile( f'\s+|\d+|[{re.escape(string.punctuation)}]' # noqa: W605 diff --git a/data_juicer/ops/deduplicator/ray_image_deduplicator.py b/data_juicer/ops/deduplicator/ray_image_deduplicator.py index 7610dc30d..d2c85cc59 100644 --- a/data_juicer/ops/deduplicator/ray_image_deduplicator.py +++ b/data_juicer/ops/deduplicator/ray_image_deduplicator.py @@ -1,5 +1,4 @@ import numpy as np -from pydantic import PositiveInt from data_juicer.utils.lazy_loader import LazyLoader from data_juicer.utils.mm_utils import load_data_with_context, load_image @@ -46,9 +45,7 @@ def __init__(self, :param args: extra args :param kwargs: extra args """ - super().__init__(redis_address=redis_address, - *args, - **kwargs) + super().__init__(redis_address=redis_address, *args, **kwargs) if method not in HASH_METHOD: raise ValueError(f'Keep strategy [{method}] is not supported. ' f'Can only be one of {HASH_METHOD}.') diff --git a/data_juicer/ops/deduplicator/ray_video_deduplicator.py b/data_juicer/ops/deduplicator/ray_video_deduplicator.py index 342abf7a1..e3deb23b3 100644 --- a/data_juicer/ops/deduplicator/ray_video_deduplicator.py +++ b/data_juicer/ops/deduplicator/ray_video_deduplicator.py @@ -1,7 +1,5 @@ import hashlib -from pydantic import PositiveInt - from data_juicer.utils.mm_utils import (close_video, load_data_with_context, load_video) @@ -30,9 +28,7 @@ def __init__(self, :param args: extra args :param kwargs: extra args """ - super().__init__(redis_address=redis_address, - *args, - **kwargs) + super().__init__(redis_address=redis_address, *args, **kwargs) def calculate_hash(self, sample, context=False): if self.video_key not in sample or not sample[self.video_key]: