From 991e2906615c8fdd67179f41dd5ae9b808ba23e6 Mon Sep 17 00:00:00 2001 From: panxuchen Date: Fri, 15 Nov 2024 16:28:18 +0800 Subject: [PATCH] fix redis prefix --- data_juicer/ops/deduplicator/ray_redis_minhash_deduplicator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data_juicer/ops/deduplicator/ray_redis_minhash_deduplicator.py b/data_juicer/ops/deduplicator/ray_redis_minhash_deduplicator.py index 4a414fa00..72c250af1 100644 --- a/data_juicer/ops/deduplicator/ray_redis_minhash_deduplicator.py +++ b/data_juicer/ops/deduplicator/ray_redis_minhash_deduplicator.py @@ -239,7 +239,8 @@ def __init__( def run(self, dataset): from ray.data.aggregate import AggregateFn - union_find = RedisUnionFind(prefix=uuid.uuid4().hex[:8], redis_address=self.redis_address) + union_find = RedisUnionFind(prefix=uuid.uuid4().hex[:8], + redis_address=self.redis_address) def add_uid_column(table: pa.Table) -> pa.Table: new_column_data = [union_find.get_uid() for _ in range(len(table))]