From 9d2473c0c547e27ca42544a096454cbe0fe81852 Mon Sep 17 00:00:00 2001 From: Tom Forbes Date: Sun, 20 Oct 2024 15:37:11 +0100 Subject: [PATCH] Use md5? --- src/pypi_data/combine_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypi_data/combine_parquet.py b/src/pypi_data/combine_parquet.py index 26eedb5e..719f2e40 100644 --- a/src/pypi_data/combine_parquet.py +++ b/src/pypi_data/combine_parquet.py @@ -124,7 +124,7 @@ async def fill_buffer( # Hash the path column with zero copies. data_buffer = batch.column("path").cast(pyarrow.large_binary()).buffers()[1] - digest = hashlib.sha256(memoryview(data_buffer)).hexdigest() + digest = hashlib.md5(memoryview(data_buffer)).hexdigest() time_hashing_ns += time.perf_counter_ns() - start_hash_time