From 4536c07fa8237539675e382e9e27d7c3f6604fa0 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 12 Aug 2024 13:06:29 -0400 Subject: [PATCH] release limit and shuffle --- src/dandi_s3_log_parser/_dandi_s3_log_file_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dandi_s3_log_parser/_dandi_s3_log_file_parser.py b/src/dandi_s3_log_parser/_dandi_s3_log_file_parser.py index 2db203a..bba509c 100644 --- a/src/dandi_s3_log_parser/_dandi_s3_log_file_parser.py +++ b/src/dandi_s3_log_parser/_dandi_s3_log_file_parser.py @@ -5,6 +5,7 @@ import importlib.metadata import os import pathlib +import random import shutil import traceback import uuid @@ -81,8 +82,9 @@ def asset_id_handler(*, raw_asset_id: str) -> str: split_by_slash = raw_asset_id.split("/") return split_by_slash[0] + "_" + split_by_slash[-1] + # The .rglob is not naturally sorted; shuffle for more uniform progress updates daily_raw_s3_log_file_paths = set(base_raw_s3_log_folder_path.rglob(pattern="*.log")) - excluded_log_files - daily_raw_s3_log_file_paths = list(daily_raw_s3_log_file_paths)[:10] + daily_raw_s3_log_file_paths = random.shuffle(list(daily_raw_s3_log_file_paths)) if maximum_number_of_workers == 1: for raw_s3_log_file_path in tqdm.tqdm(