From f8760eaec4c2b0e6e43b617e3fe3866ab6ba97d7 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Thu, 22 Aug 2024 12:26:59 -0400 Subject: [PATCH 1/6] debugging skip --- .../_bin_all_reduced_s3_logs_by_object_key.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py index 21b3522..89648a4 100644 --- a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py +++ b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py @@ -54,6 +54,11 @@ def bin_all_reduced_s3_logs_by_object_key( completed = completed or set() reduced_s3_log_files = list(set(reduced_s3_logs_folder_path.rglob("*.tsv")) - completed)[:file_limit] + + print(f"{reduced_s3_log_files=}") + print(f"{set(reduced_s3_logs_folder_path.rglob('*.tsv'))=}") + print(f"{completed=}") + for reduced_s3_log_file in tqdm.tqdm( iterable=reduced_s3_log_files, total=len(reduced_s3_log_files), From 71eeed1758c789ad6c0f1ed1e2eb03ac6669e27c Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Thu, 22 Aug 2024 12:28:39 -0400 Subject: [PATCH 2/6] debugging skip --- .../_bin_all_reduced_s3_logs_by_object_key.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py index 89648a4..e144aac 100644 --- a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py +++ b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py @@ -55,9 +55,12 @@ def bin_all_reduced_s3_logs_by_object_key( reduced_s3_log_files = list(set(reduced_s3_logs_folder_path.rglob("*.tsv")) - completed)[:file_limit] - print(f"{reduced_s3_log_files=}") - print(f"{set(reduced_s3_logs_folder_path.rglob('*.tsv'))=}") - print(f"{completed=}") + print(f"{reduced_s3_log_files[:5]=}") + print("\n") + print(f"{set(reduced_s3_logs_folder_path.rglob('*.tsv'))[:5]=}") + print("\n") + print(f"{completed[:5]=}") + assert False for reduced_s3_log_file in tqdm.tqdm( iterable=reduced_s3_log_files, From 7a68247f2c8b475822a47b34e39d3572aa3218a6 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Thu, 22 Aug 2024 12:29:09 -0400 Subject: [PATCH 3/6] debugging skip --- .../_bin_all_reduced_s3_logs_by_object_key.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py index e144aac..4f8754d 100644 --- a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py +++ b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py @@ -57,9 +57,9 @@ def bin_all_reduced_s3_logs_by_object_key( print(f"{reduced_s3_log_files[:5]=}") print("\n") - print(f"{set(reduced_s3_logs_folder_path.rglob('*.tsv'))[:5]=}") + print(f"{list(set(reduced_s3_logs_folder_path.rglob('*.tsv')))[:5]=}") print("\n") - print(f"{completed[:5]=}") + print(f"{list(completed)[:5]=}") assert False for reduced_s3_log_file in tqdm.tqdm( From 9705442fe5265800b5e3081e2072201abc18b3cf Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Thu, 22 Aug 2024 12:30:52 -0400 Subject: [PATCH 4/6] debugging skip --- .../_bin_all_reduced_s3_logs_by_object_key.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py index 4f8754d..c328d9c 100644 --- a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py +++ b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py @@ -44,7 +44,7 @@ def bin_all_reduced_s3_logs_by_object_key( with open(file=started_tracking_file_path, mode="r") as io: started = set(pathlib.Path(path) for path in io.readlines()) with open(file=completed_tracking_file_path, mode="r") as io: - completed = set(pathlib.Path(path) for path in io.readlines()) + completed = set(pathlib.Path(path.rstrip("\n")) for path in io.readlines()) if started != completed: raise ValueError( @@ -55,6 +55,7 @@ def bin_all_reduced_s3_logs_by_object_key( reduced_s3_log_files = list(set(reduced_s3_logs_folder_path.rglob("*.tsv")) - completed)[:file_limit] + print(f"{len(reduced_s3_log_files)=}") print(f"{reduced_s3_log_files[:5]=}") print("\n") print(f"{list(set(reduced_s3_logs_folder_path.rglob('*.tsv')))[:5]=}") From 98afe2d7b79dcd1f1665fee8ffa18a67c9500746 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Thu, 22 Aug 2024 12:31:44 -0400 Subject: [PATCH 5/6] debugging skip --- .../_bin_all_reduced_s3_logs_by_object_key.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py index c328d9c..753f538 100644 --- a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py +++ b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py @@ -42,7 +42,7 @@ def bin_all_reduced_s3_logs_by_object_key( completed_tracking_file_path.touch() else: with open(file=started_tracking_file_path, mode="r") as io: - started = set(pathlib.Path(path) for path in io.readlines()) + started = set(pathlib.Path(path.rstrip("\n")) for path in io.readlines()) with open(file=completed_tracking_file_path, mode="r") as io: completed = set(pathlib.Path(path.rstrip("\n")) for path in io.readlines()) From 4b4099e892788d87bd2023fec89923f7f4a5abfb Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Thu, 22 Aug 2024 12:32:20 -0400 Subject: [PATCH 6/6] debugging skip --- .../_bin_all_reduced_s3_logs_by_object_key.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py index 753f538..8162b0e 100644 --- a/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py +++ b/src/dandi_s3_log_parser/_bin_all_reduced_s3_logs_by_object_key.py @@ -54,15 +54,6 @@ def bin_all_reduced_s3_logs_by_object_key( completed = completed or set() reduced_s3_log_files = list(set(reduced_s3_logs_folder_path.rglob("*.tsv")) - completed)[:file_limit] - - print(f"{len(reduced_s3_log_files)=}") - print(f"{reduced_s3_log_files[:5]=}") - print("\n") - print(f"{list(set(reduced_s3_logs_folder_path.rglob('*.tsv')))[:5]=}") - print("\n") - print(f"{list(completed)[:5]=}") - assert False - for reduced_s3_log_file in tqdm.tqdm( iterable=reduced_s3_log_files, total=len(reduced_s3_log_files),