Skip to content

Commit

Permalink
debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
CodyCBakerPhD committed Aug 12, 2024
1 parent 81bbe1d commit 8655bbb
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 20 deletions.
6 changes: 3 additions & 3 deletions src/dandi_s3_log_parser/_dandi_s3_log_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,10 @@ def asset_id_handler(*, raw_asset_id: str) -> str:
split_by_slash = raw_asset_id.split("/")
return split_by_slash[0] + "_" + split_by_slash[-1]

daily_raw_s3_log_file_paths = set(base_raw_s3_log_folder_path.rglob(pattern="*.log"))

# Workaround to particular issue with current repo storage structure on Drogon
daily_raw_s3_log_file_paths.remove(pathlib.Path("/mnt/backup/dandi/dandiarchive-logs/stats/start-end.log"))
daily_raw_s3_log_file_paths = set(base_raw_s3_log_folder_path.rglob(pattern="*.log")) - set(
[pathlib.Path("/mnt/backup/dandi/dandiarchive-logs/stats/start-end.log")]
)

if maximum_number_of_workers == 1:
for raw_s3_log_file_path in tqdm.tqdm(
Expand Down
2 changes: 1 addition & 1 deletion src/dandi_s3_log_parser/_order_parsed_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def order_parsed_logs(

unordered_file_paths = list(unordered_parsed_s3_log_folder_path.glob("*.tsv"))
for unordered_parsed_s3_log_file_path in tqdm.tqdm(
iterable=unordered_parsed_s3_log_folder_path.glob("*.tsv"),
iterable=unordered_file_paths,
total=len(unordered_file_paths),
desc="Ordering parsed logs...",
position=0,
Expand Down
7 changes: 3 additions & 4 deletions src/dandi_s3_log_parser/_s3_log_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,9 @@ def asset_id_handler(*, raw_asset_id: str) -> str:
if order_results is True:
# Create a fresh temporary directory in the home folder and then fresh subfolders for each job
temporary_base_folder_path = parsed_s3_log_folder_path / ".temp"
shutil.rmtree(path=temporary_base_folder_path, ignore_errors=True)
temporary_base_folder_path.mkdir(exist_ok=True)

# Clean up any previous tasks that failed to clean themselves up
for previous_task_folder_path in temporary_base_folder_path.iterdir():
shutil.rmtree(path=previous_task_folder_path, ignore_errors=True)

task_id = str(uuid.uuid4())[:5]
temporary_folder_path = temporary_base_folder_path / task_id
temporary_folder_path.mkdir(exist_ok=True)
Expand All @@ -108,6 +105,8 @@ def asset_id_handler(*, raw_asset_id: str) -> str:
ip_hash_to_region_file_path=ip_hash_to_region_file_path,
)

print(reduced_logs)

reduced_logs_binned_by_unparsed_asset = dict()
for reduced_log in reduced_logs:
raw_asset_id = reduced_log.asset_id
Expand Down
4 changes: 2 additions & 2 deletions tests/test_parse_dandi_raw_s3_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_parse_dandi_raw_s3_log_example_0(tmpdir: py.path.local):
dandi_s3_log_parser.parse_dandi_raw_s3_log(
raw_s3_log_file_path=example_raw_s3_log_file_path, parsed_s3_log_folder_path=test_parsed_s3_log_folder_path
)
test_output_file_paths = list(test_parsed_s3_log_folder_path.iterdir())
test_output_file_paths = [path for path in test_parsed_s3_log_folder_path.iterdir() if path.is_file()]

number_of_output_files = len(test_output_file_paths)
assert number_of_output_files != 0, f"Test expected_output folder ({test_parsed_s3_log_folder_path}) is empty!"
Expand All @@ -35,7 +35,7 @@ def test_parse_dandi_raw_s3_log_example_0(tmpdir: py.path.local):
number_of_output_files == expected_number_of_output_files
), f"The number of asset files ({number_of_output_files}) does not match expectation!"

expected_asset_ids = [file_path.stem for file_path in expected_parsed_s3_log_folder_path.iterdir()]
expected_asset_ids = [path.stem for path in expected_parsed_s3_log_folder_path.iterdir() if path.is_file()]
for test_parsed_s3_log_file_path in test_output_file_paths:
assert (
test_parsed_s3_log_file_path.stem in expected_asset_ids
Expand Down
15 changes: 5 additions & 10 deletions tests/test_parse_dandi_raw_s3_log_bad_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import dandi_s3_log_parser


def test_parse_dandi_raw_s3_log_example_2(tmpdir: py.path.local):
def test_parse_dandi_raw_s3_log_bad_lines(tmpdir: py.path.local):
"""
'ordered_example_2' contains the basic test cases as well as a collection of 'bad lines' contributed over time.
"""
Expand All @@ -26,18 +26,13 @@ def test_parse_dandi_raw_s3_log_example_2(tmpdir: py.path.local):
dandi_s3_log_parser.parse_dandi_raw_s3_log(
raw_s3_log_file_path=example_raw_s3_log_file_path, parsed_s3_log_folder_path=test_parsed_s3_log_folder_path
)
test_output_file_paths = list(test_parsed_s3_log_folder_path.iterdir())
test_output_file_paths = [path for path in test_parsed_s3_log_folder_path.iterdir() if path.is_file()]

number_of_output_files = len(test_output_file_paths)
assert number_of_output_files != 0, f"Test expected_output folder ({test_parsed_s3_log_folder_path}) is empty!"
expected_number_of_output_files = 3
assert number_of_output_files == expected_number_of_output_files

# Increment this over time as more examples are added
expected_number_of_output_files = 2
assert (
number_of_output_files == expected_number_of_output_files
), f"The number of asset files ({number_of_output_files}) does not match expectation!"

expected_asset_ids = [file_path.stem for file_path in expected_parsed_s3_log_folder_path.iterdir()]
expected_asset_ids = [path.stem for path in expected_parsed_s3_log_folder_path.iterdir() if path.is_file()]
for test_parsed_s3_log_file_path in test_output_file_paths:
assert (
test_parsed_s3_log_file_path.stem in expected_asset_ids
Expand Down

0 comments on commit 8655bbb

Please sign in to comment.