From c7861f460c2ebee78dd975b1da6131ee840e442b Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 6 Sep 2024 13:59:03 -0400 Subject: [PATCH] Remove robust_ros3_retry (#506) * remove robuse_ros3_retry * changelog * Update CHANGELOG.md Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --------- Co-authored-by: CodyCBakerPhD Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- CHANGELOG.md | 2 + src/nwbinspector/_nwb_inspection.py | 66 +++++++++++++++++----------- src/nwbinspector/utils/__init__.py | 2 - src/nwbinspector/utils/_utils.py | 20 +-------- tests/unit_tests/test_time_series.py | 13 +++--- 5 files changed, 50 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad94e109a..3f12a2f34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ ### Deprecation (API) * The `inspect_nwb` method has been removed. Please use `inspect_nwbfile` or `inspect_nwbfile_object` instead. [#505](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/505) +### Improvements +* Removed the `robust_ros3_read` utility helper. [#506](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/506) diff --git a/src/nwbinspector/_nwb_inspection.py b/src/nwbinspector/_nwb_inspection.py index e4da1d5db..b119c7ebb 100644 --- a/src/nwbinspector/_nwb_inspection.py +++ b/src/nwbinspector/_nwb_inspection.py @@ -21,7 +21,6 @@ PathType, calculate_number_of_cpu, get_package_version, - robust_s3_read, ) @@ -145,8 +144,17 @@ def inspect_all( identifiers = defaultdict(list) for nwbfile_path in nwbfiles: with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: - nwbfile = robust_s3_read(io.read) - identifiers[nwbfile.identifier].append(nwbfile_path) + try: + nwbfile = io.read() + identifiers[nwbfile.identifier].append(nwbfile_path) + except Exception as exception: + yield InspectorMessage( + message=traceback.format_exc(), + importance=Importance.ERROR, + check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", + file_path=nwbfile_path, + ) + if len(identifiers) != len(nwbfiles): for identifier, nwbfiles_with_identifier in identifiers.items(): if len(nwbfiles_with_identifier) > 1: @@ -286,7 +294,7 @@ def inspect_nwbfile( ) try: - in_memory_nwbfile = robust_s3_read(command=io.read) + in_memory_nwbfile = io.read() for inspector_message in inspect_nwbfile_object( nwbfile_object=in_memory_nwbfile, @@ -298,11 +306,11 @@ def inspect_nwbfile( ): inspector_message.file_path = nwbfile_path yield inspector_message - except Exception as ex: + except Exception as exception: yield InspectorMessage( message=traceback.format_exc(), importance=Importance.ERROR, - check_function_name=f"During io.read() - {type(ex)}: {str(ex)}", + check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", file_path=nwbfile_path, ) @@ -426,22 +434,30 @@ def run_checks( for check_function in check_progress: for nwbfile_object in nwbfile.objects.values(): - if check_function.neurodata_type is None or issubclass(type(nwbfile_object), check_function.neurodata_type): - try: - output = robust_s3_read(command=check_function, command_args=[nwbfile_object]) - # if an individual check fails, include it in the report and continue with the inspection - except Exception: - output = InspectorMessage( - message=traceback.format_exc(), - importance=Importance.ERROR, - check_function_name=check_function.__name__, - ) - if isinstance(output, InspectorMessage): - # temporary solution to https://github.com/dandi/dandi-cli/issues/1031 - if output.importance != Importance.ERROR: - output.importance = check_function.importance - yield output - elif output is not None: - for x in output: - x.importance = check_function.importance - yield x + if check_function.neurodata_type is not None and not issubclass( + type(nwbfile_object), check_function.neurodata_type + ): + continue + + try: + output = check_function(nwbfile_object) + # if an individual check fails, include it in the report and continue with the inspection + except Exception as exception: + check_function_name = ( + f"During evaluation of '{check_function.__name__}' - {type(exception)}: {str(exception)}" + ) + output = InspectorMessage( + message=traceback.format_exc(), + importance=Importance.ERROR, + check_function_name=check_function_name, + file_path=nwbfile_path, + ) + if isinstance(output, InspectorMessage): + # temporary solution to https://github.com/dandi/dandi-cli/issues/1031 + if output.importance != Importance.ERROR: + output.importance = check_function.importance + yield output + elif output is not None: + for x in output: + x.importance = check_function.importance + yield x diff --git a/src/nwbinspector/utils/__init__.py b/src/nwbinspector/utils/__init__.py index 3828286be..905622e98 100644 --- a/src/nwbinspector/utils/__init__.py +++ b/src/nwbinspector/utils/__init__.py @@ -9,7 +9,6 @@ is_string_json_loadable, is_module_installed, get_package_version, - robust_s3_read, calculate_number_of_cpu, get_data_shape, PathType, # TODO: deprecate in favor of explicit typing @@ -28,7 +27,6 @@ "is_string_json_loadable", "is_module_installed", "get_package_version", - "robust_s3_read", "calculate_number_of_cpu", "get_data_shape", ] diff --git a/src/nwbinspector/utils/_utils.py b/src/nwbinspector/utils/_utils.py index 87f148f59..5f0224205 100644 --- a/src/nwbinspector/utils/_utils.py +++ b/src/nwbinspector/utils/_utils.py @@ -6,8 +6,7 @@ from functools import lru_cache from importlib import import_module from pathlib import Path -from time import sleep -from typing import Callable, Dict, List, Optional, Tuple, TypeVar, Union +from typing import List, Optional, Tuple, TypeVar, Union import h5py import numpy as np @@ -165,23 +164,6 @@ def get_package_version(name: str) -> version.Version: return version.parse(package_version) -def robust_s3_read( - command: Callable, max_retries: int = 10, command_args: Optional[list] = None, command_kwargs: Optional[Dict] = None -): - """Attempt the command (usually acting on an S3 IO) up to the number of max_retries using exponential backoff.""" - command_args = command_args or [] - command_kwargs = command_kwargs or dict() - for retry in range(max_retries): - try: - return command(*command_args, **command_kwargs) - except Exception as exc: - if "curl" in str(exc): # 'cannot curl request' can show up in potentially many different return error types - sleep(0.1 * 2**retry) - else: - raise exc - raise TimeoutError(f"Unable to complete the command ({command.__name__}) after {max_retries} attempts!") - - def calculate_number_of_cpu(requested_cpu: int = 1) -> int: """ Calculate the number CPUs to use with respect to negative slicing and check against maximal available resources. diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index 999d3f45b..231179036 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -17,7 +17,7 @@ check_timestamps_without_nans, ) from nwbinspector.testing import check_streaming_tests_enabled, make_minimal_nwbfile -from nwbinspector.utils import get_package_version, robust_s3_read +from nwbinspector.utils import get_package_version STREAMING_TESTS_ENABLED, DISABLED_STREAMING_TESTS_REASON = check_streaming_tests_enabled() @@ -353,6 +353,7 @@ def test_check_unknown_resolution_pass(): assert check_resolution(time_series) is None +# TODO: remove test when past version HDMF/PyNWB testing is removed @pytest.mark.skipif( not STREAMING_TESTS_ENABLED or get_package_version("hdmf") >= version.parse("3.3.1"), reason=f"{DISABLED_STREAMING_TESTS_REASON or ''}. Also needs 'hdmf<3.3.1'.", @@ -371,12 +372,10 @@ def test_check_none_matnwb_resolution_pass(): load_namespaces=True, driver="ros3", ) as io: - nwbfile = robust_s3_read(command=io.read) - time_series = robust_s3_read( - command=nwbfile.processing["video_files"]["video"].time_series.get, - command_args=["20170203_KIB_01_s1.1.h264"], - ) - assert check_resolution(time_series) is None + nwbfile = io.read() + time_series = nwbfile.processing["video_files"]["video"]["20170203_KIB_01_s1.1.h264"] + + assert check_resolution(time_series=time_series) is None def test_check_resolution_fail():