From c85cb0afc83ce8580deac4ca5356f0f21a776667 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Wed, 20 Jul 2022 13:07:55 -0400 Subject: [PATCH] [New Check] Entire column of a table is not NaN (#231) * saving state * added test and debug * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * refactor logic call * add early data access skip * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add flatten for indexed cols * generalized to util function; added None slicing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * swapped util to return only slice * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update nwbinspector/utils.py Co-authored-by: Ben Dichter * Update nwbinspector/checks/tables.py Co-authored-by: Ben Dichter * Update nwbinspector/utils.py Co-authored-by: Ben Dichter * Update nwbinspector/checks/tables.py Co-authored-by: Ben Dichter * debug Co-authored-by: Cody Baker Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ben Dichter --- nwbinspector/checks/tables.py | 19 +++++++++ tests/unit_tests/test_tables.py | 71 ++++++++++++++++++++++++++------- 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/nwbinspector/checks/tables.py b/nwbinspector/checks/tables.py index 3d28a4728..df83a3bb5 100644 --- a/nwbinspector/checks/tables.py +++ b/nwbinspector/checks/tables.py @@ -188,3 +188,22 @@ def check_table_values_for_dict(table: DynamicTable, nelems: int = 200): if is_string_json_loadable(string=string): message += " This string is also JSON loadable, so call `json.loads(...)` on the string to unpack." yield InspectorMessage(message=message) + + +@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=DynamicTable) +def check_col_not_nan(table: DynamicTable, nelems: Optional[int] = 200): + """Check if all of the values in a single column of a table are NaN.""" + for column in table.columns: + if not hasattr(column, "data") or isinstance(column, VectorIndex) or isinstance(column.data[0], str): + continue + if nelems is not None and not all(np.isnan(column[:nelems]).flatten()): + continue + + if all( + np.isnan( + column[slice(0, None, np.ceil(len(column.data) / nelems).astype(int) if nelems else None)] + ).flatten() + ): + yield InspectorMessage( + message=f"Column {column.name} has all NaN values. Consider removing it from the table." + ) diff --git a/tests/unit_tests/test_tables.py b/tests/unit_tests/test_tables.py index 4f1a47279..d25f98483 100644 --- a/tests/unit_tests/test_tables.py +++ b/tests/unit_tests/test_tables.py @@ -17,6 +17,7 @@ check_column_binary_capability, check_single_row, check_table_values_for_dict, + check_col_not_nan, ) from nwbinspector.utils import get_package_version @@ -237,8 +238,8 @@ def test_check_single_row_ignore_units(): def test_check_single_row_ignore_electrodes(): table = ElectrodeTable( - name="electrodes", # default name when building through nwbfile - ) + name="electrodes", + ) # default name when building through nwbfile if get_package_version(name="pynwb") >= version.Version("2.1.0"): table.add_row( location="unknown", @@ -291,7 +292,7 @@ def test_check_table_values_for_dict_pass(): assert check_table_values_for_dict(table=table) is None -def test_check_table_values_for_dict(): +def test_check_table_values_for_dict_fail(): table = DynamicTable(name="test_table", description="") table.add_column(name="test_column", description="") table.add_row(test_column=str(dict(a=1))) @@ -308,19 +309,59 @@ def test_check_table_values_for_dict(): ) -def test_check_table_values_for_dict_json_case(): +def test_check_table_values_for_dict_json_case_fail(): table = DynamicTable(name="test_table", description="") table.add_column(name="test_column", description="") table.add_row(test_column=json.dumps(dict(a=1))) - assert check_table_values_for_dict(table=table)[0] == InspectorMessage( - message=( - "The column 'test_column' contains a string value that contains a dictionary! Please unpack " - "dictionaries as additional rows or columns of the table. This string is also JSON loadable, so call " - "`json.loads(...)` on the string to unpack." + assert check_table_values_for_dict(table=table) == [ + InspectorMessage( + message=( + "The column 'test_column' contains a string value that contains a dictionary! Please unpack " + "dictionaries as additional rows or columns of the table. This string is also JSON loadable, so call " + "`json.loads(...)` on the string to unpack." + ), + importance=Importance.BEST_PRACTICE_VIOLATION, + check_function_name="check_table_values_for_dict", + object_type="DynamicTable", + object_name="test_table", + location="/", + ) + ] + + +def test_check_col_not_nan_pass(): + table = DynamicTable(name="test_table", description="") + for name in ["test_column_not_nan", "test_column_string"]: + table.add_column(name=name, description="") + table.add_row(test_column_not_nan=1.0, test_column_string="abc") + assert check_col_not_nan(table=table) is None + + +def test_check_col_not_nan_fail(): + table = DynamicTable(name="test_table", description="") + for name in ["test_column_not_nan_1", "test_column_nan_1", "test_column_not_nan_2", "test_column_nan_2"]: + table.add_column(name=name, description="") + for _ in range(400): + table.add_row( + test_column_not_nan_1=1.0, test_column_nan_1=np.nan, test_column_not_nan_2=1.0, test_column_nan_2=np.nan + ) + assert check_col_not_nan(table=table) == [ + InspectorMessage( + message="Column test_column_nan_1 has all NaN values. Consider removing it from the table.", + importance=Importance.BEST_PRACTICE_SUGGESTION, + check_function_name="check_col_not_nan", + object_type="DynamicTable", + object_name="test_table", + location="/", + file_path=None, ), - importance=Importance.BEST_PRACTICE_VIOLATION, - check_function_name="check_table_values_for_dict", - object_type="DynamicTable", - object_name="test_table", - location="/", - ) + InspectorMessage( + message="Column test_column_nan_2 has all NaN values. Consider removing it from the table.", + importance=Importance.BEST_PRACTICE_SUGGESTION, + check_function_name="check_col_not_nan", + object_type="DynamicTable", + object_name="test_table", + location="/", + file_path=None, + ), + ]