Skip to content

Commit

Permalink
[New Check] Entire column of a table is not NaN (#231)
Browse files Browse the repository at this point in the history
* saving state

* added test and debug

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* refactor logic call

* add early data access skip

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add flatten for indexed cols

* generalized to util function; added None slicing

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* swapped util to return only slice

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update nwbinspector/utils.py

Co-authored-by: Ben Dichter <[email protected]>

* Update nwbinspector/checks/tables.py

Co-authored-by: Ben Dichter <[email protected]>

* Update nwbinspector/utils.py

Co-authored-by: Ben Dichter <[email protected]>

* Update nwbinspector/checks/tables.py

Co-authored-by: Ben Dichter <[email protected]>

* debug

Co-authored-by: Cody Baker <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ben Dichter <[email protected]>
  • Loading branch information
4 people authored Jul 20, 2022
1 parent 2ee62e2 commit c85cb0a
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 15 deletions.
19 changes: 19 additions & 0 deletions nwbinspector/checks/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,22 @@ def check_table_values_for_dict(table: DynamicTable, nelems: int = 200):
if is_string_json_loadable(string=string):
message += " This string is also JSON loadable, so call `json.loads(...)` on the string to unpack."
yield InspectorMessage(message=message)


@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=DynamicTable)
def check_col_not_nan(table: DynamicTable, nelems: Optional[int] = 200):
"""Check if all of the values in a single column of a table are NaN."""
for column in table.columns:
if not hasattr(column, "data") or isinstance(column, VectorIndex) or isinstance(column.data[0], str):
continue
if nelems is not None and not all(np.isnan(column[:nelems]).flatten()):
continue

if all(
np.isnan(
column[slice(0, None, np.ceil(len(column.data) / nelems).astype(int) if nelems else None)]
).flatten()
):
yield InspectorMessage(
message=f"Column {column.name} has all NaN values. Consider removing it from the table."
)
71 changes: 56 additions & 15 deletions tests/unit_tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
check_column_binary_capability,
check_single_row,
check_table_values_for_dict,
check_col_not_nan,
)
from nwbinspector.utils import get_package_version

Expand Down Expand Up @@ -237,8 +238,8 @@ def test_check_single_row_ignore_units():

def test_check_single_row_ignore_electrodes():
table = ElectrodeTable(
name="electrodes", # default name when building through nwbfile
)
name="electrodes",
) # default name when building through nwbfile
if get_package_version(name="pynwb") >= version.Version("2.1.0"):
table.add_row(
location="unknown",
Expand Down Expand Up @@ -291,7 +292,7 @@ def test_check_table_values_for_dict_pass():
assert check_table_values_for_dict(table=table) is None


def test_check_table_values_for_dict():
def test_check_table_values_for_dict_fail():
table = DynamicTable(name="test_table", description="")
table.add_column(name="test_column", description="")
table.add_row(test_column=str(dict(a=1)))
Expand All @@ -308,19 +309,59 @@ def test_check_table_values_for_dict():
)


def test_check_table_values_for_dict_json_case():
def test_check_table_values_for_dict_json_case_fail():
table = DynamicTable(name="test_table", description="")
table.add_column(name="test_column", description="")
table.add_row(test_column=json.dumps(dict(a=1)))
assert check_table_values_for_dict(table=table)[0] == InspectorMessage(
message=(
"The column 'test_column' contains a string value that contains a dictionary! Please unpack "
"dictionaries as additional rows or columns of the table. This string is also JSON loadable, so call "
"`json.loads(...)` on the string to unpack."
assert check_table_values_for_dict(table=table) == [
InspectorMessage(
message=(
"The column 'test_column' contains a string value that contains a dictionary! Please unpack "
"dictionaries as additional rows or columns of the table. This string is also JSON loadable, so call "
"`json.loads(...)` on the string to unpack."
),
importance=Importance.BEST_PRACTICE_VIOLATION,
check_function_name="check_table_values_for_dict",
object_type="DynamicTable",
object_name="test_table",
location="/",
)
]


def test_check_col_not_nan_pass():
table = DynamicTable(name="test_table", description="")
for name in ["test_column_not_nan", "test_column_string"]:
table.add_column(name=name, description="")
table.add_row(test_column_not_nan=1.0, test_column_string="abc")
assert check_col_not_nan(table=table) is None


def test_check_col_not_nan_fail():
table = DynamicTable(name="test_table", description="")
for name in ["test_column_not_nan_1", "test_column_nan_1", "test_column_not_nan_2", "test_column_nan_2"]:
table.add_column(name=name, description="")
for _ in range(400):
table.add_row(
test_column_not_nan_1=1.0, test_column_nan_1=np.nan, test_column_not_nan_2=1.0, test_column_nan_2=np.nan
)
assert check_col_not_nan(table=table) == [
InspectorMessage(
message="Column test_column_nan_1 has all NaN values. Consider removing it from the table.",
importance=Importance.BEST_PRACTICE_SUGGESTION,
check_function_name="check_col_not_nan",
object_type="DynamicTable",
object_name="test_table",
location="/",
file_path=None,
),
importance=Importance.BEST_PRACTICE_VIOLATION,
check_function_name="check_table_values_for_dict",
object_type="DynamicTable",
object_name="test_table",
location="/",
)
InspectorMessage(
message="Column test_column_nan_2 has all NaN values. Consider removing it from the table.",
importance=Importance.BEST_PRACTICE_SUGGESTION,
check_function_name="check_col_not_nan",
object_type="DynamicTable",
object_name="test_table",
location="/",
file_path=None,
),
]

0 comments on commit c85cb0a

Please sign in to comment.