Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[New Check] Entire column of a table is not NaN #231

Merged
merged 23 commits into from
Jul 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
abbe8f1
saving state
Jul 13, 2022
2a6516b
added test and debug
Jul 13, 2022
a322c04
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 13, 2022
db1b426
refactor logic call
Jul 13, 2022
28fc042
Merge branch 'check_table_col_not_nan' of https://github.com/neurodat…
Jul 13, 2022
29a171d
add early data access skip
Jul 13, 2022
2ac54f8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 13, 2022
73d4978
add flatten for indexed cols
Jul 13, 2022
c72a095
Merge branch 'check_table_col_not_nan' of https://github.com/neurodat…
Jul 13, 2022
48c5e60
generalized to util function; added None slicing
Jul 13, 2022
b67f2c8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 13, 2022
e5f2618
Merge branch 'dev' into check_table_col_not_nan
CodyCBakerPhD Jul 18, 2022
75a687e
swapped util to return only slice
CodyCBakerPhD Jul 18, 2022
62350db
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 18, 2022
fd35f34
Merge branch 'dev' into check_table_col_not_nan
CodyCBakerPhD Jul 18, 2022
50f81ac
Merge branch 'dev' into check_table_col_not_nan
bendichter Jul 18, 2022
44cec05
Merge branch 'dev' into check_table_col_not_nan
CodyCBakerPhD Jul 18, 2022
1e83c6b
Update nwbinspector/utils.py
CodyCBakerPhD Jul 18, 2022
ecc129d
Merge branch 'dev' into check_table_col_not_nan
bendichter Jul 19, 2022
09b0474
Update nwbinspector/checks/tables.py
CodyCBakerPhD Jul 19, 2022
10c2073
Update nwbinspector/utils.py
CodyCBakerPhD Jul 19, 2022
41056ea
Update nwbinspector/checks/tables.py
CodyCBakerPhD Jul 19, 2022
8def458
debug
CodyCBakerPhD Jul 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions nwbinspector/checks/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,22 @@ def check_table_values_for_dict(table: DynamicTable, nelems: int = 200):
if is_string_json_loadable(string=string):
message += " This string is also JSON loadable, so call `json.loads(...)` on the string to unpack."
yield InspectorMessage(message=message)


@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=DynamicTable)
def check_col_not_nan(table: DynamicTable, nelems: Optional[int] = 200):
"""Check if all of the values in a single column of a table are NaN."""
for column in table.columns:
if not hasattr(column, "data") or isinstance(column, VectorIndex) or isinstance(column.data[0], str):
continue
if nelems is not None and not all(np.isnan(column[:nelems]).flatten()):
continue

if all(
np.isnan(
column[slice(0, None, np.ceil(len(column.data) / nelems).astype(int) if nelems else None)]
).flatten()
):
yield InspectorMessage(
message=f"Column {column.name} has all NaN values. Consider removing it from the table."
)
71 changes: 56 additions & 15 deletions tests/unit_tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
check_column_binary_capability,
check_single_row,
check_table_values_for_dict,
check_col_not_nan,
)
from nwbinspector.utils import get_package_version

Expand Down Expand Up @@ -237,8 +238,8 @@ def test_check_single_row_ignore_units():

def test_check_single_row_ignore_electrodes():
table = ElectrodeTable(
name="electrodes", # default name when building through nwbfile
)
name="electrodes",
) # default name when building through nwbfile
if get_package_version(name="pynwb") >= version.Version("2.1.0"):
table.add_row(
location="unknown",
Expand Down Expand Up @@ -291,7 +292,7 @@ def test_check_table_values_for_dict_pass():
assert check_table_values_for_dict(table=table) is None


def test_check_table_values_for_dict():
def test_check_table_values_for_dict_fail():
table = DynamicTable(name="test_table", description="")
table.add_column(name="test_column", description="")
table.add_row(test_column=str(dict(a=1)))
Expand All @@ -308,19 +309,59 @@ def test_check_table_values_for_dict():
)


def test_check_table_values_for_dict_json_case():
def test_check_table_values_for_dict_json_case_fail():
table = DynamicTable(name="test_table", description="")
table.add_column(name="test_column", description="")
table.add_row(test_column=json.dumps(dict(a=1)))
assert check_table_values_for_dict(table=table)[0] == InspectorMessage(
message=(
"The column 'test_column' contains a string value that contains a dictionary! Please unpack "
"dictionaries as additional rows or columns of the table. This string is also JSON loadable, so call "
"`json.loads(...)` on the string to unpack."
assert check_table_values_for_dict(table=table) == [
InspectorMessage(
message=(
"The column 'test_column' contains a string value that contains a dictionary! Please unpack "
"dictionaries as additional rows or columns of the table. This string is also JSON loadable, so call "
"`json.loads(...)` on the string to unpack."
),
importance=Importance.BEST_PRACTICE_VIOLATION,
check_function_name="check_table_values_for_dict",
object_type="DynamicTable",
object_name="test_table",
location="/",
)
]


def test_check_col_not_nan_pass():
table = DynamicTable(name="test_table", description="")
for name in ["test_column_not_nan", "test_column_string"]:
table.add_column(name=name, description="")
table.add_row(test_column_not_nan=1.0, test_column_string="abc")
assert check_col_not_nan(table=table) is None


def test_check_col_not_nan_fail():
table = DynamicTable(name="test_table", description="")
for name in ["test_column_not_nan_1", "test_column_nan_1", "test_column_not_nan_2", "test_column_nan_2"]:
table.add_column(name=name, description="")
for _ in range(400):
table.add_row(
test_column_not_nan_1=1.0, test_column_nan_1=np.nan, test_column_not_nan_2=1.0, test_column_nan_2=np.nan
)
assert check_col_not_nan(table=table) == [
InspectorMessage(
message="Column test_column_nan_1 has all NaN values. Consider removing it from the table.",
importance=Importance.BEST_PRACTICE_SUGGESTION,
check_function_name="check_col_not_nan",
object_type="DynamicTable",
object_name="test_table",
location="/",
file_path=None,
),
importance=Importance.BEST_PRACTICE_VIOLATION,
check_function_name="check_table_values_for_dict",
object_type="DynamicTable",
object_name="test_table",
location="/",
)
InspectorMessage(
message="Column test_column_nan_2 has all NaN values. Consider removing it from the table.",
importance=Importance.BEST_PRACTICE_SUGGESTION,
check_function_name="check_col_not_nan",
object_type="DynamicTable",
object_name="test_table",
location="/",
file_path=None,
),
]