Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge develop with master in preparation for release #788

Merged
merged 12 commits into from
Oct 27, 2023
Merged
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
Release 0.4.0 October 27, 2023
- Refactored the model classes to be based on DataFrame.
- Added additional command line options for remodeling tools.
- Restructured summaries for better reporting.
- Minor refactoring to reduce code complexity.
- Finalized and automated SPEC tests.
- Improvements to GitHub automation -- including adding CodeSpell.
- Improvements to API-Docs.

Release 0.3.1 July 3, 2023
- Pinned the version of the pydantic and inflect libraries due to inflict.
- Reorganized JSON output of remodeling summaries so that all of consistent form.
- Fixed summarize_hed_tags_op so that tags were correctly categorized for output.
- Minor refactoring to reduce code complexity.
- BaseInput and Sidecar now raise HedFileError if input could not be read.


Release 0.3.0 June 20, 2023
- Introduction of partnered schema.
- Improved error handling for schema validation.
Expand Down
6 changes: 3 additions & 3 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
author = 'HED Working Group'

# The full version, including alpha/beta/rc tags
version = '0.3.1'
release = '0.3.1'
version = '0.4.0'
release = '0.4.0'

currentdir = os.path.realpath(os.path.dirname(__file__))

Expand Down Expand Up @@ -89,7 +89,7 @@
# Toc options
'collapse_navigation': False,
'sticky_navigation': True,
'navigation_depth': 4,
'navigation_depth': 7,
'includehidden': True,
'titles_only': False
}
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def def_error_no_takes_value(def_name, placeholder_tag):

@hed_tag_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID)
def def_error_no_takes_value(tag, def_name):
return f"Tag '{str(tag)}' in Definition '{def_name}' has has a tag with the unique or required attribute."
return f"Tag '{str(tag)}' in Definition '{def_name}' has has a the unique or required attribute."


@hed_tag_error(DefinitionErrors.BAD_DEFINITION_LOCATION, actual_code=ValidationErrors.DEFINITION_INVALID)
Expand Down
18 changes: 14 additions & 4 deletions hed/models/column_metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from enum import Enum
from hed.errors.error_types import SidecarErrors
import pandas as pd
import copy


class ColumnType(Enum):
Expand Down Expand Up @@ -102,13 +103,15 @@ def set_hed_strings(self, new_strings):
return True

@staticmethod
def _detect_column_type(dict_for_entry):
def _detect_column_type(dict_for_entry, basic_validation=True):
""" Determine the ColumnType of a given json entry.
Parameters:
dict_for_entry (dict): The loaded json entry a specific column.
Generally has a "HED" entry among other optional ones.
basic_validation (bool): If False, does not verify past "HED" exists and the type
This is used to issue more precise errors that are normally just silently ignored,
but also not crash.
Returns:
ColumnType: The determined type of given column. Returns None if unknown.
Expand All @@ -122,14 +125,14 @@ def _detect_column_type(dict_for_entry):

hed_entry = dict_for_entry["HED"]
if isinstance(hed_entry, dict):
if not all(isinstance(entry, str) for entry in hed_entry.values()):
if basic_validation and not all(isinstance(entry, str) for entry in hed_entry.values()):
return None
return ColumnType.Categorical

if not isinstance(hed_entry, str):
return None

if "#" not in dict_for_entry["HED"]:
if basic_validation and "#" not in dict_for_entry["HED"]:
return None

return ColumnType.Value
Expand All @@ -155,3 +158,10 @@ def expected_pound_sign_count(column_type):
else:
return 0, None
return expected_count, error_type

def _get_unvalidated_data(self):
"""Returns a copy with less preliminary validation done(such as verifying all data types)"""
return_copy = copy.deepcopy(self)
return_copy.column_type = ColumnMetadata._detect_column_type(dict_for_entry=return_copy.source_dict,
basic_validation=False)
return return_copy
26 changes: 0 additions & 26 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,26 +120,6 @@ def expand_defs(df, hed_schema, def_dict, columns=None):
df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))


def sort_strings(df, hed_schema, tag_form="short_tag", columns=None):
""" Expands any def tags found in the dataframe.
Converts in place
Parameters:
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema or None): The schema to use to identify defs
columns (list or None): The columns to modify on the dataframe
"""
if isinstance(df, pd.Series):
df[:] = df.apply(partial(_sort, hed_schema=hed_schema, tag_form=tag_form))
else:
if columns is None:
columns = df.columns

for column in columns:
df.loc[column] = df.loc[column].apply(partial(_sort, hed_schema=hed_schema, tag_form=tag_form))


def _convert_to_form(hed_string, hed_schema, tag_form):
return str(HedString(hed_string, hed_schema).get_as_form(tag_form))

Expand All @@ -152,12 +132,6 @@ def _expand_defs(hed_string, hed_schema, def_dict):
return str(HedString(hed_string, hed_schema, def_dict).expand_defs())


def _sort(hed_string, hed_schema, tag_form):
sorted_string = HedString(hed_string, hed_schema)
sorted_string.sort()
return sorted_string.get_as_form(tag_form)


def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None):
""" Gather def-expand tags in the strings/compare with known definitions to find any differences
Expand Down
39 changes: 0 additions & 39 deletions hed/models/indexed_df.py

This file was deleted.

2 changes: 1 addition & 1 deletion hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def in_library_check(hed_schema, tag_entry, attribute_name):

library = tag_entry.attributes.get(attribute_name, "")
if hed_schema.library != library:
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID,
tag_entry.name,
library)
return issues
71 changes: 41 additions & 30 deletions hed/validator/sidecar_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
definition_checks = {}
for column_data in sidecar:
column_name = column_data.column_name
column_data = column_data._get_unvalidated_data()
hed_strings = column_data.get_hed_strings()
error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
for key_name, hed_string in hed_strings.items():
Expand Down Expand Up @@ -180,20 +181,28 @@ def _find_non_matching_braces(hed_string):

@staticmethod
def _check_for_key(key, data):
# Probably can be cleaned up more -> Return True if any data or subdata is key
if isinstance(data, dict):
if key in data:
return bool(data[key])
else:
for sub_data in data.values():
result = SidecarValidator._check_for_key(key, sub_data)
if result is not None:
return result
return SidecarValidator._check_dict(key, data)
elif isinstance(data, list):
for sub_data in data:
result = SidecarValidator._check_for_key(key, sub_data)
if result is not None:
return result
return None
return SidecarValidator._check_list(key, data)
return False

@staticmethod
def _check_dict(key, data_dict):
if key in data_dict:
return True
for sub_data in data_dict.values():
if SidecarValidator._check_for_key(key, sub_data):
return True
return False

@staticmethod
def _check_list(key, data_list):
for sub_data in data_list:
if SidecarValidator._check_for_key(key, sub_data):
return True
return False

def _validate_column_structure(self, column_name, dict_for_entry, error_handler):
""" Checks primarily for type errors such as expecting a string and getting a list in a json sidecar.
Expand All @@ -210,7 +219,7 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler)
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED_COLUMN)
return val_issues

column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry)
column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry, basic_validation=False)
if column_type is None:
val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE,
column_name=column_name)
Expand All @@ -219,25 +228,27 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler)
if found_hed:
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED)
elif column_type == ColumnType.Categorical:
raw_hed_dict = dict_for_entry["HED"]
if not raw_hed_dict:
val_issues += self._validate_categorical_column(column_name, dict_for_entry, error_handler)

return val_issues

def _validate_categorical_column(self, column_name, dict_for_entry, error_handler):
"""Validates a categorical column in a json sidecar."""
val_issues = []
raw_hed_dict = dict_for_entry["HED"]
if not raw_hed_dict:
val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
for key_name, hed_string in raw_hed_dict.items():
error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
if not hed_string:
val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
if not isinstance(raw_hed_dict, dict):
elif not isinstance(hed_string, str):
val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE,
given_type=type(raw_hed_dict),
expected_type="dict")
for key_name, hed_string in raw_hed_dict.items():
error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
if not isinstance(hed_string, str):
val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE,
given_type=type(hed_string),
expected_type="str")
if not hed_string:
val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
if key_name in self.reserved_category_values:
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name)
error_handler.pop_error_context()

given_type=type(hed_string),
expected_type="str")
elif key_name in self.reserved_category_values:
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name)
error_handler.pop_error_context()
return val_issues

def _validate_pound_sign_count(self, hed_string, column_type):
Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test__iter__(self):

def test_validate_column_group(self):
validation_issues = self.errors_sidecar.validate(self.hed_schema)
self.assertEqual(len(validation_issues), 5)
self.assertEqual(len(validation_issues), 4)

validation_issues2 = self.errors_sidecar_minor.validate(self.hed_schema)
self.assertEqual(len(validation_issues2), 1)
Expand Down
Loading