Skip to content

Commit

Permalink
Merge pull request #788 from hed-standard/develop
Browse files Browse the repository at this point in the history
Merge develop with master in preparation for release
  • Loading branch information
VisLab authored Oct 27, 2023
2 parents 4eaca61 + c6bfcf1 commit 6557bdf
Show file tree
Hide file tree
Showing 13 changed files with 307 additions and 109 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
Release 0.4.0 October 27, 2023
- Refactored the model classes to be based on DataFrame.
- Added additional command line options for remodeling tools.
- Restructured summaries for better reporting.
- Minor refactoring to reduce code complexity.
- Finalized and automated SPEC tests.
- Improvements to GitHub automation -- including adding CodeSpell.
- Improvements to API-Docs.

Release 0.3.1 July 3, 2023
- Pinned the version of the pydantic and inflect libraries due to inflict.
- Reorganized JSON output of remodeling summaries so that all of consistent form.
- Fixed summarize_hed_tags_op so that tags were correctly categorized for output.
- Minor refactoring to reduce code complexity.
- BaseInput and Sidecar now raise HedFileError if input could not be read.


Release 0.3.0 June 20, 2023
- Introduction of partnered schema.
- Improved error handling for schema validation.
Expand Down
6 changes: 3 additions & 3 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
author = 'HED Working Group'

# The full version, including alpha/beta/rc tags
version = '0.3.1'
release = '0.3.1'
version = '0.4.0'
release = '0.4.0'

currentdir = os.path.realpath(os.path.dirname(__file__))

Expand Down Expand Up @@ -89,7 +89,7 @@
# Toc options
'collapse_navigation': False,
'sticky_navigation': True,
'navigation_depth': 4,
'navigation_depth': 7,
'includehidden': True,
'titles_only': False
}
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def def_error_no_takes_value(def_name, placeholder_tag):

@hed_tag_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID)
def def_error_no_takes_value(tag, def_name):
return f"Tag '{str(tag)}' in Definition '{def_name}' has has a tag with the unique or required attribute."
return f"Tag '{str(tag)}' in Definition '{def_name}' has has a the unique or required attribute."


@hed_tag_error(DefinitionErrors.BAD_DEFINITION_LOCATION, actual_code=ValidationErrors.DEFINITION_INVALID)
Expand Down
18 changes: 14 additions & 4 deletions hed/models/column_metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from enum import Enum
from hed.errors.error_types import SidecarErrors
import pandas as pd
import copy


class ColumnType(Enum):
Expand Down Expand Up @@ -102,13 +103,15 @@ def set_hed_strings(self, new_strings):
return True

@staticmethod
def _detect_column_type(dict_for_entry):
def _detect_column_type(dict_for_entry, basic_validation=True):
""" Determine the ColumnType of a given json entry.
Parameters:
dict_for_entry (dict): The loaded json entry a specific column.
Generally has a "HED" entry among other optional ones.
basic_validation (bool): If False, does not verify past "HED" exists and the type
This is used to issue more precise errors that are normally just silently ignored,
but also not crash.
Returns:
ColumnType: The determined type of given column. Returns None if unknown.
Expand All @@ -122,14 +125,14 @@ def _detect_column_type(dict_for_entry):

hed_entry = dict_for_entry["HED"]
if isinstance(hed_entry, dict):
if not all(isinstance(entry, str) for entry in hed_entry.values()):
if basic_validation and not all(isinstance(entry, str) for entry in hed_entry.values()):
return None
return ColumnType.Categorical

if not isinstance(hed_entry, str):
return None

if "#" not in dict_for_entry["HED"]:
if basic_validation and "#" not in dict_for_entry["HED"]:
return None

return ColumnType.Value
Expand All @@ -155,3 +158,10 @@ def expected_pound_sign_count(column_type):
else:
return 0, None
return expected_count, error_type

def _get_unvalidated_data(self):
"""Returns a copy with less preliminary validation done(such as verifying all data types)"""
return_copy = copy.deepcopy(self)
return_copy.column_type = ColumnMetadata._detect_column_type(dict_for_entry=return_copy.source_dict,
basic_validation=False)
return return_copy
26 changes: 0 additions & 26 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,26 +120,6 @@ def expand_defs(df, hed_schema, def_dict, columns=None):
df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))


def sort_strings(df, hed_schema, tag_form="short_tag", columns=None):
""" Expands any def tags found in the dataframe.
Converts in place
Parameters:
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema or None): The schema to use to identify defs
columns (list or None): The columns to modify on the dataframe
"""
if isinstance(df, pd.Series):
df[:] = df.apply(partial(_sort, hed_schema=hed_schema, tag_form=tag_form))
else:
if columns is None:
columns = df.columns

for column in columns:
df.loc[column] = df.loc[column].apply(partial(_sort, hed_schema=hed_schema, tag_form=tag_form))


def _convert_to_form(hed_string, hed_schema, tag_form):
return str(HedString(hed_string, hed_schema).get_as_form(tag_form))

Expand All @@ -152,12 +132,6 @@ def _expand_defs(hed_string, hed_schema, def_dict):
return str(HedString(hed_string, hed_schema, def_dict).expand_defs())


def _sort(hed_string, hed_schema, tag_form):
sorted_string = HedString(hed_string, hed_schema)
sorted_string.sort()
return sorted_string.get_as_form(tag_form)


def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None):
""" Gather def-expand tags in the strings/compare with known definitions to find any differences
Expand Down
39 changes: 0 additions & 39 deletions hed/models/indexed_df.py

This file was deleted.

2 changes: 1 addition & 1 deletion hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def in_library_check(hed_schema, tag_entry, attribute_name):

library = tag_entry.attributes.get(attribute_name, "")
if hed_schema.library != library:
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID,
tag_entry.name,
library)
return issues
71 changes: 41 additions & 30 deletions hed/validator/sidecar_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
definition_checks = {}
for column_data in sidecar:
column_name = column_data.column_name
column_data = column_data._get_unvalidated_data()
hed_strings = column_data.get_hed_strings()
error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
for key_name, hed_string in hed_strings.items():
Expand Down Expand Up @@ -180,20 +181,28 @@ def _find_non_matching_braces(hed_string):

@staticmethod
def _check_for_key(key, data):
# Probably can be cleaned up more -> Return True if any data or subdata is key
if isinstance(data, dict):
if key in data:
return bool(data[key])
else:
for sub_data in data.values():
result = SidecarValidator._check_for_key(key, sub_data)
if result is not None:
return result
return SidecarValidator._check_dict(key, data)
elif isinstance(data, list):
for sub_data in data:
result = SidecarValidator._check_for_key(key, sub_data)
if result is not None:
return result
return None
return SidecarValidator._check_list(key, data)
return False

@staticmethod
def _check_dict(key, data_dict):
if key in data_dict:
return True
for sub_data in data_dict.values():
if SidecarValidator._check_for_key(key, sub_data):
return True
return False

@staticmethod
def _check_list(key, data_list):
for sub_data in data_list:
if SidecarValidator._check_for_key(key, sub_data):
return True
return False

def _validate_column_structure(self, column_name, dict_for_entry, error_handler):
""" Checks primarily for type errors such as expecting a string and getting a list in a json sidecar.
Expand All @@ -210,7 +219,7 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler)
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED_COLUMN)
return val_issues

column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry)
column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry, basic_validation=False)
if column_type is None:
val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE,
column_name=column_name)
Expand All @@ -219,25 +228,27 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler)
if found_hed:
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED)
elif column_type == ColumnType.Categorical:
raw_hed_dict = dict_for_entry["HED"]
if not raw_hed_dict:
val_issues += self._validate_categorical_column(column_name, dict_for_entry, error_handler)

return val_issues

def _validate_categorical_column(self, column_name, dict_for_entry, error_handler):
"""Validates a categorical column in a json sidecar."""
val_issues = []
raw_hed_dict = dict_for_entry["HED"]
if not raw_hed_dict:
val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
for key_name, hed_string in raw_hed_dict.items():
error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
if not hed_string:
val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
if not isinstance(raw_hed_dict, dict):
elif not isinstance(hed_string, str):
val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE,
given_type=type(raw_hed_dict),
expected_type="dict")
for key_name, hed_string in raw_hed_dict.items():
error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
if not isinstance(hed_string, str):
val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE,
given_type=type(hed_string),
expected_type="str")
if not hed_string:
val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
if key_name in self.reserved_category_values:
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name)
error_handler.pop_error_context()

given_type=type(hed_string),
expected_type="str")
elif key_name in self.reserved_category_values:
val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name)
error_handler.pop_error_context()
return val_issues

def _validate_pound_sign_count(self, hed_string, column_type):
Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test__iter__(self):

def test_validate_column_group(self):
validation_issues = self.errors_sidecar.validate(self.hed_schema)
self.assertEqual(len(validation_issues), 5)
self.assertEqual(len(validation_issues), 4)

validation_issues2 = self.errors_sidecar_minor.validate(self.hed_schema)
self.assertEqual(len(validation_issues2), 1)
Expand Down
Loading

0 comments on commit 6557bdf

Please sign in to comment.