Skip to content

Commit

Permalink
Updated formatting and used ID for sidecar extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
VisLab committed Sep 27, 2024
1 parent 78c29c5 commit 9e0a8f6
Show file tree
Hide file tree
Showing 19 changed files with 40 additions and 59 deletions.
2 changes: 1 addition & 1 deletion hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def val_error_invalid_char(source_string, char_index):


@hed_tag_error(ValidationErrors.ELEMENT_DEPRECATED, default_severity=ErrorSeverity.WARNING)
def val_error_element_deprecatedr(tag):
def val_error_element_deprecated(tag):
return f"Element '{tag}' has been deprecated and an alternative method of tagging should be used"


Expand Down
16 changes: 8 additions & 8 deletions hed/errors/schema_error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,22 @@ def schema_error_SCHEMA_INVALID_CHILD(tag, child_tag_list):

@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' " + \
"was not defined in the schema, or was used outside of it's defined class."
return (f"Attribute '{attribute_name}' used by '{source_tag}' " +
"was not defined in the schema, or was used outside of it's defined class.")


@hed_error(SchemaWarnings.SCHEMA_PRERELEASE_VERSION_USED, default_severity=ErrorSeverity.WARNING)
def schema_error_SCHEMA_PRERELEASE_VERSION_USED(current_version, known_versions):
return f"Schema version {current_version} used, which is prerelease or unofficial. " + \
f"Known versions are: {', '.join(known_versions)}"
return (f"Schema version {current_version} used, which is prerelease or unofficial. " +
f"Known versions are: {', '.join(known_versions)}")


@hed_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, default_severity=ErrorSeverity.WARNING,
actual_code=SchemaWarnings.SCHEMA_CHARACTER_INVALID)
def schema_error_invalid_character_prologue(char_index, source_string, section_name):
invalid_char = source_string[char_index]
return f"'{section_name}' has invalid character '{invalid_char}' at " + \
f"position {char_index} of string: {source_string}"
return (f"'{section_name}' has invalid character '{invalid_char}' at " +
f"position {char_index} of string: {source_string}")


@hed_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, default_severity=ErrorSeverity.WARNING,
Expand Down Expand Up @@ -91,8 +91,8 @@ def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child
@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_DEPRECATED,
actual_code=SchemaAttributeErrors.SCHEMA_DEPRECATION_ERROR)
def schema_error_SCHEMA_ATTRIBUTE_VALUE_DEPRECATED(tag, deprecated_suggestion, attribute_name):
return (f"Tag '{tag}' {attribute_name} uses '{deprecated_suggestion}' which has been deprecated " + \
f"and an alternative method of tagging should be used.")
return (f"Tag '{tag}' {attribute_name} uses '{deprecated_suggestion}' which has been deprecated " +
"and an alternative method of tagging should be used.")


@hed_error(SchemaAttributeErrors.SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID,
Expand Down
2 changes: 1 addition & 1 deletion hed/schema/schema_io/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,4 +182,4 @@ def get_attributes_from_row(row):
if constants.subclass_of in row.index and row[constants.subclass_of] == "HedHeader":
header_attributes, _ = _parse_header_attributes_line(attr_string)
return header_attributes
return parse_attribute_string(attr_string)
return parse_attribute_string(attr_string)
2 changes: 1 addition & 1 deletion hed/schema/schema_io/ontology_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _verify_hedid_matches(section, df, unused_tag_ids):
if id_int not in unused_tag_ids:
hedid_errors += schema_util.format_error(
row_number, row, f"'{label}' has id {id_int} which is outside " +
f"of the valid range for this type. Valid range is: " +
"of the valid range for this type. Valid range is: " +
f"{min(unused_tag_ids)} to {max(unused_tag_ids)}")
continue
except ValueError:
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from .util.hed_logger import HedLogger
from .util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns
from .util.io_util import check_filename, clean_filename, extract_suffix_path, get_file_list, make_path
from .util.io_util import get_dir_dictionary, get_file_list, get_path_components, parse_bids_filename
from .util.io_util import get_dir_dictionary, get_path_components, parse_bids_filename

from .analysis.annotation_util import \
check_df_columns, extract_tags, generate_sidecar_entry, get_bids_dataset, hed_to_df, df_to_hed, merge_hed_dict, \
Expand Down
4 changes: 2 additions & 2 deletions hed/tools/analysis/annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def generate_sidecar_entry(column_name, column_values=None):
name_label = re.sub(r'[^A-Za-z0-9-]+', '_', column_name)
sidecar_entry = {"Description": f"Description for {column_name}", "HED": ""}
if not column_values:
sidecar_entry["HED"] = f"(Label/{name_label}, Label/#)"
sidecar_entry["HED"] = f"(Label/{name_label}, ID/#)"
else:
levels = {}
hed = {}
Expand All @@ -112,7 +112,7 @@ def generate_sidecar_entry(column_name, column_values=None):
continue
value_label = re.sub(r'[^A-Za-z0-9-]+', '_', column_value)
levels[column_value] = f"Here describe column value {column_value} of column {column_name}"
hed[column_value] = f"(Label/{name_label}, Label/{value_label})"
hed[column_value] = f"(Label/{name_label}, ID/{value_label})"
sidecar_entry["Levels"] = levels
sidecar_entry["HED"] = hed
return sidecar_entry
Expand Down
10 changes: 5 additions & 5 deletions hed/tools/remodeling/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
""" Remodeling tools for revising and summarizing tabular files."""

from .backup_manager import BackupManager
from .dispatcher import Dispatcher
from .remodeler_validator import RemodelerValidator
""" Remodeling tools for revising and summarizing tabular files."""

from .backup_manager import BackupManager
from .dispatcher import Dispatcher
from .remodeler_validator import RemodelerValidator
2 changes: 1 addition & 1 deletion hed/tools/remodeling/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
""" Command-line interface for remodeling tools. """
""" Command-line interface for remodeling tools. """
2 changes: 2 additions & 0 deletions hed/tools/util/data_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def make_info_dataframe(col_info, selected_col):
df = pd.DataFrame(sorted(list(col_values)), columns=[selected_col])
return df


def replace_na(df):
""" Replace (in place) the n/a with np.nan taking care of categorical columns. """
for column in df.columns:
Expand All @@ -221,6 +222,7 @@ def replace_na(df):
df[column] = df[column].replace('n/a', np.nan)
df[column] = pd.Categorical(df[column])


def replace_values(df, values=None, replace_value='n/a', column_list=None):
""" Replace string values in specified columns.
Expand Down
7 changes: 3 additions & 4 deletions hed/tools/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
""" Visualization tools for HED. """

from .tag_word_cloud import create_wordcloud, word_cloud_to_svg

""" Visualization tools for HED. """

from .tag_word_cloud import create_wordcloud, word_cloud_to_svg
2 changes: 1 addition & 1 deletion hed/validator/util/class_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _check_value_class(self, original_tag, stripped_value, report_as, error_code
char_errors = {}
for class_name in classes:
char_errors[class_name] = self._get_problem_indices(stripped_value, class_name, start_index=start_index)
if class_valid[class_name] and not char_errors[class_name]: # We have found a valid class
if class_valid[class_name] and not char_errors[class_name]: # We have found a valid class
return []
index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag)
validation_issues = self.report_value_errors(char_errors, class_valid, report_as, index_adj)
Expand Down
4 changes: 2 additions & 2 deletions spec_tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _run_single_events_test(self, info, schema, def_dict, error_code, all_codes,
for row in test:
if not isinstance(row, list):
print(f"Improper grouping in test: {error_code}:{name}")
print(f"This is probably a missing set of square brackets.")
print("This is probably a missing set of square brackets.")
break
string += "\t".join(str(x) for x in row) + "\n"

Expand All @@ -169,7 +169,7 @@ def _run_single_combo_test(self, info, schema, def_dict, error_code, all_codes,
if not isinstance(row, list):
print(f"Improper grouping in test: {error_code}:{name}")
print(f"Improper data for test {name}: {test}")
print(f"This is probably a missing set of square brackets.")
print("This is probably a missing set of square brackets.")
break
string += "\t".join(str(x) for x in row) + "\n"

Expand Down
2 changes: 1 addition & 1 deletion tests/errors/test_error_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def test_replace_tag_references(self):
self.assertEqual(nested_list, ['Hed1', {'a': 2, 'b': [3, {'c': 'Hed2'}]}])

# Test with mixed data types and HedString in a list within a dict
mixed = {'a': HedString('Hed1', self._schema),
mixed = {'a': HedString('Hed1', self._schema),
'b': [2, 3, {'c': HedString('Hed2', self._schema)}, 4]}
replace_tag_references(mixed)
self.assertEqual(mixed, {'a': 'Hed1', 'b': [2, 3, {'c': 'Hed2'}, 4]})
Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_hed_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class TestHedStrings(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.schema = load_schema_version("8.3.0")

def validator_scalar(self, test_strings, expected_results, test_function):
for test_key in test_strings:
test_result = test_function(test_strings[test_key])
Expand Down
25 changes: 2 additions & 23 deletions tests/tools/analysis/test_annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,12 @@ def test_generate_sidecar_entry_non_letters(self):
self.assertIn('HED', entry1,
"generate_sidecar_entry has a HED key when column values and special chars")
hed_entry1 = entry1['HED']
self.assertEqual(hed_entry1['apple 1'], '(Label/my_-123_10, Label/apple_1)',
self.assertEqual(hed_entry1['apple 1'], '(Label/my_-123_10, ID/apple_1)',
"generate_sidecar_entry HED entry should convert labels correctly when column values")
entry2 = annotation_util.generate_sidecar_entry('my !#$-123_10')
self.assertIsInstance(entry2, dict,
"generate_sidecar_entry is a dictionary when no column values and special chars.")
self.assertEqual(entry2['HED'], '(Label/my_-123_10, Label/#)',
self.assertEqual(entry2['HED'], '(Label/my_-123_10, ID/#)',
"generate_sidecar_entry HED entry has correct label when no column values and special chars.")

def test_hed_to_df(self):
Expand Down Expand Up @@ -295,27 +295,6 @@ def test_merge_hed_dict_full(self):
annotation_util.merge_hed_dict(example_sidecar, spreadsheet_sidecar)
self.assertEqual(6, len(example_sidecar), 'merge_hed_dict merges with the correct length')

def test_to_factor(self):
series1 = Series([1.0, 2.0, 3.0, 4.0])
factor1 = annotation_util.to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.nan, 'n/a'])
factor2 = annotation_util.to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)
data = {
'Name': ['Alice', '', 'n/a', 1.0], # Contains a space
'Age': [25, np.nan, 35, 0]
}
df = DataFrame(data)
factor3 = annotation_util.to_factor(df, column='Name')
self.assertEqual(sum(factor3), 2)
factor4 = annotation_util.to_factor(df)
self.assertEqual(sum(factor4), 2)
with self.assertRaises(HedFileError):
annotation_util.to_factor(data)

def test_series_to_factor(self):
series1 = Series([1.0, 2.0, 3.0, 4.0])
factor1 = annotation_util.series_to_factor(series1)
Expand Down
2 changes: 1 addition & 1 deletion tests/tools/remodeling/operations/test_base_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self, parameters):

def do_op(self, dispatcher, df, name, sidecar=None):
return df.copy()

@staticmethod
def validate_input_data(parameters):
return []
Expand Down
5 changes: 3 additions & 2 deletions tests/tools/remodeling/operations/test_number_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,10 @@ def tearDownClass(cls):

# test working
def test_number_groups_new_column(self):
pass
# Test when new column name is given with overwrite unspecified (=False)
parms = json.loads(self.json_parms)
op = NumberGroupsOp(parms)
# parms = json.loads(self.json_parms)
# op = NumberGroupsOp(parms)
# df = pd.DataFrame(self.sample_data, columns=self.sample_columns)
# df_check = pd.DataFrame(self.numbered_data, columns=self.numbered_columns)
# df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns)
Expand Down
2 changes: 1 addition & 1 deletion tests/validator/test_def_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def test_expand_def_tags_placeholder(self):
expand_defs=False, shrink_defs=False,
remove_definitions=False, basic_definition_string=self.placeholder_definition_string)

self.base_def_validator(basic_def_strings, basic_def_strings,
self.base_def_validator(basic_def_strings, basic_def_strings,
expand_defs=False, shrink_defs=True,
remove_definitions=False, basic_definition_string=self.placeholder_definition_string)

Expand Down
6 changes: 3 additions & 3 deletions tests/validator/test_tag_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,8 @@ def test_topLevelTagGroup_validation(self):
'valid1': [],
'valid2': [],
'invalid2': self.format_error(
ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=
ValidationErrors.DEFINITION_INVALID) + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1),
ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=ValidationErrors.DEFINITION_INVALID) + \
self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1),
'invalidTwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0,
multiple_tags="Definition/InvalidDef3".split(", ")),
'invalid2TwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0,
Expand Down Expand Up @@ -1045,7 +1045,7 @@ def test_special_units(self):
expected_issues = {
'ascii': [],
'illegalTab': self.format_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, tag=0,
index_in_tag=13, index_in_tag_end=14, value_class="textClass"),
index_in_tag=13, index_in_tag_end=14, value_class="textClass"),
'allowTab': []
}
self.validator_semantic(test_strings, expected_results, expected_issues, True)
Expand Down

0 comments on commit 9e0a8f6

Please sign in to comment.