diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index a84e9304..e86a1ac4 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -42,7 +42,7 @@ def val_error_invalid_char(source_string, char_index): @hed_tag_error(ValidationErrors.ELEMENT_DEPRECATED, default_severity=ErrorSeverity.WARNING) -def val_error_element_deprecatedr(tag): +def val_error_element_deprecated(tag): return f"Element '{tag}' has been deprecated and an alternative method of tagging should be used" diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py index 7c7c1fa0..ee5704f7 100644 --- a/hed/errors/schema_error_messages.py +++ b/hed/errors/schema_error_messages.py @@ -33,22 +33,22 @@ def schema_error_SCHEMA_INVALID_CHILD(tag, child_tag_list): @hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID) def schema_error_unknown_attribute(attribute_name, source_tag): - return f"Attribute '{attribute_name}' used by '{source_tag}' " + \ - "was not defined in the schema, or was used outside of it's defined class." + return (f"Attribute '{attribute_name}' used by '{source_tag}' " + + "was not defined in the schema, or was used outside of it's defined class.") @hed_error(SchemaWarnings.SCHEMA_PRERELEASE_VERSION_USED, default_severity=ErrorSeverity.WARNING) def schema_error_SCHEMA_PRERELEASE_VERSION_USED(current_version, known_versions): - return f"Schema version {current_version} used, which is prerelease or unofficial. " + \ - f"Known versions are: {', '.join(known_versions)}" + return (f"Schema version {current_version} used, which is prerelease or unofficial. " + + f"Known versions are: {', '.join(known_versions)}") @hed_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, default_severity=ErrorSeverity.WARNING, actual_code=SchemaWarnings.SCHEMA_CHARACTER_INVALID) def schema_error_invalid_character_prologue(char_index, source_string, section_name): invalid_char = source_string[char_index] - return f"'{section_name}' has invalid character '{invalid_char}' at " + \ - f"position {char_index} of string: {source_string}" + return (f"'{section_name}' has invalid character '{invalid_char}' at " + + f"position {char_index} of string: {source_string}") @hed_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, default_severity=ErrorSeverity.WARNING, @@ -91,8 +91,8 @@ def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child @hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_DEPRECATED, actual_code=SchemaAttributeErrors.SCHEMA_DEPRECATION_ERROR) def schema_error_SCHEMA_ATTRIBUTE_VALUE_DEPRECATED(tag, deprecated_suggestion, attribute_name): - return (f"Tag '{tag}' {attribute_name} uses '{deprecated_suggestion}' which has been deprecated " + \ - f"and an alternative method of tagging should be used.") + return (f"Tag '{tag}' {attribute_name} uses '{deprecated_suggestion}' which has been deprecated " + + "and an alternative method of tagging should be used.") @hed_error(SchemaAttributeErrors.SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID, diff --git a/hed/schema/schema_io/df_util.py b/hed/schema/schema_io/df_util.py index 00e2011b..0f4927a6 100644 --- a/hed/schema/schema_io/df_util.py +++ b/hed/schema/schema_io/df_util.py @@ -182,4 +182,4 @@ def get_attributes_from_row(row): if constants.subclass_of in row.index and row[constants.subclass_of] == "HedHeader": header_attributes, _ = _parse_header_attributes_line(attr_string) return header_attributes - return parse_attribute_string(attr_string) \ No newline at end of file + return parse_attribute_string(attr_string) diff --git a/hed/schema/schema_io/ontology_util.py b/hed/schema/schema_io/ontology_util.py index 278f5217..28c05d5d 100644 --- a/hed/schema/schema_io/ontology_util.py +++ b/hed/schema/schema_io/ontology_util.py @@ -163,7 +163,7 @@ def _verify_hedid_matches(section, df, unused_tag_ids): if id_int not in unused_tag_ids: hedid_errors += schema_util.format_error( row_number, row, f"'{label}' has id {id_int} which is outside " + - f"of the valid range for this type. Valid range is: " + + "of the valid range for this type. Valid range is: " + f"{min(unused_tag_ids)} to {max(unused_tag_ids)}") continue except ValueError: diff --git a/hed/tools/__init__.py b/hed/tools/__init__.py index 2d0bd977..042d3cac 100644 --- a/hed/tools/__init__.py +++ b/hed/tools/__init__.py @@ -43,7 +43,7 @@ from .util.hed_logger import HedLogger from .util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns from .util.io_util import check_filename, clean_filename, extract_suffix_path, get_file_list, make_path -from .util.io_util import get_dir_dictionary, get_file_list, get_path_components, parse_bids_filename +from .util.io_util import get_dir_dictionary, get_path_components, parse_bids_filename from .analysis.annotation_util import \ check_df_columns, extract_tags, generate_sidecar_entry, get_bids_dataset, hed_to_df, df_to_hed, merge_hed_dict, \ diff --git a/hed/tools/analysis/annotation_util.py b/hed/tools/analysis/annotation_util.py index 3b91efe4..22f93a17 100644 --- a/hed/tools/analysis/annotation_util.py +++ b/hed/tools/analysis/annotation_util.py @@ -103,7 +103,7 @@ def generate_sidecar_entry(column_name, column_values=None): name_label = re.sub(r'[^A-Za-z0-9-]+', '_', column_name) sidecar_entry = {"Description": f"Description for {column_name}", "HED": ""} if not column_values: - sidecar_entry["HED"] = f"(Label/{name_label}, Label/#)" + sidecar_entry["HED"] = f"(Label/{name_label}, ID/#)" else: levels = {} hed = {} @@ -112,7 +112,7 @@ def generate_sidecar_entry(column_name, column_values=None): continue value_label = re.sub(r'[^A-Za-z0-9-]+', '_', column_value) levels[column_value] = f"Here describe column value {column_value} of column {column_name}" - hed[column_value] = f"(Label/{name_label}, Label/{value_label})" + hed[column_value] = f"(Label/{name_label}, ID/{value_label})" sidecar_entry["Levels"] = levels sidecar_entry["HED"] = hed return sidecar_entry diff --git a/hed/tools/remodeling/__init__.py b/hed/tools/remodeling/__init__.py index e6a9e2ea..5c2f44ab 100644 --- a/hed/tools/remodeling/__init__.py +++ b/hed/tools/remodeling/__init__.py @@ -1,5 +1,5 @@ -""" Remodeling tools for revising and summarizing tabular files.""" - -from .backup_manager import BackupManager -from .dispatcher import Dispatcher -from .remodeler_validator import RemodelerValidator \ No newline at end of file +""" Remodeling tools for revising and summarizing tabular files.""" + +from .backup_manager import BackupManager +from .dispatcher import Dispatcher +from .remodeler_validator import RemodelerValidator diff --git a/hed/tools/remodeling/cli/__init__.py b/hed/tools/remodeling/cli/__init__.py index d975fb81..4d0c5fc6 100644 --- a/hed/tools/remodeling/cli/__init__.py +++ b/hed/tools/remodeling/cli/__init__.py @@ -1 +1 @@ -""" Command-line interface for remodeling tools. """ \ No newline at end of file +""" Command-line interface for remodeling tools. """ diff --git a/hed/tools/util/data_util.py b/hed/tools/util/data_util.py index d7465aba..64d26a04 100644 --- a/hed/tools/util/data_util.py +++ b/hed/tools/util/data_util.py @@ -211,6 +211,7 @@ def make_info_dataframe(col_info, selected_col): df = pd.DataFrame(sorted(list(col_values)), columns=[selected_col]) return df + def replace_na(df): """ Replace (in place) the n/a with np.nan taking care of categorical columns. """ for column in df.columns: @@ -221,6 +222,7 @@ def replace_na(df): df[column] = df[column].replace('n/a', np.nan) df[column] = pd.Categorical(df[column]) + def replace_values(df, values=None, replace_value='n/a', column_list=None): """ Replace string values in specified columns. diff --git a/hed/tools/visualization/__init__.py b/hed/tools/visualization/__init__.py index aa2e73ea..a5a5a047 100644 --- a/hed/tools/visualization/__init__.py +++ b/hed/tools/visualization/__init__.py @@ -1,4 +1,3 @@ -""" Visualization tools for HED. """ - -from .tag_word_cloud import create_wordcloud, word_cloud_to_svg - +""" Visualization tools for HED. """ + +from .tag_word_cloud import create_wordcloud, word_cloud_to_svg diff --git a/hed/validator/util/class_util.py b/hed/validator/util/class_util.py index 9152234b..310f63c9 100644 --- a/hed/validator/util/class_util.py +++ b/hed/validator/util/class_util.py @@ -195,7 +195,7 @@ def _check_value_class(self, original_tag, stripped_value, report_as, error_code char_errors = {} for class_name in classes: char_errors[class_name] = self._get_problem_indices(stripped_value, class_name, start_index=start_index) - if class_valid[class_name] and not char_errors[class_name]: # We have found a valid class + if class_valid[class_name] and not char_errors[class_name]: # We have found a valid class return [] index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag) validation_issues = self.report_value_errors(char_errors, class_valid, report_as, index_adj) diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index 73fef09f..0d0602cf 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -142,7 +142,7 @@ def _run_single_events_test(self, info, schema, def_dict, error_code, all_codes, for row in test: if not isinstance(row, list): print(f"Improper grouping in test: {error_code}:{name}") - print(f"This is probably a missing set of square brackets.") + print("This is probably a missing set of square brackets.") break string += "\t".join(str(x) for x in row) + "\n" @@ -169,7 +169,7 @@ def _run_single_combo_test(self, info, schema, def_dict, error_code, all_codes, if not isinstance(row, list): print(f"Improper grouping in test: {error_code}:{name}") print(f"Improper data for test {name}: {test}") - print(f"This is probably a missing set of square brackets.") + print("This is probably a missing set of square brackets.") break string += "\t".join(str(x) for x in row) + "\n" diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py index 28daa121..61dc7a58 100644 --- a/tests/errors/test_error_reporter.py +++ b/tests/errors/test_error_reporter.py @@ -176,7 +176,7 @@ def test_replace_tag_references(self): self.assertEqual(nested_list, ['Hed1', {'a': 2, 'b': [3, {'c': 'Hed2'}]}]) # Test with mixed data types and HedString in a list within a dict - mixed = {'a': HedString('Hed1', self._schema), + mixed = {'a': HedString('Hed1', self._schema), 'b': [2, 3, {'c': HedString('Hed2', self._schema)}, 4]} replace_tag_references(mixed) self.assertEqual(mixed, {'a': 'Hed1', 'b': [2, 3, {'c': 'Hed2'}, 4]}) diff --git a/tests/models/test_hed_string.py b/tests/models/test_hed_string.py index f1ede36d..fba92013 100644 --- a/tests/models/test_hed_string.py +++ b/tests/models/test_hed_string.py @@ -8,7 +8,7 @@ class TestHedStrings(unittest.TestCase): @classmethod def setUpClass(cls): cls.schema = load_schema_version("8.3.0") - + def validator_scalar(self, test_strings, expected_results, test_function): for test_key in test_strings: test_result = test_function(test_strings[test_key]) diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py index c4570a56..b6cbdfa9 100644 --- a/tests/tools/analysis/test_annotation_util.py +++ b/tests/tools/analysis/test_annotation_util.py @@ -205,12 +205,12 @@ def test_generate_sidecar_entry_non_letters(self): self.assertIn('HED', entry1, "generate_sidecar_entry has a HED key when column values and special chars") hed_entry1 = entry1['HED'] - self.assertEqual(hed_entry1['apple 1'], '(Label/my_-123_10, Label/apple_1)', + self.assertEqual(hed_entry1['apple 1'], '(Label/my_-123_10, ID/apple_1)', "generate_sidecar_entry HED entry should convert labels correctly when column values") entry2 = annotation_util.generate_sidecar_entry('my !#$-123_10') self.assertIsInstance(entry2, dict, "generate_sidecar_entry is a dictionary when no column values and special chars.") - self.assertEqual(entry2['HED'], '(Label/my_-123_10, Label/#)', + self.assertEqual(entry2['HED'], '(Label/my_-123_10, ID/#)', "generate_sidecar_entry HED entry has correct label when no column values and special chars.") def test_hed_to_df(self): @@ -295,27 +295,6 @@ def test_merge_hed_dict_full(self): annotation_util.merge_hed_dict(example_sidecar, spreadsheet_sidecar) self.assertEqual(6, len(example_sidecar), 'merge_hed_dict merges with the correct length') - def test_to_factor(self): - series1 = Series([1.0, 2.0, 3.0, 4.0]) - factor1 = annotation_util.to_factor(series1) - self.assertEqual(len(series1), len(factor1)) - self.assertEqual(sum(factor1), len(factor1)) - series2 = Series(['a', '', None, np.nan, 'n/a']) - factor2 = annotation_util.to_factor(series2) - self.assertEqual(len(series2), len(factor2)) - self.assertEqual(sum(factor2), 1) - data = { - 'Name': ['Alice', '', 'n/a', 1.0], # Contains a space - 'Age': [25, np.nan, 35, 0] - } - df = DataFrame(data) - factor3 = annotation_util.to_factor(df, column='Name') - self.assertEqual(sum(factor3), 2) - factor4 = annotation_util.to_factor(df) - self.assertEqual(sum(factor4), 2) - with self.assertRaises(HedFileError): - annotation_util.to_factor(data) - def test_series_to_factor(self): series1 = Series([1.0, 2.0, 3.0, 4.0]) factor1 = annotation_util.series_to_factor(series1) diff --git a/tests/tools/remodeling/operations/test_base_summary.py b/tests/tools/remodeling/operations/test_base_summary.py index e45d620a..fbf4c35b 100644 --- a/tests/tools/remodeling/operations/test_base_summary.py +++ b/tests/tools/remodeling/operations/test_base_summary.py @@ -28,7 +28,7 @@ def __init__(self, parameters): def do_op(self, dispatcher, df, name, sidecar=None): return df.copy() - + @staticmethod def validate_input_data(parameters): return [] diff --git a/tests/tools/remodeling/operations/test_number_groups.py b/tests/tools/remodeling/operations/test_number_groups.py index 2dcfc8be..6aa4cad9 100644 --- a/tests/tools/remodeling/operations/test_number_groups.py +++ b/tests/tools/remodeling/operations/test_number_groups.py @@ -151,9 +151,10 @@ def tearDownClass(cls): # test working def test_number_groups_new_column(self): + pass # Test when new column name is given with overwrite unspecified (=False) - parms = json.loads(self.json_parms) - op = NumberGroupsOp(parms) + # parms = json.loads(self.json_parms) + # op = NumberGroupsOp(parms) # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) # df_check = pd.DataFrame(self.numbered_data, columns=self.numbered_columns) # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) diff --git a/tests/validator/test_def_validator.py b/tests/validator/test_def_validator.py index f3f834f6..73a89073 100644 --- a/tests/validator/test_def_validator.py +++ b/tests/validator/test_def_validator.py @@ -230,7 +230,7 @@ def test_expand_def_tags_placeholder(self): expand_defs=False, shrink_defs=False, remove_definitions=False, basic_definition_string=self.placeholder_definition_string) - self.base_def_validator(basic_def_strings, basic_def_strings, + self.base_def_validator(basic_def_strings, basic_def_strings, expand_defs=False, shrink_defs=True, remove_definitions=False, basic_definition_string=self.placeholder_definition_string) diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index 03200afb..557660d3 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -503,8 +503,8 @@ def test_topLevelTagGroup_validation(self): 'valid1': [], 'valid2': [], 'invalid2': self.format_error( - ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error= - ValidationErrors.DEFINITION_INVALID) + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), + ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=ValidationErrors.DEFINITION_INVALID) + \ + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), 'invalidTwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, multiple_tags="Definition/InvalidDef3".split(", ")), 'invalid2TwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, @@ -1045,7 +1045,7 @@ def test_special_units(self): expected_issues = { 'ascii': [], 'illegalTab': self.format_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, tag=0, - index_in_tag=13, index_in_tag_end=14, value_class="textClass"), + index_in_tag=13, index_in_tag_end=14, value_class="textClass"), 'allowTab': [] } self.validator_semantic(test_strings, expected_results, expected_issues, True)