From f36ecc5fcd57576c606117f23be3817375ab16f1 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:11:41 -0500 Subject: [PATCH] Cleaned up some of the lints --- .github/workflows/ci.yaml | 24 +- tests/errors/test_error_reporter.py | 405 ++--- tests/models/test_base_input.py | 377 ++-- tests/models/test_basic_search.py | 727 ++++---- tests/models/test_basic_search_util.py | 16 +- tests/models/test_column_mapper.py | 444 ++--- tests/models/test_definition_dict.py | 9 +- tests/models/test_definition_entry.py | 28 +- tests/models/test_df_util.py | 1201 ++++++------- tests/models/test_hed_group.py | 273 +-- tests/models/test_hed_string.py | 619 +++---- tests/models/test_hed_tag.py | 380 ++-- tests/models/test_query_handler.py | 1549 +++++++++-------- tests/models/test_sidecar.py | 345 ++-- tests/models/test_spreadsheet_input.py | 459 +++-- tests/models/test_string_util.py | 382 ++-- tests/models/test_tabular_input.py | 204 +-- tests/schema/test_hed_schema.py | 277 ++- tests/schema/test_hed_schema_io.py | 21 +- tests/schema/test_hed_schema_io_df.py | 12 +- tests/schema/test_ontology_util.py | 2 +- .../test_schema_attribute_validators.py | 358 ++-- tests/schema/test_schema_converters.py | 418 ++--- tests/schema/test_schema_entry.py | 92 +- tests/schema/test_schema_util.py | 85 +- tests/schema/test_schema_validation_util.py | 175 +- .../test_schema_validation_util_deprecated.py | 137 +- tests/schema/test_schema_validator_hed_id.py | 6 +- tests/schema/test_schema_wiki_fatal_errors.py | 231 ++- tests/schema/util_create_schemas.py | 130 +- .../scripts/test_convert_and_update_schema.py | 189 +- tests/scripts/test_script_util.py | 355 ++-- .../tools/remodeling/cli/test_run_remodel.py | 1 + .../operations/test_factor_hed_tags_op.py | 4 +- .../operations/test_split_rows_op.py | 1 - tests/tools/util/test_io_util.py | 3 +- .../visualization/test_tag_word_cloud.py | 355 ++-- tests/validator/test_char_validator.py | 2 +- tests/validator/test_def_validator.py | 504 +++--- tests/validator/test_hed_validator.py | 392 ++--- tests/validator/test_onset_validator.py | 10 +- tests/validator/test_sidecar_validator.py | 361 ++-- tests/validator/test_spreadsheet_validator.py | 195 +-- tests/validator/test_tag_validator.py | 49 +- tests/validator/test_tag_validator_base.py | 3 +- tests/validator/test_tag_validator_library.py | 12 +- 46 files changed, 5954 insertions(+), 5868 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 19fd526e..309e4ae3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -56,28 +56,34 @@ jobs: pip install -r requirements.txt pip install -r docs/requirements.txt + # Run flake8 only for Python 3.9 - name: Lint with flake8 + if: matrix.python-version == '3.9' run: | - # stop the build if there are Python syntax errors or undefined names - + pip install flake8 flake8 . --count --show-source --statistics --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - name: Test with unittest env: HED_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - coverage run -m unittest + if [ "${{ matrix.python-version }}" == "3.9" ]; then + pip install coverage + coverage run -m unittest discover tests + else + python -m unittest tests + fi - name: Run spec_test coverage env: HED_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: coverage run --append -m unittest spec_tests/test_errors.py - continue-on-error: true + run: | + if ["${{ matrix.python-version }}" == "3.9" ]; then + coverage run --append -m unittest discover tests/spec_tests + ls -a + continue-on-error: true + if - - name: List files in workspace - run: ls -a - - name: Archive code coverage results if: ${{matrix.python-version == '3.9'}} uses: actions/upload-artifact@v4 diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py index ba8de3fa..8f6ef7c2 100644 --- a/tests/errors/test_error_reporter.py +++ b/tests/errors/test_error_reporter.py @@ -1,200 +1,205 @@ -import unittest -from hed.errors import ErrorHandler, ErrorContext, ErrorSeverity, ValidationErrors, SchemaWarnings, \ - get_printable_issue_string, sort_issues, replace_tag_references -from hed.errors.error_reporter import hed_tag_error, get_printable_issue_string_html -from hed import HedString -from hed import load_schema_version - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.error_handler = ErrorHandler() - cls._schema = load_schema_version("8.3.0") - pass - - def test_push_error_context(self): - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(len(error_list) == 1) - name = "DummyFileName.txt" - self.error_handler.push_error_context(ErrorContext.FILE_NAME, name) - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME]) - column_name = "DummyColumnName" - self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME]) - self.error_handler.reset_error_context() - self.error_handler.push_error_context(ErrorContext.FILE_NAME, name) - self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) - self.error_handler.push_error_context(ErrorContext.COLUMN, column_name) - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME]) - self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME]) - self.assertTrue(column_name == error_list[0][ErrorContext.COLUMN]) - self.assertTrue(len(error_list) == 1) - self.error_handler.reset_error_context() - self.error_handler.push_error_context(ErrorContext.ROW, None) - self.assertTrue(self.error_handler.error_context[0][1] == 0) - self.error_handler.reset_error_context() - - def test_pop_error_context(self): - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(len(error_list) == 1) - name = "DummyFileName.txt" - self.error_handler.push_error_context(ErrorContext.FILE_NAME, name) - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(len(error_list) == 1) - self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME]) - self.error_handler.pop_error_context() - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(len(error_list) == 1) - column_name = "DummyColumnName" - self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(len(error_list) == 1) - self.error_handler.push_error_context(ErrorContext.FILE_NAME, name) - self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) - self.error_handler.push_error_context(ErrorContext.COLUMN, column_name) - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(len(error_list) == 1) - self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME]) - self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME]) - self.assertTrue(column_name == error_list[0][ErrorContext.COLUMN]) - self.error_handler.pop_error_context() - self.error_handler.pop_error_context() - self.error_handler.pop_error_context() - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(len(error_list) == 1) - self.assertTrue(ErrorContext.COLUMN not in error_list[0]) - self.error_handler.pop_error_context() - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - self.assertTrue(len(error_list) == 1) - self.error_handler.reset_error_context() - - def test_filter_issues_by_severity(self): - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - error_list += self.error_handler.format_error_with_context(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, - "dummy", problem_char="#", char_index=0) - self.assertTrue(len(error_list) == 2) - filtered_list = self.error_handler.filter_issues_by_severity(issues_list=error_list, - severity=ErrorSeverity.ERROR) - self.assertTrue(len(filtered_list) == 1) - - def test_printable_issue_string(self): - self.error_handler.push_error_context(ErrorContext.CUSTOM_TITLE, "Default Custom Title") - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - error_list += self.error_handler.format_error_with_context(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, - "dummy", problem_char="#", char_index=0) - - printable_issues = get_printable_issue_string(error_list) - self.assertTrue(len(printable_issues) > 10) - - printable_issues2 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR) - self.assertTrue(len(printable_issues) > len(printable_issues2)) - - printable_issues3 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR, - title="Later added custom title that is longer") - self.assertTrue(len(printable_issues3) > len(printable_issues2)) - - self.error_handler.reset_error_context() - - def test_printable_issue_string_with_filenames(self): - myfile = 'my_file.txt' - self.error_handler.push_error_context(ErrorContext.CUSTOM_TITLE, "Default Custom Title") - self.error_handler.push_error_context(ErrorContext.FILE_NAME, myfile) - error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") - error_list += self.error_handler.format_error_with_context(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, - "dummy", problem_char="#", char_index=0) - - printable_issues = get_printable_issue_string(error_list, skip_filename=False) - self.assertTrue(len(printable_issues) > 10) - self.assertEqual(printable_issues.count(myfile), 1) - - printable_issues2 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR, skip_filename=False) - self.assertTrue(len(printable_issues) > len(printable_issues2)) - self.assertEqual(printable_issues2.count(myfile), 1) - printable_issues3 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR, skip_filename=False, - title="Later added custom title that is longer") - self.assertTrue(len(printable_issues3) > len(printable_issues2)) - self.assertEqual(printable_issues3.count(myfile), 1) - - printable_issues = get_printable_issue_string_html(error_list, skip_filename=False) - self.assertTrue(len(printable_issues) > 10) - self.assertEqual(printable_issues.count(myfile), 1) - - printable_issues2 = get_printable_issue_string_html(error_list, severity=ErrorSeverity.ERROR, skip_filename=False) - self.assertTrue(len(printable_issues) > len(printable_issues2)) - self.assertEqual(printable_issues2.count(myfile), 1) - printable_issues3 = get_printable_issue_string_html(error_list, severity=ErrorSeverity.ERROR, skip_filename=False, - title="Later added custom title that is longer") - self.assertTrue(len(printable_issues3) > len(printable_issues2)) - self.assertEqual(printable_issues3.count(myfile), 1) - - self.error_handler.reset_error_context() - - def test_sort_issues(self): - schema = load_schema_version("8.1.0") - issues = [ - {ErrorContext.CUSTOM_TITLE: 'issue3', ErrorContext.FILE_NAME: 'File2', ErrorContext.ROW: 5, - ErrorContext.HED_STRING: HedString('Test C', schema)}, - {ErrorContext.CUSTOM_TITLE: 'issue1', ErrorContext.FILE_NAME: 'File1', ErrorContext.ROW: 10, - ErrorContext.HED_STRING: HedString('Test A', schema)}, - {ErrorContext.CUSTOM_TITLE: 'issue2', ErrorContext.FILE_NAME: 'File1', ErrorContext.ROW: 2}, - {ErrorContext.CUSTOM_TITLE: 'issue4', ErrorContext.FILE_NAME: 'File2', ErrorContext.ROW: 1, - ErrorContext.HED_STRING: HedString('Test D', schema)}, - {ErrorContext.CUSTOM_TITLE: 'issue5', ErrorContext.FILE_NAME: 'File3', ErrorContext.ROW: 15} - ] - - sorted_issues = sort_issues(issues) - self.assertEqual(sorted_issues[0][ErrorContext.CUSTOM_TITLE], 'issue1') - self.assertEqual(sorted_issues[1][ErrorContext.CUSTOM_TITLE], 'issue2') - self.assertEqual(sorted_issues[2][ErrorContext.CUSTOM_TITLE], 'issue3') - self.assertEqual(sorted_issues[3][ErrorContext.CUSTOM_TITLE], 'issue4') - self.assertEqual(sorted_issues[4][ErrorContext.CUSTOM_TITLE], 'issue5') - - reversed_issues = sort_issues(issues, reverse=True) - self.assertEqual(reversed_issues[0][ErrorContext.CUSTOM_TITLE], 'issue5') - self.assertEqual(reversed_issues[1][ErrorContext.CUSTOM_TITLE], 'issue4') - self.assertEqual(reversed_issues[2][ErrorContext.CUSTOM_TITLE], 'issue3') - self.assertEqual(reversed_issues[3][ErrorContext.CUSTOM_TITLE], 'issue2') - self.assertEqual(reversed_issues[4][ErrorContext.CUSTOM_TITLE], 'issue1') - - - def test_replace_tag_references(self): - # Test with mixed data types and HedString in a nested dict - nested_dict = {'a': HedString('Hed1', self._schema), 'b': {'c': 2, 'd': [3, {'e': HedString('Hed2', self._schema)}]}, 'f': [5, 6]} - replace_tag_references(nested_dict) - self.assertEqual(nested_dict, {'a': 'Hed1', 'b': {'c': 2, 'd': [3, {'e': 'Hed2'}]}, 'f': [5, 6]}) - - # Test with mixed data types and HedString in a nested list - nested_list = [HedString('Hed1', self._schema), {'a': 2, 'b': [3, {'c': HedString('Hed2', self._schema)}]}] - replace_tag_references(nested_list) - self.assertEqual(nested_list, ['Hed1', {'a': 2, 'b': [3, {'c': 'Hed2'}]}]) - - # Test with mixed data types and HedString in a list within a dict - mixed = {'a': HedString('Hed1', self._schema), 'b': [2, 3, {'c': HedString('Hed2', self._schema)}, 4]} - replace_tag_references(mixed) - self.assertEqual(mixed, {'a': 'Hed1', 'b': [2, 3, {'c': 'Hed2'}, 4]}) - - - def test_register_error_twice(self): - test_code = "test_error_code" - @hed_tag_error(test_code) - def test_error_code(tag): - pass - - with self.assertRaises(KeyError): - @hed_tag_error(test_code) - def test_error_code(tag): - pass - - def test_format_unknown_error(self): - error_code = "Unknown error type" - error_list = self.error_handler.format_error(error_code, "param1", param2=0) - self.assertEqual(error_list[0]['code'], error_code) - - actual_code = "Actual unknown error type" - error_list = self.error_handler.format_error_from_context(error_code, self.error_handler.error_context, "param1", param2=0, - actual_error=actual_code) - self.assertEqual(error_list[0]['code'], actual_code) +import unittest +from hed.errors import ErrorHandler, ErrorContext, ErrorSeverity, ValidationErrors, SchemaWarnings, \ + get_printable_issue_string, sort_issues, replace_tag_references +from hed.errors.error_reporter import hed_tag_error, get_printable_issue_string_html +from hed import HedString +from hed import load_schema_version + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.error_handler = ErrorHandler() + cls._schema = load_schema_version("8.3.0") + pass + + def test_push_error_context(self): + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(len(error_list) == 1) + name = "DummyFileName.txt" + self.error_handler.push_error_context(ErrorContext.FILE_NAME, name) + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME]) + column_name = "DummyColumnName" + self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME]) + self.error_handler.reset_error_context() + self.error_handler.push_error_context(ErrorContext.FILE_NAME, name) + self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) + self.error_handler.push_error_context(ErrorContext.COLUMN, column_name) + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME]) + self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME]) + self.assertTrue(column_name == error_list[0][ErrorContext.COLUMN]) + self.assertTrue(len(error_list) == 1) + self.error_handler.reset_error_context() + self.error_handler.push_error_context(ErrorContext.ROW, None) + self.assertTrue(self.error_handler.error_context[0][1] == 0) + self.error_handler.reset_error_context() + + def test_pop_error_context(self): + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(len(error_list) == 1) + name = "DummyFileName.txt" + self.error_handler.push_error_context(ErrorContext.FILE_NAME, name) + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(len(error_list) == 1) + self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME]) + self.error_handler.pop_error_context() + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(len(error_list) == 1) + column_name = "DummyColumnName" + self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(len(error_list) == 1) + self.error_handler.push_error_context(ErrorContext.FILE_NAME, name) + self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) + self.error_handler.push_error_context(ErrorContext.COLUMN, column_name) + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(len(error_list) == 1) + self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME]) + self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME]) + self.assertTrue(column_name == error_list[0][ErrorContext.COLUMN]) + self.error_handler.pop_error_context() + self.error_handler.pop_error_context() + self.error_handler.pop_error_context() + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(len(error_list) == 1) + self.assertTrue(ErrorContext.COLUMN not in error_list[0]) + self.error_handler.pop_error_context() + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + self.assertTrue(len(error_list) == 1) + self.error_handler.reset_error_context() + + def test_filter_issues_by_severity(self): + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + error_list += self.error_handler.format_error_with_context(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, + "dummy", problem_char="#", char_index=0) + self.assertTrue(len(error_list) == 2) + filtered_list = self.error_handler.filter_issues_by_severity(issues_list=error_list, + severity=ErrorSeverity.ERROR) + self.assertTrue(len(filtered_list) == 1) + + def test_printable_issue_string(self): + self.error_handler.push_error_context(ErrorContext.CUSTOM_TITLE, "Default Custom Title") + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + error_list += self.error_handler.format_error_with_context(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, + "dummy", problem_char="#", char_index=0) + + printable_issues = get_printable_issue_string(error_list) + self.assertTrue(len(printable_issues) > 10) + + printable_issues2 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR) + self.assertTrue(len(printable_issues) > len(printable_issues2)) + + printable_issues3 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR, + title="Later added custom title that is longer") + self.assertTrue(len(printable_issues3) > len(printable_issues2)) + + self.error_handler.reset_error_context() + + def test_printable_issue_string_with_filenames(self): + my_file = 'my_file.txt' + self.error_handler.push_error_context(ErrorContext.CUSTOM_TITLE, "Default Custom Title") + self.error_handler.push_error_context(ErrorContext.FILE_NAME, my_file) + error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") + error_list += self.error_handler.format_error_with_context(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, + "dummy", problem_char="#", char_index=0) + + printable_issues = get_printable_issue_string(error_list, skip_filename=False) + self.assertTrue(len(printable_issues) > 10) + self.assertEqual(printable_issues.count(my_file), 1) + + printable_issues2 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR, skip_filename=False) + self.assertTrue(len(printable_issues) > len(printable_issues2)) + self.assertEqual(printable_issues2.count(my_file), 1) + printable_issues3 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR, skip_filename=False, + title="Later added custom title that is longer") + self.assertTrue(len(printable_issues3) > len(printable_issues2)) + self.assertEqual(printable_issues3.count(my_file), 1) + + printable_issues = get_printable_issue_string_html(error_list, skip_filename=False) + self.assertTrue(len(printable_issues) > 10) + self.assertEqual(printable_issues.count(my_file), 1) + + printable_issues2 = get_printable_issue_string_html(error_list, severity=ErrorSeverity.ERROR, + skip_filename=False) + self.assertTrue(len(printable_issues) > len(printable_issues2)) + self.assertEqual(printable_issues2.count(my_file), 1) + printable_issues3 = get_printable_issue_string_html(error_list, severity=ErrorSeverity.ERROR, + skip_filename=False, + title="Later added custom title that is longer") + self.assertTrue(len(printable_issues3) > len(printable_issues2)) + self.assertEqual(printable_issues3.count(my_file), 1) + + self.error_handler.reset_error_context() + + def test_sort_issues(self): + schema = load_schema_version("8.1.0") + issues = [ + {ErrorContext.CUSTOM_TITLE: 'issue3', ErrorContext.FILE_NAME: 'File2', ErrorContext.ROW: 5, + ErrorContext.HED_STRING: HedString('Test C', schema)}, + {ErrorContext.CUSTOM_TITLE: 'issue1', ErrorContext.FILE_NAME: 'File1', ErrorContext.ROW: 10, + ErrorContext.HED_STRING: HedString('Test A', schema)}, + {ErrorContext.CUSTOM_TITLE: 'issue2', ErrorContext.FILE_NAME: 'File1', ErrorContext.ROW: 2}, + {ErrorContext.CUSTOM_TITLE: 'issue4', ErrorContext.FILE_NAME: 'File2', ErrorContext.ROW: 1, + ErrorContext.HED_STRING: HedString('Test D', schema)}, + {ErrorContext.CUSTOM_TITLE: 'issue5', ErrorContext.FILE_NAME: 'File3', ErrorContext.ROW: 15} + ] + + sorted_issues = sort_issues(issues) + self.assertEqual(sorted_issues[0][ErrorContext.CUSTOM_TITLE], 'issue1') + self.assertEqual(sorted_issues[1][ErrorContext.CUSTOM_TITLE], 'issue2') + self.assertEqual(sorted_issues[2][ErrorContext.CUSTOM_TITLE], 'issue3') + self.assertEqual(sorted_issues[3][ErrorContext.CUSTOM_TITLE], 'issue4') + self.assertEqual(sorted_issues[4][ErrorContext.CUSTOM_TITLE], 'issue5') + + reversed_issues = sort_issues(issues, reverse=True) + self.assertEqual(reversed_issues[0][ErrorContext.CUSTOM_TITLE], 'issue5') + self.assertEqual(reversed_issues[1][ErrorContext.CUSTOM_TITLE], 'issue4') + self.assertEqual(reversed_issues[2][ErrorContext.CUSTOM_TITLE], 'issue3') + self.assertEqual(reversed_issues[3][ErrorContext.CUSTOM_TITLE], 'issue2') + self.assertEqual(reversed_issues[4][ErrorContext.CUSTOM_TITLE], 'issue1') + + def test_replace_tag_references(self): + # Test with mixed data types and HedString in a nested dict + nested_dict = {'a': HedString('Hed1', self._schema), + 'b': {'c': 2, 'd': [3, {'e': HedString('Hed2', self._schema)}]}, 'f': [5, 6]} + replace_tag_references(nested_dict) + self.assertEqual(nested_dict, {'a': 'Hed1', 'b': {'c': 2, 'd': [3, {'e': 'Hed2'}]}, 'f': [5, 6]}) + + # Test with mixed data types and HedString in a nested list + nested_list = [HedString('Hed1', self._schema), + {'a': 2, 'b': [3, {'c': HedString('Hed2', self._schema)}]}] + replace_tag_references(nested_list) + self.assertEqual(nested_list, ['Hed1', {'a': 2, 'b': [3, {'c': 'Hed2'}]}]) + + # Test with mixed data types and HedString in a list within a dict + mixed = {'a': HedString('Hed1', self._schema), + 'b': [2, 3, {'c': HedString('Hed2', self._schema)}, 4]} + replace_tag_references(mixed) + self.assertEqual(mixed, {'a': 'Hed1', 'b': [2, 3, {'c': 'Hed2'}, 4]}) + + def test_register_error_twice(self): + test_code = "test_error_code" + + @hed_tag_error(test_code) + def test_error_code(tag): + pass + + with self.assertRaises(KeyError): + @hed_tag_error(test_code) + def test_error_code(tag): + pass + + def test_format_unknown_error(self): + error_code = "Unknown error type" + error_list = self.error_handler.format_error(error_code, "param1", param2=0) + self.assertEqual(error_list[0]['code'], error_code) + + actual_code = "Actual unknown error type" + error_list = self.error_handler.format_error_from_context(error_code, self.error_handler.error_context, + "param1", param2=0, + actual_error=actual_code) + self.assertEqual(error_list[0]['code'], actual_code) diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index 755fc554..dba97140 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -1,190 +1,187 @@ -import io -import unittest -import os -import shutil -from hed.models.sidecar import Sidecar -from hed.schema.hed_schema_io import load_schema_version -from hed.models.base_input import BaseInput -from hed.models.tabular_input import TabularInput -from hed.models.column_mapper import ColumnMapper -from hed.models.definition_dict import DefinitionDict -from hed import schema -from hed.errors.exceptions import HedFileError -from hed.errors.error_types import ErrorContext, ValidationErrors - - -import pandas as pd -import numpy as np - - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - # todo: clean up these unit tests/add more - base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/')) - cls.base_data_dir = base_data_dir - json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json") - # cls.json_def_filename = json_def_filename - json_def_sidecar = Sidecar(json_def_filename) - events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv') - cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar) - - base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") - cls.base_output_folder = base_output - os.makedirs(base_output, exist_ok=True) - - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/bids_tests/eeg_ds003645s_hed')) - schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/schema_tests/HED8.2.0.xml')) - cls.bids_root_path = bids_root_path - json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - - cls.hed_schema = schema.load_schema(schema_path) - sidecar1 = Sidecar(json_path, name='face_sub1_json') - mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False) - cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True, - name="face_sub1_events", mapper=mapper1, allow_blank_names=False) - cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events") - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.base_output_folder) - - def test_gathered_defs(self): - # todo: probably remove this test? - # todo: add unit tests for definitions in tsv file - defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema)) - expected_defs = { - 'jsonfiledef': '(Acceleration/#,Item/JsonDef1)', - 'jsonfiledef2': '(Age/#,Item/JsonDef2)', - 'jsonfiledef3': '(Age/#)', - 'takesvaluedef': '(Age/#)', - 'valueclassdef': '(Acceleration/#)' - } - self.assertEqual(defs, expected_defs) - - def test_file_not_found(self): - with self.assertRaises(HedFileError): - BaseInput('nonexistent_file.tsv') - - def test_invalid_input_type_int(self): - with self.assertRaises(HedFileError): - BaseInput(123) - - def test_invalid_input_type_dict(self): - with self.assertRaises(HedFileError): - BaseInput({'key': 'value'}) - -class TestSortingByOnset(unittest.TestCase): - @staticmethod - def generate_test_dataframe(): - data = { - 'onset': [0.5, 1.0, 1.5, 2.0, 2.5], - 'HED': [ - 'Age/1', - 'Age/2', - 'Age/3', - 'NotATag', - 'Age/5' - ] - } - - df = pd.DataFrame(data) - - return df - - def test_needs_sort(self): - df = self.generate_test_dataframe() - opened_file = TabularInput(df) - self.assertFalse(opened_file.needs_sorting) - - issues = opened_file.validate(load_schema_version("8.3.0")) - self.assertEqual(issues[1][ErrorContext.ROW], 5) - df.at[3, "onset"] = 1.5 - opened_file = TabularInput(df) - self.assertFalse(opened_file.needs_sorting) - - df.at[3, "onset"] = 1.0 - opened_file = TabularInput(df) - self.assertTrue(opened_file.needs_sorting) - issues = opened_file.validate(load_schema_version("8.3.0")) - # Should still report the same issue row despite needing sorting for validation - self.assertEqual(issues[1]['code'], ValidationErrors.ONSETS_OUT_OF_ORDER) - self.assertEqual(issues[2][ErrorContext.ROW], 5) - - def test_sort(self): - from hed.models.df_util import sort_dataframe_by_onsets - df = self.generate_test_dataframe() - df2 = sort_dataframe_by_onsets(df) - self.assertTrue(df.equals(df2)) - - df.at[3, "onset"] = 1.5 - df2 = sort_dataframe_by_onsets(df) - self.assertTrue(df.equals(df2)) - - df.at[3, "onset"] = 1.0 - df2 = sort_dataframe_by_onsets(df) - self.assertFalse(df.equals(df2)) - - -class TestCombineDataframe(unittest.TestCase): - def test_combine_dataframe_with_strings(self): - data = { - 'A': ['apple', 'banana', 'cherry'], - 'B': ['dog', 'elephant', 'fox'], - 'C': ['guitar', 'harmonica', 'piano'] - } - df = pd.DataFrame(data) - result = BaseInput.combine_dataframe(df) - expected = pd.Series(['apple, dog, guitar', 'banana, elephant, harmonica', 'cherry, fox, piano']) - self.assertTrue(result.equals(expected)) - - def test_combine_dataframe_with_nan_values(self): - data = { - 'A': ['apple', np.nan, 'cherry'], - 'B': [np.nan, 'elephant', 'fox'], - 'C': ['guitar', 'harmonica', np.nan] - } - df = pd.DataFrame(data) - # this is called on load normally - df = df.fillna("n/a") - result = BaseInput.combine_dataframe(df) - expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox']) - self.assertTrue(result.equals(expected)) - - def test_combine_dataframe_with_empty_values(self): - data = { - 'A': ['apple', '', 'cherry'], - 'B': ['', 'elephant', 'fox'], - 'C': ['guitar', 'harmonica', ''] - } - df = pd.DataFrame(data) - - result = BaseInput.combine_dataframe(df) - expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox']) - self.assertTrue(result.equals(expected)) - - def test_combine_dataframe_with_mixed_values(self): - data = { - 'A': ['apple', np.nan, 'cherry', 'n/a', ''], - 'B': [np.nan, 'elephant', 'fox', 'n/a', ''], - 'C': ['guitar', 'harmonica', np.nan, 'n/a', ''] - } - df = pd.DataFrame(data) - # this is called on load normally - df = df.fillna("n/a") - csv_buffer = io.StringIO() - df.to_csv(csv_buffer, header=False, index=False) - csv_buffer.seek(0) - - # Use the same loading function we normally use to verify n/a translates right. - loaded_df = pd.read_csv(csv_buffer, header=None) - loaded_df = loaded_df.fillna("n/a") - result = BaseInput.combine_dataframe(loaded_df) - expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', '']) - self.assertTrue(result.equals(expected)) - +import io +import unittest +import os +import shutil +from hed.models.sidecar import Sidecar +from hed.schema.hed_schema_io import load_schema_version +from hed.models.base_input import BaseInput +from hed.models.tabular_input import TabularInput +from hed.models.column_mapper import ColumnMapper +from hed.models.definition_dict import DefinitionDict +from hed import schema +from hed.errors.exceptions import HedFileError +from hed.errors.error_types import ErrorContext, ValidationErrors +import pandas as pd +import numpy as np + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + # todo: clean up these unit tests/add more + base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/')) + cls.base_data_dir = base_data_dir + json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json") + # cls.json_def_filename = json_def_filename + json_def_sidecar = Sidecar(json_def_filename) + events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv') + cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar) + + base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") + cls.base_output_folder = base_output + os.makedirs(base_output, exist_ok=True) + + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/bids_tests/eeg_ds003645s_hed')) + schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/schema_tests/HED8.2.0.xml')) + cls.bids_root_path = bids_root_path + json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + events_path = os.path.realpath(os.path.join(bids_root_path, + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + + cls.hed_schema = schema.load_schema(schema_path) + sidecar1 = Sidecar(json_path, name='face_sub1_json') + mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False) + cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True, + name="face_sub1_events", mapper=mapper1, allow_blank_names=False) + cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events") + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.base_output_folder) + + def test_gathered_defs(self): + # todo: probably remove this test? + # todo: add unit tests for definitions in tsv file + defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema)) + expected_defs = { + 'jsonfiledef': '(Acceleration/#,Item/JsonDef1)', + 'jsonfiledef2': '(Age/#,Item/JsonDef2)', + 'jsonfiledef3': '(Age/#)', + 'takesvaluedef': '(Age/#)', + 'valueclassdef': '(Acceleration/#)' + } + self.assertEqual(defs, expected_defs) + + def test_file_not_found(self): + with self.assertRaises(HedFileError): + BaseInput('nonexistent_file.tsv') + + def test_invalid_input_type_int(self): + with self.assertRaises(HedFileError): + BaseInput(123) + + def test_invalid_input_type_dict(self): + with self.assertRaises(HedFileError): + BaseInput({'key': 'value'}) + + +class TestSortingByOnset(unittest.TestCase): + @staticmethod + def generate_test_dataframe(): + data = { + 'onset': [0.5, 1.0, 1.5, 2.0, 2.5], + 'HED': [ + 'Age/1', + 'Age/2', + 'Age/3', + 'NotATag', + 'Age/5' + ] + } + + df = pd.DataFrame(data) + + return df + + def test_needs_sort(self): + df = self.generate_test_dataframe() + opened_file = TabularInput(df) + self.assertFalse(opened_file.needs_sorting) + + issues = opened_file.validate(load_schema_version("8.3.0")) + self.assertEqual(issues[1][ErrorContext.ROW], 5) + df.at[3, "onset"] = 1.5 + opened_file = TabularInput(df) + self.assertFalse(opened_file.needs_sorting) + + df.at[3, "onset"] = 1.0 + opened_file = TabularInput(df) + self.assertTrue(opened_file.needs_sorting) + issues = opened_file.validate(load_schema_version("8.3.0")) + # Should still report the same issue row despite needing sorting for validation + self.assertEqual(issues[1]['code'], ValidationErrors.ONSETS_OUT_OF_ORDER) + self.assertEqual(issues[2][ErrorContext.ROW], 5) + + def test_sort(self): + from hed.models.df_util import sort_dataframe_by_onsets + df = self.generate_test_dataframe() + df2 = sort_dataframe_by_onsets(df) + self.assertTrue(df.equals(df2)) + + df.at[3, "onset"] = 1.5 + df2 = sort_dataframe_by_onsets(df) + self.assertTrue(df.equals(df2)) + + df.at[3, "onset"] = 1.0 + df2 = sort_dataframe_by_onsets(df) + self.assertFalse(df.equals(df2)) + + +class TestCombineDataframe(unittest.TestCase): + def test_combine_dataframe_with_strings(self): + data = { + 'A': ['apple', 'banana', 'cherry'], + 'B': ['dog', 'elephant', 'fox'], + 'C': ['guitar', 'harmonica', 'piano'] + } + df = pd.DataFrame(data) + result = BaseInput.combine_dataframe(df) + expected = pd.Series(['apple, dog, guitar', 'banana, elephant, harmonica', 'cherry, fox, piano']) + self.assertTrue(result.equals(expected)) + + def test_combine_dataframe_with_nan_values(self): + data = { + 'A': ['apple', np.nan, 'cherry'], + 'B': [np.nan, 'elephant', 'fox'], + 'C': ['guitar', 'harmonica', np.nan] + } + df = pd.DataFrame(data) + # this is called on load normally + df = df.fillna("n/a") + result = BaseInput.combine_dataframe(df) + expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox']) + self.assertTrue(result.equals(expected)) + + def test_combine_dataframe_with_empty_values(self): + data = { + 'A': ['apple', '', 'cherry'], + 'B': ['', 'elephant', 'fox'], + 'C': ['guitar', 'harmonica', ''] + } + df = pd.DataFrame(data) + + result = BaseInput.combine_dataframe(df) + expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox']) + self.assertTrue(result.equals(expected)) + + def test_combine_dataframe_with_mixed_values(self): + data = { + 'A': ['apple', np.nan, 'cherry', 'n/a', ''], + 'B': [np.nan, 'elephant', 'fox', 'n/a', ''], + 'C': ['guitar', 'harmonica', np.nan, 'n/a', ''] + } + df = pd.DataFrame(data) + # this is called on load normally + df = df.fillna("n/a") + csv_buffer = io.StringIO() + df.to_csv(csv_buffer, header=False, index=False) + csv_buffer.seek(0) + + # Use the same loading function we normally use to verify n/a translates right. + loaded_df = pd.read_csv(csv_buffer, header=None) + loaded_df = loaded_df.fillna("n/a") + result = BaseInput.combine_dataframe(loaded_df) + expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', '']) + self.assertTrue(result.equals(expected)) diff --git a/tests/models/test_basic_search.py b/tests/models/test_basic_search.py index 2268fa41..3d4084cf 100644 --- a/tests/models/test_basic_search.py +++ b/tests/models/test_basic_search.py @@ -1,363 +1,364 @@ -import unittest -import pandas as pd -from hed import load_schema_version - -import os -from hed import TabularInput -from hed.models import df_util, basic_search -from hed.models.basic_search import find_words, check_parentheses, reverse_and_flip_parentheses, \ - construct_delimiter_map, verify_search_delimiters, find_matching -import numpy as np -from hed.models.df_util import convert_to_form - -class TestNewSearch(unittest.TestCase): - @classmethod - def setUpClass(cls): - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/bids_tests/eeg_ds003645s_hed')) - sidecar1_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - cls.events_path = os.path.realpath( - os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - cls.base_input = TabularInput(cls.events_path, sidecar1_path) - cls.schema = load_schema_version("8.3.0") - cls.df = cls.base_input.series_filtered - - def test_find_matching_results(self): - result1 = basic_search.find_matching(self.df, "(Face, Item-interval/1)") - result2 = basic_search.find_matching(self.df, "(Face, Item-interval/1*)") - - self.assertTrue(np.sum(result1) > 0, "result1 should have some true values") - self.assertTrue(np.sum(result2) > 0, "result2 should have some true values") - self.assertTrue(np.sum(result1) < np.sum(result2), "result1 should have fewer true values than result2") - - # Verify we get the same results in both tag forms - df_copy = self.df.copy() - convert_to_form(df_copy, self.schema, "long_tag") - - result1b = basic_search.find_matching(self.df, "(Face, Item-interval/1)") - result2b = basic_search.find_matching(self.df, "(Face, Item-interval/1*)") - - self.assertTrue(np.sum(result1b) > 0, "result1 should have some true values") - self.assertTrue(np.sum(result2b) > 0, "result2 should have some true values") - self.assertTrue(np.sum(result1b) < np.sum(result2b), "result1 should have fewer true values than result2") - self.assertTrue(result1.equals(result1b)) - self.assertTrue(result2.equals(result2b)) - - convert_to_form(df_copy, self.schema, "short_tag") - - result1b = basic_search.find_matching(self.df, "(Face, Item-interval/1)") - result2b = basic_search.find_matching(self.df, "(Face, Item-interval/1*)") - - self.assertTrue(np.sum(result1b) > 0, "result1 should have some true values") - self.assertTrue(np.sum(result2b) > 0, "result2 should have some true values") - self.assertTrue(np.sum(result1b) < np.sum(result2b), "result1 should have fewer true values than result2") - self.assertTrue(result1.equals(result1b)) - self.assertTrue(result2.equals(result2b)) - - -class TestFindWords(unittest.TestCase): - def test_basic(self): - search_string = "@global (local1, local2)" - anywhere_words, _, specific_words = find_words(search_string) - self.assertEqual(anywhere_words, ['global']) - self.assertEqual(specific_words, ['local1', 'local2']) - - def test_no_anywhere_words(self): - search_string = "(local1, local2)" - anywhere_words, _, specific_words = find_words(search_string) - self.assertEqual(anywhere_words, []) - self.assertEqual(specific_words, ['local1', 'local2']) - - def test_no_specific_words(self): - search_string = "@global1, @global2" - anywhere_words, _, specific_words = find_words(search_string) - self.assertEqual(anywhere_words, ['global1', 'global2']) - self.assertEqual(specific_words, []) - - def test_empty_string(self): - search_string = "" - anywhere_words, _, specific_words = find_words(search_string) - self.assertEqual(anywhere_words, []) - self.assertEqual(specific_words, []) - - def test_mixed_words(self): - search_string = "@global (local1, local2), @another_global" - anywhere_words, _, specific_words = find_words(search_string) - self.assertEqual(anywhere_words, ['global', 'another_global']) - self.assertEqual(specific_words, ['local1', 'local2']) - - def test_whitespace(self): - search_string = " @Global , ( local1 , local2 ) " - anywhere_words, _, specific_words = find_words(search_string) - self.assertEqual(anywhere_words, ['Global']) - self.assertEqual(specific_words, ['local1', 'local2']) - - -class TestCheckParentheses(unittest.TestCase): - def test_balanced_parentheses(self): - self.assertEqual(check_parentheses("(())"), "") - self.assertEqual(check_parentheses("(someText())"), "") - self.assertEqual(check_parentheses("((some)text())"), "") - self.assertEqual(check_parentheses("()"), "") - - def test_unbalanced_parentheses(self): - self.assertEqual(check_parentheses("(()"), "(") - self.assertEqual(check_parentheses("()someText("), "(") - self.assertEqual(check_parentheses("(text)text)"), ")") - self.assertEqual(check_parentheses("text)"), ")") - - def test_mixed_parentheses(self): - self.assertEqual(check_parentheses("(()(())"), "(") - self.assertEqual(check_parentheses("(someText))((someText)"), ")(") - self.assertEqual(check_parentheses("((someText))someText"), "") - self.assertEqual(check_parentheses("(someText(someText))someText"), "") - - def test_special_cases(self): - self.assertEqual(check_parentheses(""), "") - self.assertEqual(check_parentheses("abc"), "") - self.assertEqual(check_parentheses("((()))("), "(") - self.assertEqual(check_parentheses("text"), "") - - def test_reverse_and_flip_parentheses(self): - self.assertEqual(reverse_and_flip_parentheses("(abc)"), "(cba)") - self.assertEqual(reverse_and_flip_parentheses("Hello()"), "()olleH") - self.assertEqual(reverse_and_flip_parentheses(")("), ")(") - self.assertEqual(reverse_and_flip_parentheses("((()))"), "((()))") - self.assertEqual(reverse_and_flip_parentheses("()()()"), "()()()") - self.assertEqual(reverse_and_flip_parentheses("abc"), "cba") - self.assertEqual(reverse_and_flip_parentheses("123(abc)321"), "123(cba)321") - self.assertEqual(reverse_and_flip_parentheses("a(bc)d"), "d(cb)a") - - -class TestConstructDelimiterMap(unittest.TestCase): - def test_empty_text(self): - self.assertEqual(construct_delimiter_map("", ["word1", "word2"]), {}) - - def test_empty_words(self): - self.assertEqual(construct_delimiter_map("word1,word2", []), {}) - - def test_single_occurrence(self): - text = "word1,word2" - expected_result = { - ("word1", "word2"): "", - ("word2", "word1"): "" - } - self.assertEqual(construct_delimiter_map(text, ["word1", "word2"]), expected_result) - - def test_multiple_words(self): - text = "word0,((word1),word2)" - expected_result = { - ("word0", "word1"): "((", - ("word0", "word2"): "(", - ("word1", "word0"): "))", - ("word1", "word2"): ")", - ("word2", "word1"): "(", - ("word2", "word0"): ")" - } - self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result) - - text = "word0 , ( (word1 ), word2)" - self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result) - - -class TestVerifyDelimiters(unittest.TestCase): - def base_verify_func(self, query_text, text, specific_words, expected_result): - delimiter_map = construct_delimiter_map(query_text, specific_words) - actual_result = verify_search_delimiters(text, specific_words, delimiter_map) - self.assertEqual(actual_result, expected_result) - - def test_all_conditions_met(self): - query_text = "word0,((word1),word2)" - specific_words = ["word0", "word1", "word2"] - text = "word0,((word1),word2)" - self.base_verify_func(query_text, text, specific_words, True) - text = "((word1),word2), word0" - self.base_verify_func(query_text, text, specific_words, True) - text = "word0,(word2, (word1))" - self.base_verify_func(query_text, text, specific_words, True) - text = "word0,((word1),(ExtraGroup),word2)" - self.base_verify_func(query_text, text, specific_words, True) - text = "word0,((word2),word1)" - self.base_verify_func(query_text, text, specific_words, False) - text = "((word1),word0), word2" - self.base_verify_func(query_text, text, specific_words, False) - text = "word0,((word1))" - self.base_verify_func(query_text, text, specific_words, False) - text = "(word1),(ExtraGroup),word2)" - self.base_verify_func(query_text, text, specific_words, False) - - def test_wildcard_matching_verify_delimiters(self): - query_text = "word0, ((word1.*?)), word2.*?" - delimiter_map = construct_delimiter_map(query_text, ["word0", "word1.*?", "word2.*?"]) - - # Positive test cases - text = "((word1)), word0, word2X" - self.assertTrue(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map)) - - text = "word0, ((word1Y)), word2Z" - self.assertTrue(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map)) - - # Negative test cases - text = "word0, (word1), word2" - self.assertFalse(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map)) - - -class TestFindMatching(unittest.TestCase): - def base_find_matching(self, series, search_string, expected): - mask = find_matching(series, search_string) - self.assertTrue(all(mask == expected), f"Expected {expected}, got {mask}") - - def test_basic_matching(self): - series = pd.Series([ - "word0, word1, word2", - "word0, (word1, word2)" - ]) - search_string = "word0, word1" - expected = pd.Series([True, True]) - self.base_find_matching(series, search_string, expected) - search_string = "(word0, word1)" - expected = pd.Series([True, False]) - self.base_find_matching(series, search_string, expected) - - def test_group_matching(self): - series = pd.Series([ - "(word1), word0, ((word2))", - "word0, ((word1)), word2", - "(word1), word0, (word2)" - ]) - search_string = "word0, ((word1)), word2" - expected = pd.Series([False, True, False]) - self.base_find_matching(series, search_string, expected) - - def test_anywhere_words(self): - series = pd.Series([ - "(word1), word0, ((word2))", - "word0, ((word1)), word2", - "word0, (word3), ((word1)), word2" - ]) - search_string = "@word3, word0, ((word1)), word2" - expected = pd.Series([False, False, True]) - self.base_find_matching(series, search_string, expected) - - def test_mismatched_parentheses(self): - series = pd.Series([ - "(word1), word0, ((word2))", - "word0, ((word1)), word2", - "word0, (word1)), word2", - "word0, ((word1), word2" - ]) - search_string = "word0, ((word1)), word2" - expected = pd.Series([False, True, False, False]) - self.base_find_matching(series, search_string, expected) - - def test_wildcard_matching(self): - series = pd.Series([ - "word2, word0, ((word1X))", - "word0, ((word1Y)), word2Z", - "word0, ((word1)), word2", - "word0, (word1), word2" - ]) - search_string = "word0, ((word1*)), word2*" - expected = pd.Series([True, True, True, False]) - self.base_find_matching(series, search_string, expected) - - def test_complex_case_with_word_identifiers(self): - query_text = "word0, ((word1), @word2, @word3, word4)" - series = pd.Series([ - "word0, ((word1), word2, word3, word4)", - "word2, word0, ((word1), word3, word4)", - "word3, ((word1), word2, word4), word0", - "word0, ((word1), word4), word2, word3", - "word0, word1, word4, word2", - "word2, word3" - ]) - expected = pd.Series([True, True, True, True, False, False]) - - self.base_find_matching(series, query_text, expected) - - def test_very_complex_case_with_word_identifiers(self): - query_text = "word0, (((word1, word2), @word3)), ((word4, word5)))" - series = pd.Series([ - "word0, (((word1, word2), word3)), ((word4, word5)))", - "word3, word0, (((word1, word2))), ((word4, word5)))", - "word0, ((word1, word2), word3), (word4, word5)", - "word0, (((word1, word2), word3)), (word4)", - "word0, (((word1, word2))), ((word4, word5)))" - ]) - expected = pd.Series([True, True, False, False, False]) - - self.base_find_matching(series, query_text, expected) - - def test_incorrect_single_delimiter(self): - query_text = "word0, ((word1)), word2" - series = pd.Series([ - "word0, ((word1)), word2", - "(word0, ((word1)), word2)", - "word0, ((word1), word2)", - "word0, (word1)), word2" - ]) - expected = pd.Series([True, True, False, False]) - self.base_find_matching(series, query_text, expected) - - def test_mismatched_parentheses2(self): - query_text = "word0, ((word1)), (word2, word3)" - series = pd.Series([ - "word0, ((word1)), (word2, word3)", - "(word2, word3), word0, ((word1))", - "word0, someExtraText, ((word1)), someMoreText, (word2, word3)", - "word0, ((word1), (word2, word3))", - "word0, ((word1), ((word2, word3)" - ]) - expected = pd.Series([True, True, True, False, False]) - self.base_find_matching(series, query_text, expected) - - def test_negative_words(self): - series = pd.Series([ - "word0, word1", - "word0, word2", - "word0, word2, word3", - "word0, (word1), word2", - "word0, (word2, word3), word1", - "word0, word1suffix", - ]) - - # 1. Basic Negative Test Case - search_string1 = "~word1, word0" - expected1 = pd.Series([False, True, True, False, False, True]) - - # 2. Two Negative Words - search_string2 = "~word1, ~word3, word0" - expected2 = pd.Series([False, True, False, False, False, True]) - - # 3. Combination of Negative and Mandatory Words - search_string3 = "@word2, ~word1, word0" - expected3 = pd.Series([False, True, True, False, False, False]) - - # 4. Negative Words with Wildcards - search_string4 = "word0, ~word1*" - expected4 = pd.Series([False, True, True, False, False, False]) - - # Running tests - self.base_find_matching(series, search_string1, expected1) - self.base_find_matching(series, search_string2, expected2) - self.base_find_matching(series, search_string3, expected3) - self.base_find_matching(series, search_string4, expected4) - - def test_negative_words_group(self): - series = pd.Series([ - "word0, (word1, (word2))", - "word0, (word1, (word2)), word3", - "word0, (word1, (word2), word3)", - "word0, (word1, (word2, word3))", - ]) - search_string = "word0, (word1, (word2))" - expected = pd.Series([True, True, True, True]) - self.base_find_matching(series, search_string, expected) - - search_string = "word0, (word1, (word2)), ~word3" - expected = pd.Series([True, False, False, False]) - self.base_find_matching(series, search_string, expected) - - search_string = "word0, (word1, (word2), ~word3)" - expected = pd.Series([True, False, False, False]) - self.base_find_matching(series, search_string, expected) +import unittest +import pandas as pd +from hed import load_schema_version + +import os +from hed import TabularInput +from hed.models import basic_search +from hed.models.basic_search import find_words, check_parentheses, reverse_and_flip_parentheses, \ + construct_delimiter_map, verify_search_delimiters, find_matching +import numpy as np +from hed.models.df_util import convert_to_form + + +class TestNewSearch(unittest.TestCase): + @classmethod + def setUpClass(cls): + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/bids_tests/eeg_ds003645s_hed')) + sidecar1_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + cls.events_path = os.path.realpath( + os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + cls.base_input = TabularInput(cls.events_path, sidecar1_path) + cls.schema = load_schema_version("8.3.0") + cls.df = cls.base_input.series_filtered + + def test_find_matching_results(self): + result1 = basic_search.find_matching(self.df, "(Face, Item-interval/1)") + result2 = basic_search.find_matching(self.df, "(Face, Item-interval/1*)") + + self.assertTrue(np.sum(result1) > 0, "result1 should have some true values") + self.assertTrue(np.sum(result2) > 0, "result2 should have some true values") + self.assertTrue(np.sum(result1) < np.sum(result2), "result1 should have fewer true values than result2") + + # Verify we get the same results in both tag forms + df_copy = self.df.copy() + convert_to_form(df_copy, self.schema, "long_tag") + + result1b = basic_search.find_matching(self.df, "(Face, Item-interval/1)") + result2b = basic_search.find_matching(self.df, "(Face, Item-interval/1*)") + + self.assertTrue(np.sum(result1b) > 0, "result1 should have some true values") + self.assertTrue(np.sum(result2b) > 0, "result2 should have some true values") + self.assertTrue(np.sum(result1b) < np.sum(result2b), "result1 should have fewer true values than result2") + self.assertTrue(result1.equals(result1b)) + self.assertTrue(result2.equals(result2b)) + + convert_to_form(df_copy, self.schema, "short_tag") + + result1b = basic_search.find_matching(self.df, "(Face, Item-interval/1)") + result2b = basic_search.find_matching(self.df, "(Face, Item-interval/1*)") + + self.assertTrue(np.sum(result1b) > 0, "result1 should have some true values") + self.assertTrue(np.sum(result2b) > 0, "result2 should have some true values") + self.assertTrue(np.sum(result1b) < np.sum(result2b), "result1 should have fewer true values than result2") + self.assertTrue(result1.equals(result1b)) + self.assertTrue(result2.equals(result2b)) + + +class TestFindWords(unittest.TestCase): + def test_basic(self): + search_string = "@global (local1, local2)" + anywhere_words, _, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, ['global']) + self.assertEqual(specific_words, ['local1', 'local2']) + + def test_no_anywhere_words(self): + search_string = "(local1, local2)" + anywhere_words, _, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, []) + self.assertEqual(specific_words, ['local1', 'local2']) + + def test_no_specific_words(self): + search_string = "@global1, @global2" + anywhere_words, _, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, ['global1', 'global2']) + self.assertEqual(specific_words, []) + + def test_empty_string(self): + search_string = "" + anywhere_words, _, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, []) + self.assertEqual(specific_words, []) + + def test_mixed_words(self): + search_string = "@global (local1, local2), @another_global" + anywhere_words, _, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, ['global', 'another_global']) + self.assertEqual(specific_words, ['local1', 'local2']) + + def test_whitespace(self): + search_string = " @Global , ( local1 , local2 ) " + anywhere_words, _, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, ['Global']) + self.assertEqual(specific_words, ['local1', 'local2']) + + +class TestCheckParentheses(unittest.TestCase): + def test_balanced_parentheses(self): + self.assertEqual(check_parentheses("(())"), "") + self.assertEqual(check_parentheses("(someText())"), "") + self.assertEqual(check_parentheses("((some)text())"), "") + self.assertEqual(check_parentheses("()"), "") + + def test_unbalanced_parentheses(self): + self.assertEqual(check_parentheses("(()"), "(") + self.assertEqual(check_parentheses("()someText("), "(") + self.assertEqual(check_parentheses("(text)text)"), ")") + self.assertEqual(check_parentheses("text)"), ")") + + def test_mixed_parentheses(self): + self.assertEqual(check_parentheses("(()(())"), "(") + self.assertEqual(check_parentheses("(someText))((someText)"), ")(") + self.assertEqual(check_parentheses("((someText))someText"), "") + self.assertEqual(check_parentheses("(someText(someText))someText"), "") + + def test_special_cases(self): + self.assertEqual(check_parentheses(""), "") + self.assertEqual(check_parentheses("abc"), "") + self.assertEqual(check_parentheses("((()))("), "(") + self.assertEqual(check_parentheses("text"), "") + + def test_reverse_and_flip_parentheses(self): + self.assertEqual(reverse_and_flip_parentheses("(abc)"), "(cba)") + self.assertEqual(reverse_and_flip_parentheses("Hello()"), "()olleH") + self.assertEqual(reverse_and_flip_parentheses(")("), ")(") + self.assertEqual(reverse_and_flip_parentheses("((()))"), "((()))") + self.assertEqual(reverse_and_flip_parentheses("()()()"), "()()()") + self.assertEqual(reverse_and_flip_parentheses("abc"), "cba") + self.assertEqual(reverse_and_flip_parentheses("123(abc)321"), "123(cba)321") + self.assertEqual(reverse_and_flip_parentheses("a(bc)d"), "d(cb)a") + + +class TestConstructDelimiterMap(unittest.TestCase): + def test_empty_text(self): + self.assertEqual(construct_delimiter_map("", ["word1", "word2"]), {}) + + def test_empty_words(self): + self.assertEqual(construct_delimiter_map("word1,word2", []), {}) + + def test_single_occurrence(self): + text = "word1,word2" + expected_result = { + ("word1", "word2"): "", + ("word2", "word1"): "" + } + self.assertEqual(construct_delimiter_map(text, ["word1", "word2"]), expected_result) + + def test_multiple_words(self): + text = "word0,((word1),word2)" + expected_result = { + ("word0", "word1"): "((", + ("word0", "word2"): "(", + ("word1", "word0"): "))", + ("word1", "word2"): ")", + ("word2", "word1"): "(", + ("word2", "word0"): ")" + } + self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result) + + text = "word0 , ( (word1 ), word2)" + self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result) + + +class TestVerifyDelimiters(unittest.TestCase): + def base_verify_func(self, query_text, text, specific_words, expected_result): + delimiter_map = construct_delimiter_map(query_text, specific_words) + actual_result = verify_search_delimiters(text, specific_words, delimiter_map) + self.assertEqual(actual_result, expected_result) + + def test_all_conditions_met(self): + query_text = "word0,((word1),word2)" + specific_words = ["word0", "word1", "word2"] + text = "word0,((word1),word2)" + self.base_verify_func(query_text, text, specific_words, True) + text = "((word1),word2), word0" + self.base_verify_func(query_text, text, specific_words, True) + text = "word0,(word2, (word1))" + self.base_verify_func(query_text, text, specific_words, True) + text = "word0,((word1),(ExtraGroup),word2)" + self.base_verify_func(query_text, text, specific_words, True) + text = "word0,((word2),word1)" + self.base_verify_func(query_text, text, specific_words, False) + text = "((word1),word0), word2" + self.base_verify_func(query_text, text, specific_words, False) + text = "word0,((word1))" + self.base_verify_func(query_text, text, specific_words, False) + text = "(word1),(ExtraGroup),word2)" + self.base_verify_func(query_text, text, specific_words, False) + + def test_wildcard_matching_verify_delimiters(self): + query_text = "word0, ((word1.*?)), word2.*?" + delimiter_map = construct_delimiter_map(query_text, ["word0", "word1.*?", "word2.*?"]) + + # Positive test cases + text = "((word1)), word0, word2X" + self.assertTrue(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map)) + + text = "word0, ((word1Y)), word2Z" + self.assertTrue(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map)) + + # Negative test cases + text = "word0, (word1), word2" + self.assertFalse(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map)) + + +class TestFindMatching(unittest.TestCase): + def base_find_matching(self, series, search_string, expected): + mask = find_matching(series, search_string) + self.assertTrue(all(mask == expected), f"Expected {expected}, got {mask}") + + def test_basic_matching(self): + series = pd.Series([ + "word0, word1, word2", + "word0, (word1, word2)" + ]) + search_string = "word0, word1" + expected = pd.Series([True, True]) + self.base_find_matching(series, search_string, expected) + search_string = "(word0, word1)" + expected = pd.Series([True, False]) + self.base_find_matching(series, search_string, expected) + + def test_group_matching(self): + series = pd.Series([ + "(word1), word0, ((word2))", + "word0, ((word1)), word2", + "(word1), word0, (word2)" + ]) + search_string = "word0, ((word1)), word2" + expected = pd.Series([False, True, False]) + self.base_find_matching(series, search_string, expected) + + def test_anywhere_words(self): + series = pd.Series([ + "(word1), word0, ((word2))", + "word0, ((word1)), word2", + "word0, (word3), ((word1)), word2" + ]) + search_string = "@word3, word0, ((word1)), word2" + expected = pd.Series([False, False, True]) + self.base_find_matching(series, search_string, expected) + + def test_mismatched_parentheses(self): + series = pd.Series([ + "(word1), word0, ((word2))", + "word0, ((word1)), word2", + "word0, (word1)), word2", + "word0, ((word1), word2" + ]) + search_string = "word0, ((word1)), word2" + expected = pd.Series([False, True, False, False]) + self.base_find_matching(series, search_string, expected) + + def test_wildcard_matching(self): + series = pd.Series([ + "word2, word0, ((word1X))", + "word0, ((word1Y)), word2Z", + "word0, ((word1)), word2", + "word0, (word1), word2" + ]) + search_string = "word0, ((word1*)), word2*" + expected = pd.Series([True, True, True, False]) + self.base_find_matching(series, search_string, expected) + + def test_complex_case_with_word_identifiers(self): + query_text = "word0, ((word1), @word2, @word3, word4)" + series = pd.Series([ + "word0, ((word1), word2, word3, word4)", + "word2, word0, ((word1), word3, word4)", + "word3, ((word1), word2, word4), word0", + "word0, ((word1), word4), word2, word3", + "word0, word1, word4, word2", + "word2, word3" + ]) + expected = pd.Series([True, True, True, True, False, False]) + + self.base_find_matching(series, query_text, expected) + + def test_very_complex_case_with_word_identifiers(self): + query_text = "word0, (((word1, word2), @word3)), ((word4, word5)))" + series = pd.Series([ + "word0, (((word1, word2), word3)), ((word4, word5)))", + "word3, word0, (((word1, word2))), ((word4, word5)))", + "word0, ((word1, word2), word3), (word4, word5)", + "word0, (((word1, word2), word3)), (word4)", + "word0, (((word1, word2))), ((word4, word5)))" + ]) + expected = pd.Series([True, True, False, False, False]) + + self.base_find_matching(series, query_text, expected) + + def test_incorrect_single_delimiter(self): + query_text = "word0, ((word1)), word2" + series = pd.Series([ + "word0, ((word1)), word2", + "(word0, ((word1)), word2)", + "word0, ((word1), word2)", + "word0, (word1)), word2" + ]) + expected = pd.Series([True, True, False, False]) + self.base_find_matching(series, query_text, expected) + + def test_mismatched_parentheses2(self): + query_text = "word0, ((word1)), (word2, word3)" + series = pd.Series([ + "word0, ((word1)), (word2, word3)", + "(word2, word3), word0, ((word1))", + "word0, someExtraText, ((word1)), someMoreText, (word2, word3)", + "word0, ((word1), (word2, word3))", + "word0, ((word1), ((word2, word3)" + ]) + expected = pd.Series([True, True, True, False, False]) + self.base_find_matching(series, query_text, expected) + + def test_negative_words(self): + series = pd.Series([ + "word0, word1", + "word0, word2", + "word0, word2, word3", + "word0, (word1), word2", + "word0, (word2, word3), word1", + "word0, word1suffix", + ]) + + # 1. Basic Negative Test Case + search_string1 = "~word1, word0" + expected1 = pd.Series([False, True, True, False, False, True]) + + # 2. Two Negative Words + search_string2 = "~word1, ~word3, word0" + expected2 = pd.Series([False, True, False, False, False, True]) + + # 3. Combination of Negative and Mandatory Words + search_string3 = "@word2, ~word1, word0" + expected3 = pd.Series([False, True, True, False, False, False]) + + # 4. Negative Words with Wildcards + search_string4 = "word0, ~word1*" + expected4 = pd.Series([False, True, True, False, False, False]) + + # Running tests + self.base_find_matching(series, search_string1, expected1) + self.base_find_matching(series, search_string2, expected2) + self.base_find_matching(series, search_string3, expected3) + self.base_find_matching(series, search_string4, expected4) + + def test_negative_words_group(self): + series = pd.Series([ + "word0, (word1, (word2))", + "word0, (word1, (word2)), word3", + "word0, (word1, (word2), word3)", + "word0, (word1, (word2, word3))", + ]) + search_string = "word0, (word1, (word2))" + expected = pd.Series([True, True, True, True]) + self.base_find_matching(series, search_string, expected) + + search_string = "word0, (word1, (word2)), ~word3" + expected = pd.Series([True, False, False, False]) + self.base_find_matching(series, search_string, expected) + + search_string = "word0, (word1, (word2), ~word3)" + expected = pd.Series([True, False, False, False]) + self.base_find_matching(series, search_string, expected) diff --git a/tests/models/test_basic_search_util.py b/tests/models/test_basic_search_util.py index 9701ca65..3ccb01f7 100644 --- a/tests/models/test_basic_search_util.py +++ b/tests/models/test_basic_search_util.py @@ -7,14 +7,16 @@ class TestConvertQueryToForm(unittest.TestCase): schema = load_schema_version("8.3.0") def test_basic_convert(self): - input = "@Event, Head-part*, Time-interval/1" - expected_output = "@Event, Item/Biological-item/Anatomical-item/Body-part/Head-part*, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1" + this_input = "@Event, Head-part*, Time-interval/1" + expected_output = "@Event, Item/Biological-item/Anatomical-item/Body-part/Head-part*, " + \ + "Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1" - actual_output = convert_query(input, self.schema) + actual_output = convert_query(this_input, self.schema) self.assertEqual(expected_output, actual_output) - input = "@Head-part*, Event, Time-interval/1" - expected_output = "@Item/Biological-item/Anatomical-item/Body-part/Head-part*, Event, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1" + this_input = "@Head-part*, Event, Time-interval/1" + expected_output = "@Item/Biological-item/Anatomical-item/Body-part/Head-part*, " + \ + "Event, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1" - actual_output = convert_query(input, self.schema) - self.assertEqual(expected_output, actual_output) \ No newline at end of file + actual_output = convert_query(this_input, self.schema) + self.assertEqual(expected_output, actual_output) diff --git a/tests/models/test_column_mapper.py b/tests/models/test_column_mapper.py index cf33315f..760cf4b7 100644 --- a/tests/models/test_column_mapper.py +++ b/tests/models/test_column_mapper.py @@ -1,220 +1,224 @@ -import unittest -import os - -from hed.models import ColumnMapper, ColumnType, HedString -from hed.models.sidecar import Sidecar, DefinitionDict -from hed.errors import ValidationErrors -from hed import load_schema - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') - schema_file = 'schema_tests/HED8.0.0t.xml' - - cls.hed_schema = load_schema(os.path.join(base_data_dir, schema_file)) - cls.integer_key_dictionary = {0: 'one', 1: 'two', 2: 'three'} - cls.zero_based_row_column_count = 3 - cls.column_prefix_dictionary = {2: 'Event/Description/', 3: 'Event/Label/', 4: 'Event/Category/'} - cls.category_key = 'Event/Category/' - cls.category_participant_and_stimulus_tags = \ - HedString('Event/Category/Participant response, Event/Category/Stimulus', cls.hed_schema) - - cls.row_with_hed_tags = ['event1', 'tag1', 'tag2'] - - cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') - cls.basic_events_json = os.path.join(cls.base_data_dir, "sidecar_tests/both_types_events.json") - cls.bids_events_defs = os.path.join(cls.base_data_dir, "validator_tests/bids_events.json") - cls.basic_event_name = "trial_type" - cls.basic_event_type = ColumnType.Categorical - cls.basic_hed_tags_column = "onset" - cls.basic_column_map = ["onset", "duration", "trial_type", "response_time", " stim_file"] - cls.basic_event_row = ["1.2", "0.6", "go", "1.435", "images/red_square.jpg"] - cls.basic_event_row_invalid = ["1.2", "0.6", "invalid_category_key", "1.435", "images/red_square.jpg"] - - - def test_set_tag_columns(self): - mapper = ColumnMapper() - zero_based_tag_columns = [0, 1, 2] - mapper.set_tag_columns(zero_based_tag_columns, finalize_mapping=True) - self.assertTrue(len(mapper._final_column_map) == 3) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 0) - - def test_set_tag_columns_named(self): - mapper = ColumnMapper(warn_on_missing_column=True) - named_columns = ["Col1", "Col2", "Col3"] - mapper.set_tag_columns(named_columns) - mapper.set_column_map(named_columns) - self.assertTrue(len(mapper._final_column_map) == 3) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 0) - - def test_set_tag_columns_named_unknown(self): - mapper = ColumnMapper(warn_on_missing_column=True) - two_columns = ["Col1", "Col2"] - named_columns = ["Col1", "Col2", "Col3"] - mapper.set_tag_columns(two_columns) - mapper.set_column_map(named_columns) - self.assertTrue(len(mapper._final_column_map) == 2) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) - self.assertTrue(mapper.check_for_mapping_issues()[0]['code'] == ValidationErrors.HED_UNKNOWN_COLUMN) - - def test_set_tag_columns_mixed(self): - mapper = ColumnMapper() - mixed_columns = ["Col1", "Col2", 2] - column_map = ["Col1", "Col2", "Col3"] - mapper.set_tag_columns(mixed_columns) - mapper.set_column_map(column_map) - self.assertTrue(len(mapper._final_column_map) == 3) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 0) - - def test_set_tag_column_missing(self): - mapper = ColumnMapper() - column_map = ["Col1", "Col2", "Col3"] - mapper.set_tag_columns(["Col1", "Col4"]) - mapper.set_column_map(column_map) - self.assertTrue(len(mapper._final_column_map) == 1) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) - self.assertTrue(mapper.check_for_mapping_issues()[0]['code'] == ValidationErrors.HED_MISSING_REQUIRED_COLUMN) - - column_map = ["Col1", "Col2", "Col3"] - mapper.set_tag_columns(optional_tag_columns=["Col1", "Col4"]) - mapper.set_column_map(column_map) - self.assertTrue(len(mapper._final_column_map) == 1) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 0) - - - def test_sidecar_and_columns(self): - mapper = ColumnMapper(Sidecar(self.basic_events_json)) - mapper.set_tag_columns(["Invalid", "Invalid2"]) - mapper.set_column_map(["Invalid", "Invalid2"]) - self.assertTrue(len(mapper._final_column_map) == 2) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) - self.assertTrue(mapper.check_for_mapping_issues()[0]['code'] == ValidationErrors.SIDECAR_AND_OTHER_COLUMNS) - - def test_duplicate_list(self): - mapper = ColumnMapper() - mapper.set_tag_columns(["Invalid", "Invalid"]) - self.assertTrue(len(mapper._final_column_map) == 0) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 3) - self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_IN_LIST) - - mapper.set_tag_columns([0, 0]) - self.assertTrue(len(mapper._final_column_map) == 1) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) - self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_IN_LIST) - - mapper.set_tag_columns([0, "Column1"]) - mapper.set_column_map(["Column1"]) - self.assertTrue(len(mapper._final_column_map) == 1) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) - self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_IN_LIST) - - def test_duplicate_prefix(self): - mapper = ColumnMapper() - prefix_dict = { - 0: "Label/", - "Column1": "Description" - } - mapper.set_column_prefix_dictionary(prefix_dict) - mapper.set_column_map(["Column1"]) - self.assertTrue(len(mapper._final_column_map) == 1) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) - self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_IN_LIST) - - def test_duplicate_cross_lists(self): - mapper = ColumnMapper() - prefix_dict = { - 0: "Label/" - } - mapper.set_tag_columns([0]) - mapper.set_column_prefix_dictionary(prefix_dict) - mapper.set_column_map(["Column1"]) - self.assertTrue(len(mapper._final_column_map) == 1) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) - self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES) - - mapper = ColumnMapper() - prefix_dict = { - "Column1": "Label/" - } - mapper.set_tag_columns([0]) - mapper.set_column_prefix_dictionary(prefix_dict) - mapper.set_column_map(["Column1"]) - self.assertTrue(len(mapper._final_column_map) == 1) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) - self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES) - - - mapper.set_tag_columns(["Column1"]) - self.assertTrue(len(mapper._final_column_map) == 1) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) - self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES) - - def test_blank_column(self): - mapper = ColumnMapper() - mapper.set_column_map(["", None]) - self.assertTrue(len(mapper.check_for_mapping_issues()) == 2) - self.assertTrue(mapper.check_for_mapping_issues(allow_blank_names=False)[1]['code'] == ValidationErrors.HED_BLANK_COLUMN) - self.assertTrue(mapper.check_for_mapping_issues(allow_blank_names=False)[1]['code'] == ValidationErrors.HED_BLANK_COLUMN) - - def test_optional_column(self): - mapper = ColumnMapper() - mapper.set_tag_columns(tag_columns=["HED"]) - mapper.set_column_map({1: "HED"}) - self.assertTrue(len(mapper._final_column_map) == 1) - - mapper = ColumnMapper() - mapper.set_tag_columns(optional_tag_columns=["HED"]) - mapper.set_column_map({1: "HED"}) - self.assertTrue(len(mapper._final_column_map) == 1) - - mapper = ColumnMapper() - mapper.set_tag_columns(tag_columns=["HED"]) - self.assertTrue(len(mapper._final_column_map) == 0) - self.assertTrue(len(mapper.get_column_mapping_issues()) == 1) - - mapper = ColumnMapper() - mapper.set_tag_columns(optional_tag_columns=["HED"]) - self.assertTrue(len(mapper._final_column_map) == 0) - self.assertTrue(len(mapper.get_column_mapping_issues()) == 0) - - def test_add_json_file_events(self): - mapper = ColumnMapper() - mapper._set_sidecar(Sidecar(self.basic_events_json)) - self.assertTrue(len(mapper.sidecar_column_data) >= 2) - - def test__detect_event_type(self): - mapper = ColumnMapper() - mapper._set_sidecar(Sidecar(self.basic_events_json)) - self.assertTrue(mapper.sidecar_column_data[self.basic_event_name].column_type == self.basic_event_type) - - def test_tag_mapping_complex(self): - tag_columns = [0] - column_prefix_dictionary = {1: "Label/"} - optional_tag_columns = [2] - mapper = ColumnMapper(tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary, optional_tag_columns=optional_tag_columns) - self.assertEqual(list(mapper._final_column_map), [0, 1, 2]) - self.assertEqual(mapper._final_column_map[0].column_type, ColumnType.HEDTags) - self.assertEqual(mapper._final_column_map[1].column_type, ColumnType.Value) - self.assertEqual(mapper._final_column_map[1].hed_dict, "Label/#") - self.assertEqual(mapper._final_column_map[2].column_type, ColumnType.HEDTags) - - def test_get_def_dict(self): - mapper = ColumnMapper() - def_dict_empty = mapper.get_def_dict(self.hed_schema) - self.assertIsInstance(def_dict_empty, DefinitionDict) - def_dict_base = DefinitionDict("(Definition/TestDef, (Event))", self.hed_schema) - self.assertIsInstance(def_dict_base, DefinitionDict) - self.assertEqual(len(def_dict_base.defs), 1) - def_dict = mapper.get_def_dict(self.hed_schema, extra_def_dicts=def_dict_base) - self.assertIsInstance(def_dict, DefinitionDict) - self.assertEqual(len(def_dict.defs), 1) - - mapper._set_sidecar(Sidecar(self.bids_events_defs)) - def_dict_combined = mapper.get_def_dict(self.hed_schema, extra_def_dicts=def_dict_base) - self.assertIsInstance(def_dict_combined, DefinitionDict) - self.assertEqual(len(def_dict_combined.defs), 4) - - -if __name__ == '__main__': - unittest.main() +import unittest +import os + +from hed.models import ColumnMapper, ColumnType, HedString +from hed.models.sidecar import Sidecar, DefinitionDict +from hed.errors import ValidationErrors +from hed import load_schema + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') + schema_file = 'schema_tests/HED8.0.0t.xml' + + cls.hed_schema = load_schema(os.path.join(base_data_dir, schema_file)) + cls.integer_key_dictionary = {0: 'one', 1: 'two', 2: 'three'} + cls.zero_based_row_column_count = 3 + cls.column_prefix_dictionary = {2: 'Event/Description/', 3: 'Event/Label/', 4: 'Event/Category/'} + cls.category_key = 'Event/Category/' + cls.category_participant_and_stimulus_tags = \ + HedString('Event/Category/Participant response, Event/Category/Stimulus', cls.hed_schema) + + cls.row_with_hed_tags = ['event1', 'tag1', 'tag2'] + + cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') + cls.basic_events_json = os.path.join(cls.base_data_dir, "sidecar_tests/both_types_events.json") + cls.bids_events_defs = os.path.join(cls.base_data_dir, "validator_tests/bids_events.json") + cls.basic_event_name = "trial_type" + cls.basic_event_type = ColumnType.Categorical + cls.basic_hed_tags_column = "onset" + cls.basic_column_map = ["onset", "duration", "trial_type", "response_time", " stim_file"] + cls.basic_event_row = ["1.2", "0.6", "go", "1.435", "images/red_square.jpg"] + cls.basic_event_row_invalid = ["1.2", "0.6", "invalid_category_key", "1.435", "images/red_square.jpg"] + + def test_set_tag_columns(self): + mapper = ColumnMapper() + zero_based_tag_columns = [0, 1, 2] + mapper.set_tag_columns(zero_based_tag_columns, finalize_mapping=True) + self.assertTrue(len(mapper._final_column_map) == 3) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 0) + + def test_set_tag_columns_named(self): + mapper = ColumnMapper(warn_on_missing_column=True) + named_columns = ["Col1", "Col2", "Col3"] + mapper.set_tag_columns(named_columns) + mapper.set_column_map(named_columns) + self.assertTrue(len(mapper._final_column_map) == 3) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 0) + + def test_set_tag_columns_named_unknown(self): + mapper = ColumnMapper(warn_on_missing_column=True) + two_columns = ["Col1", "Col2"] + named_columns = ["Col1", "Col2", "Col3"] + mapper.set_tag_columns(two_columns) + mapper.set_column_map(named_columns) + self.assertTrue(len(mapper._final_column_map) == 2) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) + self.assertTrue(mapper.check_for_mapping_issues()[0]['code'] == ValidationErrors.HED_UNKNOWN_COLUMN) + + def test_set_tag_columns_mixed(self): + mapper = ColumnMapper() + mixed_columns = ["Col1", "Col2", 2] + column_map = ["Col1", "Col2", "Col3"] + mapper.set_tag_columns(mixed_columns) + mapper.set_column_map(column_map) + self.assertTrue(len(mapper._final_column_map) == 3) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 0) + + def test_set_tag_column_missing(self): + mapper = ColumnMapper() + column_map = ["Col1", "Col2", "Col3"] + mapper.set_tag_columns(["Col1", "Col4"]) + mapper.set_column_map(column_map) + self.assertTrue(len(mapper._final_column_map) == 1) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) + self.assertTrue(mapper.check_for_mapping_issues()[0]['code'] == ValidationErrors.HED_MISSING_REQUIRED_COLUMN) + + column_map = ["Col1", "Col2", "Col3"] + mapper.set_tag_columns(optional_tag_columns=["Col1", "Col4"]) + mapper.set_column_map(column_map) + self.assertTrue(len(mapper._final_column_map) == 1) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 0) + + def test_sidecar_and_columns(self): + mapper = ColumnMapper(Sidecar(self.basic_events_json)) + mapper.set_tag_columns(["Invalid", "Invalid2"]) + mapper.set_column_map(["Invalid", "Invalid2"]) + self.assertTrue(len(mapper._final_column_map) == 2) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) + self.assertTrue(mapper.check_for_mapping_issues()[0]['code'] == ValidationErrors.SIDECAR_AND_OTHER_COLUMNS) + + def test_duplicate_list(self): + mapper = ColumnMapper() + mapper.set_tag_columns(["Invalid", "Invalid"]) + self.assertTrue(len(mapper._final_column_map) == 0) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 3) + self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_IN_LIST) + + mapper.set_tag_columns([0, 0]) + self.assertTrue(len(mapper._final_column_map) == 1) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) + self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_IN_LIST) + + mapper.set_tag_columns([0, "Column1"]) + mapper.set_column_map(["Column1"]) + self.assertTrue(len(mapper._final_column_map) == 1) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) + self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_IN_LIST) + + def test_duplicate_prefix(self): + mapper = ColumnMapper() + prefix_dict = { + 0: "Label/", + "Column1": "Description" + } + mapper.set_column_prefix_dictionary(prefix_dict) + mapper.set_column_map(["Column1"]) + self.assertTrue(len(mapper._final_column_map) == 1) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) + self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == ValidationErrors.DUPLICATE_COLUMN_IN_LIST) + + def test_duplicate_cross_lists(self): + mapper = ColumnMapper() + prefix_dict = { + 0: "Label/" + } + mapper.set_tag_columns([0]) + mapper.set_column_prefix_dictionary(prefix_dict) + mapper.set_column_map(["Column1"]) + self.assertTrue(len(mapper._final_column_map) == 1) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) + self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == + ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES) + + mapper = ColumnMapper() + prefix_dict = { + "Column1": "Label/" + } + mapper.set_tag_columns([0]) + mapper.set_column_prefix_dictionary(prefix_dict) + mapper.set_column_map(["Column1"]) + self.assertTrue(len(mapper._final_column_map) == 1) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) + self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == + ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES) + + mapper.set_tag_columns(["Column1"]) + self.assertTrue(len(mapper._final_column_map) == 1) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 1) + self.assertTrue(mapper.check_for_mapping_issues()[-1]['code'] == + ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES) + + def test_blank_column(self): + mapper = ColumnMapper() + mapper.set_column_map(["", None]) + self.assertTrue(len(mapper.check_for_mapping_issues()) == 2) + self.assertTrue(mapper.check_for_mapping_issues(allow_blank_names=False)[1]['code'] == + ValidationErrors.HED_BLANK_COLUMN) + self.assertTrue(mapper.check_for_mapping_issues(allow_blank_names=False)[1]['code'] == + ValidationErrors.HED_BLANK_COLUMN) + + def test_optional_column(self): + mapper = ColumnMapper() + mapper.set_tag_columns(tag_columns=["HED"]) + mapper.set_column_map({1: "HED"}) + self.assertTrue(len(mapper._final_column_map) == 1) + + mapper = ColumnMapper() + mapper.set_tag_columns(optional_tag_columns=["HED"]) + mapper.set_column_map({1: "HED"}) + self.assertTrue(len(mapper._final_column_map) == 1) + + mapper = ColumnMapper() + mapper.set_tag_columns(tag_columns=["HED"]) + self.assertTrue(len(mapper._final_column_map) == 0) + self.assertTrue(len(mapper.get_column_mapping_issues()) == 1) + + mapper = ColumnMapper() + mapper.set_tag_columns(optional_tag_columns=["HED"]) + self.assertTrue(len(mapper._final_column_map) == 0) + self.assertTrue(len(mapper.get_column_mapping_issues()) == 0) + + def test_add_json_file_events(self): + mapper = ColumnMapper() + mapper._set_sidecar(Sidecar(self.basic_events_json)) + self.assertTrue(len(mapper.sidecar_column_data) >= 2) + + def test__detect_event_type(self): + mapper = ColumnMapper() + mapper._set_sidecar(Sidecar(self.basic_events_json)) + self.assertTrue(mapper.sidecar_column_data[self.basic_event_name].column_type == self.basic_event_type) + + def test_tag_mapping_complex(self): + tag_columns = [0] + column_prefix_dictionary = {1: "Label/"} + optional_tag_columns = [2] + mapper = ColumnMapper(tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary, + optional_tag_columns=optional_tag_columns) + self.assertEqual(list(mapper._final_column_map), [0, 1, 2]) + self.assertEqual(mapper._final_column_map[0].column_type, ColumnType.HEDTags) + self.assertEqual(mapper._final_column_map[1].column_type, ColumnType.Value) + self.assertEqual(mapper._final_column_map[1].hed_dict, "Label/#") + self.assertEqual(mapper._final_column_map[2].column_type, ColumnType.HEDTags) + + def test_get_def_dict(self): + mapper = ColumnMapper() + def_dict_empty = mapper.get_def_dict(self.hed_schema) + self.assertIsInstance(def_dict_empty, DefinitionDict) + def_dict_base = DefinitionDict("(Definition/TestDef, (Event))", self.hed_schema) + self.assertIsInstance(def_dict_base, DefinitionDict) + self.assertEqual(len(def_dict_base.defs), 1) + def_dict = mapper.get_def_dict(self.hed_schema, extra_def_dicts=def_dict_base) + self.assertIsInstance(def_dict, DefinitionDict) + self.assertEqual(len(def_dict.defs), 1) + + mapper._set_sidecar(Sidecar(self.bids_events_defs)) + def_dict_combined = mapper.get_def_dict(self.hed_schema, extra_def_dicts=def_dict_base) + self.assertIsInstance(def_dict_combined, DefinitionDict) + self.assertEqual(len(def_dict_combined.defs), 4) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/models/test_definition_dict.py b/tests/models/test_definition_dict.py index 0f957d72..fc0eebfb 100644 --- a/tests/models/test_definition_dict.py +++ b/tests/models/test_definition_dict.py @@ -66,11 +66,14 @@ def test_definitions(self): 'twoGroupTags': f"(Definition/InvalidDef1,{self.def_contents_string},{self.def_contents_string2})", 'extraValidTags': "(Definition/InvalidDefA, Red, Blue)", 'extraOtherTags': "(Definition/InvalidDef1, InvalidContents)", - 'duplicateDef': f"(Definition/Def1, {self.def_contents_string}), (Definition/Def1, {self.def_contents_string})", - 'duplicateDef2': f"(Definition/Def1, {self.def_contents_string}), (Definition/Def1/#, {self.placeholder_def_contents})", + 'duplicateDef': (f"(Definition/Def1, {self.def_contents_string}), " + f"(Definition/Def1, {self.def_contents_string})"), + 'duplicateDef2': (f"(Definition/Def1, {self.def_contents_string}), " + f"(Definition/Def1/#, {self.placeholder_def_contents})"), 'defTooManyPlaceholders': self.placeholder_invalid_def_string, 'invalidPlaceholder': f"(Definition/InvalidDef1/InvalidPlaceholder, {self.def_contents_string})", - 'invalidPlaceholderExtension': f"(Definition/InvalidDef1/this-part-is-not-allowed/#, {self.def_contents_string})", + 'invalidPlaceholderExtension': + f"(Definition/InvalidDef1/this-part-is-not-allowed/#, {self.def_contents_string})", 'defInGroup': "(Definition/ValidDefName, (Def/ImproperlyPlacedDef))", 'defExpandInGroup': "(Definition/ValidDefName, (Def-expand/ImproperlyPlacedDef, (ImproperContents)))", 'doublePoundSignPlaceholder': f"(Definition/InvalidDef/##, {self.placeholder_def_contents})", diff --git a/tests/models/test_definition_entry.py b/tests/models/test_definition_entry.py index 07854ad8..ead69c87 100644 --- a/tests/models/test_definition_entry.py +++ b/tests/models/test_definition_entry.py @@ -1,16 +1,12 @@ -import unittest - -from hed.models.definition_entry import DefinitionEntry -from hed.models.hed_string import HedString -from hed.schema.hed_schema_io import load_schema_version - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - pass - - -if __name__ == '__main__': - unittest.main() +import unittest + + +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index 434f5d90..247a073b 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -1,584 +1,617 @@ -import unittest -import pandas as pd - - -from hed import load_schema_version -from hed.models.df_util import shrink_defs, expand_defs, convert_to_form, process_def_expands -from hed import DefinitionDict -from hed.models.df_util import _handle_curly_braces_refs, _indexed_dict_from_onsets, _filter_by_index_list, split_delay_tags - - -class TestShrinkDefs(unittest.TestCase): - def setUp(self): - self.schema = load_schema_version("8.3.0") - - def test_shrink_defs_normal(self): - df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]}) - expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]}) - shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_shrink_defs_placeholder(self): - df = pd.DataFrame({"column1": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) - expected_df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) - shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_shrink_defs_no_matching_tags(self): - df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]}) - expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]}) - shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_shrink_defs_multiple_columns(self): - df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"], - "column2": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) - expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"], - "column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) - shrink_defs(df, self.schema, ['column1', 'column2']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_shrink_defs_multiple_defs_same_line(self): - df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Acceleration/30"]}) - expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Def/TestDefPlaceholder/123,Acceleration/30"]}) - shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_shrink_defs_mixed_tags(self): - df = pd.DataFrame({"column1": [ - "(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent,(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem,Acceleration/25"]}) - expected_df = pd.DataFrame( - {"column1": ["Def/TestDefNormal,Event/SomeEvent,Def/TestDefPlaceholder/123,Item/SomeItem,Acceleration/25"]}) - shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_shrink_defs_series_normal(self): - series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]) - expected_series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"]) - shrink_defs(series, self.schema, None) - pd.testing.assert_series_equal(series, expected_series) - - def test_shrink_defs_series_placeholder(self): - series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]) - expected_series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"]) - shrink_defs(series, self.schema, None) - pd.testing.assert_series_equal(series, expected_series) - - -class TestExpandDefs(unittest.TestCase): - def setUp(self): - self.schema = load_schema_version("8.3.0") - self.def_dict = DefinitionDict(["(Definition/TestDefNormal,(Acceleration/2471,Action/TestDef2))", - "(Definition/TestDefPlaceholder/#,(Acceleration/#,Action/TestDef2))"], - hed_schema=self.schema) - - def test_expand_defs_normal(self): - df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]}) - expected_df = pd.DataFrame( - {"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]}) - expand_defs(df, self.schema, self.def_dict, ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_expand_defs_placeholder(self): - df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) - expected_df = pd.DataFrame({"column1": [ - "(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) - expand_defs(df, self.schema, self.def_dict, ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_expand_defs_no_matching_tags(self): - df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]}) - expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]}) - expand_defs(df, self.schema, self.def_dict, ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_expand_defs_multiple_columns(self): - df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"], - "column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) - expected_df = pd.DataFrame( - {"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"], - "column2": [ - "(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) - expand_defs(df, self.schema, self.def_dict, ['column1', 'column2']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_expand_defs_series_normal(self): - series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"]) - expected_series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]) - expand_defs(series, self.schema, self.def_dict, None) - pd.testing.assert_series_equal(series, expected_series) - - def test_expand_defs_series_placeholder(self): - series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"]) - expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]) - expand_defs(series, self.schema, self.def_dict, None) - pd.testing.assert_series_equal(series, expected_series) - - -class TestConvertToForm(unittest.TestCase): - def setUp(self): - self.schema = load_schema_version("8.2.0") - - def test_convert_to_form_short_tags(self): - df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) - expected_df = pd.DataFrame({"column1": ["Azure,See"]}) - convert_to_form(df, self.schema, "short_tag", ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_convert_to_form_long_tags(self): - df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]}) - expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) - convert_to_form(df, self.schema, "long_tag", ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_convert_to_form_series_short_tags(self): - series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]) - expected_series = pd.Series(["Azure,See"]) - convert_to_form(series, self.schema, "short_tag") - pd.testing.assert_series_equal(series, expected_series) - - def test_convert_to_form_series_long_tags(self): - series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"]) - expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]) - convert_to_form(series, self.schema, "long_tag") - pd.testing.assert_series_equal(series, expected_series) - - def test_convert_to_form_multiple_tags_short(self): - df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]}) - expected_df = pd.DataFrame({"column1": ["Azure,Nose,Acceleration/4.5 m-per-s^2"]}) - convert_to_form(df, self.schema, "short_tag", ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_convert_to_form_multiple_tags_long(self): - df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]}) - expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]}) - convert_to_form(df, self.schema, "long_tag", ['column1']) - pd.testing.assert_frame_equal(df, expected_df) - - def test_basic_expand_detection(self): - # all simple cases with no duplicates - test_strings = [ - "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-count/3))", - "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-count/3))", - "(Def-expand/B2/3, (Action/3, Collection/animals, Alert))", - "(Def-expand/B2/4, (Action/4, Collection/animals, Alert))", - "(Def-expand/C3/5, (Action/5, Joyful, Event))", - "(Def-expand/C3/6, (Action/6, Joyful, Event))" - ] - process_def_expands(test_strings, self.schema) - - def test_mixed_detection(self): - # Cases where you can only retroactively identify the first def-expand - test_strings = [ - # Basic example first just to verify - "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-count/2))", - "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-count/2))", - # Out of order ambiguous - "(Def-expand/B2/3, (Action/3, Collection/animals, Acceleration/3))", - "(Def-expand/B2/4, (Action/4, Collection/animals, Acceleration/3))", - # Multiple tags - "(Def-expand/C3/5, (Action/5, Acceleration/5, Item-count/5))", - "(Def-expand/C3/6, (Action/6, Acceleration/5, Item-count/5))", - # Multiple tags2 - "(Def-expand/D4/7, (Action/7, Acceleration/7, Item-count/8))", - "(Def-expand/D4/8, (Action/8, Acceleration/7, Item-count/8))" - # Multiple tags3 - "(Def-expand/D5/7, (Action/7, Acceleration/7, Item-count/8, Event))", - "(Def-expand/D5/8, (Action/8, Acceleration/7, Item-count/8, Event))" - ] - def_dict, ambiguous_defs, _ = process_def_expands(test_strings, self.schema) - self.assertEqual(len(def_dict), 5) - - def test_ambiguous_defs(self): - # Cases that can't be identified - test_strings = [ - "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-count/2))", - "(Def-expand/B2/3, (Action/3, Collection/animals, Acceleration/3))", - "(Def-expand/C3/5, (Action/5, Acceleration/5, Item-count/5))", - "(Def-expand/D4/7, (Action/7, Acceleration/7, Item-count/8))", - "(Def-expand/D5/7, (Action/7, Acceleration/7, Item-count/8, Event))", - ] - _, ambiguous_defs, _ = process_def_expands(test_strings, self.schema) - self.assertEqual(len(ambiguous_defs), 5) - - def test_ambiguous_conflicting_defs(self): - # This is invalid due to conflicting defs - test_strings = [ - "(Def-expand/A1/2, (Action/2, Age/5, Item-count/2))", - "(Def-expand/A1/3, (Action/3, Age/4, Item-count/3))", - - # This could be identified, but fails due to the above raising errors - "(Def-expand/A1/4, (Action/4, Age/5, Item-count/2))", - ] - defs, ambiguous, errors = process_def_expands(test_strings, self.schema) - self.assertEqual(len(defs), 0) - self.assertEqual(len(ambiguous), 0) - self.assertEqual(len(errors["a1"]), 3) - - def test_errors(self): - # Basic recognition of conflicting errors - test_strings = [ - "(Def-expand/A1/1, (Action/1, Age/5, Item-count/2))", - "(Def-expand/A1/2, (Action/2, Age/5, Item-count/2))", - "(Def-expand/A1/3, (Action/3, Age/5, Item-count/3))", - ] - _, _, errors = process_def_expands(test_strings, self.schema) - self.assertEqual(len(errors), 1) - - def test_errors_ambiguous(self): - # Verify we recognize errors when we had a def that can't be resolved. - test_strings = [ - "(Def-expand/A1/1, (Action/1, Age/5, Item-count/1))", - "(Def-expand/A1/2, (Action/2, Age/5, Item-count/3))", - "(Def-expand/A1/3, (Action/3, Age/5, Item-count/3))", - ] - known, ambiguous, errors = process_def_expands(test_strings, self.schema) - self.assertEqual(len(errors), 1) - self.assertEqual(len(errors["a1"]), 3) - - def test_errors_unresolved(self): - # Verify we recognize errors when we had a def that can't be resolved. - test_strings = [ - "(Def-expand/A1/1, (Action/1, Age/5, Item-count/1))", - "(Def-expand/A1/2, (Action/2, Age/5, Item-count/3))", - ] - known, ambiguous, errors = process_def_expands(test_strings, self.schema) - self.assertEqual(len(errors), 1) - self.assertEqual(len(errors["a1"]), 2) - - def test_def_expand_detection(self): - test_strings = [ - "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-Count/2))", - "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-Count/2))", - "(Def-expand/B2/3, (Action/3, Collection/animals, Alert))", - "(Def-expand/B2/4, (Action/4, Collection/animals, Alert))", - "(Def-expand/C3/5, (Action/5, Joyful, Event))", - "(Def-expand/C3/6, (Action/6, Joyful, Event))", - "((Def-expand/A1/7, (Action/7, Acceleration/5, Item-Count/2)), Event, Acceleration/10)", - "((Def-expand/A1/8, (Action/8, Acceleration/5, Item-Count/2)), Collection/toys, Item-Count/5)", - "((Def-expand/B2/9, (Action/9, Collection/animals, Alert)), Event, Collection/plants)", - "((Def-expand/B2/10, (Action/10, Collection/animals, Alert)), Joyful, Item-Count/3)", - "((Def-expand/C3/11, (Action/11, Joyful, Event)), Collection/vehicles, Acceleration/20)", - "((Def-expand/C3/12, (Action/12, Joyful, Event)), Alert, Item-Count/8)", - "((Def-expand/A1/13, (Action/13, Acceleration/5, Item-Count/2)), (Def-expand/B2/13, (Action/13, Collection/animals, Alert)), Event)", - "((Def-expand/A1/14, (Action/14, Acceleration/5, Item-Count/2)), Joyful, (Def-expand/C3/14, (Action/14, Joyful, Event)))", - "(Def-expand/B2/15, (Action/15, Collection/animals, Alert)), (Def-expand/C3/15, (Action/15, Joyful, Event)), Acceleration/30", - "((Def-expand/A1/16, (Action/16, Acceleration/5, Item-Count/2)), (Def-expand/B2/16, (Action/16, Collection/animals, Alert)), Collection/food)", - "(Def-expand/C3/17, (Action/17, Joyful, Event)), (Def-expand/A1/17, (Action/17, Acceleration/5, Item-Count/2)), Item-Count/6", - "((Def-expand/B2/18, (Action/18, Collection/animals, Alert)), (Def-expand/C3/18, (Action/18, Joyful, Event)), Alert)", - "(Def-expand/D1/Apple, (Task/Apple, Collection/cars, Attribute/color))", - "(Def-expand/D1/Banana, (Task/Banana, Collection/cars, Attribute/color))", - "(Def-expand/E2/Carrot, (Collection/Carrot, Collection/plants, Attribute/type))", - "(Def-expand/E2/Dog, (Collection/Dog, Collection/plants, Attribute/type))", - "((Def-expand/D1/Elephant, (Task/Elephant, Collection/cars, Attribute/color)), (Def-expand/E2/Fox, (Collection/Fox, Collection/plants, Attribute/type)), Event)", - "((Def-expand/D1/Giraffe, (Task/Giraffe, Collection/cars, Attribute/color)), Joyful, (Def-expand/E2/Horse, (Collection/Horse, Collection/plants, Attribute/type)))", - "(Def-expand/D1/Iguana, (Task/Iguana, Collection/cars, Attribute/color)), (Def-expand/E2/Jaguar, (Collection/Jaguar, Collection/plants, Attribute/type)), Acceleration/30", - "(Def-expand/F1/Lion, (Task/Lion, Collection/boats, Attribute/length))", - "(Def-expand/F1/Monkey, (Task/Monkey, Collection/boats, Attribute/length))", - "(Def-expand/G2/Nest, (Collection/Nest, Collection/instruments, Attribute/material))", - "(Def-expand/G2/Octopus, (Collection/Octopus, Collection/instruments, Attribute/material))", - "((Def-expand/F1/Panda, (Task/Panda, Collection/boats, Attribute/length)), (Def-expand/G2/Quail, (Collection/Quail, Collection/instruments, Attribute/material)), Event)", - "((Def-expand/F1/Rabbit, (Task/Rabbit, Collection/boats, Attribute/length)), Joyful, (Def-expand/G2/Snake, (Collection/Snake, Collection/instruments, Attribute/material)))", - "(Def-expand/F1/Turtle, (Task/Turtle, Collection/boats, Attribute/length)), (Def-expand/G2/Umbrella, (Collection/Umbrella, Collection/instruments, Attribute/material))" - ] - - def_dict, ambiguous, errors = process_def_expands(test_strings, self.schema) - self.assertEqual(len(def_dict), 7) - self.assertEqual(len(ambiguous), 0) - self.assertEqual(len(errors), 0) - -class TestInsertColumns(unittest.TestCase): - - def test_insert_columns_simple(self): - df = pd.DataFrame({ - "column1": ["{column2}, Event, Action"], - "column2": ["Item"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Action"] - }) - result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_multiple_rows(self): - df = pd.DataFrame({ - "column1": ["{column2}, Event, Action", "Event, Action"], - "column2": ["Item", "Subject"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Action", "Event, Action"] - }) - result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_multiple_columns(self): - df = pd.DataFrame({ - "column1": ["{column2}, Event, {column3}, Action"], - "column2": ["Item"], - "column3": ["Subject"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Subject, Action"] - }) - result = _handle_curly_braces_refs(df, refs=["column2", "column3"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_four_columns(self): - df = pd.DataFrame({ - "column1": ["{column2}, Event, {column3}, Action"], - "column2": ["Item"], - "column3": ["Subject"], - "column4": ["Data"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Subject, Action"], - "column4": ["Data"] - }) - result = _handle_curly_braces_refs(df, refs=["column2", "column3"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["Item"], - "column3": ["Subject"], - "column4": ["Data"] - }) - expected_df = pd.DataFrame({ - "column1": ["(Item, (Subject, Data)), Event, Action"] - }) - result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses_na_values(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["Data"], - "column3": ["n/a"], - "column4": ["n/a"] - }) - expected_df = pd.DataFrame({ - "column1": ["(Data), Event, Action"] - }) - result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses_na_values2(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["n/a"], - "column3": ["n/a"], - "column4": ["Data"] - }) - expected_df = pd.DataFrame({ - "column1": ["((Data)), Event, Action"] - }) - result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses_mixed_na_values(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["n/a"], - "column3": ["Subject"], - "column4": ["n/a"] - }) - expected_df = pd.DataFrame({ - "column1": ["((Subject)), Event, Action"] - }) - result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_nested_parentheses_all_na_values(self): - df = pd.DataFrame({ - "column1": ["({column2}, ({column3}, {column4})), Event, Action"], - "column2": ["n/a"], - "column3": ["n/a"], - "column4": ["n/a"] - }) - expected_df = pd.DataFrame({ - "column1": ["Event, Action"] - }) - result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_parentheses(self): - df = pd.DataFrame({ - "column1": ["({column2}), Event, Action"], - "column2": ["Item"] - }) - expected_df = pd.DataFrame({ - "column1": ["(Item), Event, Action"] - }) - result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_with_parentheses_na_values(self): - df = pd.DataFrame({ - "column1": ["({column2}), Event, Action"], - "column2": ["n/a"], - "column3": ["n/a"] - }) - expected_df = pd.DataFrame({ - "column1": ["Event, Action"], - "column3": ["n/a"] - }) - result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) - pd.testing.assert_frame_equal(result, expected_df) - -class TestOnsetDict(unittest.TestCase): - def test_empty_and_single_onset(self): - self.assertEqual(_indexed_dict_from_onsets([]), {}) - self.assertEqual(_indexed_dict_from_onsets([3.5]), {3.5: [0]}) - - def test_identical_and_approx_equal_onsets(self): - self.assertEqual(_indexed_dict_from_onsets([3.5, 3.5]), {3.5: [0, 1]}) - self.assertEqual(_indexed_dict_from_onsets([3.5, 3.500000001]), {3.5: [0], 3.500000001: [1]}) - self.assertEqual(_indexed_dict_from_onsets([3.5, 3.5000000000001]), {3.5: [0, 1]}) - - def test_distinct_and_mixed_onsets(self): - self.assertEqual(_indexed_dict_from_onsets([3.5, 4.0, 4.4]), {3.5: [0], 4.0: [1], 4.4: [2]}) - self.assertEqual(_indexed_dict_from_onsets([3.5, 3.5, 4.0, 4.4]), {3.5: [0, 1], 4.0: [2], 4.4: [3]}) - self.assertEqual(_indexed_dict_from_onsets([4.0, 3.5, 4.4, 4.4]), {4.0: [0], 3.5: [1], 4.4: [2, 3]}) - - def test_complex_onsets(self): - # Negative, zero, and positive onsets - self.assertEqual(_indexed_dict_from_onsets([-1.0, 0.0, 1.0]), {-1.0: [0], 0.0: [1], 1.0: [2]}) - - # Very close but distinct onsets - self.assertEqual(_indexed_dict_from_onsets([1.0, 1.0 + 1e-8, 1.0 + 2e-8]), - {1.0: [0], 1.0 + 1e-8: [1], 1.0 + 2e-8: [2]}) - # Very close - self.assertEqual(_indexed_dict_from_onsets([1.0, 1.0 + 1e-10, 1.0 + 2e-10]), - {1.0: [0, 1, 2]}) - - # Mixed scenario - self.assertEqual(_indexed_dict_from_onsets([3.5, 3.5, 4.0, 4.4, 4.4, -1.0]), - {3.5: [0, 1], 4.0: [2], 4.4: [3, 4], -1.0: [5]}) - - def test_empty_and_single_item_series(self): - self.assertTrue(_filter_by_index_list(pd.Series([], dtype=str), {}).equals(pd.Series([], dtype=str))) - self.assertTrue(_filter_by_index_list(pd.Series(["apple"]), {0: [0]}).equals(pd.Series(["apple"]))) - - def test_two_item_series_with_same_onset(self): - input_series = pd.Series(["apple", "orange"]) - expected_series = pd.Series(["apple,orange", ""]) - self.assertTrue(_filter_by_index_list(input_series, {0: [0, 1]}).equals(expected_series)) - - def test_multiple_item_series(self): - input_series = pd.Series(["apple", "orange", "banana", "mango"]) - indexed_dict = {0: [0, 1], 1: [2], 2: [3]} - expected_series = pd.Series(["apple,orange", "", "banana", "mango"]) - self.assertTrue(_filter_by_index_list(input_series, indexed_dict).equals(expected_series)) - - def test_complex_scenarios(self): - # Test with negative, zero and positive onsets - original = pd.Series(["negative", "zero", "positive"]) - indexed_dict = {-1: [0], 0: [1], 1: [2]} - expected_series1 = pd.Series(["negative", "zero", "positive"]) - self.assertTrue(_filter_by_index_list(original, indexed_dict).equals(expected_series1)) - - # Test with more complex indexed_dict - original2 = pd.Series(["apple", "orange", "banana", "mango", "grape"]) - indexed_dict2= {0: [0, 1], 1: [2], 2: [3, 4]} - expected_series2 = pd.Series(["apple,orange", "", "banana", "mango,grape", ""]) - self.assertTrue(_filter_by_index_list(original2, indexed_dict2).equals(expected_series2)) - - def test_empty_and_single_item_series_df(self): - self.assertTrue(_filter_by_index_list(pd.DataFrame([], columns=["HED", "Extra"]), {}).equals( - pd.DataFrame([], columns=["HED", "Extra"]))) - self.assertTrue( - _filter_by_index_list(pd.DataFrame([["apple", "extra1"]], columns=["HED", "Extra"]), {0: [0]}).equals( - pd.DataFrame([["apple", "extra1"]], columns=["HED", "Extra"]))) - - def test_two_item_series_with_same_onset_df(self): - input_df = pd.DataFrame([["apple", "extra1"], ["orange", "extra2"]], columns=["HED", "Extra"]) - expected_df = pd.DataFrame([["apple,orange", "extra1"], ["", "extra2"]], columns=["HED", "Extra"]) - self.assertTrue(_filter_by_index_list(input_df, {0: [0, 1]}).equals(expected_df)) - - def test_multiple_item_series_df(self): - input_df = pd.DataFrame([["apple", "extra1"], ["orange", "extra2"], ["banana", "extra3"], ["mango", "extra4"]], - columns=["HED", "Extra"]) - indexed_dict = {0: [0, 1], 1: [2], 2: [3]} - expected_df = pd.DataFrame( - [["apple,orange", "extra1"], ["", "extra2"], ["banana", "extra3"], ["mango", "extra4"]], - columns=["HED", "Extra"]) - self.assertTrue(_filter_by_index_list(input_df, indexed_dict).equals(expected_df)) - - def test_complex_scenarios_df(self): - # Test with negative, zero, and positive onsets - original = pd.DataFrame([["negative", "extra1"], ["zero", "extra2"], ["positive", "extra3"]], - columns=["HED", "Extra"]) - indexed_dict = {-1: [0], 0: [1], 1: [2]} - expected_df = pd.DataFrame([["negative", "extra1"], ["zero", "extra2"], ["positive", "extra3"]], - columns=["HED", "Extra"]) - self.assertTrue(_filter_by_index_list(original, indexed_dict).equals(expected_df)) - - # Test with more complex indexed_dict - original2 = pd.DataFrame( - [["apple", "extra1"], ["orange", "extra2"], ["banana", "extra3"], ["mango", "extra4"], ["grape", "extra5"]], - columns=["HED", "Extra"]) - indexed_dict2 = {0: [0, 1], 1: [2], 2: [3, 4]} - expected_df2 = pd.DataFrame( - [["apple,orange", "extra1"], ["", "extra2"], ["banana", "extra3"], ["mango,grape", "extra4"], - ["", "extra5"]], columns=["HED", "Extra"]) - self.assertTrue(_filter_by_index_list(original2, indexed_dict2).equals(expected_df2)) - - - -class TestSplitDelayTags(unittest.TestCase): - schema = load_schema_version("8.3.0") - def test_empty_series_and_onsets(self): - empty_series = pd.Series([], dtype="object") - empty_onsets = pd.Series([], dtype="float") - result = split_delay_tags(empty_series, self.schema, empty_onsets) - self.assertIsInstance(result, pd.DataFrame) - - def test_None_series_and_onsets(self): - result = split_delay_tags(None, self.schema, None) - self.assertIsNone(result) - - def test_normal_ordered_series(self): - series = pd.Series([ - "Tag1,Tag2", - "Tag3,Tag4" - ]) - onsets = pd.Series([1.0, 2.0]) - result = split_delay_tags(series, self.schema, onsets) - self.assertTrue(result.onset.equals(pd.Series([1.0, 2.0]))) - self.assertTrue(result.HED.equals(pd.Series([ - "Tag1,Tag2", - "Tag3,Tag4" - ]))) - - def test_normal_ordered_series_with_delays(self): - series = pd.Series([ - "Tag1,Tag2,(Delay/3.0 s,(Tag5))", - "Tag3,Tag4" - ]) - onsets = pd.Series([1.0, 2.0]) - result = split_delay_tags(series, self.schema, onsets) - self.assertTrue(result.onset.equals(pd.Series([1.0, 2.0, 4.0]))) - self.assertTrue(result.HED.equals(pd.Series([ - "Tag1,Tag2", - "Tag3,Tag4", - "(Delay/3.0 s,(Tag5))" - ]))) - - def test_normal_ordered_series_with_double_delays(self): - series = pd.Series([ - "Tag1,Tag2,(Delay/3.0 s,(Tag5))", - "Tag6,(Delay/2.0 s,(Tag7))", - "Tag3,Tag4" - ]) - onsets = pd.Series([1.0, 2.0, 3.0]) - result = split_delay_tags(series, self.schema, onsets) - self.assertTrue(result.onset.equals(pd.Series([1.0, 2.0, 3.0, 4.0, 4.0]))) - self.assertTrue(result.HED.equals(pd.Series([ - "Tag1,Tag2", - "Tag6", - "Tag3,Tag4", - "(Delay/3.0 s,(Tag5)),(Delay/2.0 s,(Tag7))", - "" - ]))) - self.assertTrue(result.original_index.equals(pd.Series([0, 1, 2, 0, 1]))) \ No newline at end of file +import unittest +import pandas as pd + + +from hed import load_schema_version +from hed.models.df_util import shrink_defs, expand_defs, convert_to_form, process_def_expands +from hed import DefinitionDict +from hed.models.df_util import (_handle_curly_braces_refs, _indexed_dict_from_onsets, + _filter_by_index_list, split_delay_tags) + + +class TestShrinkDefs(unittest.TestCase): + def setUp(self): + self.schema = load_schema_version("8.3.0") + + def test_shrink_defs_normal(self): + df = pd.DataFrame( + {"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]}) + expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]}) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_shrink_defs_placeholder(self): + df = pd.DataFrame( + {"column1": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) + expected_df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_shrink_defs_no_matching_tags(self): + df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]}) + expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]}) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_shrink_defs_multiple_columns(self): + df = pd.DataFrame( + {"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"], + "column2": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) + expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"], + "column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) + shrink_defs(df, self.schema, ['column1', 'column2']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_shrink_defs_multiple_defs_same_line(self): + df = pd.DataFrame( + {"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2))," + + "(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Acceleration/30"]}) + expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Def/TestDefPlaceholder/123,Acceleration/30"]}) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_shrink_defs_mixed_tags(self): + df = pd.DataFrame({"column1": [ + "(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent," + + "(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem,Acceleration/25"]}) + expected_df = pd.DataFrame( + {"column1": ["Def/TestDefNormal,Event/SomeEvent,Def/TestDefPlaceholder/123,Item/SomeItem,Acceleration/25"]}) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_shrink_defs_series_normal(self): + series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]) + expected_series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"]) + shrink_defs(series, self.schema, None) + pd.testing.assert_series_equal(series, expected_series) + + def test_shrink_defs_series_placeholder(self): + series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]) + expected_series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"]) + shrink_defs(series, self.schema, None) + pd.testing.assert_series_equal(series, expected_series) + + +class TestExpandDefs(unittest.TestCase): + def setUp(self): + self.schema = load_schema_version("8.3.0") + self.def_dict = DefinitionDict(["(Definition/TestDefNormal,(Acceleration/2471,Action/TestDef2))", + "(Definition/TestDefPlaceholder/#,(Acceleration/#,Action/TestDef2))"], + hed_schema=self.schema) + + def test_expand_defs_normal(self): + df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]}) + expected_df = pd.DataFrame( + {"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]}) + expand_defs(df, self.schema, self.def_dict, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_expand_defs_placeholder(self): + df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) + expected_df = pd.DataFrame({"column1": [ + "(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) + expand_defs(df, self.schema, self.def_dict, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_expand_defs_no_matching_tags(self): + df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]}) + expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]}) + expand_defs(df, self.schema, self.def_dict, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_expand_defs_multiple_columns(self): + df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"], + "column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) + expected_df = pd.DataFrame( + {"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"], + "column2": [ + "(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) + expand_defs(df, self.schema, self.def_dict, ['column1', 'column2']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_expand_defs_series_normal(self): + series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"]) + expected_series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]) + expand_defs(series, self.schema, self.def_dict, None) + pd.testing.assert_series_equal(series, expected_series) + + def test_expand_defs_series_placeholder(self): + series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"]) + expected_series = pd.Series( + ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]) + expand_defs(series, self.schema, self.def_dict, None) + pd.testing.assert_series_equal(series, expected_series) + + +class TestConvertToForm(unittest.TestCase): + def setUp(self): + self.schema = load_schema_version("8.2.0") + + def test_convert_to_form_short_tags(self): + df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/" + + "Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) + expected_df = pd.DataFrame({"column1": ["Azure,See"]}) + convert_to_form(df, self.schema, "short_tag", ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_convert_to_form_long_tags(self): + df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]}) + expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/" + + "Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) + convert_to_form(df, self.schema, "long_tag", ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_convert_to_form_series_short_tags(self): + series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/" + + "Color/CSS-color/White-color/Azure,Action/Perceive/See"]) + expected_series = pd.Series(["Azure,See"]) + convert_to_form(series, self.schema, "short_tag") + pd.testing.assert_series_equal(series, expected_series) + + def test_convert_to_form_series_long_tags(self): + series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"]) + expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/" + + "Color/CSS-color/White-color/Azure,Action/Perceive/See"]) + convert_to_form(series, self.schema, "long_tag") + pd.testing.assert_series_equal(series, expected_series) + + def test_convert_to_form_multiple_tags_short(self): + df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/" + + "Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/" + + "Rate-of-change/Acceleration/4.5 m-per-s^2"]}) + expected_df = pd.DataFrame({"column1": ["Azure,Nose,Acceleration/4.5 m-per-s^2"]}) + convert_to_form(df, self.schema, "short_tag", ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_convert_to_form_multiple_tags_long(self): + df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/" + + "Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]}) + expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/" + + "Color/CSS-color/White-color/Azure,Item/Biological-item/" + + "Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/" + + "Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/" + + "4.5 m-per-s^2"]}) + convert_to_form(df, self.schema, "long_tag", ['column1']) + pd.testing.assert_frame_equal(df, expected_df) + + def test_basic_expand_detection(self): + # all simple cases with no duplicates + test_strings = [ + "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-count/3))", + "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-count/3))", + "(Def-expand/B2/3, (Action/3, Collection/animals, Alert))", + "(Def-expand/B2/4, (Action/4, Collection/animals, Alert))", + "(Def-expand/C3/5, (Action/5, Joyful, Event))", + "(Def-expand/C3/6, (Action/6, Joyful, Event))" + ] + process_def_expands(test_strings, self.schema) + + def test_mixed_detection(self): + # Cases where you can only retroactively identify the first def-expand + test_strings = [ + # Basic example first just to verify + "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-count/2))", + "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-count/2))", + # Out of order ambiguous + "(Def-expand/B2/3, (Action/3, Collection/animals, Acceleration/3))", + "(Def-expand/B2/4, (Action/4, Collection/animals, Acceleration/3))", + # Multiple tags + "(Def-expand/C3/5, (Action/5, Acceleration/5, Item-count/5))", + "(Def-expand/C3/6, (Action/6, Acceleration/5, Item-count/5))", + # Multiple tags2 + "(Def-expand/D4/7, (Action/7, Acceleration/7, Item-count/8))", + "(Def-expand/D4/8, (Action/8, Acceleration/7, Item-count/8))" + # Multiple tags3 + "(Def-expand/D5/7, (Action/7, Acceleration/7, Item-count/8, Event))", + "(Def-expand/D5/8, (Action/8, Acceleration/7, Item-count/8, Event))" + ] + def_dict, ambiguous_defs, _ = process_def_expands(test_strings, self.schema) + self.assertEqual(len(def_dict), 5) + + def test_ambiguous_defs(self): + # Cases that can't be identified + test_strings = [ + "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-count/2))", + "(Def-expand/B2/3, (Action/3, Collection/animals, Acceleration/3))", + "(Def-expand/C3/5, (Action/5, Acceleration/5, Item-count/5))", + "(Def-expand/D4/7, (Action/7, Acceleration/7, Item-count/8))", + "(Def-expand/D5/7, (Action/7, Acceleration/7, Item-count/8, Event))", + ] + _, ambiguous_defs, _ = process_def_expands(test_strings, self.schema) + self.assertEqual(len(ambiguous_defs), 5) + + def test_ambiguous_conflicting_defs(self): + # This is invalid due to conflicting defs + test_strings = [ + "(Def-expand/A1/2, (Action/2, Age/5, Item-count/2))", + "(Def-expand/A1/3, (Action/3, Age/4, Item-count/3))", + + # This could be identified, but fails due to the above raising errors + "(Def-expand/A1/4, (Action/4, Age/5, Item-count/2))", + ] + defs, ambiguous, errors = process_def_expands(test_strings, self.schema) + self.assertEqual(len(defs), 0) + self.assertEqual(len(ambiguous), 0) + self.assertEqual(len(errors["a1"]), 3) + + def test_errors(self): + # Basic recognition of conflicting errors + test_strings = [ + "(Def-expand/A1/1, (Action/1, Age/5, Item-count/2))", + "(Def-expand/A1/2, (Action/2, Age/5, Item-count/2))", + "(Def-expand/A1/3, (Action/3, Age/5, Item-count/3))", + ] + _, _, errors = process_def_expands(test_strings, self.schema) + self.assertEqual(len(errors), 1) + + def test_errors_ambiguous(self): + # Verify we recognize errors when we had a def that can't be resolved. + test_strings = [ + "(Def-expand/A1/1, (Action/1, Age/5, Item-count/1))", + "(Def-expand/A1/2, (Action/2, Age/5, Item-count/3))", + "(Def-expand/A1/3, (Action/3, Age/5, Item-count/3))", + ] + known, ambiguous, errors = process_def_expands(test_strings, self.schema) + self.assertEqual(len(errors), 1) + self.assertEqual(len(errors["a1"]), 3) + + def test_errors_unresolved(self): + # Verify we recognize errors when we had a def that can't be resolved. + test_strings = [ + "(Def-expand/A1/1, (Action/1, Age/5, Item-count/1))", + "(Def-expand/A1/2, (Action/2, Age/5, Item-count/3))", + ] + known, ambiguous, errors = process_def_expands(test_strings, self.schema) + self.assertEqual(len(errors), 1) + self.assertEqual(len(errors["a1"]), 2) + + def test_def_expand_detection(self): + test_strings = [ + "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-Count/2))", + "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-Count/2))", + "(Def-expand/B2/3, (Action/3, Collection/animals, Alert))", + "(Def-expand/B2/4, (Action/4, Collection/animals, Alert))", + "(Def-expand/C3/5, (Action/5, Joyful, Event))", + "(Def-expand/C3/6, (Action/6, Joyful, Event))", + "((Def-expand/A1/7, (Action/7, Acceleration/5, Item-Count/2)), Event, Acceleration/10)", + "((Def-expand/A1/8, (Action/8, Acceleration/5, Item-Count/2)), Collection/toys, Item-Count/5)", + "((Def-expand/B2/9, (Action/9, Collection/animals, Alert)), Event, Collection/plants)", + "((Def-expand/B2/10, (Action/10, Collection/animals, Alert)), Joyful, Item-Count/3)", + "((Def-expand/C3/11, (Action/11, Joyful, Event)), Collection/vehicles, Acceleration/20)", + "((Def-expand/C3/12, (Action/12, Joyful, Event)), Alert, Item-Count/8)", + "((Def-expand/A1/13, (Action/13, Acceleration/5, Item-Count/2)), " + + "(Def-expand/B2/13, (Action/13, Collection/animals, Alert)), Event)", + "((Def-expand/A1/14, (Action/14, Acceleration/5, Item-Count/2)), Joyful, " + + "(Def-expand/C3/14, (Action/14, Joyful, Event)))", + "(Def-expand/B2/15, (Action/15, Collection/animals, Alert)), (Def-expand/C3/15, " + + "(Action/15, Joyful, Event)), Acceleration/30", + "((Def-expand/A1/16, (Action/16, Acceleration/5, Item-Count/2)), " + + "(Def-expand/B2/16, (Action/16, Collection/animals, Alert)), Collection/food)", + "(Def-expand/C3/17, (Action/17, Joyful, Event)), (Def-expand/A1/17, " + + "(Action/17, Acceleration/5, Item-Count/2)), Item-Count/6", + "((Def-expand/B2/18, (Action/18, Collection/animals, Alert)), " + + "(Def-expand/C3/18, (Action/18, Joyful, Event)), Alert)", + "(Def-expand/D1/Apple, (Task/Apple, Collection/cars, Attribute/color))", + "(Def-expand/D1/Banana, (Task/Banana, Collection/cars, Attribute/color))", + "(Def-expand/E2/Carrot, (Collection/Carrot, Collection/plants, Attribute/type))", + "(Def-expand/E2/Dog, (Collection/Dog, Collection/plants, Attribute/type))", + "((Def-expand/D1/Elephant, (Task/Elephant, Collection/cars, Attribute/color)), " + + "(Def-expand/E2/Fox, (Collection/Fox, Collection/plants, Attribute/type)), Event)", + "((Def-expand/D1/Giraffe, (Task/Giraffe, Collection/cars, Attribute/color)), " + + "Joyful, (Def-expand/E2/Horse, (Collection/Horse, Collection/plants, Attribute/type)))", + "(Def-expand/D1/Iguana, (Task/Iguana, Collection/cars, Attribute/color)), " + + "(Def-expand/E2/Jaguar, (Collection/Jaguar, Collection/plants, Attribute/type)), Acceleration/30", + "(Def-expand/F1/Lion, (Task/Lion, Collection/boats, Attribute/length))", + "(Def-expand/F1/Monkey, (Task/Monkey, Collection/boats, Attribute/length))", + "(Def-expand/G2/Nest, (Collection/Nest, Collection/instruments, Attribute/material))", + "(Def-expand/G2/Octopus, (Collection/Octopus, Collection/instruments, Attribute/material))", + "((Def-expand/F1/Panda, (Task/Panda, Collection/boats, Attribute/length)), " + + "(Def-expand/G2/Quail, (Collection/Quail, Collection/instruments, Attribute/material)), Event)", + "((Def-expand/F1/Rabbit, (Task/Rabbit, Collection/boats, Attribute/length)), Joyful, " + + "(Def-expand/G2/Snake, (Collection/Snake, Collection/instruments, Attribute/material)))", + "(Def-expand/F1/Turtle, (Task/Turtle, Collection/boats, Attribute/length)), " + + "(Def-expand/G2/Umbrella, (Collection/Umbrella, Collection/instruments, Attribute/material))" + ] + + def_dict, ambiguous, errors = process_def_expands(test_strings, self.schema) + self.assertEqual(len(def_dict), 7) + self.assertEqual(len(ambiguous), 0) + self.assertEqual(len(errors), 0) + + +class TestInsertColumns(unittest.TestCase): + + def test_insert_columns_simple(self): + df = pd.DataFrame({ + "column1": ["{column2}, Event, Action"], + "column2": ["Item"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_multiple_rows(self): + df = pd.DataFrame({ + "column1": ["{column2}, Event, Action", "Event, Action"], + "column2": ["Item", "Subject"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Action", "Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_multiple_columns(self): + df = pd.DataFrame({ + "column1": ["{column2}, Event, {column3}, Action"], + "column2": ["Item"], + "column3": ["Subject"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Subject, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_four_columns(self): + df = pd.DataFrame({ + "column1": ["{column2}, Event, {column3}, Action"], + "column2": ["Item"], + "column3": ["Subject"], + "column4": ["Data"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Subject, Action"], + "column4": ["Data"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["Item"], + "column3": ["Subject"], + "column4": ["Data"] + }) + expected_df = pd.DataFrame({ + "column1": ["(Item, (Subject, Data)), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses_na_values(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["Data"], + "column3": ["n/a"], + "column4": ["n/a"] + }) + expected_df = pd.DataFrame({ + "column1": ["(Data), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses_na_values2(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["n/a"], + "column3": ["n/a"], + "column4": ["Data"] + }) + expected_df = pd.DataFrame({ + "column1": ["((Data)), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses_mixed_na_values(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["n/a"], + "column3": ["Subject"], + "column4": ["n/a"] + }) + expected_df = pd.DataFrame({ + "column1": ["((Subject)), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_nested_parentheses_all_na_values(self): + df = pd.DataFrame({ + "column1": ["({column2}, ({column3}, {column4})), Event, Action"], + "column2": ["n/a"], + "column3": ["n/a"], + "column4": ["n/a"] + }) + expected_df = pd.DataFrame({ + "column1": ["Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2", "column3", "column4"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_parentheses(self): + df = pd.DataFrame({ + "column1": ["({column2}), Event, Action"], + "column2": ["Item"] + }) + expected_df = pd.DataFrame({ + "column1": ["(Item), Event, Action"] + }) + result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_with_parentheses_na_values(self): + df = pd.DataFrame({ + "column1": ["({column2}), Event, Action"], + "column2": ["n/a"], + "column3": ["n/a"] + }) + expected_df = pd.DataFrame({ + "column1": ["Event, Action"], + "column3": ["n/a"] + }) + result = _handle_curly_braces_refs(df, refs=["column2"], column_names=df.columns) + pd.testing.assert_frame_equal(result, expected_df) + + +class TestOnsetDict(unittest.TestCase): + def test_empty_and_single_onset(self): + self.assertEqual(_indexed_dict_from_onsets([]), {}) + self.assertEqual(_indexed_dict_from_onsets([3.5]), {3.5: [0]}) + + def test_identical_and_approx_equal_onsets(self): + self.assertEqual(_indexed_dict_from_onsets([3.5, 3.5]), {3.5: [0, 1]}) + self.assertEqual(_indexed_dict_from_onsets([3.5, 3.500000001]), {3.5: [0], 3.500000001: [1]}) + self.assertEqual(_indexed_dict_from_onsets([3.5, 3.5000000000001]), {3.5: [0, 1]}) + + def test_distinct_and_mixed_onsets(self): + self.assertEqual(_indexed_dict_from_onsets([3.5, 4.0, 4.4]), {3.5: [0], 4.0: [1], 4.4: [2]}) + self.assertEqual(_indexed_dict_from_onsets([3.5, 3.5, 4.0, 4.4]), {3.5: [0, 1], 4.0: [2], 4.4: [3]}) + self.assertEqual(_indexed_dict_from_onsets([4.0, 3.5, 4.4, 4.4]), {4.0: [0], 3.5: [1], 4.4: [2, 3]}) + + def test_complex_onsets(self): + # Negative, zero, and positive onsets + self.assertEqual(_indexed_dict_from_onsets([-1.0, 0.0, 1.0]), {-1.0: [0], 0.0: [1], 1.0: [2]}) + + # Very close but distinct onsets + self.assertEqual(_indexed_dict_from_onsets([1.0, 1.0 + 1e-8, 1.0 + 2e-8]), + {1.0: [0], 1.0 + 1e-8: [1], 1.0 + 2e-8: [2]}) + # Very close + self.assertEqual(_indexed_dict_from_onsets([1.0, 1.0 + 1e-10, 1.0 + 2e-10]), + {1.0: [0, 1, 2]}) + + # Mixed scenario + self.assertEqual(_indexed_dict_from_onsets([3.5, 3.5, 4.0, 4.4, 4.4, -1.0]), + {3.5: [0, 1], 4.0: [2], 4.4: [3, 4], -1.0: [5]}) + + def test_empty_and_single_item_series(self): + self.assertTrue(_filter_by_index_list(pd.Series([], dtype=str), {}).equals(pd.Series([], dtype=str))) + self.assertTrue(_filter_by_index_list(pd.Series(["apple"]), {0: [0]}).equals(pd.Series(["apple"]))) + + def test_two_item_series_with_same_onset(self): + input_series = pd.Series(["apple", "orange"]) + expected_series = pd.Series(["apple,orange", ""]) + self.assertTrue(_filter_by_index_list(input_series, {0: [0, 1]}).equals(expected_series)) + + def test_multiple_item_series(self): + input_series = pd.Series(["apple", "orange", "banana", "mango"]) + indexed_dict = {0: [0, 1], 1: [2], 2: [3]} + expected_series = pd.Series(["apple,orange", "", "banana", "mango"]) + self.assertTrue(_filter_by_index_list(input_series, indexed_dict).equals(expected_series)) + + def test_complex_scenarios(self): + # Test with negative, zero and positive onsets + original = pd.Series(["negative", "zero", "positive"]) + indexed_dict = {-1: [0], 0: [1], 1: [2]} + expected_series1 = pd.Series(["negative", "zero", "positive"]) + self.assertTrue(_filter_by_index_list(original, indexed_dict).equals(expected_series1)) + + # Test with more complex indexed_dict + original2 = pd.Series(["apple", "orange", "banana", "mango", "grape"]) + indexed_dict2 = {0: [0, 1], 1: [2], 2: [3, 4]} + expected_series2 = pd.Series(["apple,orange", "", "banana", "mango,grape", ""]) + self.assertTrue(_filter_by_index_list(original2, indexed_dict2).equals(expected_series2)) + + def test_empty_and_single_item_series_df(self): + self.assertTrue(_filter_by_index_list(pd.DataFrame([], columns=["HED", "Extra"]), {}).equals( + pd.DataFrame([], columns=["HED", "Extra"]))) + self.assertTrue( + _filter_by_index_list(pd.DataFrame([["apple", "extra1"]], columns=["HED", "Extra"]), {0: [0]}).equals( + pd.DataFrame([["apple", "extra1"]], columns=["HED", "Extra"]))) + + def test_two_item_series_with_same_onset_df(self): + input_df = pd.DataFrame([["apple", "extra1"], ["orange", "extra2"]], columns=["HED", "Extra"]) + expected_df = pd.DataFrame([["apple,orange", "extra1"], ["", "extra2"]], columns=["HED", "Extra"]) + self.assertTrue(_filter_by_index_list(input_df, {0: [0, 1]}).equals(expected_df)) + + def test_multiple_item_series_df(self): + input_df = pd.DataFrame([["apple", "extra1"], ["orange", "extra2"], ["banana", "extra3"], ["mango", "extra4"]], + columns=["HED", "Extra"]) + indexed_dict = {0: [0, 1], 1: [2], 2: [3]} + expected_df = pd.DataFrame( + [["apple,orange", "extra1"], ["", "extra2"], ["banana", "extra3"], ["mango", "extra4"]], + columns=["HED", "Extra"]) + self.assertTrue(_filter_by_index_list(input_df, indexed_dict).equals(expected_df)) + + def test_complex_scenarios_df(self): + # Test with negative, zero, and positive onsets + original = pd.DataFrame([["negative", "extra1"], ["zero", "extra2"], ["positive", "extra3"]], + columns=["HED", "Extra"]) + indexed_dict = {-1: [0], 0: [1], 1: [2]} + expected_df = pd.DataFrame([["negative", "extra1"], ["zero", "extra2"], ["positive", "extra3"]], + columns=["HED", "Extra"]) + self.assertTrue(_filter_by_index_list(original, indexed_dict).equals(expected_df)) + + # Test with more complex indexed_dict + original2 = pd.DataFrame( + [["apple", "extra1"], ["orange", "extra2"], ["banana", "extra3"], ["mango", "extra4"], ["grape", "extra5"]], + columns=["HED", "Extra"]) + indexed_dict2 = {0: [0, 1], 1: [2], 2: [3, 4]} + expected_df2 = pd.DataFrame( + [["apple,orange", "extra1"], ["", "extra2"], ["banana", "extra3"], ["mango,grape", "extra4"], + ["", "extra5"]], columns=["HED", "Extra"]) + self.assertTrue(_filter_by_index_list(original2, indexed_dict2).equals(expected_df2)) + + +class TestSplitDelayTags(unittest.TestCase): + schema = load_schema_version("8.3.0") + + def test_empty_series_and_onsets(self): + empty_series = pd.Series([], dtype="object") + empty_onsets = pd.Series([], dtype="float") + result = split_delay_tags(empty_series, self.schema, empty_onsets) + self.assertIsInstance(result, pd.DataFrame) + + def test_None_series_and_onsets(self): + result = split_delay_tags(None, self.schema, None) + self.assertIsNone(result) + + def test_normal_ordered_series(self): + series = pd.Series([ + "Tag1,Tag2", + "Tag3,Tag4" + ]) + onsets = pd.Series([1.0, 2.0]) + result = split_delay_tags(series, self.schema, onsets) + self.assertTrue(result.onset.equals(pd.Series([1.0, 2.0]))) + self.assertTrue(result.HED.equals(pd.Series([ + "Tag1,Tag2", + "Tag3,Tag4" + ]))) + + def test_normal_ordered_series_with_delays(self): + series = pd.Series([ + "Tag1,Tag2,(Delay/3.0 s,(Tag5))", + "Tag3,Tag4" + ]) + onsets = pd.Series([1.0, 2.0]) + result = split_delay_tags(series, self.schema, onsets) + self.assertTrue(result.onset.equals(pd.Series([1.0, 2.0, 4.0]))) + self.assertTrue(result.HED.equals(pd.Series([ + "Tag1,Tag2", + "Tag3,Tag4", + "(Delay/3.0 s,(Tag5))" + ]))) + + def test_normal_ordered_series_with_double_delays(self): + series = pd.Series([ + "Tag1,Tag2,(Delay/3.0 s,(Tag5))", + "Tag6,(Delay/2.0 s,(Tag7))", + "Tag3,Tag4" + ]) + onsets = pd.Series([1.0, 2.0, 3.0]) + result = split_delay_tags(series, self.schema, onsets) + self.assertTrue(result.onset.equals(pd.Series([1.0, 2.0, 3.0, 4.0, 4.0]))) + self.assertTrue(result.HED.equals(pd.Series([ + "Tag1,Tag2", + "Tag6", + "Tag3,Tag4", + "(Delay/3.0 s,(Tag5)),(Delay/2.0 s,(Tag7))", + "" + ]))) + self.assertTrue(result.original_index.equals(pd.Series([0, 1, 2, 0, 1]))) diff --git a/tests/models/test_hed_group.py b/tests/models/test_hed_group.py index 1a3632bf..22d7939c 100644 --- a/tests/models/test_hed_group.py +++ b/tests/models/test_hed_group.py @@ -1,136 +1,137 @@ -import unittest -import os - -from hed import schema -from hed.models import HedString -import copy - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') - hed_xml_file = os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0t.xml") - cls.hed_schema = schema.load_schema(hed_xml_file) - - def test_remove_groups(self): - from hed.models.definition_dict import DefTagNames - basic_definition_string = "(Definition/TestDef, (Keypad-key/TestDef1,Keyboard-key/TestDef2))" - basic_definition_string_repeated = f"{basic_definition_string},{basic_definition_string}" - def_string_with_repeat = HedString(basic_definition_string_repeated, self.hed_schema) - definition_tags = def_string_with_repeat.find_tags({DefTagNames.DEFINITION_KEY}, - recursive=True, include_groups=1) - definition_tag2 = definition_tags[1] - def_string_with_repeat.remove([definition_tag2]) - remaining_children = def_string_with_repeat.get_all_groups() - for child in remaining_children: - if child is definition_tag2: - self.assertFalse(False, "Definition tag not removed successfully") - - basic_definition_string_repeated_subgroup = \ - f"{basic_definition_string},{basic_definition_string}, ({basic_definition_string})" - def_string_with_repeat = HedString(basic_definition_string_repeated_subgroup, self.hed_schema) - definition_tags = def_string_with_repeat.find_tags({DefTagNames.DEFINITION_KEY}, - recursive=True, include_groups=1) - definition_tag3 = definition_tags[2] - def_string_with_repeat.remove([definition_tag3]) - remaining_children = def_string_with_repeat.get_all_groups() - for child in remaining_children: - if child is definition_tag3: - self.assertFalse(False, "Nested definition tag not removed successfully") - - basic_definition_string_repeated_subgroup = \ - f"{basic_definition_string},{basic_definition_string}, ({basic_definition_string})" - def_string_with_repeat = HedString(basic_definition_string_repeated_subgroup, self.hed_schema) - definition_tags = def_string_with_repeat.find_tags({DefTagNames.DEFINITION_KEY}, - recursive=True, include_groups=1) - definition_tag2 = definition_tags[1] - def_string_with_repeat.remove([definition_tag2]) - remaining_children = def_string_with_repeat.get_all_groups() - for child in remaining_children: - if child is definition_tag2: - self.assertFalse(False, "Nested definition tag not removed successfully") - - def test_find_tags_with_term(self): - basic_hed_string = \ - "(Keypad-key/TestDef1,Keyboard-key/TestDef2, Item/Object, Event), Event, Object, Geometric-object" - basic_hed_string_obj = HedString(basic_hed_string, self.hed_schema) - # works - located_tags = basic_hed_string_obj.find_tags_with_term("Object", recursive=True, include_groups=0) - self.assertEqual(len(located_tags), 5) - # located tags now has found all 5 hed tags - - # This will find no tags - located_tags = basic_hed_string_obj.find_tags_with_term("reject", recursive=True, include_groups=0) - self.assertEqual(len(located_tags), 0) - - # this will also find no tags - located_tags = basic_hed_string_obj.find_tags_with_term("Item/Object", recursive=True, include_groups=0) - self.assertEqual(len(located_tags), 0) - - def _compare_strings(self, hed_strings): - str1 = HedString(hed_strings[0], self.hed_schema) - str1.sort() - for hed_string in hed_strings: - str2 = HedString(hed_string, self.hed_schema) - str2.sort() - self.assertEqual(str1, str2) - - def _compare_strings2(self, hed_strings): - str1 = HedString(hed_strings[0], self.hed_schema) - for hed_string in hed_strings: - str2 = HedString(hed_string, self.hed_schema) - self.assertEqual(str1.sorted(), str2.sorted()) - - def test_sort_and_sorted(self): - hed_strings = [ - "A, B, C", - "A, C, B", - "B, C, A", - "C, B, A" - ] - self._compare_strings(hed_strings) - self._compare_strings2(hed_strings) - hed_strings = [ - "A, (B, C)", - "(B, C), A" - ] - self._compare_strings(hed_strings) - self._compare_strings2(hed_strings) - hed_strings = [ - "A, (A, (B, C))", - "(A, (B, C)), A", - "((B, C), A), A", - "A, ((B, C), A)" - ] - self._compare_strings(hed_strings) - self._compare_strings2(hed_strings) - hed_strings = [ - "D, A, (A, (B, C))", - "(A, (B, C)), A, D", - "((B, C), A), A, D", - "A, D, ((B, C), A)" - ] - self._compare_strings(hed_strings) - self._compare_strings2(hed_strings) - hed_strings = [ - "D, (E, F), A, (A, (B, C))", - "(A, (B, C)), A, D, (F, E)", - "((B, C), A), (E, F), A, D", - "A, D, ((B, C), A), (F, E)" - ] - self._compare_strings(hed_strings) - - def test_sorted_structure(self): - hed_string = HedString("(Tag3, Tag1, Tag5, Tag2, Tag4)", self.hed_schema) - original_hed_string = copy.deepcopy(hed_string) - - sorted_hed_string = hed_string.sorted() - - self.assertIsInstance(sorted_hed_string, HedString) - self.assertEqual(str(original_hed_string), str(hed_string)) - self.assertIsNot(sorted_hed_string, hed_string) - -if __name__ == '__main__': - unittest.main() +import unittest +import os + +from hed import schema +from hed.models import HedString +import copy + + +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') + hed_xml_file = os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0t.xml") + cls.hed_schema = schema.load_schema(hed_xml_file) + + def test_remove_groups(self): + from hed.models.definition_dict import DefTagNames + basic_definition_string = "(Definition/TestDef, (Keypad-key/TestDef1,Keyboard-key/TestDef2))" + basic_definition_string_repeated = f"{basic_definition_string},{basic_definition_string}" + def_string_with_repeat = HedString(basic_definition_string_repeated, self.hed_schema) + definition_tags = def_string_with_repeat.find_tags({DefTagNames.DEFINITION_KEY}, + recursive=True, include_groups=1) + definition_tag2 = definition_tags[1] + def_string_with_repeat.remove([definition_tag2]) + remaining_children = def_string_with_repeat.get_all_groups() + for child in remaining_children: + if child is definition_tag2: + self.assertFalse(False, "Definition tag not removed successfully") + + basic_definition_string_repeated_subgroup = \ + f"{basic_definition_string},{basic_definition_string}, ({basic_definition_string})" + def_string_with_repeat = HedString(basic_definition_string_repeated_subgroup, self.hed_schema) + definition_tags = def_string_with_repeat.find_tags({DefTagNames.DEFINITION_KEY}, + recursive=True, include_groups=1) + definition_tag3 = definition_tags[2] + def_string_with_repeat.remove([definition_tag3]) + remaining_children = def_string_with_repeat.get_all_groups() + for child in remaining_children: + if child is definition_tag3: + self.assertFalse(False, "Nested definition tag not removed successfully") + + basic_definition_string_repeated_subgroup = \ + f"{basic_definition_string},{basic_definition_string}, ({basic_definition_string})" + def_string_with_repeat = HedString(basic_definition_string_repeated_subgroup, self.hed_schema) + definition_tags = def_string_with_repeat.find_tags({DefTagNames.DEFINITION_KEY}, + recursive=True, include_groups=1) + definition_tag2 = definition_tags[1] + def_string_with_repeat.remove([definition_tag2]) + remaining_children = def_string_with_repeat.get_all_groups() + for child in remaining_children: + if child is definition_tag2: + self.assertFalse(False, "Nested definition tag not removed successfully") + + def test_find_tags_with_term(self): + basic_hed_string = \ + "(Keypad-key/TestDef1,Keyboard-key/TestDef2, Item/Object, Event), Event, Object, Geometric-object" + basic_hed_string_obj = HedString(basic_hed_string, self.hed_schema) + # works + located_tags = basic_hed_string_obj.find_tags_with_term("Object", recursive=True, include_groups=0) + self.assertEqual(len(located_tags), 5) + # located tags now has found all 5 hed tags + + # This will find no tags + located_tags = basic_hed_string_obj.find_tags_with_term("reject", recursive=True, include_groups=0) + self.assertEqual(len(located_tags), 0) + + # this will also find no tags + located_tags = basic_hed_string_obj.find_tags_with_term("Item/Object", recursive=True, include_groups=0) + self.assertEqual(len(located_tags), 0) + + def _compare_strings(self, hed_strings): + str1 = HedString(hed_strings[0], self.hed_schema) + str1.sort() + for hed_string in hed_strings: + str2 = HedString(hed_string, self.hed_schema) + str2.sort() + self.assertEqual(str1, str2) + + def _compare_strings2(self, hed_strings): + str1 = HedString(hed_strings[0], self.hed_schema) + for hed_string in hed_strings: + str2 = HedString(hed_string, self.hed_schema) + self.assertEqual(str1.sorted(), str2.sorted()) + + def test_sort_and_sorted(self): + hed_strings = [ + "A, B, C", + "A, C, B", + "B, C, A", + "C, B, A" + ] + self._compare_strings(hed_strings) + self._compare_strings2(hed_strings) + hed_strings = [ + "A, (B, C)", + "(B, C), A" + ] + self._compare_strings(hed_strings) + self._compare_strings2(hed_strings) + hed_strings = [ + "A, (A, (B, C))", + "(A, (B, C)), A", + "((B, C), A), A", + "A, ((B, C), A)" + ] + self._compare_strings(hed_strings) + self._compare_strings2(hed_strings) + hed_strings = [ + "D, A, (A, (B, C))", + "(A, (B, C)), A, D", + "((B, C), A), A, D", + "A, D, ((B, C), A)" + ] + self._compare_strings(hed_strings) + self._compare_strings2(hed_strings) + hed_strings = [ + "D, (E, F), A, (A, (B, C))", + "(A, (B, C)), A, D, (F, E)", + "((B, C), A), (E, F), A, D", + "A, D, ((B, C), A), (F, E)" + ] + self._compare_strings(hed_strings) + + def test_sorted_structure(self): + hed_string = HedString("(Tag3, Tag1, Tag5, Tag2, Tag4)", self.hed_schema) + original_hed_string = copy.deepcopy(hed_string) + + sorted_hed_string = hed_string.sorted() + + self.assertIsInstance(sorted_hed_string, HedString) + self.assertEqual(str(original_hed_string), str(hed_string)) + self.assertIsNot(sorted_hed_string, hed_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/models/test_hed_string.py b/tests/models/test_hed_string.py index aa3ec2ca..f1ede36d 100644 --- a/tests/models/test_hed_string.py +++ b/tests/models/test_hed_string.py @@ -1,309 +1,310 @@ -from hed.models import HedString -import unittest -from hed import load_schema_version -import copy - - -class TestHedStrings(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.schema = load_schema_version("8.3.0") - - def validator_scalar(self, test_strings, expected_results, test_function): - for test_key in test_strings: - test_result = test_function(test_strings[test_key]) - expected_result = expected_results[test_key] - self.assertEqual(test_result, expected_result, test_strings[test_key]) - - def validator_list(self, test_strings, expected_results, test_function): - for test_key in test_strings: - test_result = test_function(test_strings[test_key]) - expected_result = expected_results[test_key] - self.assertCountEqual(test_result, expected_result, test_strings[test_key]) - - -class TestHedString(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.schema = load_schema_version("8.0.0") - pass - - def test_constructor(self): - test_strings = { - 'normal': "Tag1,Tag2", - 'normalParen': "(Tag1,Tag2)", - 'normalDoubleParen': "(Tag1,Tag2,(Tag3,Tag4))", - 'extraOpeningParen': "((Tag1,Tag2,(Tag3,Tag4))", - 'extra2OpeningParen': "(((Tag1,Tag2,(Tag3,Tag4))", - 'extraClosingParen': "(Tag1,Tag2,(Tag3,Tag4)))", - 'extra2ClosingParen': "(Tag1,Tag2,(Tag3,Tag4))))" - } - expected_result = { - 'normal': True, - 'normalParen': True, - 'normalDoubleParen': True, - 'extraOpeningParen': False, - 'extra2OpeningParen': False, - 'extraClosingParen': False, - 'extra2ClosingParen': False - } - - # Just make sure it doesn't crash while parsing super invalid strings. - for name, string in test_strings.items(): - hed_string = HedString(string, self.schema) - - self.assertEqual(bool(hed_string), expected_result[name]) - if bool(hed_string): - _ = hed_string.get_all_groups() - _ = hed_string.get_all_tags() - - -class HedTagLists(TestHedStrings): - def test_type(self): - hed_string = 'Event/Category/Experimental stimulus,Item/Object/Vehicle/Train,Attribute/Visual/Color/Purple' - result = HedString.split_into_groups(hed_string, self.schema) - self.assertIsInstance(result, list) - - def test_top_level_tags(self): - hed_string = 'Event/Category/Experimental stimulus,Item/Object/Vehicle/Train,Attribute/Visual/Color/Purple' - result = HedString.split_into_groups(hed_string, self.schema) - tags_as_strings = [str(tag) for tag in result] - self.assertCountEqual(tags_as_strings, ['Event/Category/Experimental stimulus', 'Item/Object/Vehicle/Train', - 'Attribute/Visual/Color/Purple']) - - def test_group_tags(self): - hed_string = '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),' \ - '/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px ' - string_obj = HedString(hed_string, self.schema) - tags_as_strings = [str(tag) for tag in string_obj.children] - self.assertCountEqual(tags_as_strings, - ['/Action/Reach/To touch', - '(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm)', - '/Attribute/Location/Screen/Top/70 px', '/Attribute/Location/Screen/Left/23 px']) - - def test_square_brackets_in_string(self): - # just verifying this parses, square brackets do not validate - hed_string = '[test_ref], Event/Sensory-event, Participant, ([test_ref2], Event)' - string_obj = HedString(hed_string, self.schema) - tags_as_strings = [str(tag) for tag in string_obj.children] - self.assertCountEqual(tags_as_strings, - ['[test_ref]', 'Sensory-event', 'Participant', '([test_ref2],Event)']) - - # Potentially restore some similar behavior later if desired. - # We no longer automatically remove things like quotes. - # def test_double_quotes(self): - # double_quote_string = 'Event/Category/Experimental stimulus,"Item/Object/Vehicle/Train",' \ - # 'Attribute/Visual/Color/Purple ' - # normal_string = 'Event/Category/Experimental stimulus,Item/Object/Vehicle/Train,Attribute/Visual/Color/Purple' - # double_quote_result = HedString.split_into_groups(double_quote_string) - # normal_result = HedString.split_into_groups(normal_string) - # self.assertEqual(double_quote_result, normal_result) - - def test_blanks(self): - test_strings = { - 'doubleTilde': - '/Item/Object/Vehicle/Car~~/Attribute/Object control/Perturb', - 'doubleComma': - '/Item/Object/Vehicle/Car,,/Attribute/Object control/Perturb', - 'doubleInvalidCharacter': - '/Item/Object/Vehicle/Car[]/Attribute/Object control/Perturb', - 'trailingBlank': - '/Item/Object/Vehicle/Car,/Attribute/Object control/Perturb,', - } - expected_list = [ - '/Item/Object/Vehicle/Car', - '/Attribute/Object control/Perturb', - ] - expected_results = { - 'doubleTilde': [ - '/Item/Object/Vehicle/Car~~/Attribute/Object control/Perturb', - ], - 'doubleComma': expected_list, - 'doubleInvalidCharacter': ['/Item/Object/Vehicle/Car[]/Attribute/Object control/Perturb'], - 'trailingBlank': expected_list, - } - - def test_function(string): - return [str(child) for child in HedString.split_into_groups(string, self.schema)] - - self.validator_list(test_strings, expected_results, test_function) - - -class ProcessedHedTags(TestHedStrings): - def test_parsed_tags(self): - hed_string = '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),' \ - '/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px ' - parsed_string = HedString(hed_string, self.schema) - self.assertCountEqual([str(tag) for tag in parsed_string.get_all_tags()], [ - '/Action/Reach/To touch', - '/Attribute/Object side/Left', - '/Participant/Effect/Body part/Arm', - '/Attribute/Location/Screen/Top/70 px', - '/Attribute/Location/Screen/Left/23 px', - ]) - self.assertCountEqual([str(group) for group in parsed_string.get_all_groups()], - ['/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),' - '/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', - '(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm)']) - - -class TestHedStringUtil(unittest.TestCase): - def compare_split_results(self, test_strings, expected_results): - for test_key in test_strings: - test_string = test_strings[test_key] - expected_result = expected_results[test_key] - actual_results = HedString.split_hed_string(test_string) - decoded_results = [test_string[start:end] for (is_tag, (start, end)) in actual_results] - self.assertEqual(decoded_results, expected_result) - - def test_split_hed_string(self): - test_strings = { - 'single': 'Event', - 'double': 'Event, Event/Extension', - 'singleAndGroup': 'Event/Extension, (Event/Extension2, Event/Extension3)', - 'singleAndGroupWithBlank': 'Event/Extension, (Event, ,Event/Extension3)', - 'manyParens': 'Event/Extension,(((Event/Extension2, )(Event)', - 'manyParensEndingSpace': 'Event/Extension,(((Event/Extension2, )(Event) ', - 'manyParensOpeningSpace': ' Event/Extension,(((Event/Extension2, )(Event)', - 'manyParensBothSpace': ' Event/Extension,(((Event/Extension2, )(Event ', - 'manyClosingParens': 'Event/Extension, (Event/Extension2, ))(Event)', - } - expected_results = { - 'single': ['Event'], - 'double': ['Event', ', ', 'Event/Extension'], - 'singleAndGroup': ['Event/Extension', ', ', '(', 'Event/Extension2', ', ', 'Event/Extension3', ')'], - 'singleAndGroupWithBlank': ['Event/Extension', ', ', '(', 'Event', ', ', ',', 'Event/Extension3', ')'], - 'manyParens': ['Event/Extension', ',', '(', '(', '(', 'Event/Extension2', ', ', ')', '(', 'Event', ')'], - 'manyParensEndingSpace': - ['Event/Extension', ',', '(', '(', '(', 'Event/Extension2', ', ', ')', '(', 'Event', ') '], - 'manyParensOpeningSpace': - [' ', 'Event/Extension', ',', '(', '(', '(', 'Event/Extension2', ', ', ')', '(', 'Event', ')'], - 'manyParensBothSpace': - [' ', 'Event/Extension', ',', '(', '(', '(', 'Event/Extension2', ', ', ')', '(', 'Event', ' '], - 'manyClosingParens': ['Event/Extension', ', ', '(', 'Event/Extension2', ', ', ')', ')', '(', 'Event', ')'] - } - - self.compare_split_results(test_strings, expected_results) - -class TestHedStringShrinkDefs(unittest.TestCase): - hed_schema = load_schema_version("8.0.0") - - def test_shrink_defs(self): - test_strings = { - 1: "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2)),Event", - 2: "Event, ((Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2)),Event)", - # this one shouldn't change as it doesn't have a parent - 3: "Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2),Event", - # This one is an obviously invalid def, but still shrinks - 4: "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2), ThisDefIsInvalid),Event", - } - - expected_results = { - 1: "Def/TestDefPlaceholder/2471,Event", - 2: "Event,(Def/TestDefPlaceholder/2471,Event)", - 3: "Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2),Event", - 4: "Def/TestDefPlaceholder/2471,Event", - } - - for key, test_string in test_strings.items(): - hed_string = HedString(test_string, hed_schema=self.hed_schema) - hed_string.shrink_defs() - self.assertEqual(str(hed_string), expected_results[key]) - - -class TestFromHedStrings(unittest.TestCase): - def setUp(self): - self.schema = load_schema_version("8.1.0") - self.hed_strings = [ - HedString('Event', self.schema), - HedString('Action', self.schema), - HedString('Age/20', self.schema), - HedString('Item', self.schema), - ] - - def test_from_hed_strings(self): - combined_hed_string = HedString.from_hed_strings(self.hed_strings) - - # Test that the combined hed string is as expected - self.assertEqual(combined_hed_string._hed_string, 'Event,Action,Age/20,Item') - - # Test that the schema of the combined hed string is the same as the first hed string - self.assertEqual(combined_hed_string._schema, self.schema) - - # Test that the contents of the combined hed string is the concatenation of the contents of all hed strings - expected_contents = [child for hed_string in self.hed_strings for child in hed_string.children] - self.assertEqual(combined_hed_string.children, expected_contents) - - # Test that the _from_strings attribute of the combined hed string is the list of original hed strings - self.assertEqual(combined_hed_string._from_strings, self.hed_strings) - - def test_empty_hed_strings_list(self): - with self.assertRaises(TypeError): - HedString.from_hed_strings([]) - - def test_none_hed_strings_list(self): - with self.assertRaises(TypeError): - HedString.from_hed_strings(None) - - def test_complex_hed_strings(self): - complex_hed_strings = [ - HedString('Event,Action', self.schema), - HedString('Age/20,Hand', self.schema), - HedString('Item,(Leg, Nose)', self.schema), - ] - - combined_hed_string = HedString.from_hed_strings(complex_hed_strings) - - # Test that the combined hed string is as expected - self.assertEqual(combined_hed_string._hed_string, 'Event,Action,Age/20,Hand,Item,(Leg, Nose)') - - # Test that the schema of the combined hed string is the same as the first hed string - self.assertEqual(combined_hed_string._schema, self.schema) - - # Test that the contents of the combined hed string is the concatenation of the contents of all hed strings - expected_contents = [child for hed_string in complex_hed_strings for child in hed_string.children] - self.assertEqual(combined_hed_string.children, expected_contents) - - # Test that the _from_strings attribute of the combined hed string is the list of original hed strings - self.assertEqual(combined_hed_string._from_strings, complex_hed_strings) - - def _verify_copied_string(self, original_hed_string): - # Make a deepcopy of the original HedString - copied_hed_string = copy.deepcopy(original_hed_string) - - # The copied HedString should not be the same object as the original - self.assertNotEqual(id(original_hed_string), id(copied_hed_string)) - - # The copied HedString should have the same _hed_string as the original - self.assertEqual(copied_hed_string._hed_string, original_hed_string._hed_string) - - # The _children attribute of copied HedString should not be the same object as the original - self.assertNotEqual(id(original_hed_string.children), id(copied_hed_string.children)) - - # The _children attribute of copied HedString should have the same contents as the original - self.assertEqual(copied_hed_string.children, original_hed_string.children) - - # The parent of each child in copied_hed_string._children should point to copied_hed_string - for child in copied_hed_string.children: - self.assertEqual(child._parent, copied_hed_string) - - # The _original_children and _from_strings attributes should also be deepcopied - self.assertNotEqual(id(original_hed_string._original_children), id(copied_hed_string._original_children)) - self.assertEqual(copied_hed_string._original_children, original_hed_string._original_children) - if original_hed_string._from_strings: - self.assertNotEqual(id(original_hed_string._from_strings), id(copied_hed_string._from_strings)) - self.assertEqual(copied_hed_string._from_strings, original_hed_string._from_strings) - - def test_deepcopy(self): - original_hed_string = HedString('Event,Action', self.schema) - - self._verify_copied_string(original_hed_string) - complex_hed_strings = [ - HedString('Event,Action', self.schema), - HedString('Age/20,Hand', self.schema), - HedString('Item,(Leg, Nose)', self.schema), - ] - - combined_hed_string = HedString.from_hed_strings(complex_hed_strings) - - self._verify_copied_string(combined_hed_string) \ No newline at end of file +from hed.models import HedString +import unittest +from hed import load_schema_version +import copy + + +class TestHedStrings(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.schema = load_schema_version("8.3.0") + + def validator_scalar(self, test_strings, expected_results, test_function): + for test_key in test_strings: + test_result = test_function(test_strings[test_key]) + expected_result = expected_results[test_key] + self.assertEqual(test_result, expected_result, test_strings[test_key]) + + def validator_list(self, test_strings, expected_results, test_function): + for test_key in test_strings: + test_result = test_function(test_strings[test_key]) + expected_result = expected_results[test_key] + self.assertCountEqual(test_result, expected_result, test_strings[test_key]) + + +class TestHedString(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.schema = load_schema_version("8.0.0") + pass + + def test_constructor(self): + test_strings = { + 'normal': "Tag1,Tag2", + 'normalParen': "(Tag1,Tag2)", + 'normalDoubleParen': "(Tag1,Tag2,(Tag3,Tag4))", + 'extraOpeningParen': "((Tag1,Tag2,(Tag3,Tag4))", + 'extra2OpeningParen': "(((Tag1,Tag2,(Tag3,Tag4))", + 'extraClosingParen': "(Tag1,Tag2,(Tag3,Tag4)))", + 'extra2ClosingParen': "(Tag1,Tag2,(Tag3,Tag4))))" + } + expected_result = { + 'normal': True, + 'normalParen': True, + 'normalDoubleParen': True, + 'extraOpeningParen': False, + 'extra2OpeningParen': False, + 'extraClosingParen': False, + 'extra2ClosingParen': False + } + + # Just make sure it doesn't crash while parsing super invalid strings. + for name, string in test_strings.items(): + hed_string = HedString(string, self.schema) + + self.assertEqual(bool(hed_string), expected_result[name]) + if bool(hed_string): + _ = hed_string.get_all_groups() + _ = hed_string.get_all_tags() + + +class HedTagLists(TestHedStrings): + def test_type(self): + hed_string = 'Event/Category/Experimental stimulus,Item/Object/Vehicle/Train,Attribute/Visual/Color/Purple' + result = HedString.split_into_groups(hed_string, self.schema) + self.assertIsInstance(result, list) + + def test_top_level_tags(self): + hed_string = 'Event/Category/Experimental stimulus,Item/Object/Vehicle/Train,Attribute/Visual/Color/Purple' + result = HedString.split_into_groups(hed_string, self.schema) + tags_as_strings = [str(tag) for tag in result] + self.assertCountEqual(tags_as_strings, ['Event/Category/Experimental stimulus', 'Item/Object/Vehicle/Train', + 'Attribute/Visual/Color/Purple']) + + def test_group_tags(self): + hed_string = '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),' \ + '/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px ' + string_obj = HedString(hed_string, self.schema) + tags_as_strings = [str(tag) for tag in string_obj.children] + self.assertCountEqual(tags_as_strings, + ['/Action/Reach/To touch', + '(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm)', + '/Attribute/Location/Screen/Top/70 px', '/Attribute/Location/Screen/Left/23 px']) + + def test_square_brackets_in_string(self): + # just verifying this parses, square brackets do not validate + hed_string = '[test_ref], Event/Sensory-event, Participant, ([test_ref2], Event)' + string_obj = HedString(hed_string, self.schema) + tags_as_strings = [str(tag) for tag in string_obj.children] + self.assertCountEqual(tags_as_strings, + ['[test_ref]', 'Sensory-event', 'Participant', '([test_ref2],Event)']) + + # Potentially restore some similar behavior later if desired. + # We no longer automatically remove things like quotes. + # def test_double_quotes(self): + # double_quote_string = 'Event/Category/Experimental stimulus,"Item/Object/Vehicle/Train",' \ + # 'Attribute/Visual/Color/Purple ' + # normal_string = 'Event/Category/Experimental stimulus,Item/Object/Vehicle/Train,Attribute/Visual/Color/Purple' + # double_quote_result = HedString.split_into_groups(double_quote_string) + # normal_result = HedString.split_into_groups(normal_string) + # self.assertEqual(double_quote_result, normal_result) + + def test_blanks(self): + test_strings = { + 'doubleTilde': + '/Item/Object/Vehicle/Car~~/Attribute/Object control/Perturb', + 'doubleComma': + '/Item/Object/Vehicle/Car,,/Attribute/Object control/Perturb', + 'doubleInvalidCharacter': + '/Item/Object/Vehicle/Car[]/Attribute/Object control/Perturb', + 'trailingBlank': + '/Item/Object/Vehicle/Car,/Attribute/Object control/Perturb,', + } + expected_list = [ + '/Item/Object/Vehicle/Car', + '/Attribute/Object control/Perturb', + ] + expected_results = { + 'doubleTilde': [ + '/Item/Object/Vehicle/Car~~/Attribute/Object control/Perturb', + ], + 'doubleComma': expected_list, + 'doubleInvalidCharacter': ['/Item/Object/Vehicle/Car[]/Attribute/Object control/Perturb'], + 'trailingBlank': expected_list, + } + + def test_function(string): + return [str(child) for child in HedString.split_into_groups(string, self.schema)] + + self.validator_list(test_strings, expected_results, test_function) + + +class ProcessedHedTags(TestHedStrings): + def test_parsed_tags(self): + hed_string = '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),' \ + '/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px ' + parsed_string = HedString(hed_string, self.schema) + self.assertCountEqual([str(tag) for tag in parsed_string.get_all_tags()], [ + '/Action/Reach/To touch', + '/Attribute/Object side/Left', + '/Participant/Effect/Body part/Arm', + '/Attribute/Location/Screen/Top/70 px', + '/Attribute/Location/Screen/Left/23 px', + ]) + self.assertCountEqual([str(group) for group in parsed_string.get_all_groups()], + ['/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),' + '/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', + '(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm)']) + + +class TestHedStringUtil(unittest.TestCase): + def compare_split_results(self, test_strings, expected_results): + for test_key in test_strings: + test_string = test_strings[test_key] + expected_result = expected_results[test_key] + actual_results = HedString.split_hed_string(test_string) + decoded_results = [test_string[start:end] for (is_tag, (start, end)) in actual_results] + self.assertEqual(decoded_results, expected_result) + + def test_split_hed_string(self): + test_strings = { + 'single': 'Event', + 'double': 'Event, Event/Extension', + 'singleAndGroup': 'Event/Extension, (Event/Extension2, Event/Extension3)', + 'singleAndGroupWithBlank': 'Event/Extension, (Event, ,Event/Extension3)', + 'manyParens': 'Event/Extension,(((Event/Extension2, )(Event)', + 'manyParensEndingSpace': 'Event/Extension,(((Event/Extension2, )(Event) ', + 'manyParensOpeningSpace': ' Event/Extension,(((Event/Extension2, )(Event)', + 'manyParensBothSpace': ' Event/Extension,(((Event/Extension2, )(Event ', + 'manyClosingParens': 'Event/Extension, (Event/Extension2, ))(Event)', + } + expected_results = { + 'single': ['Event'], + 'double': ['Event', ', ', 'Event/Extension'], + 'singleAndGroup': ['Event/Extension', ', ', '(', 'Event/Extension2', ', ', 'Event/Extension3', ')'], + 'singleAndGroupWithBlank': ['Event/Extension', ', ', '(', 'Event', ', ', ',', 'Event/Extension3', ')'], + 'manyParens': ['Event/Extension', ',', '(', '(', '(', 'Event/Extension2', ', ', ')', '(', 'Event', ')'], + 'manyParensEndingSpace': + ['Event/Extension', ',', '(', '(', '(', 'Event/Extension2', ', ', ')', '(', 'Event', ') '], + 'manyParensOpeningSpace': + [' ', 'Event/Extension', ',', '(', '(', '(', 'Event/Extension2', ', ', ')', '(', 'Event', ')'], + 'manyParensBothSpace': + [' ', 'Event/Extension', ',', '(', '(', '(', 'Event/Extension2', ', ', ')', '(', 'Event', ' '], + 'manyClosingParens': ['Event/Extension', ', ', '(', 'Event/Extension2', ', ', ')', ')', '(', 'Event', ')'] + } + + self.compare_split_results(test_strings, expected_results) + + +class TestHedStringShrinkDefs(unittest.TestCase): + hed_schema = load_schema_version("8.0.0") + + def test_shrink_defs(self): + test_strings = { + 1: "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2)),Event", + 2: "Event, ((Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2)),Event)", + # this one shouldn't change as it doesn't have a parent + 3: "Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2),Event", + # This one is an obviously invalid def, but still shrinks + 4: "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2), ThisDefIsInvalid),Event", + } + + expected_results = { + 1: "Def/TestDefPlaceholder/2471,Event", + 2: "Event,(Def/TestDefPlaceholder/2471,Event)", + 3: "Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2),Event", + 4: "Def/TestDefPlaceholder/2471,Event", + } + + for key, test_string in test_strings.items(): + hed_string = HedString(test_string, hed_schema=self.hed_schema) + hed_string.shrink_defs() + self.assertEqual(str(hed_string), expected_results[key]) + + +class TestFromHedStrings(unittest.TestCase): + def setUp(self): + self.schema = load_schema_version("8.1.0") + self.hed_strings = [ + HedString('Event', self.schema), + HedString('Action', self.schema), + HedString('Age/20', self.schema), + HedString('Item', self.schema), + ] + + def test_from_hed_strings(self): + combined_hed_string = HedString.from_hed_strings(self.hed_strings) + + # Test that the combined HED string is as expected + self.assertEqual(combined_hed_string._hed_string, 'Event,Action,Age/20,Item') + + # Test that the schema of the combined HED string is the same as the first HED string + self.assertEqual(combined_hed_string._schema, self.schema) + + # Test that the contents of the combined HED string is the concatenation of the contents of all HED strings + expected_contents = [child for hed_string in self.hed_strings for child in hed_string.children] + self.assertEqual(combined_hed_string.children, expected_contents) + + # Test that the _from_strings attribute of the combined HED string is the list of original HED strings + self.assertEqual(combined_hed_string._from_strings, self.hed_strings) + + def test_empty_hed_strings_list(self): + with self.assertRaises(TypeError): + HedString.from_hed_strings([]) + + def test_none_hed_strings_list(self): + with self.assertRaises(TypeError): + HedString.from_hed_strings(None) + + def test_complex_hed_strings(self): + complex_hed_strings = [ + HedString('Event,Action', self.schema), + HedString('Age/20,Hand', self.schema), + HedString('Item,(Leg, Nose)', self.schema), + ] + + combined_hed_string = HedString.from_hed_strings(complex_hed_strings) + + # Test that the combined HED string is as expected + self.assertEqual(combined_hed_string._hed_string, 'Event,Action,Age/20,Hand,Item,(Leg, Nose)') + + # Test that the schema of the combined HED string is the same as the first HED string + self.assertEqual(combined_hed_string._schema, self.schema) + + # Test that the contents of the combined HED string is the concatenation of the contents of all HED strings + expected_contents = [child for hed_string in complex_hed_strings for child in hed_string.children] + self.assertEqual(combined_hed_string.children, expected_contents) + + # Test that the _from_strings attribute of the combined HED string is the list of original HED strings + self.assertEqual(combined_hed_string._from_strings, complex_hed_strings) + + def _verify_copied_string(self, original_hed_string): + # Make a deepcopy of the original HedString + copied_hed_string = copy.deepcopy(original_hed_string) + + # The copied HedString should not be the same object as the original + self.assertNotEqual(id(original_hed_string), id(copied_hed_string)) + + # The copied HedString should have the same _hed_string as the original + self.assertEqual(copied_hed_string._hed_string, original_hed_string._hed_string) + + # The _children attribute of copied HedString should not be the same object as the original + self.assertNotEqual(id(original_hed_string.children), id(copied_hed_string.children)) + + # The _children attribute of copied HedString should have the same contents as the original + self.assertEqual(copied_hed_string.children, original_hed_string.children) + + # The parent of each child in copied_hed_string._children should point to copied_hed_string + for child in copied_hed_string.children: + self.assertEqual(child._parent, copied_hed_string) + + # The _original_children and _from_strings attributes should also be deep copied + self.assertNotEqual(id(original_hed_string._original_children), id(copied_hed_string._original_children)) + self.assertEqual(copied_hed_string._original_children, original_hed_string._original_children) + if original_hed_string._from_strings: + self.assertNotEqual(id(original_hed_string._from_strings), id(copied_hed_string._from_strings)) + self.assertEqual(copied_hed_string._from_strings, original_hed_string._from_strings) + + def test_deepcopy(self): + original_hed_string = HedString('Event,Action', self.schema) + + self._verify_copied_string(original_hed_string) + complex_hed_strings = [ + HedString('Event,Action', self.schema), + HedString('Age/20,Hand', self.schema), + HedString('Item,(Leg, Nose)', self.schema), + ] + + combined_hed_string = HedString.from_hed_strings(complex_hed_strings) + + self._verify_copied_string(combined_hed_string) diff --git a/tests/models/test_hed_tag.py b/tests/models/test_hed_tag.py index a61380b9..e1b62ac5 100644 --- a/tests/models/test_hed_tag.py +++ b/tests/models/test_hed_tag.py @@ -1,188 +1,192 @@ -from hed.models.hed_tag import HedTag -from tests.validator.test_tag_validator_base import TestHedBase -from hed.schema import HedKey -from hed import load_schema_version - -from tests.schema import util_create_schemas - - -class TestValidatorUtilityFunctions(TestHedBase): - - @classmethod - def setUpClass(cls): - cls.hed_schema = load_schema_version("8.3.0") - - def test_if_tag_exists(self): - valid_tag1 = HedTag('Left-handed', hed_schema=self.hed_schema) - hash1 = hash(valid_tag1) - hash2 = hash(valid_tag1) - self.assertEqual(hash1, hash2) - valid_tag2 = HedTag('Geometric-object', hed_schema=self.hed_schema) - valid_tag3 = HedTag('duration/#', hed_schema=self.hed_schema) - invalid_tag1 = HedTag('something', hed_schema=self.hed_schema) - invalid_tag2 = HedTag('Participant/nothing', hed_schema=self.hed_schema) - invalid_tag3 = HedTag('participant/#', hed_schema=self.hed_schema) - valid_tag1_results = valid_tag1.tag_exists_in_schema() - valid_tag2_results = valid_tag2.tag_exists_in_schema() - valid_tag3_results = valid_tag3.tag_exists_in_schema() - invalid_tag1_results = invalid_tag1.tag_exists_in_schema() - invalid_tag2_results = invalid_tag2.tag_exists_in_schema() - invalid_tag3_results = invalid_tag3.tag_exists_in_schema() - # valid_tag1_results = self.semantic_tag_validator.check_tag_exists_in_schema(valid_tag1) - # valid_tag2_results = self.semantic_tag_validator.check_tag_exists_in_schema(valid_tag2) - # valid_tag3_results = self.semantic_tag_validator.check_tag_exists_in_schema(valid_tag3) - # invalid_tag1_results = self.semantic_tag_validator.check_tag_exists_in_schema(invalid_tag1) - # invalid_tag2_results = self.semantic_tag_validator.check_tag_exists_in_schema(invalid_tag2) - # invalid_tag3_results = self.semantic_tag_validator.check_tag_exists_in_schema(invalid_tag3) - self.assertEqual(valid_tag1_results, True) - self.assertEqual(valid_tag2_results, True) - self.assertEqual(valid_tag3_results, True) - self.assertEqual(invalid_tag1_results, False) - self.assertEqual(invalid_tag2_results, False) - self.assertEqual(invalid_tag3_results, False) - - -class TestSchemaUtilityFunctions(TestHedBase): - @classmethod - def setUpClass(cls): - cls.hed_schema = load_schema_version("8.3.0") - - def test_correctly_determine_tag_takes_value(self): - value_tag1 = HedTag('Distance/35 px', hed_schema=self.hed_schema) - value_tag2 = HedTag('id/35', hed_schema=self.hed_schema) - value_tag3 = HedTag('duration/#', hed_schema=self.hed_schema) - no_value_tag1 = HedTag('something', hed_schema=self.hed_schema) - no_value_tag2 = HedTag('attribute/color/black', hed_schema=self.hed_schema) - no_value_tag3 = HedTag('participant/#', hed_schema=self.hed_schema) - value_tag1_result = value_tag1.is_takes_value_tag() - value_tag2_result = value_tag2.is_takes_value_tag() - value_tag3_result = value_tag3.is_takes_value_tag() - no_value_tag1_result = no_value_tag1.is_takes_value_tag() - no_value_tag2_result = no_value_tag2.is_takes_value_tag() - no_value_tag3_result = no_value_tag3.is_takes_value_tag() - self.assertEqual(value_tag1_result, True) - self.assertEqual(value_tag2_result, True) - self.assertEqual(value_tag3_result, True) - self.assertEqual(no_value_tag1_result, False) - self.assertEqual(no_value_tag2_result, False) - self.assertEqual(no_value_tag3_result, False) - - def test_should_determine_default_unit(self): - unit_class_tag1 = HedTag('duration/35 ms', hed_schema=self.hed_schema) - # unit_class_tag2 = HedTag('participant/effect/cognitive/reward/11 dollars', - # schema=self.schema) - no_unit_class_tag = HedTag('RGB-red/0.5', hed_schema=self.hed_schema) - no_value_tag = HedTag('Black', hed_schema=self.hed_schema) - unit_class_tag1_result = unit_class_tag1.default_unit - # unit_class_tag2_result = unit_class_tag2.default_unit - no_unit_class_tag_result = no_unit_class_tag.default_unit - no_value_tag_result = no_value_tag.default_unit - self.assertEqual(unit_class_tag1_result.name, 's') - # self.assertEqual(unit_class_tag2_result, '$') - self.assertEqual(no_unit_class_tag_result, None) - self.assertEqual(no_value_tag_result, None) - - def test_correctly_determine_tag_unit_classes(self): - unit_class_tag1 = HedTag('distance/35 px', hed_schema=self.hed_schema) - # Todo: Make a schema with a currency unit to test this - # unit_class_tag2 = HedTag('reward/$10.55', schema=self.schema) - unit_class_tag3 = HedTag('duration/#', hed_schema=self.hed_schema) - no_unit_class_tag = HedTag('RGB-red/0.5', hed_schema=self.hed_schema) - unit_class_tag1_result = list(unit_class_tag1.unit_classes.keys()) - # unit_class_tag2_result = list(unit_class_tag2.get_tag_unit_class_units()) - unit_class_tag3_result = list(unit_class_tag3.unit_classes.keys()) - no_unit_class_tag_result = list(no_unit_class_tag.unit_classes.keys()) - self.assertCountEqual(unit_class_tag1_result, ['physicalLengthUnits']) - # self.assertCountEqual(unit_class_tag2_result, ['currency']) - self.assertCountEqual(unit_class_tag3_result, ['timeUnits']) - self.assertEqual(no_unit_class_tag_result, []) - - def test_determine_tags_legal_units(self): - unit_class_tag1 = HedTag('distance/35 px', hed_schema=self.hed_schema) - # todo: add this back in when we have a currency unit or make a test for one. - # unit_class_tag2 = HedTag('reward/$10.55', schema=self.schema) - no_unit_class_tag = HedTag('RGB-red/0.5', hed_schema=self.hed_schema) - unit_class_tag1_result = unit_class_tag1.get_tag_unit_class_units() - # unit_class_tag2_result = unit_class_tag2.get_tag_unit_class_units() - no_unit_class_tag_result = no_unit_class_tag.get_tag_unit_class_units() - self.assertCountEqual(sorted(unit_class_tag1_result), sorted([ - 'inch', - 'm', - 'foot', - 'metre', - 'meter', - 'mile', - ])) - # self.assertCountEqual(unit_class_tag2_result, [ - # 'dollar', - # '$', - # 'point', - # 'fraction', - # ]) - self.assertEqual(no_unit_class_tag_result, []) - - def test_strip_off_units_from_value(self): - # todo: add this back in when we have a currency unit or make a test for one. - # dollars_string_no_space = HedTag('Participant/Effect/Cognitive/Reward/$25.99', schema=self.schema) - # dollars_string = HedTag('Participant/Effect/Cognitive/Reward/$ 25.99', schema=self.schema) - # dollars_string_invalid = HedTag('Participant/Effect/Cognitive/Reward/25.99$', schema=self.schema) - volume_string_no_space = HedTag('Volume/100m^3', hed_schema=self.hed_schema) - volume_string = HedTag('Volume/100 m^3', hed_schema=self.hed_schema) - prefixed_volume_string = HedTag('Volume/100 cm^3', hed_schema=self.hed_schema) - invalid_volume_string = HedTag('Volume/200 cm', hed_schema=self.hed_schema) - invalid_distance_string = HedTag('Distance/200 M', hed_schema=self.hed_schema) - # currency_units = { - # 'currency':self.schema.unit_classes['currency'] - # } - volume_units = { - 'volume': self.hed_schema.unit_classes['volumeUnits'] - } - distance_units = { - 'distance': self.hed_schema.unit_classes['physicalLengthUnits'] - } - # stripped_dollars_string_no_space = dollars_string_no_space._get_tag_units_portion(currency_units) - # stripped_dollars_string = dollars_string._get_tag_units_portion(currency_units) - # stripped_dollars_string_invalid = dollars_string_invalid._get_tag_units_portion(currency_units) - stripped_volume_string, _, _ = HedTag._get_tag_units_portion(volume_string.extension, volume_units) - stripped_volume_string_no_space, _, _ = HedTag._get_tag_units_portion(volume_string_no_space.extension, volume_units) - stripped_prefixed_volume_string, _, _ = HedTag._get_tag_units_portion(prefixed_volume_string.extension, volume_units) - stripped_invalid_volume_string, _, _ = HedTag._get_tag_units_portion(invalid_volume_string.extension, volume_units) - stripped_invalid_distance_string, _, _ = HedTag._get_tag_units_portion(invalid_distance_string.extension, distance_units) - # self.assertEqual(stripped_dollars_string_no_space, None) - # self.assertEqual(stripped_dollars_string, '25.99') - # self.assertEqual(stripped_dollars_string_invalid, None) - self.assertEqual(stripped_volume_string, '100') - self.assertEqual(stripped_volume_string_no_space, None) - self.assertEqual(stripped_prefixed_volume_string, '100') - self.assertEqual(stripped_invalid_volume_string, None) - self.assertEqual(stripped_invalid_distance_string, None) - - def test_determine_allows_extensions(self): - extension_tag1 = HedTag('boat', hed_schema=self.hed_schema) - no_extension_tag1 = HedTag('duration/22 s', hed_schema=self.hed_schema) - no_extension_tag2 = HedTag('id/45', hed_schema=self.hed_schema) - no_extension_tag3 = HedTag('RGB-red/0.5', hed_schema=self.hed_schema) - extension_tag1_result = extension_tag1.has_attribute(HedKey.ExtensionAllowed) - no_extension_tag1_result = no_extension_tag1.has_attribute(HedKey.ExtensionAllowed) - no_extension_tag2_result = no_extension_tag2.has_attribute(HedKey.ExtensionAllowed) - no_extension_tag3_result = no_extension_tag3.has_attribute(HedKey.ExtensionAllowed) - self.assertEqual(extension_tag1_result, True) - self.assertEqual(no_extension_tag1_result, False) - self.assertEqual(no_extension_tag2_result, False) - self.assertEqual(no_extension_tag3_result, False) - - def test_get_as_default_units(self): - tag = HedTag("Duration/300 ms", hed_schema=self.hed_schema) - self.assertAlmostEqual(0.3, tag.value_as_default_unit()) - - tag2 = HedTag("Duration/300", hed_schema=self.hed_schema) - self.assertAlmostEqual(300, tag2.value_as_default_unit()) - - tag3 = HedTag("Duration/300 m", hed_schema=self.hed_schema) - self.assertEqual(None, tag3.value_as_default_unit()) - - tag4 = HedTag("IntensityTakesValue/300", hed_schema=util_create_schemas.load_schema_intensity()) - self.assertEqual(300, tag4.value_as_default_unit()) - - tag5 = HedTag("IntensityTakesValue/300 cd", hed_schema=util_create_schemas.load_schema_intensity()) - self.assertEqual(None, tag5.value_as_default_unit()) +from hed.models.hed_tag import HedTag +from tests.validator.test_tag_validator_base import TestHedBase +from hed.schema import HedKey +from hed import load_schema_version + +from tests.schema import util_create_schemas + + +class TestValidatorUtilityFunctions(TestHedBase): + + @classmethod + def setUpClass(cls): + cls.hed_schema = load_schema_version("8.3.0") + + def test_if_tag_exists(self): + valid_tag1 = HedTag('Left-handed', hed_schema=self.hed_schema) + hash1 = hash(valid_tag1) + hash2 = hash(valid_tag1) + self.assertEqual(hash1, hash2) + valid_tag2 = HedTag('Geometric-object', hed_schema=self.hed_schema) + valid_tag3 = HedTag('duration/#', hed_schema=self.hed_schema) + invalid_tag1 = HedTag('something', hed_schema=self.hed_schema) + invalid_tag2 = HedTag('Participant/nothing', hed_schema=self.hed_schema) + invalid_tag3 = HedTag('participant/#', hed_schema=self.hed_schema) + valid_tag1_results = valid_tag1.tag_exists_in_schema() + valid_tag2_results = valid_tag2.tag_exists_in_schema() + valid_tag3_results = valid_tag3.tag_exists_in_schema() + invalid_tag1_results = invalid_tag1.tag_exists_in_schema() + invalid_tag2_results = invalid_tag2.tag_exists_in_schema() + invalid_tag3_results = invalid_tag3.tag_exists_in_schema() + # valid_tag1_results = self.semantic_tag_validator.check_tag_exists_in_schema(valid_tag1) + # valid_tag2_results = self.semantic_tag_validator.check_tag_exists_in_schema(valid_tag2) + # valid_tag3_results = self.semantic_tag_validator.check_tag_exists_in_schema(valid_tag3) + # invalid_tag1_results = self.semantic_tag_validator.check_tag_exists_in_schema(invalid_tag1) + # invalid_tag2_results = self.semantic_tag_validator.check_tag_exists_in_schema(invalid_tag2) + # invalid_tag3_results = self.semantic_tag_validator.check_tag_exists_in_schema(invalid_tag3) + self.assertEqual(valid_tag1_results, True) + self.assertEqual(valid_tag2_results, True) + self.assertEqual(valid_tag3_results, True) + self.assertEqual(invalid_tag1_results, False) + self.assertEqual(invalid_tag2_results, False) + self.assertEqual(invalid_tag3_results, False) + + +class TestSchemaUtilityFunctions(TestHedBase): + @classmethod + def setUpClass(cls): + cls.hed_schema = load_schema_version("8.3.0") + + def test_correctly_determine_tag_takes_value(self): + value_tag1 = HedTag('Distance/35 px', hed_schema=self.hed_schema) + value_tag2 = HedTag('id/35', hed_schema=self.hed_schema) + value_tag3 = HedTag('duration/#', hed_schema=self.hed_schema) + no_value_tag1 = HedTag('something', hed_schema=self.hed_schema) + no_value_tag2 = HedTag('attribute/color/black', hed_schema=self.hed_schema) + no_value_tag3 = HedTag('participant/#', hed_schema=self.hed_schema) + value_tag1_result = value_tag1.is_takes_value_tag() + value_tag2_result = value_tag2.is_takes_value_tag() + value_tag3_result = value_tag3.is_takes_value_tag() + no_value_tag1_result = no_value_tag1.is_takes_value_tag() + no_value_tag2_result = no_value_tag2.is_takes_value_tag() + no_value_tag3_result = no_value_tag3.is_takes_value_tag() + self.assertEqual(value_tag1_result, True) + self.assertEqual(value_tag2_result, True) + self.assertEqual(value_tag3_result, True) + self.assertEqual(no_value_tag1_result, False) + self.assertEqual(no_value_tag2_result, False) + self.assertEqual(no_value_tag3_result, False) + + def test_should_determine_default_unit(self): + unit_class_tag1 = HedTag('duration/35 ms', hed_schema=self.hed_schema) + # unit_class_tag2 = HedTag('participant/effect/cognitive/reward/11 dollars', + # schema=self.schema) + no_unit_class_tag = HedTag('RGB-red/0.5', hed_schema=self.hed_schema) + no_value_tag = HedTag('Black', hed_schema=self.hed_schema) + unit_class_tag1_result = unit_class_tag1.default_unit + # unit_class_tag2_result = unit_class_tag2.default_unit + no_unit_class_tag_result = no_unit_class_tag.default_unit + no_value_tag_result = no_value_tag.default_unit + self.assertEqual(unit_class_tag1_result.name, 's') + # self.assertEqual(unit_class_tag2_result, '$') + self.assertEqual(no_unit_class_tag_result, None) + self.assertEqual(no_value_tag_result, None) + + def test_correctly_determine_tag_unit_classes(self): + unit_class_tag1 = HedTag('distance/35 px', hed_schema=self.hed_schema) + # Todo: Make a schema with a currency unit to test this + # unit_class_tag2 = HedTag('reward/$10.55', schema=self.schema) + unit_class_tag3 = HedTag('duration/#', hed_schema=self.hed_schema) + no_unit_class_tag = HedTag('RGB-red/0.5', hed_schema=self.hed_schema) + unit_class_tag1_result = list(unit_class_tag1.unit_classes.keys()) + # unit_class_tag2_result = list(unit_class_tag2.get_tag_unit_class_units()) + unit_class_tag3_result = list(unit_class_tag3.unit_classes.keys()) + no_unit_class_tag_result = list(no_unit_class_tag.unit_classes.keys()) + self.assertCountEqual(unit_class_tag1_result, ['physicalLengthUnits']) + # self.assertCountEqual(unit_class_tag2_result, ['currency']) + self.assertCountEqual(unit_class_tag3_result, ['timeUnits']) + self.assertEqual(no_unit_class_tag_result, []) + + def test_determine_tags_legal_units(self): + unit_class_tag1 = HedTag('distance/35 px', hed_schema=self.hed_schema) + # todo: add this back in when we have a currency unit or make a test for one. + # unit_class_tag2 = HedTag('reward/$10.55', schema=self.schema) + no_unit_class_tag = HedTag('RGB-red/0.5', hed_schema=self.hed_schema) + unit_class_tag1_result = unit_class_tag1.get_tag_unit_class_units() + # unit_class_tag2_result = unit_class_tag2.get_tag_unit_class_units() + no_unit_class_tag_result = no_unit_class_tag.get_tag_unit_class_units() + self.assertCountEqual(sorted(unit_class_tag1_result), sorted([ + 'inch', + 'm', + 'foot', + 'metre', + 'meter', + 'mile', + ])) + # self.assertCountEqual(unit_class_tag2_result, [ + # 'dollar', + # '$', + # 'point', + # 'fraction', + # ]) + self.assertEqual(no_unit_class_tag_result, []) + + def test_strip_off_units_from_value(self): + # todo: add this back in when we have a currency unit or make a test for one. + # dollars_string_no_space = HedTag('Participant/Effect/Cognitive/Reward/$25.99', schema=self.schema) + # dollars_string = HedTag('Participant/Effect/Cognitive/Reward/$ 25.99', schema=self.schema) + # dollars_string_invalid = HedTag('Participant/Effect/Cognitive/Reward/25.99$', schema=self.schema) + volume_string_no_space = HedTag('Volume/100m^3', hed_schema=self.hed_schema) + volume_string = HedTag('Volume/100 m^3', hed_schema=self.hed_schema) + prefixed_volume_string = HedTag('Volume/100 cm^3', hed_schema=self.hed_schema) + invalid_volume_string = HedTag('Volume/200 cm', hed_schema=self.hed_schema) + invalid_distance_string = HedTag('Distance/200 M', hed_schema=self.hed_schema) + # currency_units = { + # 'currency':self.schema.unit_classes['currency'] + # } + volume_units = { + 'volume': self.hed_schema.unit_classes['volumeUnits'] + } + distance_units = { + 'distance': self.hed_schema.unit_classes['physicalLengthUnits'] + } + # stripped_dollars_string_no_space = dollars_string_no_space._get_tag_units_portion(currency_units) + # stripped_dollars_string = dollars_string._get_tag_units_portion(currency_units) + # stripped_dollars_string_invalid = dollars_string_invalid._get_tag_units_portion(currency_units) + stripped_volume_string, _, _ = HedTag._get_tag_units_portion(volume_string.extension, volume_units) + stripped_volume_string_no_space, _, _ = HedTag._get_tag_units_portion(volume_string_no_space.extension, + volume_units) + stripped_prefixed_volume_string, _, _ = HedTag._get_tag_units_portion(prefixed_volume_string.extension, + volume_units) + stripped_invalid_volume_string, _, _ = HedTag._get_tag_units_portion(invalid_volume_string.extension, + volume_units) + stripped_invalid_distance_string, _, _ = HedTag._get_tag_units_portion(invalid_distance_string.extension, + distance_units) + # self.assertEqual(stripped_dollars_string_no_space, None) + # self.assertEqual(stripped_dollars_string, '25.99') + # self.assertEqual(stripped_dollars_string_invalid, None) + self.assertEqual(stripped_volume_string, '100') + self.assertEqual(stripped_volume_string_no_space, None) + self.assertEqual(stripped_prefixed_volume_string, '100') + self.assertEqual(stripped_invalid_volume_string, None) + self.assertEqual(stripped_invalid_distance_string, None) + + def test_determine_allows_extensions(self): + extension_tag1 = HedTag('boat', hed_schema=self.hed_schema) + no_extension_tag1 = HedTag('duration/22 s', hed_schema=self.hed_schema) + no_extension_tag2 = HedTag('id/45', hed_schema=self.hed_schema) + no_extension_tag3 = HedTag('RGB-red/0.5', hed_schema=self.hed_schema) + extension_tag1_result = extension_tag1.has_attribute(HedKey.ExtensionAllowed) + no_extension_tag1_result = no_extension_tag1.has_attribute(HedKey.ExtensionAllowed) + no_extension_tag2_result = no_extension_tag2.has_attribute(HedKey.ExtensionAllowed) + no_extension_tag3_result = no_extension_tag3.has_attribute(HedKey.ExtensionAllowed) + self.assertEqual(extension_tag1_result, True) + self.assertEqual(no_extension_tag1_result, False) + self.assertEqual(no_extension_tag2_result, False) + self.assertEqual(no_extension_tag3_result, False) + + def test_get_as_default_units(self): + tag = HedTag("Duration/300 ms", hed_schema=self.hed_schema) + self.assertAlmostEqual(0.3, tag.value_as_default_unit()) + + tag2 = HedTag("Duration/300", hed_schema=self.hed_schema) + self.assertAlmostEqual(300, tag2.value_as_default_unit()) + + tag3 = HedTag("Duration/300 m", hed_schema=self.hed_schema) + self.assertEqual(None, tag3.value_as_default_unit()) + + tag4 = HedTag("IntensityTakesValue/300", hed_schema=util_create_schemas.load_schema_intensity()) + self.assertEqual(300, tag4.value_as_default_unit()) + + tag5 = HedTag("IntensityTakesValue/300 cd", hed_schema=util_create_schemas.load_schema_intensity()) + self.assertEqual(None, tag5.value_as_default_unit()) diff --git a/tests/models/test_query_handler.py b/tests/models/test_query_handler.py index 7989b301..707fd4fb 100644 --- a/tests/models/test_query_handler.py +++ b/tests/models/test_query_handler.py @@ -1,774 +1,775 @@ -import unittest -from hed.models.hed_string import HedString -from hed.models.query_handler import QueryHandler -import os -from hed import schema -from hed import HedTag - - -# Override the tag terms function for testing purposes when we don't have a schema -def new_init(self, *args, **kwargs): - old_tag_init(self, *args, **kwargs) - if not self.tag_terms: - self.tag_terms = (str(self).lower(),) - -old_tag_init = HedTag.__init__ -HedTag.__init__ = new_init - - -class TestParser(unittest.TestCase): - @classmethod - def setUpClass(cls): - base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/')) - cls.base_data_dir = base_data_dir - hed_xml_file = os.path.join(base_data_dir, "schema_tests/HED8.0.0t.xml") - cls.hed_schema = schema.load_schema(hed_xml_file) - - def base_test(self, parse_expr, search_strings): - expression = QueryHandler(parse_expr) - - # print(f"Search Pattern: {expression._org_string} - {str(expression.tree)}") - for string, expected_result in search_strings.items(): - hed_string = HedString(string, self.hed_schema) - result2 = expression.search(hed_string) - # print(f"\tSearching string '{str(hed_string)}'") - # if result2: - # print(f"\t\tFound as group(s) {str([str(r) for r in result2])}") - self.assertEqual(bool(result2), expected_result) - - def test_broken_search_strings(self): - test_search_strings = [ - "A &&", - "(A && B", - "&& B", - "A, ", - ", A", - "A)" - ] - for string in test_search_strings: - with self.assertRaises(ValueError) as context: - QueryHandler(string) - self.assertTrue(context.exception.args[0]) - - def test_finding_tags(self): - test_strings = { - "Item, (Clear-throat)": True, - "(Item, (Clear-throat))": True, - "Item, Clear-throat": True, - "Agent, Clear-throat": True, - "Agent, Event": False, - } - self.base_test("(Item || Agent) && Action", test_strings) - - def test_finding_tags_wildcards(self): - test_strings = { - "Def/Def1": True, - "Def/Def2": True, - "Def/Def1/Value": True, - } - self.base_test("Def", test_strings) - test_strings = { - "Def/Def1": True, - "Def/Def2": True, - "Def/Def1/Value": True, - } - self.base_test("Def/Def*", test_strings) - test_strings = { - "Def/Def1": True, - "Def/Def2": False, - "Def/Def1/Value": False, - } - self.base_test("Def/Def1", test_strings) - test_strings = { - "Def/Def1": True, - "Def/Def2": False, - "Def/Def1/Value": True, - } - self.base_test("Def/Def1*", test_strings) - test_strings = { - "Def/Def1": False, - "Def/Def2": False, - "Def/Def1/Value": True, - } - self.base_test("Def/Def1/*", test_strings) - - def test_exact_term(self): - test_strings = { - "Event": True, - "Sensory-event": False, - "Event/ext": False - } - self.base_test('"Event"', test_strings) - - def test_actual_wildcard(self): - test_strings = { - "A, B, C": True, - "A, B": True, - "A, B, (C)": True, - } - self.base_test("A, B", test_strings) - - def test_finding_tags2(self): - test_strings = { - "Item, (Clear-throat)": True, - "(Item, (Clear-throat))": True, - "Item, Clear-throat": False, - "Agent, Clear-throat": False, - "Agent, Event": False, - "Agent, (Event)": True, - "(Item), (Event)": True - } - self.base_test("(Item || Agent) && {Action || Event}", test_strings) - - def test_exact_group(self): - test_strings = { - "A, B": False, - "(A, B)": True, - "(A, (B))": False, - "(A, (B, C))": False, - "(A), (A, B)": True, - "(A, B), (A)": True, - "(A, B, (C, D))": True, - "(A, B, C)": True - } - self.base_test("{a, b}", test_strings) - - def test_exact_group_simple_complex(self): - test_strings = { - "(A, C)": False, - "(A, (C))": True, - "((A, C))": False, - "A, B, C, D": False, - "(A, B, C, D)": False, - "(A, B, (C, D))": True, - "(A, B, ((C, D)))": False, - "(E, F, (A, B, (C, D)))": True, - "(A, B, (E, F, (C, D)))": False, # TODO: Should this be True? [[c]] isn't directly inside an a group. - } - self.base_test("{a, {c} }", test_strings) - - def test_exact_group_complex(self): - test_strings = { - "A, B, C, D": False, - "(A, B, C, D)": False, - "(A, B, (C, D))": True, - "(A, B, ((C, D)))": False, - "(E, F, (A, B, (C, D)))": True, - } - self.base_test("{a, b, {c, d} }", test_strings) - - def test_duplicate_search(self): - test_strings = { - "(Event)": False, - "(Event, Agent-action)": True, - - } - self.base_test("Event && Event", test_strings) - - def test_duplicate_search_or(self): - test_strings = { - "(Event)": True, - "(Event, Agent-action)": True, - - } - self.base_test("Event || Event", test_strings) - - def test_exact_group_complex_split(self): - test_strings = { - "A, B, C, D": False, - "(A, B, C, D)": False, - "((A, B, C, D))": False, - "(A, B, (C, D))": False, - "(A, B, ((C, D)))": False, - "(E, F, (A, B, (C, D)))": False, - "((A, B), (C, D))": True, - } - self.base_test("{ {a, b}, {c, d} }", test_strings) - - def test_mixed_group_split(self): - test_strings = { - "(Event, Clear-throat)": False, - "((Event), (Clear-throat))": True, - "((Event), ((Clear-throat)))": True, - "((Event, Clear-throat))": False, - } - self.base_test("{ [Event], [Action] }", test_strings) - - def test_exact_group_split(self): - test_strings = { - "(Event, Clear-throat)": False, - "((Event), (Clear-throat))": True, - "((Event), ((Clear-throat)))": False, - "((Event, Clear-throat))": False, - } - self.base_test("{ {Event}, {Action} }", test_strings) - - def test_exact_group_split_or(self): - test_strings = { - "(A, D)": False, - "((A), (D))": True, - "((A), ((D)))": True, - "((A, D))": True, - } - self.base_test("{ {a} || {d} }", test_strings) - - def test_exact_group_split_or_negation(self): - test_strings = { - # "(Event, Clear-throat)": False, - "((Event), (Clear-throat))": True, - "((Event))": False, - "((Event), ((Clear-throat)))": True, - "((Event, Clear-throat))": False, - } - # Need to think this through more. How do you exact match a negative tag? - self.base_test("{ {~Event} }", test_strings) - - def test_exact_group_split_or_negation_dual(self): - test_strings = { - "(A, B)": False, - "((A), (B))": False, - "((A))": False, - "((A), ((B)))": True, # TODO: must all result groups have tags? True because of ((B)) group with no tags. - "((A, B))": False, - "((A), (C))": True, - "((A), (B, C))": False, - "((A), ((B), C))": True, - } - self.base_test("{ {~a && ~b} }", test_strings) - - def test_exact_group_split_or_negation_dual2(self): - test_strings = { - "(A, B)": False, - "((A), (B))": False, - "((A))": False, - "((A), ((B)))": True, # TODO: must all result groups have tags? True because of ((B)) group with no tags. - "((A, B))": False, - "((A), (C))": True, - "((A), (B, C))": False, - "((A), ((B), C))": True, - } - self.base_test("{ {~(a || b)} }", test_strings) - - def test_exact_group_split_or_negation_complex(self): - test_strings = { - "(A, B), (D)": False, - "((A), (B)), (D)": False, - "((A)), (D)": False, - "((A), ((B))), (D)": True, - "((A), ((B))), (H)": True, - "((A, B)), (D)": False, - "((A), (C)), (D)": True, - "((A), (B, C)), (D)": False, - "((A), (B, C)), (H)": False, - } - self.base_test("{ {~(a || b)} } && {D || ~F}", test_strings) - - # TODO: Should this work, and what should it mean? - # Right now this is always true, since there is at least one group without ", (a)" in every string. - def test_exact_group_negation(self): - test_strings = { - "(A, D)": True, - "((A), (D))": True, - "((A))": True, - "((A), ((D)))": True, - "((A, D))": True, - } - self.base_test("{ ~{a} }", test_strings) - - def test_exact_group_negation2(self): - test_strings = { - "(A, D, B)": True, - "((A), (D), B)": False, - "((A))": False, - "((A), ((D, B)))": True, - "((A, D))": False, - "(B, (D))": True, - "(B)": True, - "((A), B)": False - } - self.base_test("{ ~{a}, b}", test_strings) - - def test_exact_group_negation3(self): - test_strings = { - "(A, D, B)": False, - "((A), (D), B)": True, - "((A))": False, - "((A), ((D, B)))": True, - "((A, D))": False, - "(B, (D))": True, - "(B)": True, - "((A), B)": True - } - self.base_test("{ ~a && b}", test_strings) - - def test_exact_group_negation4(self): - test_strings = { - "(A, D, B)": False, - "((A), (D), B)": False, - "((A))": False, - "((A), ((D, B)))": False, - "((A, D))": False, - "(B)": True, - "(B, (D))": True, - "((A), B)": False - } - self.base_test("{ @c && @a && b: ???}", test_strings) - - def test_exact_group_negation5(self): - test_string = "{ ~a && b:}" - with self.assertRaises(ValueError) as context: - QueryHandler(test_string) - self.assertTrue(context.exception.args[0]) - - def test_mixed_group_complex_split(self): - test_strings = { - "A, B, C, D": False, - "(A, B), (C, D)": False, - "(A, B, C, D)": False, - "(A, B, (C, D))": False, - "(A, B, ((C, D)))": False, - "(E, F, (A, B, (C, D)))": False, - "((A, B), (C, D))": True, - "((A, B, C, D))": False, - } - self.base_test("{ [a, b], [c, d] }", test_strings) - - def test_exact_group_complex2(self): - test_strings = { - "A, B, C": False, - "(A, B, C)": False, - "(A, B, (C)), (A, B)": True, - "(A, B), (A, B, (C))": True, - "(A, B), (B, (C))": False, - "(B, (C)), (A, B, (C))": True, - "(A, B, (A, (C)))": False - } - self.base_test("{a, b, {c} }", test_strings) - - def test_containing_group_complex2(self): - test_strings = { - "A, B, C": False, - "(A, B, C)": False, - "(A, B, (C)), (A, B)": True, - "(A, B), (A, B, (C))": True, - "(A, B), (B, (C))": False, - "(B, (C)), (A, B, (C))": True - } - self.base_test("[a, b, [c] ]", test_strings) - - def test_containing_group(self): - test_strings = { - "A, B": False, - "(A, B)": True, - "(A, B), (A, B)": True, - "(A, (B))": True, - "(A, (B, C))": True, - "(A), (B)": False - } - self.base_test("[a, b]", test_strings) - - def test_containing_group_simple_complex(self): - test_strings = { - "A, B, C, D": False, - "(A, C)": False, - "(A, B, (C, D))": True, - "(A, (B))": False, - "(A, (C))": True, - "(A, (B, C))": True, - "(A), (B)": False, - "(C, (A))": False, - "(A, ((C)))": True - } - self.base_test("[a, [c] ]", test_strings) - - def test_containing_group_complex(self): - test_strings = { - "A, B, C, D": False, - "(A, B, C, D)": False, - "(A, B, (C, D))": True, - "(A, (B))": False, - "(A, (B, C))": False, - "(A), (B)": False - } - self.base_test("[a, b, [c, d] ]", test_strings) - - def test_mixed_groups(self): - test_strings = { - "(A, B), (C, D, (E, F))": True - } - self.base_test("{a}, { {e, f} }", test_strings) - - test_strings = { - "(A, B), (C, D, (E, F))": False - } - # This example works because it finds the group containing (c, d, (e, f)), rather than the ef group - self.base_test("{a}, [e, {f} ]", test_strings) - - def test_and(self): - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": True, - "A, C": False, - "B, C": False - } - self.base_test("a && b", test_strings) - - def test_or(self): - test_strings = { - "A": True, - "B": True, - "C": False, - "A, B": True, - "A, C": True, - "B, C": True - } - self.base_test("a || b", test_strings) - - def test_and_wildcard(self): - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": False, - "A, C": False, - "B, C": False, - "A, B, C": True, - "D, A, B": True, - "A, B, (C)": True - } - self.base_test("a && b && ?", test_strings) - - def test_and_wildcard_nothing_else(self): - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": False, - "A, C": False, - "B, C": False, - "A, B, C": False, - "D, A, B": False, - "A, B, (C)": False, - "(A, B), C": True, - "(A, B, C)": True, - } - self.base_test("{a && b}", test_strings) - - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": False, - "A, C": False, - "B, C": False, - "A, B, C": False, - "D, A, B": False, - "A, B, (C)": False, - "(A, B), C": True, - "(A, B, C)": False, - } - self.base_test("{a && b:}", test_strings) - - def test_and_logical_wildcard(self): - test_strings = { - "A": False, - "A, B": False, - "A, B, (C)": True, - "A, B, C": True, - } - self.base_test("(A, B) && ?", test_strings) - self.base_test("A, B && ?", test_strings) - - test_strings = { - "A": True, - "A, C": True, - "A, B, C": True, - "B, C": False, - "B, C, D, E": True - } - self.base_test("(a || (b && c) && ?)", test_strings) - - self.base_test("(a || (b && c && ?) && ?)", test_strings) - - def test_double_wildcard(self): - test_strings = { - "A": False, - "A, B, (C)": True, - "A, B, C": True, - "A, (B), (C)": False, - } - self.base_test("A && ? && ??", test_strings) - - def test_or_wildcard(self): - test_strings = { - "A": True, - "B": False, - "C": False, - "A, B": True, - "A, C": True, - "B, C": True, - "A, B, C": True, - "D, A, B": True, - "A, B, (C)": True - } - self.base_test("a || b, ?", test_strings) - - def test_and_wildcard_tags(self): - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": False, - "A, C": False, - "B, C": False, - "A, B, C": True, - "D, A, B": True, - "A, B, (C)": False - } - self.base_test("a && b, ??", test_strings) - - def test_and_wildcard_groups(self): - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": False, - "A, C": False, - "B, C": False, - "A, B, C": False, - "D, A, B": False, - "A, B, (C)": True - } - self.base_test("a && b, ???", test_strings) - - def test_complex_wildcard_groups(self): - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": False, - "A, C": False, - "B, C": False, - "A, B, C": False, - "D, A, B": False, - "A, B, (C)": False, - "(A, B, (C))": False, - "(A, B, (C)), D": True, - "(A, B, (C)), (D)": True, - "((A, B), (C)), E": False, # todo: should discuss this case. Is this correct to be False? - "((A, B), C), E": False, - } - self.base_test("[a && b, ???], ?", test_strings) - - def test_wildcard_new(self): - # todo: does it make sense this behavior varies? I think so - test_strings = { - "((A, B), C)": False, - } - self.base_test("[a && b, ???]", test_strings) - - test_strings = { - "((A, B), C)": False, - } - self.base_test("[a && b && c]", test_strings) - - def test_complex_wildcard_groups2(self): - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": False, - "A, C": False, - "B, C": False, - "A, B, C": False, - "D, A, B": False, - "A, B, (C)": False, - "(A, B, (C))": False, - "(A, B, (C)), D": False, - "(A, B, (C)), (D), E": True, - } - self.base_test("[a && b, ???], E, ?", test_strings) - - def test_and_or(self): - test_strings = { - "A": False, - "B": False, - "C": True, - "A, B": True, - "A, C": True, - "B, C": True - } - self.base_test("a && b || c", test_strings) - - test_strings = { - "A": False, - "B": False, - "C": True, - "A, B": True, - "A, C": True, - "B, C": True - } - self.base_test("(a && b) || c", test_strings) - - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": True, - "A, C": True, - "B, C": False - } - self.base_test("a && (b || c)", test_strings) - - test_strings = { - "A": True, - "B": False, - "C": False, - "A, B": True, - "A, C": True, - "B, C": True - } - self.base_test("a || b && c", test_strings) - - test_strings = { - "A": True, - "B": False, - "C": False, - "A, B": True, - "A, C": True, - "B, C": True - } - self.base_test("a || (b && c)", test_strings) - - test_strings = { - "A": False, - "B": False, - "C": False, - "A, B": False, - "A, C": True, - "B, C": True - } - self.base_test("(a || b) && c", test_strings) - - def test_logical_negation(self): - expression = QueryHandler("~a") - hed_string = HedString("A", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), False) - hed_string = HedString("B", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), True) - - expression = QueryHandler("~a && b") - hed_string = HedString("A", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), False) - hed_string = HedString("B", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("A, B", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), False) - - expression = QueryHandler("~( (a || b) && c)") - hed_string = HedString("A", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("B", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("C", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("A, B", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("A, C", self.hed_schema) - self.assertEqual(bool(expression.search(hed_string)), False) - - def test_not_in_line(self): - test_strings = { - "A": True, - "B": False, - "C": True, - "A, B": False, - "A, C": True, - "B, C": False, - "A, B, C": False, - "D, A, B": False, - "A, B, (C)": False, - "(A, B, (C))": False, - "(A, B, (C)), D": False, - "(A, B, (C)), (D), E": False, - } - self.base_test("@B", test_strings) - - def test_not_in_line2(self): - test_strings = { - "A": False, - "B": False, - "C": True, - "A, B": False, - "A, C": True, - "B, C": False, - "A, B, C": False, - "D, A, B": False, - "A, B, (C)": False, - "(A, B, (C))": False, - "(A, B, (C)), D": False, - "(A, B, (C)), (D), E": False, - } - self.base_test("@B && C", test_strings) - - def test_not_in_line3(self): - test_strings = { - "A": True, - "B": True, - "C": False, - "A, B": True, - "A, C": False, - "B, C": True, - "A, B, C": True, - "D, A, B": True, - "A, B, (C)": True, - "(A, B, (C))": True, - "(A, B, (C)), D": True, - "(A, B, (C)), (D), E": True, - } - self.base_test("@C || B", test_strings) - - def test_optional_exact_group(self): - test_strings = { - "(A, C)": True, - } - self.base_test("{a && (b || c)}", test_strings) - - test_strings = { - "(A, B, C, D)": True, - } - self.base_test("{a && b: c && d}", test_strings) - - test_strings = { - "(A, B, C)": True, - "(A, B, C, D)": False, - } - self.base_test("{a && b: c || d}", test_strings) - - test_strings = { - "(A, C)": True, - "(A, D)": True, - "(A, B, C)": False, - "(A, B, C, D)": False, - } - self.base_test("{a || b: c || d}", test_strings) - - test_strings = { - "(Onset, (Def-expand/taco))": True, - "(Onset, Def-expand/taco)": False, - "(Onset, Def/taco, (Def-expand/taco))": True, # this one validates - "(Onset, (Def/taco))": False, - "(Onset, (Def-expand/taco, (Label/DefContents)))": True, - "(Onset, (Def-expand/taco), (Label/OnsetContents))": True, - "(Onset, (Def-expand/taco), (Label/OnsetContents, Description/MoreContents))": True, - "Onset, (Def-expand/taco), (Label/OnsetContents)": False, - "(Onset, (Def-expand/taco), Label/OnsetContents)": False, - } - self.base_test("{(Onset || Offset), (Def || {Def-expand}): ???}", test_strings) - test_strings = { - "(A, B)": True, - "(A, B, C)": True - } - self.base_test("{a || b}", test_strings) \ No newline at end of file +import unittest +from hed.models.hed_string import HedString +from hed.models.query_handler import QueryHandler +import os +from hed import schema +from hed import HedTag + + +# Override the tag terms function for testing purposes when we don't have a schema +def new_init(self, *args, **kwargs): + old_tag_init(self, *args, **kwargs) + if not self.tag_terms: + self.tag_terms = (str(self).lower(),) + + +old_tag_init = HedTag.__init__ +HedTag.__init__ = new_init + + +class TestParser(unittest.TestCase): + @classmethod + def setUpClass(cls): + base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/')) + cls.base_data_dir = base_data_dir + hed_xml_file = os.path.join(base_data_dir, "schema_tests/HED8.0.0t.xml") + cls.hed_schema = schema.load_schema(hed_xml_file) + + def base_test(self, parse_expr, search_strings): + expression = QueryHandler(parse_expr) + + # print(f"Search Pattern: {expression._org_string} - {str(expression.tree)}") + for string, expected_result in search_strings.items(): + hed_string = HedString(string, self.hed_schema) + result2 = expression.search(hed_string) + # print(f"\tSearching string '{str(hed_string)}'") + # if result2: + # print(f"\t\tFound as group(s) {str([str(r) for r in result2])}") + self.assertEqual(bool(result2), expected_result) + + def test_broken_search_strings(self): + test_search_strings = [ + "A &&", + "(A && B", + "&& B", + "A, ", + ", A", + "A)" + ] + for string in test_search_strings: + with self.assertRaises(ValueError) as context: + QueryHandler(string) + self.assertTrue(context.exception.args[0]) + + def test_finding_tags(self): + test_strings = { + "Item, (Clear-throat)": True, + "(Item, (Clear-throat))": True, + "Item, Clear-throat": True, + "Agent, Clear-throat": True, + "Agent, Event": False, + } + self.base_test("(Item || Agent) && Action", test_strings) + + def test_finding_tags_wildcards(self): + test_strings = { + "Def/Def1": True, + "Def/Def2": True, + "Def/Def1/Value": True, + } + self.base_test("Def", test_strings) + test_strings = { + "Def/Def1": True, + "Def/Def2": True, + "Def/Def1/Value": True, + } + self.base_test("Def/Def*", test_strings) + test_strings = { + "Def/Def1": True, + "Def/Def2": False, + "Def/Def1/Value": False, + } + self.base_test("Def/Def1", test_strings) + test_strings = { + "Def/Def1": True, + "Def/Def2": False, + "Def/Def1/Value": True, + } + self.base_test("Def/Def1*", test_strings) + test_strings = { + "Def/Def1": False, + "Def/Def2": False, + "Def/Def1/Value": True, + } + self.base_test("Def/Def1/*", test_strings) + + def test_exact_term(self): + test_strings = { + "Event": True, + "Sensory-event": False, + "Event/ext": False + } + self.base_test('"Event"', test_strings) + + def test_actual_wildcard(self): + test_strings = { + "A, B, C": True, + "A, B": True, + "A, B, (C)": True, + } + self.base_test("A, B", test_strings) + + def test_finding_tags2(self): + test_strings = { + "Item, (Clear-throat)": True, + "(Item, (Clear-throat))": True, + "Item, Clear-throat": False, + "Agent, Clear-throat": False, + "Agent, Event": False, + "Agent, (Event)": True, + "(Item), (Event)": True + } + self.base_test("(Item || Agent) && {Action || Event}", test_strings) + + def test_exact_group(self): + test_strings = { + "A, B": False, + "(A, B)": True, + "(A, (B))": False, + "(A, (B, C))": False, + "(A), (A, B)": True, + "(A, B), (A)": True, + "(A, B, (C, D))": True, + "(A, B, C)": True + } + self.base_test("{a, b}", test_strings) + + def test_exact_group_simple_complex(self): + test_strings = { + "(A, C)": False, + "(A, (C))": True, + "((A, C))": False, + "A, B, C, D": False, + "(A, B, C, D)": False, + "(A, B, (C, D))": True, + "(A, B, ((C, D)))": False, + "(E, F, (A, B, (C, D)))": True, + "(A, B, (E, F, (C, D)))": False, # TODO: Should this be True? [[c]] isn't directly inside an a group. + } + self.base_test("{a, {c} }", test_strings) + + def test_exact_group_complex(self): + test_strings = { + "A, B, C, D": False, + "(A, B, C, D)": False, + "(A, B, (C, D))": True, + "(A, B, ((C, D)))": False, + "(E, F, (A, B, (C, D)))": True, + } + self.base_test("{a, b, {c, d} }", test_strings) + + def test_duplicate_search(self): + test_strings = { + "(Event)": False, + "(Event, Agent-action)": True, + + } + self.base_test("Event && Event", test_strings) + + def test_duplicate_search_or(self): + test_strings = { + "(Event)": True, + "(Event, Agent-action)": True, + + } + self.base_test("Event || Event", test_strings) + + def test_exact_group_complex_split(self): + test_strings = { + "A, B, C, D": False, + "(A, B, C, D)": False, + "((A, B, C, D))": False, + "(A, B, (C, D))": False, + "(A, B, ((C, D)))": False, + "(E, F, (A, B, (C, D)))": False, + "((A, B), (C, D))": True, + } + self.base_test("{ {a, b}, {c, d} }", test_strings) + + def test_mixed_group_split(self): + test_strings = { + "(Event, Clear-throat)": False, + "((Event), (Clear-throat))": True, + "((Event), ((Clear-throat)))": True, + "((Event, Clear-throat))": False, + } + self.base_test("{ [Event], [Action] }", test_strings) + + def test_exact_group_split(self): + test_strings = { + "(Event, Clear-throat)": False, + "((Event), (Clear-throat))": True, + "((Event), ((Clear-throat)))": False, + "((Event, Clear-throat))": False, + } + self.base_test("{ {Event}, {Action} }", test_strings) + + def test_exact_group_split_or(self): + test_strings = { + "(A, D)": False, + "((A), (D))": True, + "((A), ((D)))": True, + "((A, D))": True, + } + self.base_test("{ {a} || {d} }", test_strings) + + def test_exact_group_split_or_negation(self): + test_strings = { + # "(Event, Clear-throat)": False, + "((Event), (Clear-throat))": True, + "((Event))": False, + "((Event), ((Clear-throat)))": True, + "((Event, Clear-throat))": False, + } + # Need to think this through more. How do you exact match a negative tag? + self.base_test("{ {~Event} }", test_strings) + + def test_exact_group_split_or_negation_dual(self): + test_strings = { + "(A, B)": False, + "((A), (B))": False, + "((A))": False, + "((A), ((B)))": True, # TODO: must all result groups have tags? True because of ((B)) group with no tags. + "((A, B))": False, + "((A), (C))": True, + "((A), (B, C))": False, + "((A), ((B), C))": True, + } + self.base_test("{ {~a && ~b} }", test_strings) + + def test_exact_group_split_or_negation_dual2(self): + test_strings = { + "(A, B)": False, + "((A), (B))": False, + "((A))": False, + "((A), ((B)))": True, # TODO: must all result groups have tags? True because of ((B)) group with no tags. + "((A, B))": False, + "((A), (C))": True, + "((A), (B, C))": False, + "((A), ((B), C))": True, + } + self.base_test("{ {~(a || b)} }", test_strings) + + def test_exact_group_split_or_negation_complex(self): + test_strings = { + "(A, B), (D)": False, + "((A), (B)), (D)": False, + "((A)), (D)": False, + "((A), ((B))), (D)": True, + "((A), ((B))), (H)": True, + "((A, B)), (D)": False, + "((A), (C)), (D)": True, + "((A), (B, C)), (D)": False, + "((A), (B, C)), (H)": False, + } + self.base_test("{ {~(a || b)} } && {D || ~F}", test_strings) + + # TODO: Should this work, and what should it mean? + # Right now this is always true, since there is at least one group without ", (a)" in every string. + def test_exact_group_negation(self): + test_strings = { + "(A, D)": True, + "((A), (D))": True, + "((A))": True, + "((A), ((D)))": True, + "((A, D))": True, + } + self.base_test("{ ~{a} }", test_strings) + + def test_exact_group_negation2(self): + test_strings = { + "(A, D, B)": True, + "((A), (D), B)": False, + "((A))": False, + "((A), ((D, B)))": True, + "((A, D))": False, + "(B, (D))": True, + "(B)": True, + "((A), B)": False + } + self.base_test("{ ~{a}, b}", test_strings) + + def test_exact_group_negation3(self): + test_strings = { + "(A, D, B)": False, + "((A), (D), B)": True, + "((A))": False, + "((A), ((D, B)))": True, + "((A, D))": False, + "(B, (D))": True, + "(B)": True, + "((A), B)": True + } + self.base_test("{ ~a && b}", test_strings) + + def test_exact_group_negation4(self): + test_strings = { + "(A, D, B)": False, + "((A), (D), B)": False, + "((A))": False, + "((A), ((D, B)))": False, + "((A, D))": False, + "(B)": True, + "(B, (D))": True, + "((A), B)": False + } + self.base_test("{ @c && @a && b: ???}", test_strings) + + def test_exact_group_negation5(self): + test_string = "{ ~a && b:}" + with self.assertRaises(ValueError) as context: + QueryHandler(test_string) + self.assertTrue(context.exception.args[0]) + + def test_mixed_group_complex_split(self): + test_strings = { + "A, B, C, D": False, + "(A, B), (C, D)": False, + "(A, B, C, D)": False, + "(A, B, (C, D))": False, + "(A, B, ((C, D)))": False, + "(E, F, (A, B, (C, D)))": False, + "((A, B), (C, D))": True, + "((A, B, C, D))": False, + } + self.base_test("{ [a, b], [c, d] }", test_strings) + + def test_exact_group_complex2(self): + test_strings = { + "A, B, C": False, + "(A, B, C)": False, + "(A, B, (C)), (A, B)": True, + "(A, B), (A, B, (C))": True, + "(A, B), (B, (C))": False, + "(B, (C)), (A, B, (C))": True, + "(A, B, (A, (C)))": False + } + self.base_test("{a, b, {c} }", test_strings) + + def test_containing_group_complex2(self): + test_strings = { + "A, B, C": False, + "(A, B, C)": False, + "(A, B, (C)), (A, B)": True, + "(A, B), (A, B, (C))": True, + "(A, B), (B, (C))": False, + "(B, (C)), (A, B, (C))": True + } + self.base_test("[a, b, [c] ]", test_strings) + + def test_containing_group(self): + test_strings = { + "A, B": False, + "(A, B)": True, + "(A, B), (A, B)": True, + "(A, (B))": True, + "(A, (B, C))": True, + "(A), (B)": False + } + self.base_test("[a, b]", test_strings) + + def test_containing_group_simple_complex(self): + test_strings = { + "A, B, C, D": False, + "(A, C)": False, + "(A, B, (C, D))": True, + "(A, (B))": False, + "(A, (C))": True, + "(A, (B, C))": True, + "(A), (B)": False, + "(C, (A))": False, + "(A, ((C)))": True + } + self.base_test("[a, [c] ]", test_strings) + + def test_containing_group_complex(self): + test_strings = { + "A, B, C, D": False, + "(A, B, C, D)": False, + "(A, B, (C, D))": True, + "(A, (B))": False, + "(A, (B, C))": False, + "(A), (B)": False + } + self.base_test("[a, b, [c, d] ]", test_strings) + + def test_mixed_groups(self): + test_strings = { + "(A, B), (C, D, (E, F))": True + } + self.base_test("{a}, { {e, f} }", test_strings) + + test_strings = { + "(A, B), (C, D, (E, F))": False + } + # This example works because it finds the group containing (c, d, (e, f)), rather than the ef group + self.base_test("{a}, [e, {f} ]", test_strings) + + def test_and(self): + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": True, + "A, C": False, + "B, C": False + } + self.base_test("a && b", test_strings) + + def test_or(self): + test_strings = { + "A": True, + "B": True, + "C": False, + "A, B": True, + "A, C": True, + "B, C": True + } + self.base_test("a || b", test_strings) + + def test_and_wildcard(self): + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": False, + "A, C": False, + "B, C": False, + "A, B, C": True, + "D, A, B": True, + "A, B, (C)": True + } + self.base_test("a && b && ?", test_strings) + + def test_and_wildcard_nothing_else(self): + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": False, + "A, C": False, + "B, C": False, + "A, B, C": False, + "D, A, B": False, + "A, B, (C)": False, + "(A, B), C": True, + "(A, B, C)": True, + } + self.base_test("{a && b}", test_strings) + + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": False, + "A, C": False, + "B, C": False, + "A, B, C": False, + "D, A, B": False, + "A, B, (C)": False, + "(A, B), C": True, + "(A, B, C)": False, + } + self.base_test("{a && b:}", test_strings) + + def test_and_logical_wildcard(self): + test_strings = { + "A": False, + "A, B": False, + "A, B, (C)": True, + "A, B, C": True, + } + self.base_test("(A, B) && ?", test_strings) + self.base_test("A, B && ?", test_strings) + + test_strings = { + "A": True, + "A, C": True, + "A, B, C": True, + "B, C": False, + "B, C, D, E": True + } + self.base_test("(a || (b && c) && ?)", test_strings) + + self.base_test("(a || (b && c && ?) && ?)", test_strings) + + def test_double_wildcard(self): + test_strings = { + "A": False, + "A, B, (C)": True, + "A, B, C": True, + "A, (B), (C)": False, + } + self.base_test("A && ? && ??", test_strings) + + def test_or_wildcard(self): + test_strings = { + "A": True, + "B": False, + "C": False, + "A, B": True, + "A, C": True, + "B, C": True, + "A, B, C": True, + "D, A, B": True, + "A, B, (C)": True + } + self.base_test("a || b, ?", test_strings) + + def test_and_wildcard_tags(self): + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": False, + "A, C": False, + "B, C": False, + "A, B, C": True, + "D, A, B": True, + "A, B, (C)": False + } + self.base_test("a && b, ??", test_strings) + + def test_and_wildcard_groups(self): + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": False, + "A, C": False, + "B, C": False, + "A, B, C": False, + "D, A, B": False, + "A, B, (C)": True + } + self.base_test("a && b, ???", test_strings) + + def test_complex_wildcard_groups(self): + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": False, + "A, C": False, + "B, C": False, + "A, B, C": False, + "D, A, B": False, + "A, B, (C)": False, + "(A, B, (C))": False, + "(A, B, (C)), D": True, + "(A, B, (C)), (D)": True, + "((A, B), (C)), E": False, # todo: should discuss this case. Is this correct to be False? + "((A, B), C), E": False, + } + self.base_test("[a && b, ???], ?", test_strings) + + def test_wildcard_new(self): + # todo: does it make sense this behavior varies? I think so + test_strings = { + "((A, B), C)": False, + } + self.base_test("[a && b, ???]", test_strings) + + test_strings = { + "((A, B), C)": False, + } + self.base_test("[a && b && c]", test_strings) + + def test_complex_wildcard_groups2(self): + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": False, + "A, C": False, + "B, C": False, + "A, B, C": False, + "D, A, B": False, + "A, B, (C)": False, + "(A, B, (C))": False, + "(A, B, (C)), D": False, + "(A, B, (C)), (D), E": True, + } + self.base_test("[a && b, ???], E, ?", test_strings) + + def test_and_or(self): + test_strings = { + "A": False, + "B": False, + "C": True, + "A, B": True, + "A, C": True, + "B, C": True + } + self.base_test("a && b || c", test_strings) + + test_strings = { + "A": False, + "B": False, + "C": True, + "A, B": True, + "A, C": True, + "B, C": True + } + self.base_test("(a && b) || c", test_strings) + + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": True, + "A, C": True, + "B, C": False + } + self.base_test("a && (b || c)", test_strings) + + test_strings = { + "A": True, + "B": False, + "C": False, + "A, B": True, + "A, C": True, + "B, C": True + } + self.base_test("a || b && c", test_strings) + + test_strings = { + "A": True, + "B": False, + "C": False, + "A, B": True, + "A, C": True, + "B, C": True + } + self.base_test("a || (b && c)", test_strings) + + test_strings = { + "A": False, + "B": False, + "C": False, + "A, B": False, + "A, C": True, + "B, C": True + } + self.base_test("(a || b) && c", test_strings) + + def test_logical_negation(self): + expression = QueryHandler("~a") + hed_string = HedString("A", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), False) + hed_string = HedString("B", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), True) + + expression = QueryHandler("~a && b") + hed_string = HedString("A", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), False) + hed_string = HedString("B", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), True) + hed_string = HedString("A, B", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), False) + + expression = QueryHandler("~( (a || b) && c)") + hed_string = HedString("A", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), True) + hed_string = HedString("B", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), True) + hed_string = HedString("C", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), True) + hed_string = HedString("A, B", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), True) + hed_string = HedString("A, C", self.hed_schema) + self.assertEqual(bool(expression.search(hed_string)), False) + + def test_not_in_line(self): + test_strings = { + "A": True, + "B": False, + "C": True, + "A, B": False, + "A, C": True, + "B, C": False, + "A, B, C": False, + "D, A, B": False, + "A, B, (C)": False, + "(A, B, (C))": False, + "(A, B, (C)), D": False, + "(A, B, (C)), (D), E": False, + } + self.base_test("@B", test_strings) + + def test_not_in_line2(self): + test_strings = { + "A": False, + "B": False, + "C": True, + "A, B": False, + "A, C": True, + "B, C": False, + "A, B, C": False, + "D, A, B": False, + "A, B, (C)": False, + "(A, B, (C))": False, + "(A, B, (C)), D": False, + "(A, B, (C)), (D), E": False, + } + self.base_test("@B && C", test_strings) + + def test_not_in_line3(self): + test_strings = { + "A": True, + "B": True, + "C": False, + "A, B": True, + "A, C": False, + "B, C": True, + "A, B, C": True, + "D, A, B": True, + "A, B, (C)": True, + "(A, B, (C))": True, + "(A, B, (C)), D": True, + "(A, B, (C)), (D), E": True, + } + self.base_test("@C || B", test_strings) + + def test_optional_exact_group(self): + test_strings = { + "(A, C)": True, + } + self.base_test("{a && (b || c)}", test_strings) + + test_strings = { + "(A, B, C, D)": True, + } + self.base_test("{a && b: c && d}", test_strings) + + test_strings = { + "(A, B, C)": True, + "(A, B, C, D)": False, + } + self.base_test("{a && b: c || d}", test_strings) + + test_strings = { + "(A, C)": True, + "(A, D)": True, + "(A, B, C)": False, + "(A, B, C, D)": False, + } + self.base_test("{a || b: c || d}", test_strings) + + test_strings = { + "(Onset, (Def-expand/taco))": True, + "(Onset, Def-expand/taco)": False, + "(Onset, Def/taco, (Def-expand/taco))": True, # this one validates + "(Onset, (Def/taco))": False, + "(Onset, (Def-expand/taco, (Label/DefContents)))": True, + "(Onset, (Def-expand/taco), (Label/OnsetContents))": True, + "(Onset, (Def-expand/taco), (Label/OnsetContents, Description/MoreContents))": True, + "Onset, (Def-expand/taco), (Label/OnsetContents)": False, + "(Onset, (Def-expand/taco), Label/OnsetContents)": False, + } + self.base_test("{(Onset || Offset), (Def || {Def-expand}): ???}", test_strings) + test_strings = { + "(A, B)": True, + "(A, B, C)": True + } + self.base_test("{a || b}", test_strings) diff --git a/tests/models/test_sidecar.py b/tests/models/test_sidecar.py index c8e8e5da..a2dc928b 100644 --- a/tests/models/test_sidecar.py +++ b/tests/models/test_sidecar.py @@ -1,172 +1,173 @@ -import unittest -import os -import io -import shutil - -from hed.errors import HedFileError, ValidationErrors -from hed.models import ColumnMetadata, HedString, Sidecar -from hed import schema -from hed.models import DefinitionDict -from hed.errors import ErrorHandler - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') - cls.base_data_dir = base_data_dir - hed_xml_file = os.path.join(base_data_dir, "schema_tests/HED8.0.0t.xml") - cls.hed_schema = schema.load_schema(hed_xml_file) - json_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events.json") - cls.json_filename = json_filename - json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json") - cls.json_def_filename = json_def_filename - json_without_definitions_filename = \ - os.path.join(base_data_dir, "sidecar_tests/both_types_events_without_definitions.json") - json_errors_filename = os.path.join(base_data_dir, "sidecar_tests/json_errors.json") - json_errors_filename_minor = os.path.join(base_data_dir, "sidecar_tests/json_errors_minor.json") - cls.default_sidecar = Sidecar(json_filename) - cls.json_def_sidecar = Sidecar(json_def_filename) - cls.errors_sidecar = Sidecar(json_errors_filename) - cls.errors_sidecar_minor = Sidecar(json_errors_filename_minor) - cls.json_without_definitions_sidecar = Sidecar(json_without_definitions_filename) - - base_output_folder = os.path.realpath(os.path.join(os.path.dirname(__file__), "../data/tests_output/")) - cls.base_output_folder = base_output_folder - os.makedirs(base_output_folder, exist_ok=True) - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.base_output_folder) - - def test_file_not_found(self): - with self.assertRaises(HedFileError): - Sidecar('nonexistent_file.json') - - def test_invalid_input_type_int(self): - with self.assertRaises(HedFileError): - Sidecar(123) - - def test_invalid_input_type_dict(self): - with self.assertRaises(HedFileError): - Sidecar({'key': 'value'}) - - def test_invalid_filenames(self): - # Handle missing or invalid files. - invalid_json = "invalidxmlfile.json" - self.assertRaises(HedFileError, Sidecar, invalid_json) - - json_dict = None - try: - json_dict = Sidecar(None) - except HedFileError: - pass - self.assertTrue(len(json_dict.loaded_dict) == 0) - - json_dict = None - try: - json_dict = Sidecar("") - except HedFileError: - pass - self.assertTrue(len(json_dict.loaded_dict) == 0) - - def test_name(self): - invalid_json = "invalidxmlfile.json" - with self.assertRaises(HedFileError) as context: - Sidecar(invalid_json) - self.assertEqual(context.exception.args[0], 'fileNotFound') - - def test_add_json_string(self): - with open(self.json_filename) as file: - file_as_string = io.StringIO(file.read()) - json_file = Sidecar(file_as_string) - self.assertTrue(json_file) - - def test__iter__(self): - columns_target = 3 - columns_count = 0 - for column_data in self.default_sidecar: - self.assertIsInstance(column_data, ColumnMetadata) - columns_count += 1 - - self.assertEqual(columns_target, columns_count) - - def test_validate_column_group(self): - validation_issues = self.errors_sidecar.validate(self.hed_schema) - self.assertEqual(len(validation_issues), 4) - - validation_issues2 = self.errors_sidecar_minor.validate(self.hed_schema) - self.assertEqual(len(validation_issues2), 1) - - validation_issues = self.json_without_definitions_sidecar.validate(self.hed_schema) - self.assertEqual(len(validation_issues), 7) - - hed_string = HedString("(Definition/JsonFileDef/#, (Item/JsonDef1/#,Item/JsonDef1))", self.hed_schema) - extra_def_dict = DefinitionDict() - extra_def_dict.check_for_definitions(hed_string) - - validation_issues2 = self.json_without_definitions_sidecar.validate(self.hed_schema, extra_def_dicts=extra_def_dict) - # this removes one undef matched error - self.assertEqual(len(validation_issues2), 7) - - def test_duplicate_def(self): - sidecar = self.json_def_sidecar - - duplicate_dict = sidecar.extract_definitions(hed_schema=self.hed_schema) - issues = sidecar.validate(self.hed_schema, extra_def_dicts=duplicate_dict, error_handler=ErrorHandler(False)) - self.assertEqual(len(issues), 5) - self.assertTrue(issues[0]['code'], ValidationErrors.DEFINITION_INVALID) - - def test_save_load(self): - sidecar = Sidecar(self.json_def_filename) - save_filename = os.path.join(self.base_output_folder, "test_sidecar_save.json") - sidecar.save_as_json(save_filename) - - reloaded_sidecar = Sidecar(save_filename) - - for data1, data2 in zip(sidecar, reloaded_sidecar): - self.assertEqual(data1.source_dict, data2.source_dict) - - def test_save_load2(self): - sidecar = Sidecar(self.json_def_filename) - json_string = sidecar.get_as_json_string() - - reloaded_sidecar = Sidecar(io.StringIO(json_string)) - - for data1, data2 in zip(sidecar, reloaded_sidecar): - self.assertEqual(data1.source_dict, data2.source_dict) - - def test_merged_sidecar(self): - base_folder = self.base_data_dir + "sidecar_tests/" - combined_sidecar_json = base_folder + "test_merged_merged.json" - sidecar_json1 = base_folder + "test_merged1.json" - sidecar_json2 = base_folder + "test_merged2.json" - - sidecar = Sidecar([sidecar_json1, sidecar_json2]) - sidecar2 = Sidecar(combined_sidecar_json) - - self.assertEqual(sidecar.loaded_dict, sidecar2.loaded_dict) - - def test_set_hed_strings(self): - from hed.models import df_util - sidecar = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/short_tag_test.json")) - - for column_data in sidecar: - hed_strings = column_data.get_hed_strings() - df_util.convert_to_form(hed_strings, self.hed_schema, "long_tag") - column_data.set_hed_strings(hed_strings) - sidecar_long = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/long_tag_test.json")) - self.assertEqual(sidecar.loaded_dict, sidecar_long.loaded_dict) - - sidecar = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/long_tag_test.json")) - - for column_data in sidecar: - hed_strings = column_data.get_hed_strings() - df_util.convert_to_form(hed_strings, self.hed_schema, "short_tag") - column_data.set_hed_strings(hed_strings) - sidecar_short = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/short_tag_test.json")) - self.assertEqual(sidecar.loaded_dict, sidecar_short.loaded_dict) - - -if __name__ == '__main__': - unittest.main() +import unittest +import os +import io +import shutil + +from hed.errors import HedFileError, ValidationErrors +from hed.models import ColumnMetadata, HedString, Sidecar +from hed import schema +from hed.models import DefinitionDict +from hed.errors import ErrorHandler + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') + cls.base_data_dir = base_data_dir + hed_xml_file = os.path.join(base_data_dir, "schema_tests/HED8.0.0t.xml") + cls.hed_schema = schema.load_schema(hed_xml_file) + json_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events.json") + cls.json_filename = json_filename + json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json") + cls.json_def_filename = json_def_filename + json_without_definitions_filename = \ + os.path.join(base_data_dir, "sidecar_tests/both_types_events_without_definitions.json") + json_errors_filename = os.path.join(base_data_dir, "sidecar_tests/json_errors.json") + json_errors_filename_minor = os.path.join(base_data_dir, "sidecar_tests/json_errors_minor.json") + cls.default_sidecar = Sidecar(json_filename) + cls.json_def_sidecar = Sidecar(json_def_filename) + cls.errors_sidecar = Sidecar(json_errors_filename) + cls.errors_sidecar_minor = Sidecar(json_errors_filename_minor) + cls.json_without_definitions_sidecar = Sidecar(json_without_definitions_filename) + + base_output_folder = os.path.realpath(os.path.join(os.path.dirname(__file__), "../data/tests_output/")) + cls.base_output_folder = base_output_folder + os.makedirs(base_output_folder, exist_ok=True) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.base_output_folder) + + def test_file_not_found(self): + with self.assertRaises(HedFileError): + Sidecar('nonexistent_file.json') + + def test_invalid_input_type_int(self): + with self.assertRaises(HedFileError): + Sidecar(123) + + def test_invalid_input_type_dict(self): + with self.assertRaises(HedFileError): + Sidecar({'key': 'value'}) + + def test_invalid_filenames(self): + # Handle missing or invalid files. + invalid_json = "invalidxmlfile.json" + self.assertRaises(HedFileError, Sidecar, invalid_json) + + json_dict = None + try: + json_dict = Sidecar(None) + except HedFileError: + pass + self.assertTrue(len(json_dict.loaded_dict) == 0) + + json_dict = None + try: + json_dict = Sidecar("") + except HedFileError: + pass + self.assertTrue(len(json_dict.loaded_dict) == 0) + + def test_name(self): + invalid_json = "invalidxmlfile.json" + with self.assertRaises(HedFileError) as context: + Sidecar(invalid_json) + self.assertEqual(context.exception.args[0], 'fileNotFound') + + def test_add_json_string(self): + with open(self.json_filename) as file: + file_as_string = io.StringIO(file.read()) + json_file = Sidecar(file_as_string) + self.assertTrue(json_file) + + def test__iter__(self): + columns_target = 3 + columns_count = 0 + for column_data in self.default_sidecar: + self.assertIsInstance(column_data, ColumnMetadata) + columns_count += 1 + + self.assertEqual(columns_target, columns_count) + + def test_validate_column_group(self): + validation_issues = self.errors_sidecar.validate(self.hed_schema) + self.assertEqual(len(validation_issues), 4) + + validation_issues2 = self.errors_sidecar_minor.validate(self.hed_schema) + self.assertEqual(len(validation_issues2), 1) + + validation_issues = self.json_without_definitions_sidecar.validate(self.hed_schema) + self.assertEqual(len(validation_issues), 7) + + hed_string = HedString("(Definition/JsonFileDef/#, (Item/JsonDef1/#,Item/JsonDef1))", self.hed_schema) + extra_def_dict = DefinitionDict() + extra_def_dict.check_for_definitions(hed_string) + + validation_issues2 = self.json_without_definitions_sidecar.validate(self.hed_schema, + extra_def_dicts=extra_def_dict) + # this removes one undef matched error + self.assertEqual(len(validation_issues2), 7) + + def test_duplicate_def(self): + sidecar = self.json_def_sidecar + + duplicate_dict = sidecar.extract_definitions(hed_schema=self.hed_schema) + issues = sidecar.validate(self.hed_schema, extra_def_dicts=duplicate_dict, error_handler=ErrorHandler(False)) + self.assertEqual(len(issues), 5) + self.assertTrue(issues[0]['code'], ValidationErrors.DEFINITION_INVALID) + + def test_save_load(self): + sidecar = Sidecar(self.json_def_filename) + save_filename = os.path.join(self.base_output_folder, "test_sidecar_save.json") + sidecar.save_as_json(save_filename) + + reloaded_sidecar = Sidecar(save_filename) + + for data1, data2 in zip(sidecar, reloaded_sidecar): + self.assertEqual(data1.source_dict, data2.source_dict) + + def test_save_load2(self): + sidecar = Sidecar(self.json_def_filename) + json_string = sidecar.get_as_json_string() + + reloaded_sidecar = Sidecar(io.StringIO(json_string)) + + for data1, data2 in zip(sidecar, reloaded_sidecar): + self.assertEqual(data1.source_dict, data2.source_dict) + + def test_merged_sidecar(self): + base_folder = self.base_data_dir + "sidecar_tests/" + combined_sidecar_json = base_folder + "test_merged_merged.json" + sidecar_json1 = base_folder + "test_merged1.json" + sidecar_json2 = base_folder + "test_merged2.json" + + sidecar = Sidecar([sidecar_json1, sidecar_json2]) + sidecar2 = Sidecar(combined_sidecar_json) + + self.assertEqual(sidecar.loaded_dict, sidecar2.loaded_dict) + + def test_set_hed_strings(self): + from hed.models import df_util + sidecar = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/short_tag_test.json")) + + for column_data in sidecar: + hed_strings = column_data.get_hed_strings() + df_util.convert_to_form(hed_strings, self.hed_schema, "long_tag") + column_data.set_hed_strings(hed_strings) + sidecar_long = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/long_tag_test.json")) + self.assertEqual(sidecar.loaded_dict, sidecar_long.loaded_dict) + + sidecar = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/long_tag_test.json")) + + for column_data in sidecar: + hed_strings = column_data.get_hed_strings() + df_util.convert_to_form(hed_strings, self.hed_schema, "short_tag") + column_data.set_hed_strings(hed_strings) + sidecar_short = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/short_tag_test.json")) + self.assertEqual(sidecar.loaded_dict, sidecar_short.loaded_dict) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/models/test_spreadsheet_input.py b/tests/models/test_spreadsheet_input.py index eeee6bc8..7ab141e5 100644 --- a/tests/models/test_spreadsheet_input.py +++ b/tests/models/test_spreadsheet_input.py @@ -1,230 +1,229 @@ -import unittest -import os -import io - -from hed.errors import HedFileError -from hed.models import TabularInput, SpreadsheetInput, Sidecar -import shutil -from hed import schema -import pandas as pd - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - base = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') - cls.base_data_dir = base - hed_xml_file = os.path.join(base, "schema_tests/HED8.0.0t.xml") - cls.hed_schema = schema.load_schema(hed_xml_file) - default = os.path.join(os.path.dirname(os.path.realpath(__file__)), - "../data/spreadsheet_validator_tests/ExcelMultipleSheets.xlsx") - cls.default_test_file_name = default - cls.generic_file_input = SpreadsheetInput(default) - base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") - cls.base_output_folder = base_output - os.makedirs(base_output, exist_ok=True) - - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.base_output_folder) - - def test_all(self): - hed_input = self.default_test_file_name - has_column_names = True - column_prefix_dictionary = {1: 'Label/', 2: 'Description'} - tag_columns = [4] - worksheet_name = 'LKT Events' - - file_input = SpreadsheetInput(hed_input, has_column_names=has_column_names, worksheet_name=worksheet_name, - tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary) - - self.assertTrue(isinstance(file_input.dataframe_a, pd.DataFrame)) - self.assertTrue(isinstance(file_input.series_a, pd.Series)) - self.assertTrue(file_input.dataframe_a.size) - self.assertEqual(len(file_input._mapper.get_column_mapping_issues()), 0) - - def test_all2(self): - # This should work, but raise an issue as Short label and column 1 overlap. - hed_input = self.default_test_file_name - has_column_names = True - column_prefix_dictionary = {1: 'Label/', "Short label": 'Description'} - tag_columns = [4] - worksheet_name = 'LKT Events' - - file_input = SpreadsheetInput(hed_input, has_column_names=has_column_names, worksheet_name=worksheet_name, - tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary) - - self.assertTrue(isinstance(file_input.dataframe_a, pd.DataFrame)) - self.assertTrue(isinstance(file_input.series_a, pd.Series)) - self.assertTrue(file_input.dataframe_a.size) - self.assertEqual(len(file_input._mapper.get_column_mapping_issues()), 1) - - def test_file_as_string(self): - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/validator_tests/bids_events_no_index.tsv') - - json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - "../data/validator_tests/bids_events.json") - sidecar = Sidecar(json_path) - self.assertEqual(len(sidecar.validate(self.hed_schema)), 0) - input_file = TabularInput(events_path, sidecar=sidecar) - - with open(events_path) as file: - events_file_as_string = io.StringIO(file.read()) - input_file_from_string = TabularInput(file=events_file_as_string, sidecar=sidecar) - - self.assertTrue(input_file._dataframe.equals(input_file_from_string._dataframe)) - - def test_bad_file_inputs(self): - self.assertRaises(HedFileError, TabularInput, None) - - def test_loading_binary(self): - with open(self.default_test_file_name, "rb") as f: - self.assertRaises(HedFileError, SpreadsheetInput, f) - - with open(self.default_test_file_name, "rb") as f: - opened_binary_file = SpreadsheetInput(f, file_type=".xlsx") - self.assertIsInstance(opened_binary_file, SpreadsheetInput, "SpreadsheetInput creates a correct object.") - self.assertTrue(True) - - def test_to_excel(self): - test_input_file = self.generic_file_input - test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave.xlsx" - test_input_file.to_excel(test_output_name) - - test_input_file = self.generic_file_input - test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_formatting.xlsx" - test_input_file.to_excel(test_output_name) - - # Test to a file stream - test_input_file = self.generic_file_input - test_output_name = self.base_output_folder + "ExcelMultipleSheets_fileio.xlsx" - with open(test_output_name, "wb") as f: - test_input_file.to_excel(f) - - def test_to_excel_should_work(self): - spreadsheet = SpreadsheetInput(file=self.default_test_file_name, file_type='.xlsx', - tag_columns=[3], has_column_names=True, - column_prefix_dictionary={1: 'Label/', 2: 'Description/'}, - name='ExcelOneSheet.xlsx') - buffer = io.BytesIO() - spreadsheet.to_excel(buffer) - buffer.seek(0) - v = buffer.getvalue() - self.assertGreater(len(v), 0, "It should have a length greater than 0") - - def test_to_csv(self): - test_input_file = self.generic_file_input - test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave.csv" - test_input_file.to_csv(test_output_name) - - test_input_file = self.generic_file_input - file_as_csv = test_input_file.to_csv(None) - self.assertIsInstance(file_as_csv, str) - - def test_loading_and_reloading(self): - test_input_file = self.generic_file_input - test_output_name = self.base_output_folder + "ExcelMultipleSheets_test_save.xlsx" - - test_input_file.to_excel(test_output_name) - - reloaded_input = SpreadsheetInput(test_output_name) - - self.assertTrue(test_input_file._dataframe.equals(reloaded_input._dataframe)) - - def test_loading_and_reset_mapper(self): - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/validator_tests/bids_events_no_index.tsv') - json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - "../data/validator_tests/bids_events.json") - sidecar = Sidecar(json_path) - self.assertEqual(len(sidecar.validate(self.hed_schema)), 0) - input_file_1 = TabularInput(events_path, sidecar=sidecar) - input_file_2 = TabularInput(events_path, sidecar=sidecar) - - input_file_2.reset_column_mapper() - - self.assertTrue(input_file_1.dataframe.equals(input_file_2.dataframe)) - - def test_no_column_header_and_convert(self): - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/no_column_header.tsv') - hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) - hed_input.convert_to_long(self.hed_schema) - - events_path_long = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/no_column_header_long.tsv') - hed_input_long = SpreadsheetInput(events_path_long, has_column_names=False, tag_columns=[0, 1]) - self.assertTrue(hed_input._dataframe.equals(hed_input_long._dataframe)) - - def test_convert_short_long_with_definitions(self): - # Verify behavior works as expected even if definitions are present - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/no_column_header_definition.tsv') - hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) - hed_input.convert_to_long(self.hed_schema) - - events_path_long = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/no_column_header_definition_long.tsv') - hed_input_long = SpreadsheetInput(events_path_long, has_column_names=False, tag_columns=[0, 1]) - self.assertTrue(hed_input._dataframe.equals(hed_input_long._dataframe)) - - def test_definitions_identified(self): - # Todo: this test is no longer relevant - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/no_column_header_definition.tsv') - hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/no_column_header_definition.tsv') - hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) - - def test_loading_dataframe_directly(self): - ds_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/no_column_header_definition.tsv') - ds = pd.read_csv(ds_path, delimiter="\t", header=None) - hed_input = SpreadsheetInput(ds, has_column_names=False, tag_columns=[0, 1]) - - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/no_column_header_definition.tsv') - hed_input2 = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) - self.assertTrue(hed_input._dataframe.equals(hed_input2._dataframe)) - - def test_ignoring_na_column(self): - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/na_tag_column.tsv') - hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) - self.assertTrue(hed_input.dataframe_a.loc[1, 1] == 'n/a') - - def test_ignoring_na_value_column(self): - from hed import TabularInput - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/na_value_column.tsv') - sidecar_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/model_tests/na_value_column.json') - hed_input = TabularInput(events_path, sidecar=sidecar_path) - self.assertTrue(hed_input.dataframe_a.loc[1, 'Value'] == 'n/a') - - def test_to_excel_workbook(self): - excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3", - tag_columns=["HED tags"]) - test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled.xlsx" - excel_book.convert_to_long(self.hed_schema) - excel_book.to_excel(test_output_name) - reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3") - - self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe)) - - def test_to_excel_workbook_no_col_names(self): - excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3", - tag_columns=[4], has_column_names=False) - test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_no_col_names.xlsx" - excel_book.convert_to_long(self.hed_schema) - excel_book.to_excel(test_output_name) - reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3", tag_columns=[4], - has_column_names=False) - self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe)) - - -if __name__ == '__main__': - unittest.main() +import unittest +import os +import io + +from hed.errors import HedFileError +from hed.models import TabularInput, SpreadsheetInput, Sidecar +import shutil +from hed import schema +import pandas as pd + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + base = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') + cls.base_data_dir = base + hed_xml_file = os.path.join(base, "schema_tests/HED8.0.0t.xml") + cls.hed_schema = schema.load_schema(hed_xml_file) + default = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "../data/spreadsheet_validator_tests/ExcelMultipleSheets.xlsx") + cls.default_test_file_name = default + cls.generic_file_input = SpreadsheetInput(default) + base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") + cls.base_output_folder = base_output + os.makedirs(base_output, exist_ok=True) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.base_output_folder) + + def test_all(self): + hed_input = self.default_test_file_name + has_column_names = True + column_prefix_dictionary = {1: 'Label/', 2: 'Description'} + tag_columns = [4] + worksheet_name = 'LKT Events' + + file_input = SpreadsheetInput(hed_input, has_column_names=has_column_names, worksheet_name=worksheet_name, + tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary) + + self.assertTrue(isinstance(file_input.dataframe_a, pd.DataFrame)) + self.assertTrue(isinstance(file_input.series_a, pd.Series)) + self.assertTrue(file_input.dataframe_a.size) + self.assertEqual(len(file_input._mapper.get_column_mapping_issues()), 0) + + def test_all2(self): + # This should work, but raise an issue as Short label and column 1 overlap. + hed_input = self.default_test_file_name + has_column_names = True + column_prefix_dictionary = {1: 'Label/', "Short label": 'Description'} + tag_columns = [4] + worksheet_name = 'LKT Events' + + file_input = SpreadsheetInput(hed_input, has_column_names=has_column_names, worksheet_name=worksheet_name, + tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary) + + self.assertTrue(isinstance(file_input.dataframe_a, pd.DataFrame)) + self.assertTrue(isinstance(file_input.series_a, pd.Series)) + self.assertTrue(file_input.dataframe_a.size) + self.assertEqual(len(file_input._mapper.get_column_mapping_issues()), 1) + + def test_file_as_string(self): + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/validator_tests/bids_events_no_index.tsv') + + json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "../data/validator_tests/bids_events.json") + sidecar = Sidecar(json_path) + self.assertEqual(len(sidecar.validate(self.hed_schema)), 0) + input_file = TabularInput(events_path, sidecar=sidecar) + + with open(events_path) as file: + events_file_as_string = io.StringIO(file.read()) + input_file_from_string = TabularInput(file=events_file_as_string, sidecar=sidecar) + + self.assertTrue(input_file._dataframe.equals(input_file_from_string._dataframe)) + + def test_bad_file_inputs(self): + self.assertRaises(HedFileError, TabularInput, None) + + def test_loading_binary(self): + with open(self.default_test_file_name, "rb") as f: + self.assertRaises(HedFileError, SpreadsheetInput, f) + + with open(self.default_test_file_name, "rb") as f: + opened_binary_file = SpreadsheetInput(f, file_type=".xlsx") + self.assertIsInstance(opened_binary_file, SpreadsheetInput, "SpreadsheetInput creates a correct object.") + self.assertTrue(True) + + def test_to_excel(self): + test_input_file = self.generic_file_input + test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave.xlsx" + test_input_file.to_excel(test_output_name) + + test_input_file = self.generic_file_input + test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_formatting.xlsx" + test_input_file.to_excel(test_output_name) + + # Test to a file stream + test_input_file = self.generic_file_input + test_output_name = self.base_output_folder + "ExcelMultipleSheets_fileio.xlsx" + with open(test_output_name, "wb") as f: + test_input_file.to_excel(f) + + def test_to_excel_should_work(self): + spreadsheet = SpreadsheetInput(file=self.default_test_file_name, file_type='.xlsx', + tag_columns=[3], has_column_names=True, + column_prefix_dictionary={1: 'Label/', 2: 'Description/'}, + name='ExcelOneSheet.xlsx') + buffer = io.BytesIO() + spreadsheet.to_excel(buffer) + buffer.seek(0) + v = buffer.getvalue() + self.assertGreater(len(v), 0, "It should have a length greater than 0") + + def test_to_csv(self): + test_input_file = self.generic_file_input + test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave.csv" + test_input_file.to_csv(test_output_name) + + test_input_file = self.generic_file_input + file_as_csv = test_input_file.to_csv(None) + self.assertIsInstance(file_as_csv, str) + + def test_loading_and_reloading(self): + test_input_file = self.generic_file_input + test_output_name = self.base_output_folder + "ExcelMultipleSheets_test_save.xlsx" + + test_input_file.to_excel(test_output_name) + + reloaded_input = SpreadsheetInput(test_output_name) + + self.assertTrue(test_input_file._dataframe.equals(reloaded_input._dataframe)) + + def test_loading_and_reset_mapper(self): + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/validator_tests/bids_events_no_index.tsv') + json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "../data/validator_tests/bids_events.json") + sidecar = Sidecar(json_path) + self.assertEqual(len(sidecar.validate(self.hed_schema)), 0) + input_file_1 = TabularInput(events_path, sidecar=sidecar) + input_file_2 = TabularInput(events_path, sidecar=sidecar) + + input_file_2.reset_column_mapper() + + self.assertTrue(input_file_1.dataframe.equals(input_file_2.dataframe)) + + def test_no_column_header_and_convert(self): + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/no_column_header.tsv') + hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) + hed_input.convert_to_long(self.hed_schema) + + events_path_long = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/no_column_header_long.tsv') + hed_input_long = SpreadsheetInput(events_path_long, has_column_names=False, tag_columns=[0, 1]) + self.assertTrue(hed_input._dataframe.equals(hed_input_long._dataframe)) + + def test_convert_short_long_with_definitions(self): + # Verify behavior works as expected even if definitions are present + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/no_column_header_definition.tsv') + hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) + hed_input.convert_to_long(self.hed_schema) + + events_path_long = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/no_column_header_definition_long.tsv') + hed_input_long = SpreadsheetInput(events_path_long, has_column_names=False, tag_columns=[0, 1]) + self.assertTrue(hed_input._dataframe.equals(hed_input_long._dataframe)) + + def test_definitions_identified(self): + # Todo: this test is no longer relevant + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/no_column_header_definition.tsv') + hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/no_column_header_definition.tsv') + hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) + + def test_loading_dataframe_directly(self): + ds_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/no_column_header_definition.tsv') + ds = pd.read_csv(ds_path, delimiter="\t", header=None) + hed_input = SpreadsheetInput(ds, has_column_names=False, tag_columns=[0, 1]) + + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/no_column_header_definition.tsv') + hed_input2 = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) + self.assertTrue(hed_input._dataframe.equals(hed_input2._dataframe)) + + def test_ignoring_na_column(self): + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/na_tag_column.tsv') + hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) + self.assertTrue(hed_input.dataframe_a.loc[1, 1] == 'n/a') + + def test_ignoring_na_value_column(self): + from hed import TabularInput + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/na_value_column.tsv') + sidecar_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/model_tests/na_value_column.json') + hed_input = TabularInput(events_path, sidecar=sidecar_path) + self.assertTrue(hed_input.dataframe_a.loc[1, 'Value'] == 'n/a') + + def test_to_excel_workbook(self): + excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3", + tag_columns=["HED tags"]) + test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled.xlsx" + excel_book.convert_to_long(self.hed_schema) + excel_book.to_excel(test_output_name) + reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3") + + self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe)) + + def test_to_excel_workbook_no_col_names(self): + excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3", + tag_columns=[4], has_column_names=False) + test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_no_col_names.xlsx" + excel_book.convert_to_long(self.hed_schema) + excel_book.to_excel(test_output_name) + reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3", tag_columns=[4], + has_column_names=False) + self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/models/test_string_util.py b/tests/models/test_string_util.py index 5ccce661..5df65137 100644 --- a/tests/models/test_string_util.py +++ b/tests/models/test_string_util.py @@ -1,190 +1,192 @@ -import unittest -from hed import HedString, load_schema_version -from hed.models.string_util import split_base_tags, split_def_tags, gather_descriptions -import copy - - -class TestHedStringSplit(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.schema = load_schema_version("8.3.0") - - def check_split_base_tags(self, hed_string, base_tags, expected_string, expected_string2): - # Test case 1: remove_group=False - hed_string_copy = copy.deepcopy(hed_string) - remaining_hed, found_hed = split_base_tags(hed_string_copy, base_tags, remove_group=False) - - self.assertIsInstance(remaining_hed, HedString) - self.assertIsInstance(found_hed, HedString) - self.assertEqual(str(remaining_hed), expected_string) - - self.assertTrue(all(tag in [str(t) for t in found_hed.get_all_tags()] for tag in base_tags)) - self.assertTrue(all(tag not in [str(t) for t in remaining_hed.get_all_tags()] for tag in base_tags)) - - # Test case 2: remove_group=True - hed_string_copy = copy.deepcopy(hed_string) - remaining_hed, found_hed = split_base_tags(hed_string_copy, base_tags, remove_group=True) - - self.assertIsInstance(remaining_hed, HedString) - self.assertIsInstance(found_hed, HedString) - self.assertEqual(str(remaining_hed), expected_string2) - - self.assertTrue(all(tag in [str(t) for t in found_hed.get_all_tags()] for tag in base_tags)) - self.assertTrue(all(tag not in [str(t) for t in remaining_hed.get_all_tags()] for tag in base_tags)) - - def test_case_1(self): - hed_string = HedString('Memorize,Action,Area', self.schema) - base_tags = ['Area', 'Action'] - expected_string = 'Memorize' - expected_string2 = 'Memorize' - self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) - - def test_case_2(self): - hed_string = HedString('Area,LightBlue,Handedness', self.schema) - base_tags = ['Area', 'LightBlue'] - expected_string = 'Handedness' - expected_string2 = 'Handedness' - self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) - - def test_case_3(self): - hed_string = HedString('(Wink,Communicate),Face,HotPink', self.schema) - base_tags = ['Wink', 'Face'] - expected_string = '(Communicate),HotPink' - expected_string2 = "HotPink" - self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) - - def test_case_4(self): - hed_string = HedString('(Area,(LightBlue,Handedness,(Wink,Communicate))),Face,HotPink', self.schema) - base_tags = ['Area', 'LightBlue'] - expected_string = '((Handedness,(Wink,Communicate))),Face,HotPink' - expected_string2 = 'Face,HotPink' - self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) - - def test_case_5(self): - hed_string = HedString('(Memorize,(Action,(Area,LightBlue),Handedness),Wink)', self.schema) - base_tags = ['Area', 'LightBlue'] - expected_string = '(Memorize,(Action,Handedness),Wink)' - expected_string2 = '(Memorize,(Action,Handedness),Wink)' - self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) - -class TestHedStringSplitDef(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.schema = load_schema_version("8.3.0") - - def check_split_def_tags(self, hed_string, def_names, expected_string, expected_string2): - # Test case 1: remove_group=False - hed_string_copy1 = copy.deepcopy(hed_string) - remaining_hed1, found_hed1 = split_def_tags(hed_string_copy1, def_names, remove_group=False) - - self.assertIsInstance(remaining_hed1, HedString) - self.assertIsInstance(found_hed1, HedString) - self.assertEqual(str(remaining_hed1), expected_string) - - self.assertTrue(all(tag.short_base_tag == "Def" for tag in found_hed1.get_all_tags())) - self.assertTrue(all(tag.short_base_tag != "Def" for tag in remaining_hed1.get_all_tags())) - - # Test case 2: remove_group=True - hed_string_copy2 = copy.deepcopy(hed_string) - remaining_hed2, found_hed2 = split_def_tags(hed_string_copy2, def_names, remove_group=True) - - self.assertIsInstance(remaining_hed2, HedString) - self.assertIsInstance(found_hed2, HedString) - self.assertEqual(str(remaining_hed2), expected_string2) - - #self.assertTrue(all(tag.short_base_tag == "Def" for tag in found_hed.get_all_tags())) - self.assertTrue(all(tag.short_base_tag != "Def" for tag in remaining_hed2.get_all_tags())) - - def test_case_1(self): - hed_string = HedString('Memorize,Action,def/CustomTag1', self.schema) - def_names = ['CustomTag1'] - expected_string = 'Memorize,Action' - expected_string2 = 'Memorize,Action' - self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) - - def test_case_2(self): - hed_string = HedString('def/CustomTag1,LightBlue,def/CustomTag2/123', self.schema) - def_names = ['CustomTag1', 'CustomTag2'] - expected_string = 'LightBlue' - expected_string2 = 'LightBlue' - self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) - - def test_case_3(self): - hed_string = HedString('(def/CustomTag1,Communicate),Face,def/CustomTag3/abc', self.schema) - def_names = ['CustomTag1', 'CustomTag3'] - expected_string = '(Communicate),Face' - expected_string2 = 'Face' - self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) - - def test_case_4(self): - hed_string = HedString('(def/CustomTag1,(LightBlue,def/CustomTag2/123,(Wink,Communicate))),Face,def/CustomTag3/abc', self.schema) - def_names = ['CustomTag1', 'CustomTag2', 'CustomTag3'] - expected_string = '((LightBlue,(Wink,Communicate))),Face' - expected_string2 = 'Face' - self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) - - def test_case_5(self): - hed_string = HedString('(Memorize,(Action,(def/CustomTag1,LightBlue),def/CustomTag2/123),Wink)', self.schema) - def_names = ['CustomTag1', 'CustomTag2'] - expected_string = '(Memorize,(Action,(LightBlue)),Wink)' - expected_string2 = '(Memorize,Wink)' - self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) - - -class TestGatherDescriptions(unittest.TestCase): - def setUp(self): - self.schema = load_schema_version("8.3.0") - - def test_gather_single_description(self): - input_str = "Sensory-event, Description/This is a test." - hed_string = HedString(input_str, hed_schema=self.schema) - - result = gather_descriptions(hed_string) - expected_result = "This is a test." - - self.assertEqual(result, expected_result) - self.assertNotIn("Description", str(result)) - - def test_gather_multiple_descriptions(self): - input_str = "Sensory-event, Description/First description, Second-tag, Description/Second description." - hed_string = HedString(input_str, hed_schema=self.schema) - - result = gather_descriptions(hed_string) - expected_result = "First description. Second description." - - self.assertEqual(result, expected_result) - self.assertNotIn("Description", str(result)) - - def test_gather_no_descriptions(self): - input_str = "Sensory-event, No-description-here, Another-tag" - hed_string = HedString(input_str, hed_schema=self.schema) - - result = gather_descriptions(hed_string) - expected_result = "" - - self.assertEqual(result, expected_result) - self.assertNotIn("Description", str(result)) - - def test_gather_descriptions_mixed_order(self): - input_str = "Sensory-event, Description/First., Another-tag, Description/Second, Third-tag, Description/Third." - hed_string = HedString(input_str, hed_schema=self.schema) - - result = gather_descriptions(hed_string) - expected_result = "First. Second. Third." - - self.assertEqual(result, expected_result) - self.assertNotIn("Description", str(result)) - - def test_gather_descriptions_missing_period(self): - input_str = "Sensory-event, Description/First, Description/Second" - hed_string = HedString(input_str, hed_schema=self.schema) - - result = gather_descriptions(hed_string) - expected_result = "First. Second." - - self.assertEqual(result, expected_result) - self.assertNotIn("Description", str(result)) - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file +import unittest +from hed import HedString, load_schema_version +from hed.models.string_util import split_base_tags, split_def_tags, gather_descriptions +import copy + + +class TestHedStringSplit(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.schema = load_schema_version("8.3.0") + + def check_split_base_tags(self, hed_string, base_tags, expected_string, expected_string2): + # Test case 1: remove_group=False + hed_string_copy = copy.deepcopy(hed_string) + remaining_hed, found_hed = split_base_tags(hed_string_copy, base_tags, remove_group=False) + + self.assertIsInstance(remaining_hed, HedString) + self.assertIsInstance(found_hed, HedString) + self.assertEqual(str(remaining_hed), expected_string) + + self.assertTrue(all(tag in [str(t) for t in found_hed.get_all_tags()] for tag in base_tags)) + self.assertTrue(all(tag not in [str(t) for t in remaining_hed.get_all_tags()] for tag in base_tags)) + + # Test case 2: remove_group=True + hed_string_copy = copy.deepcopy(hed_string) + remaining_hed, found_hed = split_base_tags(hed_string_copy, base_tags, remove_group=True) + + self.assertIsInstance(remaining_hed, HedString) + self.assertIsInstance(found_hed, HedString) + self.assertEqual(str(remaining_hed), expected_string2) + + self.assertTrue(all(tag in [str(t) for t in found_hed.get_all_tags()] for tag in base_tags)) + self.assertTrue(all(tag not in [str(t) for t in remaining_hed.get_all_tags()] for tag in base_tags)) + + def test_case_1(self): + hed_string = HedString('Memorize,Action,Area', self.schema) + base_tags = ['Area', 'Action'] + expected_string = 'Memorize' + expected_string2 = 'Memorize' + self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) + + def test_case_2(self): + hed_string = HedString('Area,LightBlue,Handedness', self.schema) + base_tags = ['Area', 'LightBlue'] + expected_string = 'Handedness' + expected_string2 = 'Handedness' + self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) + + def test_case_3(self): + hed_string = HedString('(Wink,Communicate),Face,HotPink', self.schema) + base_tags = ['Wink', 'Face'] + expected_string = '(Communicate),HotPink' + expected_string2 = "HotPink" + self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) + + def test_case_4(self): + hed_string = HedString('(Area,(LightBlue,Handedness,(Wink,Communicate))),Face,HotPink', self.schema) + base_tags = ['Area', 'LightBlue'] + expected_string = '((Handedness,(Wink,Communicate))),Face,HotPink' + expected_string2 = 'Face,HotPink' + self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) + + def test_case_5(self): + hed_string = HedString('(Memorize,(Action,(Area,LightBlue),Handedness),Wink)', self.schema) + base_tags = ['Area', 'LightBlue'] + expected_string = '(Memorize,(Action,Handedness),Wink)' + expected_string2 = '(Memorize,(Action,Handedness),Wink)' + self.check_split_base_tags(hed_string, base_tags, expected_string, expected_string2) + + +class TestHedStringSplitDef(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.schema = load_schema_version("8.3.0") + + def check_split_def_tags(self, hed_string, def_names, expected_string, expected_string2): + # Test case 1: remove_group=False + hed_string_copy1 = copy.deepcopy(hed_string) + remaining_hed1, found_hed1 = split_def_tags(hed_string_copy1, def_names, remove_group=False) + + self.assertIsInstance(remaining_hed1, HedString) + self.assertIsInstance(found_hed1, HedString) + self.assertEqual(str(remaining_hed1), expected_string) + + self.assertTrue(all(tag.short_base_tag == "Def" for tag in found_hed1.get_all_tags())) + self.assertTrue(all(tag.short_base_tag != "Def" for tag in remaining_hed1.get_all_tags())) + + # Test case 2: remove_group=True + hed_string_copy2 = copy.deepcopy(hed_string) + remaining_hed2, found_hed2 = split_def_tags(hed_string_copy2, def_names, remove_group=True) + + self.assertIsInstance(remaining_hed2, HedString) + self.assertIsInstance(found_hed2, HedString) + self.assertEqual(str(remaining_hed2), expected_string2) + + # self.assertTrue(all(tag.short_base_tag == "Def" for tag in found_hed.get_all_tags())) + self.assertTrue(all(tag.short_base_tag != "Def" for tag in remaining_hed2.get_all_tags())) + + def test_case_1(self): + hed_string = HedString('Memorize,Action,def/CustomTag1', self.schema) + def_names = ['CustomTag1'] + expected_string = 'Memorize,Action' + expected_string2 = 'Memorize,Action' + self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) + + def test_case_2(self): + hed_string = HedString('def/CustomTag1,LightBlue,def/CustomTag2/123', self.schema) + def_names = ['CustomTag1', 'CustomTag2'] + expected_string = 'LightBlue' + expected_string2 = 'LightBlue' + self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) + + def test_case_3(self): + hed_string = HedString('(def/CustomTag1,Communicate),Face,def/CustomTag3/abc', self.schema) + def_names = ['CustomTag1', 'CustomTag3'] + expected_string = '(Communicate),Face' + expected_string2 = 'Face' + self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) + + def test_case_4(self): + hed_string = HedString('(def/CustomTag1,(LightBlue,def/CustomTag2/123,(Wink,Communicate))),' + + 'Face,def/CustomTag3/abc', self.schema) + def_names = ['CustomTag1', 'CustomTag2', 'CustomTag3'] + expected_string = '((LightBlue,(Wink,Communicate))),Face' + expected_string2 = 'Face' + self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) + + def test_case_5(self): + hed_string = HedString('(Memorize,(Action,(def/CustomTag1,LightBlue),def/CustomTag2/123),Wink)', self.schema) + def_names = ['CustomTag1', 'CustomTag2'] + expected_string = '(Memorize,(Action,(LightBlue)),Wink)' + expected_string2 = '(Memorize,Wink)' + self.check_split_def_tags(hed_string, def_names, expected_string, expected_string2) + + +class TestGatherDescriptions(unittest.TestCase): + def setUp(self): + self.schema = load_schema_version("8.3.0") + + def test_gather_single_description(self): + input_str = "Sensory-event, Description/This is a test." + hed_string = HedString(input_str, hed_schema=self.schema) + + result = gather_descriptions(hed_string) + expected_result = "This is a test." + + self.assertEqual(result, expected_result) + self.assertNotIn("Description", str(result)) + + def test_gather_multiple_descriptions(self): + input_str = "Sensory-event, Description/First description, Second-tag, Description/Second description." + hed_string = HedString(input_str, hed_schema=self.schema) + + result = gather_descriptions(hed_string) + expected_result = "First description. Second description." + + self.assertEqual(result, expected_result) + self.assertNotIn("Description", str(result)) + + def test_gather_no_descriptions(self): + input_str = "Sensory-event, No-description-here, Another-tag" + hed_string = HedString(input_str, hed_schema=self.schema) + + result = gather_descriptions(hed_string) + expected_result = "" + + self.assertEqual(result, expected_result) + self.assertNotIn("Description", str(result)) + + def test_gather_descriptions_mixed_order(self): + input_str = "Sensory-event, Description/First., Another-tag, Description/Second, Third-tag, Description/Third." + hed_string = HedString(input_str, hed_schema=self.schema) + + result = gather_descriptions(hed_string) + expected_result = "First. Second. Third." + + self.assertEqual(result, expected_result) + self.assertNotIn("Description", str(result)) + + def test_gather_descriptions_missing_period(self): + input_str = "Sensory-event, Description/First, Description/Second" + hed_string = HedString(input_str, hed_schema=self.schema) + + result = gather_descriptions(hed_string) + expected_result = "First. Second." + + self.assertEqual(result, expected_result) + self.assertNotIn("Description", str(result)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/models/test_tabular_input.py b/tests/models/test_tabular_input.py index e1c3bc7f..9020ed72 100644 --- a/tests/models/test_tabular_input.py +++ b/tests/models/test_tabular_input.py @@ -1,102 +1,102 @@ -import unittest -import os -import shutil - -from hed.models import DefinitionEntry, Sidecar, TabularInput -from hed import schema -from hed.errors import HedFileError -from hed.errors import ErrorHandler, ErrorContext - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - base_output_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") - os.makedirs(base_output_folder, exist_ok=True) - - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/bids_tests/eeg_ds003645s_hed')) - schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/schema_tests/HED8.2.0.xml')) - sidecar1_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - cls.events_path = os.path.realpath( - os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - sidecar2_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/remodel_tests/task-FacePerceptionSmall_events.json')) - cls.hed_schema = schema.load_schema(schema_path) - cls.sidecar1 = Sidecar(sidecar1_path, name='face_sub1_json') - cls.sidecar2 = Sidecar(sidecar2_path, name='face_small_json') - cls.base_output_folder = base_output_folder - - cls.invalid_inputs = [123, {'key': 'value'}, 'nonexistent_file.tsv'] - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.base_output_folder) - - def test_missing_column_name_issue(self): - events_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/validator_tests/bids_events_bad_column_name.tsv')) - json_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - "../data/validator_tests/bids_events.json")) - sidecar = Sidecar(json_path) - issues = sidecar.validate(self.hed_schema) - self.assertEqual(len(issues), 0) - input_file = TabularInput(events_path, sidecar=sidecar) - - validation_issues = input_file.validate(hed_schema=self.hed_schema) - self.assertEqual(len(validation_issues), 1) - - def test_expand_column_issues(self): - events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/validator_tests/bids_events_bad_category_key.tsv') - json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - "../data/validator_tests/bids_events.json") - sidecar = Sidecar(json_path) - issues = sidecar.validate(hed_schema=self.hed_schema) - self.assertEqual(len(issues), 0) - input_file = TabularInput(events_path, sidecar=sidecar) - - validation_issues = input_file.validate(hed_schema=self.hed_schema) - self.assertEqual(validation_issues[0][ErrorContext.ROW], 2) - self.assertEqual(len(validation_issues), 1) - - def test_blank_and_duplicate_columns(self): - filepath = os.path.join(os.path.dirname(os.path.realpath(__file__)), - "../data/model_tests/blank_column_name.tsv") - - with self.assertRaises(HedFileError): - _ = TabularInput(filepath) - - # todo: add back in when we do this check - # filepath = os.path.join(os.path.dirname(os.path.realpath(__file__)), - # "../data/model_tests/duplicate_column_name.tsv") - # - # with self.assertRaises(HedFileError): - # _ = TabularInput(filepath) - - def test_validate_file_warnings(self): - issues1 = self.sidecar1.validate(hed_schema=self.hed_schema) - input_file1 = TabularInput(self.events_path, sidecar=self.sidecar1) - issues1a = input_file1.validate(hed_schema=self.hed_schema) - - issues2 = self.sidecar1.validate(hed_schema=self.hed_schema, error_handler=ErrorHandler(False)) - input_file2 = TabularInput(self.events_path, sidecar=self.sidecar2) - issues2a = input_file2.validate(hed_schema=self.hed_schema, error_handler=ErrorHandler(False)) - - def test_invalid_file(self): - for invalid_input in self.invalid_inputs: - with self.subTest(input=invalid_input): - with self.assertRaises(HedFileError): - TabularInput(file=invalid_input) - - def test_invalid_sidecar(self): - for invalid_input in self.invalid_inputs: - with self.subTest(input=invalid_input): - with self.assertRaises(HedFileError): - # Replace 'valid_path.tsv' with a path to an existing .tsv file - TabularInput(file=self.events_path, sidecar=invalid_input) - - -if __name__ == '__main__': - unittest.main() +import unittest +import os +import shutil + +from hed.models import Sidecar, TabularInput +from hed import schema +from hed.errors import HedFileError +from hed.errors import ErrorHandler, ErrorContext + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + base_output_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") + os.makedirs(base_output_folder, exist_ok=True) + + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/bids_tests/eeg_ds003645s_hed')) + schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/schema_tests/HED8.2.0.xml')) + sidecar1_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + cls.events_path = os.path.realpath( + os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + sidecar2_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/remodel_tests/task-FacePerceptionSmall_events.json')) + cls.hed_schema = schema.load_schema(schema_path) + cls.sidecar1 = Sidecar(sidecar1_path, name='face_sub1_json') + cls.sidecar2 = Sidecar(sidecar2_path, name='face_small_json') + cls.base_output_folder = base_output_folder + + cls.invalid_inputs = [123, {'key': 'value'}, 'nonexistent_file.tsv'] + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.base_output_folder) + + def test_missing_column_name_issue(self): + events_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/validator_tests/bids_events_bad_column_name.tsv')) + json_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + "../data/validator_tests/bids_events.json")) + sidecar = Sidecar(json_path) + issues = sidecar.validate(self.hed_schema) + self.assertEqual(len(issues), 0) + input_file = TabularInput(events_path, sidecar=sidecar) + + validation_issues = input_file.validate(hed_schema=self.hed_schema) + self.assertEqual(len(validation_issues), 1) + + def test_expand_column_issues(self): + events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/validator_tests/bids_events_bad_category_key.tsv') + json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "../data/validator_tests/bids_events.json") + sidecar = Sidecar(json_path) + issues = sidecar.validate(hed_schema=self.hed_schema) + self.assertEqual(len(issues), 0) + input_file = TabularInput(events_path, sidecar=sidecar) + + validation_issues = input_file.validate(hed_schema=self.hed_schema) + self.assertEqual(validation_issues[0][ErrorContext.ROW], 2) + self.assertEqual(len(validation_issues), 1) + + def test_blank_and_duplicate_columns(self): + filepath = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "../data/model_tests/blank_column_name.tsv") + + with self.assertRaises(HedFileError): + _ = TabularInput(filepath) + + # todo: add back in when we do this check + # filepath = os.path.join(os.path.dirname(os.path.realpath(__file__)), + # "../data/model_tests/duplicate_column_name.tsv") + # + # with self.assertRaises(HedFileError): + # _ = TabularInput(filepath) + + def test_validate_file_warnings(self): + issues1 = self.sidecar1.validate(hed_schema=self.hed_schema) + input_file1 = TabularInput(self.events_path, sidecar=self.sidecar1) + issues1a = input_file1.validate(hed_schema=self.hed_schema) + + issues2 = self.sidecar1.validate(hed_schema=self.hed_schema, error_handler=ErrorHandler(False)) + input_file2 = TabularInput(self.events_path, sidecar=self.sidecar2) + issues2a = input_file2.validate(hed_schema=self.hed_schema, error_handler=ErrorHandler(False)) + + def test_invalid_file(self): + for invalid_input in self.invalid_inputs: + with self.subTest(input=invalid_input): + with self.assertRaises(HedFileError): + TabularInput(file=invalid_input) + + def test_invalid_sidecar(self): + for invalid_input in self.invalid_inputs: + with self.subTest(input=invalid_input): + with self.assertRaises(HedFileError): + # Replace 'valid_path.tsv' with a path to an existing .tsv file + TabularInput(file=self.events_path, sidecar=invalid_input) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/schema/test_hed_schema.py b/tests/schema/test_hed_schema.py index 6b429e03..5f87476e 100644 --- a/tests/schema/test_hed_schema.py +++ b/tests/schema/test_hed_schema.py @@ -1,139 +1,138 @@ -import unittest -import os - -from hed.errors import HedFileError, get_printable_issue_string -from hed.models import HedString, HedTag -from hed.schema import HedKey, HedSectionKey, get_hed_xml_version, load_schema, HedSchemaGroup, load_schema_version, HedSchema - - -class TestHedSchema(unittest.TestCase): - schema_file_3g_xml = '../data/schema_tests/HED8.0.0t.xml' - schema_file_3g = '../data/schema_tests/HED8.2.0.mediawiki' - - @classmethod - def setUpClass(cls): - cls.hed_xml_3g = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.schema_file_3g_xml) - cls.hed_wiki_3g = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.schema_file_3g) - cls.hed_schema_3g_wiki = load_schema(cls.hed_wiki_3g) - cls.hed_schema_3g = load_schema(cls.hed_xml_3g) - - schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' - hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), schema_file) - hed_schema1 = load_schema(hed_xml) - hed_schema2 = load_schema(hed_xml, schema_namespace="tl:") - cls.hed_schema_group = HedSchemaGroup([hed_schema1, hed_schema2]) - - def test_name(self): - invalid_xml_file = "invalidxmlfile.xml" - try: - load_schema(invalid_xml_file) - # We should have an error before we reach here. - self.assertTrue(False) - except HedFileError as e: - self.assertTrue(invalid_xml_file in e.filename) - - def test_tag_attribute(self): - test_strings = { - 'value': 'Weight/#', - 'valueParent': 'Label', - 'allowedExtension': 'Experiment-structure', - } - expected_results = { - 'value': { - 'defaultUnits': False, - 'extensionAllowed': False, - 'position': False, - 'predicateType': False, - 'recommended': False, - 'required': False, - 'requireChild': False, - 'takesValue': True, - 'unique': False, - 'unitClass': True, - }, - 'valueParent': { - 'defaultUnits': False, - 'extensionAllowed': False, - 'position': False, - 'predicateType': False, - 'recommended': False, - 'required': False, - 'requireChild': True, - 'takesValue': False, - 'unique': False, - 'unitClass': False, - }, - 'allowedExtension': { - 'defaultUnits': False, - 'extensionAllowed': False, - 'position': False, - 'predicateType': False, - 'recommended': False, - 'required': False, - 'requireChild': False, - 'takesValue': False, - 'unique': False, - 'unitClass': False, - }, - } - for key, test_string in test_strings.items(): - expected_dict = expected_results[key] - tag = HedTag(test_string, hed_schema=self.hed_schema_3g) - for attribute, expected_value in expected_dict.items(): - self.assertEqual(tag.has_attribute(attribute), expected_value, - 'Test string: %s. Attribute: %s.' % (test_string, attribute)) - - def test_get_all_tag_attributes(self): - tag_props = self.hed_schema_3g._get_tag_entry("Jerk-rate/#").attributes - expected_props = { - "takesValue": "true", - "valueClass": "numericClass", - "unitClass": 'jerkUnits' - } - self.assertCountEqual(tag_props, expected_props) - - tag_props = self.hed_schema_3g._get_tag_entry("Statistical-value").attributes - expected_props = { - HedKey.ExtensionAllowed: "true", - } - self.assertCountEqual(tag_props, expected_props) - # also test long form. - tag_props = self.hed_schema_3g._get_tag_entry("Property/Data-property/Data-value/Statistical-value").attributes - self.assertCountEqual(tag_props, expected_props) - - def test_get_hed_xml_version(self): - self.assertEqual(get_hed_xml_version(self.hed_xml_3g), "8.0.0") - - def test_has_duplicate_tags(self): - self.assertFalse(self.hed_schema_3g.has_duplicates()) - - def test_short_tag_mapping(self): - self.assertEqual(len(self.hed_schema_3g.tags.keys()), 1110) - - def test_schema_compliance(self): - warnings = self.hed_schema_group.check_compliance(True) - self.assertEqual(len(warnings), 18) - - def test_bad_prefixes(self): - schema = load_schema_version(xml_version="8.3.0") - - self.assertTrue(schema.get_tag_entry("Event")) - self.assertFalse(schema.get_tag_entry("sc:Event")) - self.assertFalse(schema.get_tag_entry("unknown:Event")) - self.assertFalse(schema.get_tag_entry(":Event")) - self.assertFalse(schema.get_tag_entry("Event", schema_namespace=None)) - self.assertTrue(schema.get_tag_entry("Event", schema_namespace='')) - self.assertFalse(schema.get_tag_entry("Event", schema_namespace='unknown')) - - def test_bad_prefixes_library(self): - schema = load_schema_version(xml_version="tl:8.3.0") - - self.assertTrue(schema.get_tag_entry("tl:Event", schema_namespace="tl:")) - self.assertFalse(schema.get_tag_entry("sc:Event", schema_namespace="tl:")) - self.assertTrue(schema.get_tag_entry("Event", schema_namespace="tl:")) - self.assertFalse(schema.get_tag_entry("unknown:Event", schema_namespace="tl:")) - self.assertFalse(schema.get_tag_entry(":Event", schema_namespace="tl:")) - self.assertFalse(schema.get_tag_entry("Event", schema_namespace=None)) - self.assertFalse(schema.get_tag_entry("Event", schema_namespace='')) - self.assertFalse(schema.get_tag_entry("Event", schema_namespace='unknown')) - +import unittest +import os + +from hed.errors import HedFileError +from hed.models import HedTag +from hed.schema import HedKey, get_hed_xml_version, load_schema, HedSchemaGroup, load_schema_version + + +class TestHedSchema(unittest.TestCase): + schema_file_3g_xml = '../data/schema_tests/HED8.0.0t.xml' + schema_file_3g = '../data/schema_tests/HED8.2.0.mediawiki' + + @classmethod + def setUpClass(cls): + cls.hed_xml_3g = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.schema_file_3g_xml) + cls.hed_wiki_3g = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.schema_file_3g) + cls.hed_schema_3g_wiki = load_schema(cls.hed_wiki_3g) + cls.hed_schema_3g = load_schema(cls.hed_xml_3g) + + schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki' + hed_xml = os.path.join(os.path.dirname(os.path.realpath(__file__)), schema_file) + hed_schema1 = load_schema(hed_xml) + hed_schema2 = load_schema(hed_xml, schema_namespace="tl:") + cls.hed_schema_group = HedSchemaGroup([hed_schema1, hed_schema2]) + + def test_name(self): + invalid_xml_file = "invalidxmlfile.xml" + try: + load_schema(invalid_xml_file) + # We should have an error before we reach here. + self.assertTrue(False) + except HedFileError as e: + self.assertTrue(invalid_xml_file in e.filename) + + def test_tag_attribute(self): + test_strings = { + 'value': 'Weight/#', + 'valueParent': 'Label', + 'allowedExtension': 'Experiment-structure', + } + expected_results = { + 'value': { + 'defaultUnits': False, + 'extensionAllowed': False, + 'position': False, + 'predicateType': False, + 'recommended': False, + 'required': False, + 'requireChild': False, + 'takesValue': True, + 'unique': False, + 'unitClass': True, + }, + 'valueParent': { + 'defaultUnits': False, + 'extensionAllowed': False, + 'position': False, + 'predicateType': False, + 'recommended': False, + 'required': False, + 'requireChild': True, + 'takesValue': False, + 'unique': False, + 'unitClass': False, + }, + 'allowedExtension': { + 'defaultUnits': False, + 'extensionAllowed': False, + 'position': False, + 'predicateType': False, + 'recommended': False, + 'required': False, + 'requireChild': False, + 'takesValue': False, + 'unique': False, + 'unitClass': False, + }, + } + for key, test_string in test_strings.items(): + expected_dict = expected_results[key] + tag = HedTag(test_string, hed_schema=self.hed_schema_3g) + for attribute, expected_value in expected_dict.items(): + self.assertEqual(tag.has_attribute(attribute), expected_value, + 'Test string: %s. Attribute: %s.' % (test_string, attribute)) + + def test_get_all_tag_attributes(self): + tag_props = self.hed_schema_3g._get_tag_entry("Jerk-rate/#").attributes + expected_props = { + "takesValue": "true", + "valueClass": "numericClass", + "unitClass": 'jerkUnits' + } + self.assertCountEqual(tag_props, expected_props) + + tag_props = self.hed_schema_3g._get_tag_entry("Statistical-value").attributes + expected_props = { + HedKey.ExtensionAllowed: "true", + } + self.assertCountEqual(tag_props, expected_props) + # also test long form. + tag_props = self.hed_schema_3g._get_tag_entry("Property/Data-property/Data-value/Statistical-value").attributes + self.assertCountEqual(tag_props, expected_props) + + def test_get_hed_xml_version(self): + self.assertEqual(get_hed_xml_version(self.hed_xml_3g), "8.0.0") + + def test_has_duplicate_tags(self): + self.assertFalse(self.hed_schema_3g.has_duplicates()) + + def test_short_tag_mapping(self): + self.assertEqual(len(self.hed_schema_3g.tags.keys()), 1110) + + def test_schema_compliance(self): + warnings = self.hed_schema_group.check_compliance(True) + self.assertEqual(len(warnings), 18) + + def test_bad_prefixes(self): + schema = load_schema_version(xml_version="8.3.0") + + self.assertTrue(schema.get_tag_entry("Event")) + self.assertFalse(schema.get_tag_entry("sc:Event")) + self.assertFalse(schema.get_tag_entry("unknown:Event")) + self.assertFalse(schema.get_tag_entry(":Event")) + self.assertFalse(schema.get_tag_entry("Event", schema_namespace=None)) + self.assertTrue(schema.get_tag_entry("Event", schema_namespace='')) + self.assertFalse(schema.get_tag_entry("Event", schema_namespace='unknown')) + + def test_bad_prefixes_library(self): + schema = load_schema_version(xml_version="tl:8.3.0") + + self.assertTrue(schema.get_tag_entry("tl:Event", schema_namespace="tl:")) + self.assertFalse(schema.get_tag_entry("sc:Event", schema_namespace="tl:")) + self.assertTrue(schema.get_tag_entry("Event", schema_namespace="tl:")) + self.assertFalse(schema.get_tag_entry("unknown:Event", schema_namespace="tl:")) + self.assertFalse(schema.get_tag_entry(":Event", schema_namespace="tl:")) + self.assertFalse(schema.get_tag_entry("Event", schema_namespace=None)) + self.assertFalse(schema.get_tag_entry("Event", schema_namespace='')) + self.assertFalse(schema.get_tag_entry("Event", schema_namespace='unknown')) diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index 0e08858c..52eecee8 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -70,13 +70,13 @@ def test_load_schema_name(self): schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/schema_tests/HED8.2.0.mediawiki') - schema = load_schema(schema_path, schema_namespace="testspace", name="Test Name") - self.assertEqual(schema.schema_namespace, "testspace:") - self.assertEqual(schema.name, "Test Name") + hed_schema = load_schema(schema_path, schema_namespace="testspace", name="Test Name") + self.assertEqual(hed_schema.schema_namespace, "testspace:") + self.assertEqual(hed_schema.name, "Test Name") - schema = load_schema(schema_path, schema_namespace="testspace") - self.assertEqual(schema.schema_namespace, "testspace:") - self.assertEqual(schema.name, schema_path) + hed_schema = load_schema(schema_path, schema_namespace="testspace") + self.assertEqual(hed_schema.schema_namespace, "testspace:") + self.assertEqual(hed_schema.name, schema_path) def test_load_schema_version(self): ver1 = "8.0.0" @@ -218,7 +218,8 @@ class TestHedSchemaUnmerged(unittest.TestCase): # Verify the hed cache can handle loading unmerged with_standard schemas in case they are ever used @classmethod def setUpClass(cls): - hed_cache_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../schema_cache_test_local_unmerged/') + hed_cache_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../schema_cache_test_local_unmerged/') if os.path.exists(hed_cache_dir) and os.path.isdir(hed_cache_dir): shutil.rmtree(hed_cache_dir) _load_schema_version.cache_clear() @@ -240,7 +241,6 @@ def setUpClass(cls): new_filename = f"HED_{cls.dupe_library_name}.xml" loaded_schema.save_as_xml(os.path.join(cls.hed_cache_dir, new_filename), save_merged=False) - @classmethod def tearDownClass(cls): shutil.rmtree(cls.hed_cache_dir) @@ -586,9 +586,10 @@ def test_multiple_libraries_without_and_with_prefix(self): self.assertEqual(parse_version_list(["test:score", "test:testlib"]), {"test": "test:score,testlib"}) def test_single_and_multiple_libraries_with_different_prefixes(self): - """Test that a single library with a prefix and multiple libraries with different prefixes are handled correctly.""" + """Test a single library with a prefix and multiple libraries with different prefixes are handled correctly.""" self.assertEqual(parse_version_list(["ol:otherlib"]), {"ol": "ol:otherlib"}) - self.assertEqual(parse_version_list(["score", "ol:otherlib", "ul:anotherlib"]), {"": "score", "ol": "ol:otherlib", "ul": "ul:anotherlib"}) + self.assertEqual(parse_version_list(["score", "ol:otherlib", "ul:anotherlib"]), + {"": "score", "ol": "ol:otherlib", "ul": "ul:anotherlib"}) def test_duplicate_library_raises_error(self): """Test that duplicate libraries raise the correct error.""" diff --git a/tests/schema/test_hed_schema_io_df.py b/tests/schema/test_hed_schema_io_df.py index 4c5cef70..0014eb98 100644 --- a/tests/schema/test_hed_schema_io_df.py +++ b/tests/schema/test_hed_schema_io_df.py @@ -90,10 +90,12 @@ def test_save_load_location2(self): def _create_structure_df(self): data = {"hedId": ["HED_0060010"], "rdfs:label": ["LangHeader"], - "Attributes": ['version="1.0.0", library="lang", withStandard="8.3.0", unmerged="True"'], - "omn:SubClassOf": ["HedHeader"], + "Attributes": ['version="1.0.0", library="lang", withStandard="8.3.0", unmerged="True"'], + "omn:SubClassOf": ["HedHeader"], "dc:description": [""], - "omn:EquivalentTo": ['HedHeader and (inHedSchema some LangSchema) and (version value "1.0.0") and (library value "lang") and (withStandard value "8.3.0") and (unmerged value "True")']} + "omn:EquivalentTo": ['HedHeader and (inHedSchema some LangSchema) and (version value "1.0.0")' + + 'and (library value "lang") and (withStandard value "8.3.0")' + + 'and (unmerged value "True")']} df = pd.DataFrame(data) return df @@ -119,7 +121,7 @@ def test_loading_out_of_order(self): loaded_schema = from_dataframes(dataframes) issues = loaded_schema.check_compliance(check_for_warnings=False) self.assertEqual(len(issues), 0) - breakHere = 3 + break_here = 3 self.assertEqual(loaded_schema.tags['MadeUpLongTagNameChild'].name, "MadeUpLongTagNameParent/MadeUpLongTagNameChild") @@ -171,4 +173,4 @@ def test_loading_circular(self): with self.assertRaises(HedFileError) as error: _ = from_dataframes(dataframes) - self.assertEqual(error.exception.args[0], HedExceptions.SCHEMA_TAG_TSV_BAD_PARENT) \ No newline at end of file + self.assertEqual(error.exception.args[0], HedExceptions.SCHEMA_TAG_TSV_BAD_PARENT) diff --git a/tests/schema/test_ontology_util.py b/tests/schema/test_ontology_util.py index 5ced2ad7..aa5bd468 100644 --- a/tests/schema/test_ontology_util.py +++ b/tests/schema/test_ontology_util.py @@ -153,7 +153,7 @@ def test_update_dataframes_from_schema(self): updated_dataframes = update_dataframes_from_schema(schema_dataframes_new, schema) except HedFileError as e: self.assertEqual(len(e.issues), 115) - breakHere = 3 + break_here = 3 class TestConvertOmn(unittest.TestCase): diff --git a/tests/schema/test_schema_attribute_validators.py b/tests/schema/test_schema_attribute_validators.py index 95bdd550..8a04732a 100644 --- a/tests/schema/test_schema_attribute_validators.py +++ b/tests/schema/test_schema_attribute_validators.py @@ -1,165 +1,193 @@ -import unittest -import copy - -from hed.schema import schema_attribute_validators, HedSectionKey -from hed import load_schema_version - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.hed_schema = load_schema_version("8.2.0") - - def test_util_placeholder(self): - tag_entry = self.hed_schema.tags["Event"] - attribute_name = "unitClass" - self.assertTrue(schema_attribute_validators.tag_is_placeholder_check(self.hed_schema, tag_entry, attribute_name)) - attribute_name = "unitClass" - tag_entry = self.hed_schema.tags["Age/#"] - self.assertFalse(schema_attribute_validators.tag_is_placeholder_check(self.hed_schema, tag_entry, attribute_name)) - - def test_util_suggested(self): - tag_entry = self.hed_schema.tags["Event/Sensory-event"] - attribute_name = "suggestedTag" - self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.Tags)) - tag_entry = self.hed_schema.tags["Property"] - self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.Tags)) - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes["suggestedTag"] = "InvalidSuggestedTag" - self.assertTrue(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.Tags)) - - def test_util_rooted(self): - tag_entry = self.hed_schema.tags["Event"] - attribute_name = "rooted" - self.assertFalse(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, tag_entry, attribute_name)) - tag_entry = self.hed_schema.tags["Property"] - self.assertFalse(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, tag_entry, attribute_name)) - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes["rooted"] = "Event" - self.assertFalse(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, tag_entry, attribute_name)) - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes["rooted"] = "NotRealTag" - self.assertTrue(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, tag_entry, attribute_name)) - - def test_unit_class_exists(self): - tag_entry = self.hed_schema.tags["Weight/#"] - attribute_name = "unitClass" - self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.UnitClasses)) - - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes["unitClass"] = "fakeClass" - self.assertTrue(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.UnitClasses)) - - def test_value_class_exists(self): - tag_entry = self.hed_schema.tags["Weight/#"] - attribute_name = "valueClass" - self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.ValueClasses)) - - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes["valueClass"] = "fakeClass" - self.assertTrue(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.ValueClasses)) - - def test_unit_exists(self): - tag_entry = self.hed_schema.unit_classes["accelerationUnits"] - attribute_name = "defaultUnits" - self.assertFalse(schema_attribute_validators.unit_exists(self.hed_schema, tag_entry, attribute_name)) - - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes["defaultUnits"] = "bad_unit" - self.assertTrue(schema_attribute_validators.unit_exists(self.hed_schema, tag_entry, attribute_name)) - - def test_deprecatedFrom(self): - tag_entry = self.hed_schema.tags["Event/Measurement-event"] - attribute_name = "deprecatedFrom" - self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) - - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes["deprecatedFrom"] = "200.3.0" - self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) - - tag_entry.attributes["deprecatedFrom"] = "invalid" - self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) - - tag_entry.attributes["deprecatedFrom"] = "1" - self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) - - tag_entry.attributes["deprecatedFrom"] = "8.0.0" - self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) - - tag_entry.attributes["deprecatedFrom"] = "8.2.0" - self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name)) - del tag_entry.attributes["deprecatedFrom"] - - unit_class_entry = copy.deepcopy(self.hed_schema.unit_classes["temperatureUnits"]) - # This should raise an issue because it assumes the attribute is set - self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, attribute_name)) - unit_class_entry.attributes["deprecatedFrom"] = "8.1.0" - unit_class_entry.units['degree Celsius'].attributes["deprecatedFrom"] = "8.1.0" - # Still a warning for oC - self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, attribute_name)) - unit_class_entry.units['oC'].attributes["deprecatedFrom"] = "8.1.0" - self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, attribute_name)) - # this is still fine, as we are validating the child has deprecated from, not it's value - unit_class_entry.units['oC'].attributes["deprecatedFrom"] = "8.2.0" - self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, attribute_name)) - - self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry.units['oC'], attribute_name)) - - def test_conversionFactor(self): - tag_entry = self.hed_schema.unit_classes["accelerationUnits"].units["m-per-s^2"] - attribute_name = "conversionFactor" - self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) - - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes[attribute_name] = "-1.0" - self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) - - tag_entry.attributes[attribute_name] = "10^3" - self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) - - tag_entry.attributes[attribute_name] = None - self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) - - def test_conversionFactor_modifier(self): - tag_entry = self.hed_schema.unit_classes["magneticFieldUnits"].units["tesla"] - attribute_name = "conversionFactor" - self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) - - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes[attribute_name] = "-1.0" - self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) - - tag_entry.attributes[attribute_name] = "10^3" - self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) - - tag_entry.attributes[attribute_name] = None - self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) - - def test_allowed_characters_check(self): - tag_entry = self.hed_schema.value_classes["dateTimeClass"] - attribute_name = "allowedCharacter" - valid_attributes = {"letters", "blank", "digits", "alphanumeric", ":", "$", "a"} - self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) - - tag_entry = copy.deepcopy(tag_entry) - for attribute in valid_attributes: - tag_entry.attributes[attribute_name] = attribute - self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) - - invalid_attributes = {"lettersdd", "notaword", ":a"} - for attribute in invalid_attributes: - tag_entry.attributes[attribute_name] = attribute - self.assertTrue(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) - - def test_in_library_check(self): - score = load_schema_version("score_1.1.0") - tag_entry = score.tags["Modulator"] - attribute_name = "inLibrary" - self.assertFalse(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) - - tag_entry = copy.deepcopy(tag_entry) - tag_entry.attributes[attribute_name] = "invalid" - self.assertTrue(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) - - tag_entry.attributes[attribute_name] = "" - self.assertTrue(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) \ No newline at end of file +import unittest +import copy + +from hed.schema import schema_attribute_validators, HedSectionKey +from hed import load_schema_version + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.hed_schema = load_schema_version("8.2.0") + + def test_util_placeholder(self): + tag_entry = self.hed_schema.tags["Event"] + attribute_name = "unitClass" + self.assertTrue(schema_attribute_validators.tag_is_placeholder_check(self.hed_schema, + tag_entry, attribute_name)) + attribute_name = "unitClass" + tag_entry = self.hed_schema.tags["Age/#"] + self.assertFalse(schema_attribute_validators.tag_is_placeholder_check(self.hed_schema, + tag_entry, attribute_name)) + + def test_util_suggested(self): + tag_entry = self.hed_schema.tags["Event/Sensory-event"] + attribute_name = "suggestedTag" + self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, + attribute_name, HedSectionKey.Tags)) + tag_entry = self.hed_schema.tags["Property"] + self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, + attribute_name, HedSectionKey.Tags)) + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["suggestedTag"] = "InvalidSuggestedTag" + self.assertTrue(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, + attribute_name, HedSectionKey.Tags)) + + def test_util_rooted(self): + tag_entry = self.hed_schema.tags["Event"] + attribute_name = "rooted" + self.assertFalse(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, + tag_entry, attribute_name)) + tag_entry = self.hed_schema.tags["Property"] + self.assertFalse(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, + tag_entry, attribute_name)) + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["rooted"] = "Event" + self.assertFalse(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, + tag_entry, attribute_name)) + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["rooted"] = "NotRealTag" + self.assertTrue(schema_attribute_validators.tag_exists_base_schema_check(self.hed_schema, + tag_entry, attribute_name)) + + def test_unit_class_exists(self): + tag_entry = self.hed_schema.tags["Weight/#"] + attribute_name = "unitClass" + self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, + HedSectionKey.UnitClasses)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["unitClass"] = "fakeClass" + self.assertTrue(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, + HedSectionKey.UnitClasses)) + + def test_value_class_exists(self): + tag_entry = self.hed_schema.tags["Weight/#"] + attribute_name = "valueClass" + self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, + HedSectionKey.ValueClasses)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["valueClass"] = "fakeClass" + self.assertTrue(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, + HedSectionKey.ValueClasses)) + + def test_unit_exists(self): + tag_entry = self.hed_schema.unit_classes["accelerationUnits"] + attribute_name = "defaultUnits" + self.assertFalse(schema_attribute_validators.unit_exists(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["defaultUnits"] = "bad_unit" + self.assertTrue(schema_attribute_validators.unit_exists(self.hed_schema, tag_entry, attribute_name)) + + def test_deprecatedFrom(self): + tag_entry = self.hed_schema.tags["Event/Measurement-event"] + attribute_name = "deprecatedFrom" + self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, + attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes["deprecatedFrom"] = "200.3.0" + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, + attribute_name)) + + tag_entry.attributes["deprecatedFrom"] = "invalid" + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, + attribute_name)) + + tag_entry.attributes["deprecatedFrom"] = "1" + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, + attribute_name)) + + tag_entry.attributes["deprecatedFrom"] = "8.0.0" + self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, + attribute_name)) + + tag_entry.attributes["deprecatedFrom"] = "8.2.0" + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, + attribute_name)) + del tag_entry.attributes["deprecatedFrom"] + + unit_class_entry = copy.deepcopy(self.hed_schema.unit_classes["temperatureUnits"]) + # This should raise an issue because it assumes the attribute is set + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, + attribute_name)) + unit_class_entry.attributes["deprecatedFrom"] = "8.1.0" + unit_class_entry.units['degree Celsius'].attributes["deprecatedFrom"] = "8.1.0" + # Still a warning for oC + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, + attribute_name)) + unit_class_entry.units['oC'].attributes["deprecatedFrom"] = "8.1.0" + self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, + attribute_name)) + # this is still fine, as we are validating the child has deprecated from, not it's value + unit_class_entry.units['oC'].attributes["deprecatedFrom"] = "8.2.0" + self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, unit_class_entry, + attribute_name)) + + self.assertTrue(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, + unit_class_entry.units['oC'], + attribute_name)) + + def test_conversionFactor(self): + tag_entry = self.hed_schema.unit_classes["accelerationUnits"].units["m-per-s^2"] + attribute_name = "conversionFactor" + self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes[attribute_name] = "-1.0" + self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = "10^3" + self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = None + self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + def test_conversionFactor_modifier(self): + tag_entry = self.hed_schema.unit_classes["magneticFieldUnits"].units["tesla"] + attribute_name = "conversionFactor" + self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes[attribute_name] = "-1.0" + self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = "10^3" + self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = None + self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name)) + + def test_allowed_characters_check(self): + tag_entry = self.hed_schema.value_classes["dateTimeClass"] + attribute_name = "allowedCharacter" + valid_attributes = {"letters", "blank", "digits", "alphanumeric", ":", "$", "a"} + self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, + attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + for attribute in valid_attributes: + tag_entry.attributes[attribute_name] = attribute + self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, + attribute_name)) + + invalid_attributes = {"lettersdd", "notaword", ":a"} + for attribute in invalid_attributes: + tag_entry.attributes[attribute_name] = attribute + self.assertTrue(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, + attribute_name)) + + def test_in_library_check(self): + score = load_schema_version("score_1.1.0") + tag_entry = score.tags["Modulator"] + attribute_name = "inLibrary" + self.assertFalse(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) + + tag_entry = copy.deepcopy(tag_entry) + tag_entry.attributes[attribute_name] = "invalid" + self.assertTrue(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) + + tag_entry.attributes[attribute_name] = "" + self.assertTrue(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) diff --git a/tests/schema/test_schema_converters.py b/tests/schema/test_schema_converters.py index 9073e50e..8d69dd5e 100644 --- a/tests/schema/test_schema_converters.py +++ b/tests/schema/test_schema_converters.py @@ -1,209 +1,209 @@ -import copy -import unittest -import os - -from hed import schema -import tempfile -import functools - - -def get_temp_filename(extension): - with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as temp_file: - filename = temp_file.name - return filename - -# Function wrapper to create and clean up a single schema for testing -def with_temp_file(extension): - def decorator(test_func): - @functools.wraps(test_func) - def wrapper(*args, **kwargs): - # Create a temporary file with the given extension - filename = get_temp_filename(extension) - try: - # Call the test function with the filename - return test_func(*args, filename=filename, **kwargs) - finally: - # Clean up: Remove the temporary file - os.remove(filename) - return wrapper - return decorator - - -class TestConverterBase(unittest.TestCase): - xml_file = '../data/schema_tests/HED8.2.0.xml' - wiki_file = '../data/schema_tests/HED8.2.0.mediawiki' - can_compare = True - - @classmethod - def setUpClass(cls): - cls.xml_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.xml_file) - cls.hed_schema_xml = schema.load_schema(cls.xml_file) - cls.wiki_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.wiki_file) - cls.hed_schema_wiki = schema.load_schema(cls.wiki_file) - - # !BFK! - Delete default units as they aren't in the XML file. - if "HED8.2.0" in cls.wiki_file: - del cls.hed_schema_wiki.unit_classes["temperatureUnits"].attributes["defaultUnits"] - - @with_temp_file(".xml") - def test_schema2xml(self, filename): - self.hed_schema_xml.save_as_xml(filename) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.hed_schema_xml) - - @with_temp_file(".mediawiki") - def test_schema2wiki(self, filename): - self.hed_schema_xml.save_as_mediawiki(filename) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.hed_schema_xml) - - def test_schema_as_string_xml(self): - with open(self.xml_file) as file: - hed_schema_as_string = "".join([line for line in file]) - - string_schema = schema.from_string(hed_schema_as_string) - - self.assertEqual(string_schema, self.hed_schema_xml) - - def test_schema_as_string_wiki(self): - with open(self.wiki_file) as file: - hed_schema_as_string = "".join([line for line in file]) - - string_schema = schema.from_string(hed_schema_as_string, schema_format=".mediawiki") - #!BFK! - Same as before, 8.2.0 has a difference - if "HED8.2.0" in self.wiki_file: - del string_schema.unit_classes["temperatureUnits"].attributes["defaultUnits"] - - self.assertEqual(string_schema, self.hed_schema_wiki) - - @with_temp_file(".xml") - def test_wikischema2xml(self, filename): - self.hed_schema_wiki.save_as_xml(filename) - loaded_schema = schema.load_schema(filename) - - wiki_schema_copy = copy.deepcopy(self.hed_schema_wiki) - - self.assertEqual(loaded_schema, wiki_schema_copy) - - @with_temp_file(".mediawiki") - def test_wikischema2wiki(self, filename): - self.hed_schema_wiki.save_as_mediawiki(filename) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.hed_schema_wiki) - - def test_compare_readers(self): - if self.can_compare: - self.assertEqual(self.hed_schema_wiki, self.hed_schema_xml) - - -class TestComplianceBase(unittest.TestCase): - xml_file_old = '../data/schema_tests/HED8.0.0t.xml' - xml_file = '../data/schema_tests/HED8.2.0.xml' - wiki_file = '../data/schema_tests/HED8.2.0.mediawiki' - can_compare = True - expected_issues = 0 - - @classmethod - def setUpClass(cls): - cls.xml_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.xml_file) - cls.hed_schema_xml = schema.load_schema(cls.xml_file) - cls.wiki_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.wiki_file) - cls.hed_schema_wiki = schema.load_schema(cls.wiki_file) - if "HED8.2.0" in cls.wiki_file: - del cls.hed_schema_wiki.unit_classes["temperatureUnits"].attributes["defaultUnits"] - cls.xml_file_old = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.xml_file_old) - cls.hed_schema_xml_old = schema.load_schema(cls.xml_file_old) - - def test_compliance(self): - issues = self.hed_schema_wiki.check_compliance() - self.assertEqual(len(issues), self.expected_issues) - issues_old = self.hed_schema_xml_old.check_compliance() - self.assertGreater(len(issues_old), 0) - - def test_compare_readers(self): - self.assertNotEqual(self.hed_schema_xml, self.hed_schema_xml_old) - if self.can_compare: - self.assertEqual(self.hed_schema_wiki, self.hed_schema_xml) - - -class TestPropertyAdded(TestConverterBase): - xml_file = '../data/schema_tests/added_prop.xml' - wiki_file = '../data/schema_tests/added_prop.mediawiki' - can_compare = True - - -class TestPropertyAddedUsage(TestConverterBase): - xml_file = '../data/schema_tests/added_prop_with_usage.xml' - wiki_file = '../data/schema_tests/added_prop_with_usage.mediawiki' - can_compare = True - - -class TestHedUnknownAttr(TestConverterBase): - xml_file = '../data/schema_tests/unknown_attribute.xml' - wiki_file = '../data/schema_tests/unknown_attribute.mediawiki' - can_compare = True - - -class TestHedMultiValueClass(TestConverterBase): - xml_file = '../data/schema_tests/HED8.0.0_2_value_classes.xml' - wiki_file = '../data/schema_tests/HED8.0.0_2_value_classes.mediawiki' - can_compare = True - - -class TestPrologueIssues1(TestConverterBase): - xml_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_end.xml' - wiki_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_end.mediawiki' - can_compare = True - - -class TestPrologueIssues2(TestConverterBase): - xml_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_middle.xml' - wiki_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_middle.mediawiki' - can_compare = True - - -class TestPrologueIssues3(TestConverterBase): - xml_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_start.xml' - wiki_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_start.mediawiki' - can_compare = True - - -class TestPrologueIssues4(TestConverterBase): - xml_file = '../data/schema_tests/prologue_tests/test_no_blank_line.xml' - wiki_file = '../data/schema_tests/prologue_tests/test_no_blank_line.mediawiki' - can_compare = True - - -class TestDuplicateUnitCompliance(TestComplianceBase): - xml_file = '../data/schema_tests/duplicate_unit.xml' - wiki_file = '../data/schema_tests/duplicate_unit.mediawiki' - can_compare = True - expected_issues = 1 - - -class TestDuplicateUnitClass(TestComplianceBase): - xml_file = '../data/schema_tests/duplicate_unit_class.xml' - wiki_file = '../data/schema_tests/duplicate_unit_class.mediawiki' - can_compare = True - expected_issues = 1 - - - -class TestConverterSavingPrefix(unittest.TestCase): - xml_file = '../data/schema_tests/HED8.0.0t.xml' - - @classmethod - def setUpClass(cls): - cls.xml_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.xml_file) - cls.hed_schema_xml = schema.load_schema(cls.xml_file) - cls.hed_schema_xml_prefix = schema.load_schema(cls.xml_file, schema_namespace="tl:") - - @with_temp_file(".xml") - def test_saving_prefix(self, filename): - self.hed_schema_xml_prefix.save_as_xml(filename) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.hed_schema_xml) +import copy +import unittest +import os + +from hed import schema +import tempfile +import functools + + +def get_temp_filename(extension): + with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as temp_file: + filename = temp_file.name + return filename + + +# Function wrapper to create and clean up a single schema for testing +def with_temp_file(extension): + def decorator(test_func): + @functools.wraps(test_func) + def wrapper(*args, **kwargs): + # Create a temporary file with the given extension + filename = get_temp_filename(extension) + try: + # Call the test function with the filename + return test_func(*args, filename=filename, **kwargs) + finally: + # Clean up: Remove the temporary file + os.remove(filename) + return wrapper + return decorator + + +class TestConverterBase(unittest.TestCase): + xml_file = '../data/schema_tests/HED8.2.0.xml' + wiki_file = '../data/schema_tests/HED8.2.0.mediawiki' + can_compare = True + + @classmethod + def setUpClass(cls): + cls.xml_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.xml_file) + cls.hed_schema_xml = schema.load_schema(cls.xml_file) + cls.wiki_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.wiki_file) + cls.hed_schema_wiki = schema.load_schema(cls.wiki_file) + + # !BFK! - Delete default units as they aren't in the XML file. + if "HED8.2.0" in cls.wiki_file: + del cls.hed_schema_wiki.unit_classes["temperatureUnits"].attributes["defaultUnits"] + + @with_temp_file(".xml") + def test_schema2xml(self, filename): + self.hed_schema_xml.save_as_xml(filename) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.hed_schema_xml) + + @with_temp_file(".mediawiki") + def test_schema2wiki(self, filename): + self.hed_schema_xml.save_as_mediawiki(filename) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.hed_schema_xml) + + def test_schema_as_string_xml(self): + with open(self.xml_file) as file: + hed_schema_as_string = "".join([line for line in file]) + + string_schema = schema.from_string(hed_schema_as_string) + + self.assertEqual(string_schema, self.hed_schema_xml) + + def test_schema_as_string_wiki(self): + with open(self.wiki_file) as file: + hed_schema_as_string = "".join([line for line in file]) + + string_schema = schema.from_string(hed_schema_as_string, schema_format=".mediawiki") + # !BFK! - Same as before, 8.2.0 has a difference + if "HED8.2.0" in self.wiki_file: + del string_schema.unit_classes["temperatureUnits"].attributes["defaultUnits"] + + self.assertEqual(string_schema, self.hed_schema_wiki) + + @with_temp_file(".xml") + def test_wikischema2xml(self, filename): + self.hed_schema_wiki.save_as_xml(filename) + loaded_schema = schema.load_schema(filename) + + wiki_schema_copy = copy.deepcopy(self.hed_schema_wiki) + + self.assertEqual(loaded_schema, wiki_schema_copy) + + @with_temp_file(".mediawiki") + def test_wikischema2wiki(self, filename): + self.hed_schema_wiki.save_as_mediawiki(filename) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.hed_schema_wiki) + + def test_compare_readers(self): + if self.can_compare: + self.assertEqual(self.hed_schema_wiki, self.hed_schema_xml) + + +class TestComplianceBase(unittest.TestCase): + xml_file_old = '../data/schema_tests/HED8.0.0t.xml' + xml_file = '../data/schema_tests/HED8.2.0.xml' + wiki_file = '../data/schema_tests/HED8.2.0.mediawiki' + can_compare = True + expected_issues = 0 + + @classmethod + def setUpClass(cls): + cls.xml_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.xml_file) + cls.hed_schema_xml = schema.load_schema(cls.xml_file) + cls.wiki_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.wiki_file) + cls.hed_schema_wiki = schema.load_schema(cls.wiki_file) + if "HED8.2.0" in cls.wiki_file: + del cls.hed_schema_wiki.unit_classes["temperatureUnits"].attributes["defaultUnits"] + cls.xml_file_old = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.xml_file_old) + cls.hed_schema_xml_old = schema.load_schema(cls.xml_file_old) + + def test_compliance(self): + issues = self.hed_schema_wiki.check_compliance() + self.assertEqual(len(issues), self.expected_issues) + issues_old = self.hed_schema_xml_old.check_compliance() + self.assertGreater(len(issues_old), 0) + + def test_compare_readers(self): + self.assertNotEqual(self.hed_schema_xml, self.hed_schema_xml_old) + if self.can_compare: + self.assertEqual(self.hed_schema_wiki, self.hed_schema_xml) + + +class TestPropertyAdded(TestConverterBase): + xml_file = '../data/schema_tests/added_prop.xml' + wiki_file = '../data/schema_tests/added_prop.mediawiki' + can_compare = True + + +class TestPropertyAddedUsage(TestConverterBase): + xml_file = '../data/schema_tests/added_prop_with_usage.xml' + wiki_file = '../data/schema_tests/added_prop_with_usage.mediawiki' + can_compare = True + + +class TestHedUnknownAttr(TestConverterBase): + xml_file = '../data/schema_tests/unknown_attribute.xml' + wiki_file = '../data/schema_tests/unknown_attribute.mediawiki' + can_compare = True + + +class TestHedMultiValueClass(TestConverterBase): + xml_file = '../data/schema_tests/HED8.0.0_2_value_classes.xml' + wiki_file = '../data/schema_tests/HED8.0.0_2_value_classes.mediawiki' + can_compare = True + + +class TestPrologueIssues1(TestConverterBase): + xml_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_end.xml' + wiki_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_end.mediawiki' + can_compare = True + + +class TestPrologueIssues2(TestConverterBase): + xml_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_middle.xml' + wiki_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_middle.mediawiki' + can_compare = True + + +class TestPrologueIssues3(TestConverterBase): + xml_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_start.xml' + wiki_file = '../data/schema_tests/prologue_tests/test_extra_blank_line_start.mediawiki' + can_compare = True + + +class TestPrologueIssues4(TestConverterBase): + xml_file = '../data/schema_tests/prologue_tests/test_no_blank_line.xml' + wiki_file = '../data/schema_tests/prologue_tests/test_no_blank_line.mediawiki' + can_compare = True + + +class TestDuplicateUnitCompliance(TestComplianceBase): + xml_file = '../data/schema_tests/duplicate_unit.xml' + wiki_file = '../data/schema_tests/duplicate_unit.mediawiki' + can_compare = True + expected_issues = 1 + + +class TestDuplicateUnitClass(TestComplianceBase): + xml_file = '../data/schema_tests/duplicate_unit_class.xml' + wiki_file = '../data/schema_tests/duplicate_unit_class.mediawiki' + can_compare = True + expected_issues = 1 + + +class TestConverterSavingPrefix(unittest.TestCase): + xml_file = '../data/schema_tests/HED8.0.0t.xml' + + @classmethod + def setUpClass(cls): + cls.xml_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.xml_file) + cls.hed_schema_xml = schema.load_schema(cls.xml_file) + cls.hed_schema_xml_prefix = schema.load_schema(cls.xml_file, schema_namespace="tl:") + + @with_temp_file(".xml") + def test_saving_prefix(self, filename): + self.hed_schema_xml_prefix.save_as_xml(filename) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.hed_schema_xml) diff --git a/tests/schema/test_schema_entry.py b/tests/schema/test_schema_entry.py index 4cc23c10..7d8e58c3 100644 --- a/tests/schema/test_schema_entry.py +++ b/tests/schema/test_schema_entry.py @@ -1,45 +1,47 @@ -import unittest -from hed.schema.hed_schema_entry import HedTagEntry -class MockEntry: - def __init__(self, attributes, parent=None): - self.attributes = attributes - self.takes_value_child_entry = False - self._parent_tag = parent - - _check_inherited_attribute = HedTagEntry._check_inherited_attribute - _check_inherited_attribute_internal = HedTagEntry._check_inherited_attribute_internal - - -class TestMockEntry(unittest.TestCase): - - def setUp(self): - # Test setup - self.root_entry = MockEntry({'color': 'blue', 'size': 'large', 'is_round': False}) - self.child_entry1 = MockEntry({'color': 'green', 'shape': 'circle', 'is_round': True}, parent=self.root_entry) - self.child_entry2 = MockEntry({'size': 'medium', 'material': 'wood', 'number': 5}, parent=self.child_entry1) - - def test_check_inherited_attribute(self): - self.assertEqual(self.child_entry2._check_inherited_attribute('material', return_value=True), 'wood') - - # Test attribute present in the parent but not in the current entry, treat_as_string=True - self.assertEqual(self.child_entry2._check_inherited_attribute('color', return_value=True), 'green,blue') - - # Test attribute present in the current entry and in parents, treat_as_string=True - self.assertEqual(self.child_entry2._check_inherited_attribute('size', return_value=True), 'medium,large') - - # Test attribute not present anywhere - self.assertIsNone(self.child_entry2._check_inherited_attribute('weight', return_value=True)) - - # Test attribute present in the current entry but not in parents, no return value - self.assertTrue(self.child_entry2._check_inherited_attribute('material', return_value=False)) - - # Test attribute not present anywhere, no return value - self.assertFalse(self.child_entry2._check_inherited_attribute('weight', return_value=False)) - - def test_check_inherited_attribute_bool(self): - # Test boolean attribute present in the current entry but not in parents - self.assertTrue(self.child_entry2._check_inherited_attribute('is_round', return_value=True)) - - def test_check_inherited_attribute_numeric(self): - # Test numeric attribute present only in the current entry - self.assertEqual(self.child_entry2._check_inherited_attribute('number', return_value=True), 5) +import unittest +from hed.schema.hed_schema_entry import HedTagEntry + + +class MockEntry: + def __init__(self, attributes, parent=None): + self.attributes = attributes + self.takes_value_child_entry = False + self._parent_tag = parent + + _check_inherited_attribute = HedTagEntry._check_inherited_attribute + _check_inherited_attribute_internal = HedTagEntry._check_inherited_attribute_internal + + +class TestMockEntry(unittest.TestCase): + + def setUp(self): + # Test setup + self.root_entry = MockEntry({'color': 'blue', 'size': 'large', 'is_round': False}) + self.child_entry1 = MockEntry({'color': 'green', 'shape': 'circle', 'is_round': True}, parent=self.root_entry) + self.child_entry2 = MockEntry({'size': 'medium', 'material': 'wood', 'number': 5}, parent=self.child_entry1) + + def test_check_inherited_attribute(self): + self.assertEqual(self.child_entry2._check_inherited_attribute('material', return_value=True), 'wood') + + # Test attribute present in the parent but not in the current entry, treat_as_string=True + self.assertEqual(self.child_entry2._check_inherited_attribute('color', return_value=True), 'green,blue') + + # Test attribute present in the current entry and in parents, treat_as_string=True + self.assertEqual(self.child_entry2._check_inherited_attribute('size', return_value=True), 'medium,large') + + # Test attribute not present anywhere + self.assertIsNone(self.child_entry2._check_inherited_attribute('weight', return_value=True)) + + # Test attribute present in the current entry but not in parents, no return value + self.assertTrue(self.child_entry2._check_inherited_attribute('material', return_value=False)) + + # Test attribute not present anywhere, no return value + self.assertFalse(self.child_entry2._check_inherited_attribute('weight', return_value=False)) + + def test_check_inherited_attribute_bool(self): + # Test boolean attribute present in the current entry but not in parents + self.assertTrue(self.child_entry2._check_inherited_attribute('is_round', return_value=True)) + + def test_check_inherited_attribute_numeric(self): + # Test numeric attribute present only in the current entry + self.assertEqual(self.child_entry2._check_inherited_attribute('number', return_value=True), 5) diff --git a/tests/schema/test_schema_util.py b/tests/schema/test_schema_util.py index 0d349846..3ac4866d 100644 --- a/tests/schema/test_schema_util.py +++ b/tests/schema/test_schema_util.py @@ -1,42 +1,43 @@ -import unittest -import os - -from hed.schema.schema_io import schema_util -from hed.schema import HedSchemaGroup -from hed import load_schema_version -from hed import load_schema - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.default_test_url = \ - """https://raw.githubusercontent.com/hed-standard/hed-schemas/master/standard_schema/hedxml/HED8.0.0.xml""" - cls.hed_xml_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../schema_tests/HED8.0.0t.xml') - - def test_url_to_file(self): - downloaded_file = schema_util.url_to_file(self.default_test_url) - self.assertTrue(downloaded_file) - os.remove(downloaded_file) - - def test_schema_version_greater_equal(self): - schema1 = load_schema_version("8.0.0") - self.assertFalse(schema_util.schema_version_greater_equal(schema1, "8.3.0")) - - schema2 = load_schema_version("v:8.2.0") - self.assertFalse(schema_util.schema_version_greater_equal(schema2, "8.3.0")) - - schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/schema_tests/schema_utf8.mediawiki') - schema3 = load_schema(schema_path, schema_namespace="tl:") - self.assertTrue(schema_util.schema_version_greater_equal(schema3, "8.3.0")) - - schema_group = HedSchemaGroup([schema1, schema2]) - self.assertFalse(schema_util.schema_version_greater_equal(schema_group, "8.3.0")) - - schema_group = HedSchemaGroup([schema2, schema3]) - self.assertTrue(schema_util.schema_version_greater_equal(schema_group, "8.3.0")) - -if __name__ == '__main__': - unittest.main() +import unittest +import os + +from hed.schema.schema_io import schema_util +from hed.schema import HedSchemaGroup +from hed import load_schema_version +from hed import load_schema + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.default_test_url = \ + """https://raw.githubusercontent.com/hed-standard/hed-schemas/master/standard_schema/hedxml/HED8.0.0.xml""" + cls.hed_xml_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../schema_tests/HED8.0.0t.xml') + + def test_url_to_file(self): + downloaded_file = schema_util.url_to_file(self.default_test_url) + self.assertTrue(downloaded_file) + os.remove(downloaded_file) + + def test_schema_version_greater_equal(self): + schema1 = load_schema_version("8.0.0") + self.assertFalse(schema_util.schema_version_greater_equal(schema1, "8.3.0")) + + schema2 = load_schema_version("v:8.2.0") + self.assertFalse(schema_util.schema_version_greater_equal(schema2, "8.3.0")) + + schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/schema_tests/schema_utf8.mediawiki') + schema3 = load_schema(schema_path, schema_namespace="tl:") + self.assertTrue(schema_util.schema_version_greater_equal(schema3, "8.3.0")) + + schema_group = HedSchemaGroup([schema1, schema2]) + self.assertFalse(schema_util.schema_version_greater_equal(schema_group, "8.3.0")) + + schema_group = HedSchemaGroup([schema2, schema3]) + self.assertTrue(schema_util.schema_version_greater_equal(schema_group, "8.3.0")) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/schema/test_schema_validation_util.py b/tests/schema/test_schema_validation_util.py index b8fd9e99..5cecc1b0 100644 --- a/tests/schema/test_schema_validation_util.py +++ b/tests/schema/test_schema_validation_util.py @@ -1,88 +1,87 @@ -import os -import unittest -import hed.schema.schema_validation_util as util -from hed.errors import ErrorHandler, SchemaWarnings -from hed import load_schema_version, load_schema, HedSchemaGroup -from hed.schema.hed_schema_entry import HedSchemaEntry, HedTagEntry - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.hed_schema = load_schema_version("8.1.0") - - def validate_term_base(self, input_text, expected_issues): - for text, issues in zip(input_text, expected_issues): - entry = HedTagEntry(name=text, section=None) - entry.short_tag_name = text - test_issues = util.validate_schema_tag_new(entry) - self.assertCountEqual(issues, test_issues) - - def validate_desc_base(self, input_descriptions, expected_issues): - for description, issues in zip(input_descriptions, expected_issues): - entry = HedSchemaEntry(name="dummy", section=None) - entry.description = description - test_issues = util.validate_schema_description_new(entry) - self.assertCountEqual(issues, test_issues) - - def test_validate_schema_term(self): - test_terms = [ - "invalidcaps", - "Validcaps", - "3numberisvalid", - "Invalidchar#", - "@invalidcharatstart", - ] - expected_issues = [ - ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[0], char_index=0, - problem_char="i"), - [], - [], - ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[3], char_index=11, - problem_char="#"), - ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[4], char_index=0, - problem_char="@") - + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[4], char_index=0, - problem_char="@"), - ] - self.validate_term_base(test_terms, expected_issues) - - def test_validate_schema_description(self): - test_descs = [ - "This is a tag description with no invalid characters.", - "This is (also) a tag description with no invalid characters. -_:;./()+ ^", - "This description has no invalid characters, as commas are allowed", - "This description has multiple invalid characters at the end {}[]" - ] - expected_issues = [ - [], - [], - [], - ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", - char_index=60, problem_char="{") - + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", - char_index=61, problem_char="}") - + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", - char_index=62, problem_char="[") - + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", - char_index=63, problem_char="]") - - ] - self.validate_desc_base(test_descs, expected_issues) - - def test_schema_version_for_library(self): - schema1 = load_schema_version("8.0.0") - self.assertEqual(util.schema_version_for_library(schema1, ""), "8.0.0") - self.assertEqual(util.schema_version_for_library(schema1, None), "8.0.0") - - schema2 = load_schema_version("8.3.0") - self.assertEqual(util.schema_version_for_library(schema2, ""), "8.3.0") - self.assertEqual(util.schema_version_for_library(schema2, None), "8.3.0") - - schema3 = load_schema_version(["testlib_2.0.0", "score_1.1.0"]) - self.assertEqual(util.schema_version_for_library(schema3, ""), "8.2.0") - self.assertEqual(util.schema_version_for_library(schema3, None), "8.2.0") - self.assertEqual(util.schema_version_for_library(schema3, "score"), "1.1.0") - self.assertEqual(util.schema_version_for_library(schema3, "testlib"), "2.0.0") - - self.assertEqual(util.schema_version_for_library(schema3, "badlib"), None) +import unittest +import hed.schema.schema_validation_util as util +from hed.errors import ErrorHandler, SchemaWarnings +from hed import load_schema_version +from hed.schema.hed_schema_entry import HedSchemaEntry, HedTagEntry + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.hed_schema = load_schema_version("8.1.0") + + def validate_term_base(self, input_text, expected_issues): + for text, issues in zip(input_text, expected_issues): + entry = HedTagEntry(name=text, section=None) + entry.short_tag_name = text + test_issues = util.validate_schema_tag_new(entry) + self.assertCountEqual(issues, test_issues) + + def validate_desc_base(self, input_descriptions, expected_issues): + for description, issues in zip(input_descriptions, expected_issues): + entry = HedSchemaEntry(name="dummy", section=None) + entry.description = description + test_issues = util.validate_schema_description_new(entry) + self.assertCountEqual(issues, test_issues) + + def test_validate_schema_term(self): + test_terms = [ + "invalidcaps", + "Validcaps", + "3numberisvalid", + "Invalidchar#", + "@invalidcharatstart", + ] + expected_issues = [ + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[0], char_index=0, + problem_char="i"), + [], + [], + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[3], char_index=11, + problem_char="#"), + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[4], char_index=0, + problem_char="@") + + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[4], char_index=0, + problem_char="@"), + ] + self.validate_term_base(test_terms, expected_issues) + + def test_validate_schema_description(self): + test_descs = [ + "This is a tag description with no invalid characters.", + "This is (also) a tag description with no invalid characters. -_:;./()+ ^", + "This description has no invalid characters, as commas are allowed", + "This description has multiple invalid characters at the end {}[]" + ] + expected_issues = [ + [], + [], + [], + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", + char_index=60, problem_char="{") + + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", + char_index=61, problem_char="}") + + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", + char_index=62, problem_char="[") + + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", + char_index=63, problem_char="]") + + ] + self.validate_desc_base(test_descs, expected_issues) + + def test_schema_version_for_library(self): + schema1 = load_schema_version("8.0.0") + self.assertEqual(util.schema_version_for_library(schema1, ""), "8.0.0") + self.assertEqual(util.schema_version_for_library(schema1, None), "8.0.0") + + schema2 = load_schema_version("8.3.0") + self.assertEqual(util.schema_version_for_library(schema2, ""), "8.3.0") + self.assertEqual(util.schema_version_for_library(schema2, None), "8.3.0") + + schema3 = load_schema_version(["testlib_2.0.0", "score_1.1.0"]) + self.assertEqual(util.schema_version_for_library(schema3, ""), "8.2.0") + self.assertEqual(util.schema_version_for_library(schema3, None), "8.2.0") + self.assertEqual(util.schema_version_for_library(schema3, "score"), "1.1.0") + self.assertEqual(util.schema_version_for_library(schema3, "testlib"), "2.0.0") + + self.assertEqual(util.schema_version_for_library(schema3, "badlib"), None) diff --git a/tests/schema/test_schema_validation_util_deprecated.py b/tests/schema/test_schema_validation_util_deprecated.py index 5da596b3..441687d8 100644 --- a/tests/schema/test_schema_validation_util_deprecated.py +++ b/tests/schema/test_schema_validation_util_deprecated.py @@ -1,69 +1,68 @@ -import os -import unittest -import hed.schema.schema_validation_util_deprecated as util -from hed.schema.hed_schema_entry import HedSchemaEntry, HedTagEntry -from hed.errors import ErrorHandler, SchemaWarnings -from hed import load_schema_version - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.hed_schema = load_schema_version("8.1.0") - - def validate_term_base(self, input_text, expected_issues): - for text, issues in zip(input_text, expected_issues): - entry = HedTagEntry(name=text, section=None) - entry.short_tag_name = text - test_issues = util.validate_schema_tag(entry) - self.assertCountEqual(issues, test_issues) - - def validate_desc_base(self, input_descriptions, expected_issues): - for description, issues in zip(input_descriptions, expected_issues): - entry = HedSchemaEntry(name="dummy", section=None) - entry.description = description - test_issues = util.validate_schema_description(entry) - self.assertCountEqual(issues, test_issues) - - def test_validate_schema_term(self): - test_terms = [ - "invalidcaps", - "Validcaps", - "3numberisvalid", - "Invalidchar#", - "@invalidcharatstart", - ] - expected_issues = [ - ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[0], char_index=0, - problem_char="i"), - [], - [], - ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[3], char_index=11, - problem_char="#"), - ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[4], char_index=0, - problem_char="@"), - ] - self.validate_term_base(test_terms, expected_issues) - - def test_validate_schema_description(self): - test_descs = [ - "This is a tag description with no invalid characters.", - "This is (also) a tag description with no invalid characters. -_:;./()+ ^", - "This description has no invalid characters, as commas are allowed", - "This description has multiple invalid characters at the end @$%*" - ] - expected_issues = [ - [], - [], - [], - ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", - char_index=60, problem_char="@") - + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", - char_index=61, problem_char="$") - + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", - char_index=62, problem_char="%") - + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", - char_index=63, problem_char="*") - - ] - self.validate_desc_base(test_descs, expected_issues) \ No newline at end of file +import unittest +import hed.schema.schema_validation_util_deprecated as util +from hed.schema.hed_schema_entry import HedSchemaEntry, HedTagEntry +from hed.errors import ErrorHandler, SchemaWarnings +from hed import load_schema_version + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.hed_schema = load_schema_version("8.1.0") + + def validate_term_base(self, input_text, expected_issues): + for text, issues in zip(input_text, expected_issues): + entry = HedTagEntry(name=text, section=None) + entry.short_tag_name = text + test_issues = util.validate_schema_tag(entry) + self.assertCountEqual(issues, test_issues) + + def validate_desc_base(self, input_descriptions, expected_issues): + for description, issues in zip(input_descriptions, expected_issues): + entry = HedSchemaEntry(name="dummy", section=None) + entry.description = description + test_issues = util.validate_schema_description(entry) + self.assertCountEqual(issues, test_issues) + + def test_validate_schema_term(self): + test_terms = [ + "invalidcaps", + "Validcaps", + "3numberisvalid", + "Invalidchar#", + "@invalidcharatstart", + ] + expected_issues = [ + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[0], char_index=0, + problem_char="i"), + [], + [], + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, test_terms[3], char_index=11, + problem_char="#"), + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION, test_terms[4], char_index=0, + problem_char="@"), + ] + self.validate_term_base(test_terms, expected_issues) + + def test_validate_schema_description(self): + test_descs = [ + "This is a tag description with no invalid characters.", + "This is (also) a tag description with no invalid characters. -_:;./()+ ^", + "This description has no invalid characters, as commas are allowed", + "This description has multiple invalid characters at the end @$%*" + ] + expected_issues = [ + [], + [], + [], + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", + char_index=60, problem_char="@") + + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", + char_index=61, problem_char="$") + + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", + char_index=62, problem_char="%") + + ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, test_descs[3], "dummy", + char_index=63, problem_char="*") + + ] + self.validate_desc_base(test_descs, expected_issues) diff --git a/tests/schema/test_schema_validator_hed_id.py b/tests/schema/test_schema_validator_hed_id.py index 224dace5..262db8be 100644 --- a/tests/schema/test_schema_validator_hed_id.py +++ b/tests/schema/test_schema_validator_hed_id.py @@ -8,7 +8,7 @@ # tests needed: -# 1. Verify hed id(HARDEST, MAY SKIP) +# 1. Verify HED id(HARDEST, MAY SKIP) # 4. Json tests class Test(unittest.TestCase): @@ -50,8 +50,8 @@ def test_verify_tag_id(self): issues = id_validator.verify_tag_id(self.hed_schema84, event_entry, HedKey.HedID) self.assertTrue("It has changed", issues[0]["message"]) self.assertTrue("between 10000", issues[0]["message"]) - breakHere = 3 + break_here = 3 event_entry = self.hed_schema84.tags["Event"] event_entry.attributes[HedKey.HedID] = "HED_XXXXXXX" - self.assertTrue("It must be an integer in the format", issues[0]["message"]) \ No newline at end of file + self.assertTrue("It must be an integer in the format", issues[0]["message"]) diff --git a/tests/schema/test_schema_wiki_fatal_errors.py b/tests/schema/test_schema_wiki_fatal_errors.py index 43348432..835b47d0 100644 --- a/tests/schema/test_schema_wiki_fatal_errors.py +++ b/tests/schema/test_schema_wiki_fatal_errors.py @@ -1,117 +1,114 @@ -import unittest -import os - -from hed import load_schema -from hed.errors import HedFileError, HedExceptions - - -class TestHedSchema(unittest.TestCase): - base_schema_dir = '../data/schema_tests/wiki_tests/' - - @classmethod - def setUpClass(cls): - cls.full_base_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.base_schema_dir) - cls.files_and_errors = { - "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, - "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, - "HED_hed_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, - "HED_separator_invalid.mediawiki": HedExceptions.WIKI_SEPARATOR_INVALID, - "HED_header_missing.mediawiki": HedExceptions.SCHEMA_HEADER_MISSING, - "HED_header_invalid.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID, - "empty_file.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID, - "HED_header_invalid_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, - "HED_header_missing_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, - "HED_header_unknown_attribute.mediawiki": HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE, - "HED_header_bad_library.mediawiki": HedExceptions.BAD_HED_LIBRARY_NAME, - "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, - "empty_node.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, - "malformed_line.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, - "malformed_line2.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, - "malformed_line3.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, - "malformed_line4.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, - "malformed_line5.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, - "malformed_line6.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, - "malformed_line7.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, - "empty_node.xml": HedExceptions.HED_SCHEMA_NODE_NAME_INVALID - } - - cls.expected_count = { - "empty_node.mediawiki": 1, - "malformed_line.mediawiki": 1, - "malformed_line2.mediawiki": 2, - "malformed_line3.mediawiki": 2, - "malformed_line4.mediawiki": 1, - "malformed_line5.mediawiki": 1, - "malformed_line6.mediawiki": 2, - "malformed_line7.mediawiki": 2, - 'HED_schema_no_start.mediawiki': 1 - } - cls.expected_line_numbers = { - "empty_node.mediawiki": [9], - "malformed_line.mediawiki": [9], - "malformed_line2.mediawiki": [9, 9], - "malformed_line3.mediawiki": [9, 9], - "malformed_line4.mediawiki": [9], - "malformed_line5.mediawiki": [9], - "malformed_line6.mediawiki": [9, 10], - "malformed_line7.mediawiki": [9, 10], - } - - def test_invalid_schema(self): - for filename, error in self.files_and_errors.items(): - full_filename = self.full_base_folder + filename - with self.assertRaises(HedFileError) as context: - load_schema(full_filename) - # all of these should produce exceptions. - from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string - - # Verify basic properties of exception - expected_line_numbers = self.expected_line_numbers.get(filename, []) - if expected_line_numbers: - for issue, expected in zip(context.exception.issues, expected_line_numbers): - self.assertEqual(issue[ErrorContext.ROW], expected) - - - issues = context.exception.issues - - self.assertIsInstance(get_printable_issue_string(issues), str) - - self.assertTrue(context.exception.args[0] == error) - self.assertTrue(context.exception.filename == full_filename) - - - def test_merging_errors_schema(self): - for filename, error in self.files_and_errors.items(): - full_filename = self.full_base_folder + filename - with self.assertRaises(HedFileError) as context: - load_schema(full_filename) - # all of these should produce exceptions. - from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string - from hed.errors.error_types import SchemaAttributeErrors - # Verify basic properties of exception - expected_line_numbers = self.expected_line_numbers.get(filename, []) - if expected_line_numbers: - for issue, expected in zip(context.exception.issues, expected_line_numbers): - self.assertEqual(issue[ErrorContext.ROW], expected) - - error_handler = ErrorHandler() - - error_handler.push_error_context(ErrorContext.ROW, 1) - error_handler.push_error_context(ErrorContext.COLUMN, 2) - - issues = error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID, - "error_attribute", source_tag="error_tag") - error_handler.pop_error_context() - error_handler.pop_error_context() - - issues += context.exception.issues - self.assertIsInstance(get_printable_issue_string(issues), str) - - self.assertTrue(context.exception.args[0] == error) - self.assertTrue(context.exception.filename == full_filename) - - def test_attribute_invalid(self): - path = os.path.join(self.full_base_folder, "attribute_unknown1.mediawiki") - schema = load_schema(path) - issues = schema.check_compliance() - self.assertEqual(len(issues), 7) \ No newline at end of file +import unittest +import os + +from hed import load_schema +from hed.errors import HedFileError, HedExceptions + + +class TestHedSchema(unittest.TestCase): + base_schema_dir = '../data/schema_tests/wiki_tests/' + + @classmethod + def setUpClass(cls): + cls.full_base_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.base_schema_dir) + cls.files_and_errors = { + "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "HED_hed_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "HED_separator_invalid.mediawiki": HedExceptions.WIKI_SEPARATOR_INVALID, + "HED_header_missing.mediawiki": HedExceptions.SCHEMA_HEADER_MISSING, + "HED_header_invalid.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID, + "empty_file.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID, + "HED_header_invalid_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, + "HED_header_missing_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, + "HED_header_unknown_attribute.mediawiki": HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE, + "HED_header_bad_library.mediawiki": HedExceptions.BAD_HED_LIBRARY_NAME, + "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "empty_node.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line2.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line3.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line4.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line5.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line6.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line7.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "empty_node.xml": HedExceptions.HED_SCHEMA_NODE_NAME_INVALID + } + + cls.expected_count = { + "empty_node.mediawiki": 1, + "malformed_line.mediawiki": 1, + "malformed_line2.mediawiki": 2, + "malformed_line3.mediawiki": 2, + "malformed_line4.mediawiki": 1, + "malformed_line5.mediawiki": 1, + "malformed_line6.mediawiki": 2, + "malformed_line7.mediawiki": 2, + 'HED_schema_no_start.mediawiki': 1 + } + cls.expected_line_numbers = { + "empty_node.mediawiki": [9], + "malformed_line.mediawiki": [9], + "malformed_line2.mediawiki": [9, 9], + "malformed_line3.mediawiki": [9, 9], + "malformed_line4.mediawiki": [9], + "malformed_line5.mediawiki": [9], + "malformed_line6.mediawiki": [9, 10], + "malformed_line7.mediawiki": [9, 10], + } + + def test_invalid_schema(self): + for filename, error in self.files_and_errors.items(): + full_filename = self.full_base_folder + filename + with self.assertRaises(HedFileError) as context: + load_schema(full_filename) + # all of these should produce exceptions. + from hed.errors import ErrorContext, get_printable_issue_string + + # Verify basic properties of exception + expected_line_numbers = self.expected_line_numbers.get(filename, []) + if expected_line_numbers: + for issue, expected in zip(context.exception.issues, expected_line_numbers): + self.assertEqual(issue[ErrorContext.ROW], expected) + issues = context.exception.issues + + self.assertIsInstance(get_printable_issue_string(issues), str) + + self.assertTrue(context.exception.args[0] == error) + self.assertTrue(context.exception.filename == full_filename) + + def test_merging_errors_schema(self): + for filename, error in self.files_and_errors.items(): + full_filename = self.full_base_folder + filename + with self.assertRaises(HedFileError) as context: + load_schema(full_filename) + # all of these should produce exceptions. + from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string + from hed.errors.error_types import SchemaAttributeErrors + # Verify basic properties of exception + expected_line_numbers = self.expected_line_numbers.get(filename, []) + if expected_line_numbers: + for issue, expected in zip(context.exception.issues, expected_line_numbers): + self.assertEqual(issue[ErrorContext.ROW], expected) + + error_handler = ErrorHandler() + + error_handler.push_error_context(ErrorContext.ROW, 1) + error_handler.push_error_context(ErrorContext.COLUMN, 2) + + issues = error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID, + "error_attribute", source_tag="error_tag") + error_handler.pop_error_context() + error_handler.pop_error_context() + + issues += context.exception.issues + self.assertIsInstance(get_printable_issue_string(issues), str) + + self.assertTrue(context.exception.args[0] == error) + self.assertTrue(context.exception.filename == full_filename) + + def test_attribute_invalid(self): + path = os.path.join(self.full_base_folder, "attribute_unknown1.mediawiki") + schema = load_schema(path) + issues = schema.check_compliance() + self.assertEqual(len(issues), 7) diff --git a/tests/schema/util_create_schemas.py b/tests/schema/util_create_schemas.py index 4add2b07..d6f13c76 100644 --- a/tests/schema/util_create_schemas.py +++ b/tests/schema/util_create_schemas.py @@ -1,64 +1,66 @@ -from hed.schema import HedKey, HedSectionKey, from_string - - -library_schema_start = """HED library="testcomparison" version="1.1.0" withStandard="8.3.0" unmerged="true" - -'''Prologue''' - -!# start schema - -""" - -library_schema_end = """ - - -!# end hed - """ - -default_end_lines = """ -!# end schema -""" - -required_non_tag = [ -"'''Unit classes'''", -"'''Unit modifiers'''", -"'''Value classes'''", -"'''Schema attributes'''", -"'''Properties'''", -"'''Epilogue'''" -] -def _get_test_schema(node_lines, other_lines=(default_end_lines,)): - node_section = "\n".join(node_lines) - non_tag_section = "\n".join(other_lines) - for name in required_non_tag: - if name not in other_lines: - non_tag_section += f"\n{name}\n" - library_schema_string = library_schema_start + node_section + non_tag_section + library_schema_end - test_schema = from_string(library_schema_string, ".mediawiki") - - return test_schema - - -def load_schema1(): - test_nodes = ["'''TestNode''' [This is a simple test node]\n", - " *TestNode2", - " *TestNode3", - " *TestNode4" - ] - return _get_test_schema(test_nodes) - - -def load_schema2(): - test_nodes = ["'''TestNode''' [This is a simple test node]\n", - " *TestNode2", - " **TestNode3", - " *TestNode5" - ] - - return _get_test_schema(test_nodes) - - -def load_schema_intensity(): - test_nodes = ["'''IntensityTakesValue'''", - " * # {unitClass=intensityUnits}"] - return _get_test_schema(test_nodes) \ No newline at end of file +from hed.schema import from_string + + +library_schema_start = """HED library="testcomparison" version="1.1.0" withStandard="8.3.0" unmerged="true" + +'''Prologue''' + +!# start schema + +""" + +library_schema_end = """ + + +!# end hed + """ + +default_end_lines = """ +!# end schema +""" + +required_non_tag = [ + "'''Unit classes'''", + "'''Unit modifiers'''", + "'''Value classes'''", + "'''Schema attributes'''", + "'''Properties'''", + "'''Epilogue'''" +] + + +def _get_test_schema(node_lines, other_lines=(default_end_lines,)): + node_section = "\n".join(node_lines) + non_tag_section = "\n".join(other_lines) + for name in required_non_tag: + if name not in other_lines: + non_tag_section += f"\n{name}\n" + library_schema_string = library_schema_start + node_section + non_tag_section + library_schema_end + test_schema = from_string(library_schema_string, ".mediawiki") + + return test_schema + + +def load_schema1(): + test_nodes = ["'''TestNode''' [This is a simple test node]\n", + " *TestNode2", + " *TestNode3", + " *TestNode4" + ] + return _get_test_schema(test_nodes) + + +def load_schema2(): + test_nodes = ["'''TestNode''' [This is a simple test node]\n", + " *TestNode2", + " **TestNode3", + " *TestNode5" + ] + + return _get_test_schema(test_nodes) + + +def load_schema_intensity(): + test_nodes = ["'''IntensityTakesValue'''", + " * # {unitClass=intensityUnits}"] + return _get_test_schema(test_nodes) diff --git a/tests/scripts/test_convert_and_update_schema.py b/tests/scripts/test_convert_and_update_schema.py index 370d997a..39597f76 100644 --- a/tests/scripts/test_convert_and_update_schema.py +++ b/tests/scripts/test_convert_and_update_schema.py @@ -1,95 +1,94 @@ -import unittest -import shutil -import copy -import os -from hed import load_schema, load_schema_version -from hed.schema import HedSectionKey, HedKey -from hed.scripts.script_util import add_extension -from hed.scripts.convert_and_update_schema import convert_and_update -import contextlib - - -class TestConvertAndUpdate(unittest.TestCase): - @classmethod - def setUpClass(cls): - # Create a temporary directory for schema files - cls.base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'schemas_update', 'prerelease') - if not os.path.exists(cls.base_path): - os.makedirs(cls.base_path) - - def test_schema_conversion_and_update(self): - # Load a known schema, modify it if necessary, and save it - schema = load_schema_version("8.3.0") - original_name = os.path.join(self.base_path, "test_schema.mediawiki") - schema.save_as_mediawiki(original_name) - - # Assume filenames updated includes just the original schema file for simplicity - filenames = [original_name] - with contextlib.redirect_stdout(None): - result = convert_and_update(filenames, set_ids=False) - - # Verify no error from convert_and_update and the correct schema version was saved - self.assertEqual(result, 0) - - tsv_filename = add_extension(os.path.join(self.base_path, "test_schema"), ".tsv") - schema_reload1 = load_schema(tsv_filename) - schema_reload2 = load_schema(os.path.join(self.base_path, "test_schema.xml")) - - self.assertEqual(schema, schema_reload1) - self.assertEqual(schema, schema_reload2) - - # Now verify after doing this again with a new schema, they're still the same. - schema = load_schema_version("8.3.0") - schema.save_as_dataframes(tsv_filename) - - filenames = [os.path.join(tsv_filename, "test_schema_Tag.tsv")] - with contextlib.redirect_stdout(None): - result = convert_and_update(filenames, set_ids=False) - - # Verify no error from convert_and_update and the correct schema version was saved - self.assertEqual(result, 0) - - schema_reload1 = load_schema(os.path.join(self.base_path, "test_schema.mediawiki")) - schema_reload2 = load_schema(os.path.join(self.base_path, "test_schema.xml")) - - self.assertEqual(schema, schema_reload1) - self.assertEqual(schema, schema_reload2) - - def test_schema_adding_tag(self): - schema = load_schema_version("8.3.0") - basename = os.path.join(self.base_path, "test_schema_edited") - schema.save_as_mediawiki(add_extension(basename, ".mediawiki")) - schema.save_as_xml(add_extension(basename, ".xml")) - schema.save_as_dataframes(add_extension(basename, ".tsv")) - - schema_edited = copy.deepcopy(schema) - test_tag_name = "NewTagWithoutID" - new_entry = schema_edited._create_tag_entry(test_tag_name, HedSectionKey.Tags) - schema_edited._add_tag_to_dict(test_tag_name, new_entry, HedSectionKey.Tags) - - schema_edited.save_as_mediawiki(add_extension(basename, ".mediawiki")) - - # Assume filenames updated includes just the original schema file for simplicity - filenames = [add_extension(basename, ".mediawiki")] - with contextlib.redirect_stdout(None): - result = convert_and_update(filenames, set_ids=False) - self.assertEqual(result, 0) - - schema_reloaded = load_schema(add_extension(basename, ".xml")) - - self.assertEqual(schema_reloaded, schema_edited) - - with contextlib.redirect_stdout(None): - result = convert_and_update(filenames, set_ids=True) - self.assertEqual(result, 0) - - schema_reloaded = load_schema(add_extension(basename, ".xml")) - - reloaded_entry = schema_reloaded.tags[test_tag_name] - self.assertTrue(reloaded_entry.has_attribute(HedKey.HedID)) - - - @classmethod - def tearDownClass(cls): - # Clean up the directory created for testing - shutil.rmtree(cls.base_path) +import unittest +import shutil +import copy +import os +from hed import load_schema, load_schema_version +from hed.schema import HedSectionKey, HedKey +from hed.scripts.script_util import add_extension +from hed.scripts.convert_and_update_schema import convert_and_update +import contextlib + + +class TestConvertAndUpdate(unittest.TestCase): + @classmethod + def setUpClass(cls): + # Create a temporary directory for schema files + cls.base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'schemas_update', 'prerelease') + if not os.path.exists(cls.base_path): + os.makedirs(cls.base_path) + + def test_schema_conversion_and_update(self): + # Load a known schema, modify it if necessary, and save it + schema = load_schema_version("8.3.0") + original_name = os.path.join(self.base_path, "test_schema.mediawiki") + schema.save_as_mediawiki(original_name) + + # Assume filenames updated includes just the original schema file for simplicity + filenames = [original_name] + with contextlib.redirect_stdout(None): + result = convert_and_update(filenames, set_ids=False) + + # Verify no error from convert_and_update and the correct schema version was saved + self.assertEqual(result, 0) + + tsv_filename = add_extension(os.path.join(self.base_path, "test_schema"), ".tsv") + schema_reload1 = load_schema(tsv_filename) + schema_reload2 = load_schema(os.path.join(self.base_path, "test_schema.xml")) + + self.assertEqual(schema, schema_reload1) + self.assertEqual(schema, schema_reload2) + + # Now verify after doing this again with a new schema, they're still the same. + schema = load_schema_version("8.3.0") + schema.save_as_dataframes(tsv_filename) + + filenames = [os.path.join(tsv_filename, "test_schema_Tag.tsv")] + with contextlib.redirect_stdout(None): + result = convert_and_update(filenames, set_ids=False) + + # Verify no error from convert_and_update and the correct schema version was saved + self.assertEqual(result, 0) + + schema_reload1 = load_schema(os.path.join(self.base_path, "test_schema.mediawiki")) + schema_reload2 = load_schema(os.path.join(self.base_path, "test_schema.xml")) + + self.assertEqual(schema, schema_reload1) + self.assertEqual(schema, schema_reload2) + + def test_schema_adding_tag(self): + schema = load_schema_version("8.3.0") + basename = os.path.join(self.base_path, "test_schema_edited") + schema.save_as_mediawiki(add_extension(basename, ".mediawiki")) + schema.save_as_xml(add_extension(basename, ".xml")) + schema.save_as_dataframes(add_extension(basename, ".tsv")) + + schema_edited = copy.deepcopy(schema) + test_tag_name = "NewTagWithoutID" + new_entry = schema_edited._create_tag_entry(test_tag_name, HedSectionKey.Tags) + schema_edited._add_tag_to_dict(test_tag_name, new_entry, HedSectionKey.Tags) + + schema_edited.save_as_mediawiki(add_extension(basename, ".mediawiki")) + + # Assume filenames updated includes just the original schema file for simplicity + filenames = [add_extension(basename, ".mediawiki")] + with contextlib.redirect_stdout(None): + result = convert_and_update(filenames, set_ids=False) + self.assertEqual(result, 0) + + schema_reloaded = load_schema(add_extension(basename, ".xml")) + + self.assertEqual(schema_reloaded, schema_edited) + + with contextlib.redirect_stdout(None): + result = convert_and_update(filenames, set_ids=True) + self.assertEqual(result, 0) + + schema_reloaded = load_schema(add_extension(basename, ".xml")) + + reloaded_entry = schema_reloaded.tags[test_tag_name] + self.assertTrue(reloaded_entry.has_attribute(HedKey.HedID)) + + @classmethod + def tearDownClass(cls): + # Clean up the directory created for testing + shutil.rmtree(cls.base_path) diff --git a/tests/scripts/test_script_util.py b/tests/scripts/test_script_util.py index 55989be1..bd7da7e8 100644 --- a/tests/scripts/test_script_util.py +++ b/tests/scripts/test_script_util.py @@ -1,177 +1,178 @@ -import unittest -import os -import shutil -from hed import load_schema_version -from hed.scripts.script_util import add_extension, sort_base_schemas, validate_all_schema_formats, validate_schema -import contextlib - - -class TestAddExtension(unittest.TestCase): - - def test_regular_extension(self): - """Test that regular extensions are added correctly.""" - self.assertEqual(add_extension("filename", ".txt"), "filename.txt") - self.assertEqual(add_extension("document", ".pdf"), "document.pdf") - - def test_tsv_extension(self): - """Test that .tsv extensions are handled differently.""" - # Assuming the function correctly handles paths with directories - self.assertEqual(add_extension(os.path.normpath("path/to/filename"), ".tsv"), os.path.normpath("path/to/hedtsv/filename")) - # Testing with a basename only - self.assertEqual(add_extension("filename", ".tsv"), os.path.normpath("hedtsv/filename")) - - def test_empty_extension(self): - """Test adding an empty extension.""" - self.assertEqual(add_extension("filename", ""), "filename") - - def test_none_extension(self): - """Test behavior with None as extension.""" - with self.assertRaises(TypeError): - add_extension("filename", None) - - -class TestSortBaseSchemas(unittest.TestCase): - TEST_DIR = 'test_directory' - - @classmethod - def setUpClass(cls): - if not os.path.exists(cls.TEST_DIR): - os.makedirs(cls.TEST_DIR) - os.chdir(cls.TEST_DIR) - cls.create_stub_files() - - @classmethod - def tearDownClass(cls): - os.chdir('..') - shutil.rmtree(cls.TEST_DIR) - - @classmethod - def create_stub_files(cls): - filenames = [ - "test_schema.mediawiki", - os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), - "other_schema.xml", - os.path.normpath("hedtsv/wrong_folder/wrong_name_Tag.tsv"), - os.path.normpath("prerelease/hedtsv/test_schema/test_schema_Tag.tsv"), - os.path.normpath("not_hedtsv/test_schema/test_schema_Tag.tsv") - ] - for filename in filenames: - filepath = os.path.normpath(filename) - directory = os.path.dirname(filepath) - if directory: - os.makedirs(directory, exist_ok=True) - with open(filepath, 'w') as f: - f.write('') # Create an empty file - - def test_mixed_file_types(self): - filenames = [ - "test_schema.mediawiki", - os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), - "other_schema.xml" - ] - expected = { - "test_schema": {".mediawiki", ".tsv"}, - "other_schema": {".xml"} - } - with contextlib.redirect_stdout(None): - result = sort_base_schemas(filenames) - self.assertEqual(dict(result), expected) - - def test_tsv_in_correct_subfolder(self): - filenames = [ - os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), - os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), - os.path.normpath("hedtsv/wrong_folder/wrong_name_Tag.tsv") # Should be ignored - ] - expected = { - "test_schema": {".tsv"} - } - with contextlib.redirect_stdout(None): - result = sort_base_schemas(filenames) - self.assertEqual(dict(result), expected) - - def test_tsv_in_correct_subfolder2(self): - filenames = [ - os.path.normpath("prerelease/hedtsv/test_schema/test_schema_Tag.tsv"), - os.path.normpath("prerelease/hedtsv/test_schema/test_schema_Tag.tsv"), - os.path.normpath("prerelease/hedtsv/wrong_folder/wrong_name_Tag.tsv") # Should be ignored - ] - expected = { - os.path.normpath("prerelease/test_schema"): {".tsv"} - } - with contextlib.redirect_stdout(None): - result = sort_base_schemas(filenames) - self.assertEqual(dict(result), expected) - - def test_ignored_files(self): - filenames = [ - "test_schema.mediawiki", - os.path.normpath("not_hedtsv/test_schema/test_schema_Tag.tsv") # Should be ignored - ] - expected = { - "test_schema": {".mediawiki"} - } - with contextlib.redirect_stdout(None): - result = sort_base_schemas(filenames) - self.assertEqual(dict(result), expected) - - def test_empty_input(self): - filenames = [] - expected = {} - with contextlib.redirect_stdout(None): - result = sort_base_schemas(filenames) - self.assertEqual(dict(result), expected) - - -class TestValidateAllSchemaFormats(unittest.TestCase): - @classmethod - def setUpClass(cls): - # Determine the path to save schemas based on the location of this test file - cls.base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'schemas') - if not os.path.exists(cls.base_path): - os.makedirs(cls.base_path) - cls.basename = "test_schema" - - def test_error_no_error(self): - """Test the function with correctly saved schemas in all three formats.""" - # Load specific schema versions and save them correctly - schema = load_schema_version("8.3.0") - schema.save_as_xml(os.path.join(self.base_path, self.basename + ".xml")) - schema.save_as_dataframes(os.path.join(self.base_path, "hedtsv", self.basename)) - with contextlib.redirect_stdout(None): - issues = validate_all_schema_formats(os.path.join(self.base_path, self.basename)) - self.assertTrue(issues) - self.assertIn("Error loading schema", issues[0]) - - schema.save_as_mediawiki(os.path.join(self.base_path, self.basename + ".mediawiki")) - - with contextlib.redirect_stdout(None): - self.assertEqual(validate_all_schema_formats(os.path.join(self.base_path, self.basename)), []) - - schema_incorrect = load_schema_version("8.2.0") - schema_incorrect.save_as_dataframes(os.path.join(self.base_path, "hedtsv", self.basename)) - - # Validate and expect errors - with contextlib.redirect_stdout(None): - issues = validate_all_schema_formats(os.path.join(self.base_path, self.basename)) - self.assertTrue(issues) - self.assertIn("Multiple schemas of type", issues[0]) - - @classmethod - def tearDownClass(cls): - """Remove the entire directory created for testing to ensure a clean state.""" - shutil.rmtree(cls.base_path) # This will delete the directory and all its contents - - -class TestValidateSchema(unittest.TestCase): - def test_load_invalid_extension(self): - # Verify capital letters fail validation - with contextlib.redirect_stdout(None): - self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.MEDIAWIKI")[0]) - self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.Mediawiki")[0]) - self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.XML")[0]) - self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.Xml")[0]) - self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.TSV")[0]) - self.assertNotIn("Only fully lowercase extensions ", validate_schema("does_not_matter.tsv")[0]) - self.assertNotIn("Only fully lowercase extensions ", validate_schema("does_not_matter.xml")[0]) - self.assertNotIn("Only fully lowercase extensions ", validate_schema("does_not_matter.mediawiki")[0]) \ No newline at end of file +import unittest +import os +import shutil +from hed import load_schema_version +from hed.scripts.script_util import add_extension, sort_base_schemas, validate_all_schema_formats, validate_schema +import contextlib + + +class TestAddExtension(unittest.TestCase): + + def test_regular_extension(self): + """Test that regular extensions are added correctly.""" + self.assertEqual(add_extension("filename", ".txt"), "filename.txt") + self.assertEqual(add_extension("document", ".pdf"), "document.pdf") + + def test_tsv_extension(self): + """Test that .tsv extensions are handled differently.""" + # Assuming the function correctly handles paths with directories + self.assertEqual(add_extension(os.path.normpath("path/to/filename"), ".tsv"), + os.path.normpath("path/to/hedtsv/filename")) + # Testing with a basename only + self.assertEqual(add_extension("filename", ".tsv"), os.path.normpath("hedtsv/filename")) + + def test_empty_extension(self): + """Test adding an empty extension.""" + self.assertEqual(add_extension("filename", ""), "filename") + + def test_none_extension(self): + """Test behavior with None as extension.""" + with self.assertRaises(TypeError): + add_extension("filename", None) + + +class TestSortBaseSchemas(unittest.TestCase): + TEST_DIR = 'test_directory' + + @classmethod + def setUpClass(cls): + if not os.path.exists(cls.TEST_DIR): + os.makedirs(cls.TEST_DIR) + os.chdir(cls.TEST_DIR) + cls.create_stub_files() + + @classmethod + def tearDownClass(cls): + os.chdir('..') + shutil.rmtree(cls.TEST_DIR) + + @classmethod + def create_stub_files(cls): + filenames = [ + "test_schema.mediawiki", + os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), + "other_schema.xml", + os.path.normpath("hedtsv/wrong_folder/wrong_name_Tag.tsv"), + os.path.normpath("prerelease/hedtsv/test_schema/test_schema_Tag.tsv"), + os.path.normpath("not_hedtsv/test_schema/test_schema_Tag.tsv") + ] + for filename in filenames: + filepath = os.path.normpath(filename) + directory = os.path.dirname(filepath) + if directory: + os.makedirs(directory, exist_ok=True) + with open(filepath, 'w') as f: + f.write('') # Create an empty file + + def test_mixed_file_types(self): + filenames = [ + "test_schema.mediawiki", + os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), + "other_schema.xml" + ] + expected = { + "test_schema": {".mediawiki", ".tsv"}, + "other_schema": {".xml"} + } + with contextlib.redirect_stdout(None): + result = sort_base_schemas(filenames) + self.assertEqual(dict(result), expected) + + def test_tsv_in_correct_subfolder(self): + filenames = [ + os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), + os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), + os.path.normpath("hedtsv/wrong_folder/wrong_name_Tag.tsv") # Should be ignored + ] + expected = { + "test_schema": {".tsv"} + } + with contextlib.redirect_stdout(None): + result = sort_base_schemas(filenames) + self.assertEqual(dict(result), expected) + + def test_tsv_in_correct_subfolder2(self): + filenames = [ + os.path.normpath("prerelease/hedtsv/test_schema/test_schema_Tag.tsv"), + os.path.normpath("prerelease/hedtsv/test_schema/test_schema_Tag.tsv"), + os.path.normpath("prerelease/hedtsv/wrong_folder/wrong_name_Tag.tsv") # Should be ignored + ] + expected = { + os.path.normpath("prerelease/test_schema"): {".tsv"} + } + with contextlib.redirect_stdout(None): + result = sort_base_schemas(filenames) + self.assertEqual(dict(result), expected) + + def test_ignored_files(self): + filenames = [ + "test_schema.mediawiki", + os.path.normpath("not_hedtsv/test_schema/test_schema_Tag.tsv") # Should be ignored + ] + expected = { + "test_schema": {".mediawiki"} + } + with contextlib.redirect_stdout(None): + result = sort_base_schemas(filenames) + self.assertEqual(dict(result), expected) + + def test_empty_input(self): + filenames = [] + expected = {} + with contextlib.redirect_stdout(None): + result = sort_base_schemas(filenames) + self.assertEqual(dict(result), expected) + + +class TestValidateAllSchemaFormats(unittest.TestCase): + @classmethod + def setUpClass(cls): + # Determine the path to save schemas based on the location of this test file + cls.base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'schemas') + if not os.path.exists(cls.base_path): + os.makedirs(cls.base_path) + cls.basename = "test_schema" + + def test_error_no_error(self): + """Test the function with correctly saved schemas in all three formats.""" + # Load specific schema versions and save them correctly + schema = load_schema_version("8.3.0") + schema.save_as_xml(os.path.join(self.base_path, self.basename + ".xml")) + schema.save_as_dataframes(os.path.join(self.base_path, "hedtsv", self.basename)) + with contextlib.redirect_stdout(None): + issues = validate_all_schema_formats(os.path.join(self.base_path, self.basename)) + self.assertTrue(issues) + self.assertIn("Error loading schema", issues[0]) + + schema.save_as_mediawiki(os.path.join(self.base_path, self.basename + ".mediawiki")) + + with contextlib.redirect_stdout(None): + self.assertEqual(validate_all_schema_formats(os.path.join(self.base_path, self.basename)), []) + + schema_incorrect = load_schema_version("8.2.0") + schema_incorrect.save_as_dataframes(os.path.join(self.base_path, "hedtsv", self.basename)) + + # Validate and expect errors + with contextlib.redirect_stdout(None): + issues = validate_all_schema_formats(os.path.join(self.base_path, self.basename)) + self.assertTrue(issues) + self.assertIn("Multiple schemas of type", issues[0]) + + @classmethod + def tearDownClass(cls): + """Remove the entire directory created for testing to ensure a clean state.""" + shutil.rmtree(cls.base_path) # This will delete the directory and all its contents + + +class TestValidateSchema(unittest.TestCase): + def test_load_invalid_extension(self): + # Verify capital letters fail validation + with contextlib.redirect_stdout(None): + self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.MEDIAWIKI")[0]) + self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.Mediawiki")[0]) + self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.XML")[0]) + self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.Xml")[0]) + self.assertIn("Only fully lowercase extensions ", validate_schema("does_not_matter.TSV")[0]) + self.assertNotIn("Only fully lowercase extensions ", validate_schema("does_not_matter.tsv")[0]) + self.assertNotIn("Only fully lowercase extensions ", validate_schema("does_not_matter.xml")[0]) + self.assertNotIn("Only fully lowercase extensions ", validate_schema("does_not_matter.mediawiki")[0]) diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py index eb256383..6e1a74a7 100644 --- a/tests/tools/remodeling/cli/test_run_remodel.py +++ b/tests/tools/remodeling/cli/test_run_remodel.py @@ -84,6 +84,7 @@ def test_parse_tasks(self): self.assertEqual(1, len(tasks2)) files2 = ['task-.tsv', '/base/'] tasks3 = parse_tasks(files2, "*") + self.assertFalse(tasks3) def test_main_bids(self): arg_list = [self.data_root, self.model_path, '-x', 'derivatives', 'stimuli', '-b'] diff --git a/tests/tools/remodeling/operations/test_factor_hed_tags_op.py b/tests/tools/remodeling/operations/test_factor_hed_tags_op.py index 90380f77..9ade528e 100644 --- a/tests/tools/remodeling/operations/test_factor_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_factor_hed_tags_op.py @@ -96,7 +96,7 @@ def test_no_expand_context(self): params["replace_defs"] = True params["remove_types"] = [] op = FactorHedTagsOp(params) - df_new = op.do_op(dispatch, df,'run-01', sidecar=self.json_path) + df_new = op.do_op(dispatch, df, 'run-01', sidecar=self.json_path) df_new = dispatch.post_proc_data(df_new) self.assertEqual(len(df_new), len(df)) self.assertEqual(len(df_new.columns), df_columns + 3) @@ -144,7 +144,7 @@ def test_expand_context(self): # Setup for testing remove types dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') params = json.loads(self.json_params) - params["expand_context"] =True + params["expand_context"] = True params["queries"] = ["Def/Famous-face-cond", "Def/Right-sym-cond", "Def/Initialize-recording"] df = dispatch.get_data_file(self.data_path) df = dispatch.prep_data(df) diff --git a/tests/tools/remodeling/operations/test_split_rows_op.py b/tests/tools/remodeling/operations/test_split_rows_op.py index 60714954..3d3ae35d 100644 --- a/tests/tools/remodeling/operations/test_split_rows_op.py +++ b/tests/tools/remodeling/operations/test_split_rows_op.py @@ -111,7 +111,6 @@ def test_invalid_onset_duration(self): op.do_op(self.dispatch, self.dispatch.prep_data(df2), 'run-01') self.assertEqual('MissingDurationColumn', ex.exception.args[0]) - def test_valid_new_anchor_column(self): # Test when new column is used as anchor event parms = json.loads(self.json_parms) diff --git a/tests/tools/util/test_io_util.py b/tests/tools/util/test_io_util.py index c0add9db..5ac649b3 100644 --- a/tests/tools/util/test_io_util.py +++ b/tests/tools/util/test_io_util.py @@ -6,7 +6,6 @@ parse_bids_filename, _split_entity, get_allowed, get_filtered_by_element - class Test(unittest.TestCase): @classmethod @@ -95,7 +94,7 @@ def test_get_allowed(self): self.assertEqual(value2, test_value1) def test_get_alphanumeric_path(self): - mypath1 = 'g:\\String1%_-sTring2\n//string3\\\\\string4.pnG' + mypath1 = 'g:\\String1%_-sTring2\n//string3\\\\string4.pnG' repath1 = get_alphanumeric_path(mypath1) self.assertEqual('g_String1_sTring2_string3_string4_pnG', repath1) repath2 = get_alphanumeric_path(mypath1, '$') diff --git a/tests/tools/visualization/test_tag_word_cloud.py b/tests/tools/visualization/test_tag_word_cloud.py index a148baf4..2e9dd780 100644 --- a/tests/tools/visualization/test_tag_word_cloud.py +++ b/tests/tools/visualization/test_tag_word_cloud.py @@ -1,178 +1,177 @@ -import unittest -import wordcloud -from hed.tools.visualization import tag_word_cloud -from hed.tools.visualization.tag_word_cloud import load_and_resize_mask -import matplotlib.font_manager as fm - -import numpy as np -from PIL import Image, ImageDraw -import os - - -class TestWordCloudFunctions(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.mask_path = os.path.realpath(os.path.join(os.path.dirname(__file__), - '../../data/visualization/word_mask.png')) - - def test_create_wordcloud(self): - word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} - width = 400 - height = 200 - wc = tag_word_cloud.create_wordcloud(word_dict, width=width, height=height) - - self.assertIsInstance(wc, wordcloud.WordCloud) - self.assertEqual(wc.width, width) - self.assertEqual(wc.height, height) - - def test_create_wordcloud_font_direct(self): - word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} - width = 400 - height = 200 - - fonts = fm.findSystemFonts() - if not fonts: - return - font_path = os.path.realpath(fonts[0]) - wc = tag_word_cloud.create_wordcloud(word_dict, width=width, height=height, font_path=font_path) - - self.assertIsInstance(wc, wordcloud.WordCloud) - self.assertEqual(wc.width, width) - self.assertEqual(wc.height, height) - self.assertIn(font_path, wc.font_path) - - def test_create_wordcloud_default_params(self): - word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} - wc = tag_word_cloud.create_wordcloud(word_dict) - - self.assertIsInstance(wc, wordcloud.WordCloud) - self.assertEqual(wc.width, 400) - self.assertEqual(wc.height, 300) - - def test_mask_scaling(self): - word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} - wc = tag_word_cloud.create_wordcloud(word_dict, self.mask_path, width=300, height=300) - - self.assertIsInstance(wc, wordcloud.WordCloud) - self.assertEqual(wc.width, 300) - self.assertEqual(wc.height, 300) - - def test_mask_scaling2(self): - word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} - wc = tag_word_cloud.create_wordcloud(word_dict, self.mask_path, width=300, height=None) - - self.assertIsInstance(wc, wordcloud.WordCloud) - self.assertEqual(wc.width, 300) - self.assertLess(wc.height, 300) - - def test_create_wordcloud_with_empty_dict(self): - # Test creation of word cloud with an empty dictionary - word_dict = {} - with self.assertRaises(ValueError): - tag_word_cloud.create_wordcloud(word_dict) - - def test_create_wordcloud_with_single_word(self): - # Test creation of word cloud with a single word - word_dict = {'single_word': 1} - wc = tag_word_cloud.create_wordcloud(word_dict) - self.assertIsInstance(wc, wordcloud.WordCloud) - # Check that the single word is in the word cloud - self.assertIn('single_word', wc.words_) - - def test_valid_word_cloud(self): - word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} - wc = tag_word_cloud.create_wordcloud(word_dict, mask_path=self.mask_path, width=400, height=None) - svg_output = tag_word_cloud.word_cloud_to_svg(wc) - self.assertTrue(svg_output.startswith('