diff --git a/.codespellrc b/.codespellrc deleted file mode 100644 index 18082524..00000000 --- a/.codespellrc +++ /dev/null @@ -1,3 +0,0 @@ -[codespell] -skip = .git,*.pdf,*.svg,deprecated,*.xml,*.mediawiki,*.omn,*.toml -ignore-words-list = covert,hed,assertIn,parms diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5654b31d..bed2e1c9 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -133,7 +133,7 @@ jobs: with: coverageCommand: coverage xml debug: true - uses: paambaati/codeclimate-action@v6.0.0 + uses: paambaati/codeclimate-action@v8.0.0 env: CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml new file mode 100644 index 00000000..278271b9 --- /dev/null +++ b/.github/workflows/ci_windows.yaml @@ -0,0 +1,42 @@ +name: CI + +on: + push: + branches: ["main", "master"] + pull_request: + branches: ["main", "master"] + +jobs: + build: + strategy: + matrix: + platform: [windows-latest] + python-version: ["3.10"] + + runs-on: ${{ matrix.platform }} + + steps: + - uses: actions/checkout@v4 + with: + submodules: false + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/cache@v4 + with: + path: ${{ env.pythonLocation }} + key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }} + + - name: Install dependencies + run: | + python -m pip install --upgrade --upgrade-strategy eager pip + pip install -r requirements.txt + + - name: Test with unittest + env: + HED_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + python -m unittest diff --git a/hed/models/basic_search_util.py b/hed/models/basic_search_util.py new file mode 100644 index 00000000..7e3224a5 --- /dev/null +++ b/hed/models/basic_search_util.py @@ -0,0 +1,38 @@ +""" +Utilities to support HED searches based on strings. +""" +from hed.models.hed_string import HedString +from hed.models.hed_tag import HedTag + + +def convert_query(search_query, schema): + """Converts the given basic search query into a hed_string + + Parameters: + search_query(str): The basic search query to convert. + schema(HedSchema): The schema to use to convert tags + + Returns: + long_query(str): The converted search query, in long form. + """ + input_tags = HedString.split_hed_string(search_query) + output_string = "" + skippable_prefix = ("@", "~") + skippable_suffix = ("*", ) + for is_hed_tag, (startpos, endpos) in input_tags: + input_tag = search_query[startpos:endpos] + add_suffix = "" + if is_hed_tag: + if input_tag.startswith(skippable_prefix): + output_string += input_tag[:1] + input_tag = input_tag[1:] + + if input_tag.endswith(skippable_suffix): + add_suffix = input_tag[-1:] + input_tag = input_tag[:-1] + output_string += HedTag(input_tag, schema).long_tag + output_string += add_suffix + else: + output_string += input_tag + + return output_string diff --git a/hed/schema/__init__.py b/hed/schema/__init__.py index 54f4b07a..937f6665 100644 --- a/hed/schema/__init__.py +++ b/hed/schema/__init__.py @@ -3,7 +3,7 @@ from .hed_schema_entry import HedSchemaEntry, UnitClassEntry, UnitEntry, HedTagEntry from .hed_schema_group import HedSchemaGroup from .hed_schema_section import HedSchemaSection -from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version +from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version, from_dataframes from .hed_schema_constants import HedKey, HedSectionKey from .hed_cache import cache_xml_versions, get_hed_versions, \ set_cache_directory, get_cache_directory diff --git a/hed/schema/schema_io/df2schema.py b/hed/schema/schema_io/df2schema.py index 2fd6e6cc..5b26c19f 100644 --- a/hed/schema/schema_io/df2schema.py +++ b/hed/schema/schema_io/df2schema.py @@ -137,35 +137,19 @@ def _read_schema(self, dataframe): Parameters: dataframe (pd.DataFrame): The dataframe for the main tags section """ - # note: this assumes loading is in order row by row. - # If tags are NOT sorted this won't work.(same as mediawiki) self._schema._initialize_attributes(HedSectionKey.Tags) - known_tag_levels = {"HedTag": -1} - parent_tags = [] + known_parent_tags = {"HedTag": []} level_adj = 0 for row_number, row in dataframe[constants.TAG_KEY].iterrows(): # skip blank rows, though there shouldn't be any if not any(row): continue parent_tag = row[constants.subclass_of] - # Return -1 by default for top level rooted tag support(they might not be in the dict) - raw_level = known_tag_levels.get(parent_tag, -1) + 1 - if raw_level == 0: - parent_tags = [] - level_adj = 0 - else: - level = raw_level + level_adj - if level < len(parent_tags): - parent_tags = parent_tags[:level] - elif level > len(parent_tags): - self._add_fatal_error(row_number, row, - "Invalid level reported from Level column", - HedExceptions.GENERIC_ERROR) - continue - - tag_entry, parent_tags, level_adj = self._add_tag_meta(parent_tags, row_number, row, level_adj) + org_parent_tags = known_parent_tags.get(parent_tag, []).copy() + + tag_entry, parent_tags, _ = self._add_tag_meta(org_parent_tags, row_number, row, level_adj) if tag_entry: - known_tag_levels[tag_entry.short_tag_name] = raw_level + known_parent_tags[tag_entry.short_tag_name] = parent_tags.copy() def _read_section(self, df, section_key): self._schema._initialize_attributes(section_key) diff --git a/hed/schema/schema_io/schema2df.py b/hed/schema/schema_io/schema2df.py index 9afee0a3..e2832e1a 100644 --- a/hed/schema/schema_io/schema2df.py +++ b/hed/schema/schema_io/schema2df.py @@ -95,7 +95,7 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0): constants.hed_id: f"{tag_id}", constants.level: f"{level}", constants.name: - tag_entry.short_tag_name if not tag_entry.has_attribute(HedKey.TakesValue) + tag_entry.short_tag_name if not tag_entry.name.endswith("#") else tag_entry.short_tag_name + "-#", constants.subclass_of: self._get_subclass_of(tag_entry), constants.attributes: self._format_tag_attributes(tag_entry.attributes), diff --git a/hed/schema/schema_io/schema2wiki.py b/hed/schema/schema_io/schema2wiki.py index 3d562290..e4a8f775 100644 --- a/hed/schema/schema_io/schema2wiki.py +++ b/hed/schema/schema_io/schema2wiki.py @@ -96,14 +96,14 @@ def _add_blank_line(self): self.output.append("") def _format_props_and_desc(self, schema_entry): - prop_string = "" - tag_props = schema_entry.attributes - if tag_props: - prop_string += f"{{{self._format_tag_attributes(tag_props)}}}" + extras_string = "" + attribute_string = self._format_tag_attributes(schema_entry.attributes) + if attribute_string: + extras_string += f"{{{attribute_string}}}" desc = schema_entry.description if desc: - if tag_props: - prop_string += " " - prop_string += f"[{desc}]" + if attribute_string: + extras_string += " " + extras_string += f"[{desc}]" - return prop_string + return extras_string diff --git a/hed/scripts/script_util.py b/hed/scripts/script_util.py index 5f54934a..842b5e76 100644 --- a/hed/scripts/script_util.py +++ b/hed/scripts/script_util.py @@ -1,7 +1,8 @@ import os.path from collections import defaultdict -from hed.schema import from_string, load_schema +from hed.schema import from_string, load_schema, from_dataframes from hed.errors import get_printable_issue_string, HedFileError, SchemaWarnings +from hed.schema.schema_compare import compare_differences all_extensions = [".tsv", ".mediawiki", ".xml"] @@ -35,18 +36,17 @@ def validate_schema(file_path): mediawiki_string = base_schema.get_as_mediawiki_string() reloaded_schema = from_string(mediawiki_string, schema_format=".mediawiki") - if reloaded_schema != base_schema: - error_text = f"Failed to reload {file_path} as mediawiki. " \ - f"There is either a problem with the source file, or the saving/loading code." - validation_issues.append(error_text) + validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "mediawiki") xml_string = base_schema.get_as_xml_string() reloaded_schema = from_string(xml_string, schema_format=".xml") - if reloaded_schema != base_schema: - error_text = f"Failed to reload {file_path} as xml. " \ - f"There is either a problem with the source file, or the saving/loading code." - validation_issues.append(error_text) + validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "xml") + + tsv_dataframes = base_schema.get_as_dataframes() + reloaded_schema = from_dataframes(tsv_dataframes) + + validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "tsv") except HedFileError as e: print(f"Saving/loading error: {file_path} {e.message}") error_text = e.message @@ -209,3 +209,15 @@ def get_prerelease_path(repo_path, schema_name, schema_version): schema_filename = get_schema_filename(schema_name, schema_version) return os.path.join(base_path, "hedtsv", schema_filename) + + +def _get_schema_comparison(schema, schema_reload, file_path, file_format): + if schema_reload != schema: + error_text = f"Failed to reload {file_path} as {file_format}. " \ + f"There is either a problem with the source file, or the saving/loading code." + title_prompt = ("If the problem is in the schema file, " + "the following comparison should indicate the approximate source of the issues:") + error_text += "\n" + compare_differences(schema, schema_reload, title=title_prompt) + return [error_text] + + return [] diff --git a/hed/tools/analysis/annotation_util.py b/hed/tools/analysis/annotation_util.py index 015c2336..3b91efe4 100644 --- a/hed/tools/analysis/annotation_util.py +++ b/hed/tools/analysis/annotation_util.py @@ -199,7 +199,7 @@ def series_to_factor(series): series (Series) - Series to be converted to a list. Returns: - list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0. + list - contains 0's and 1's, empty, 'n/a' and np.nan are converted to 0. """ replaced = series.replace('n/a', False) filled = replaced.fillna(False) @@ -273,7 +273,7 @@ def to_factor(data, column=None): column (str): Optional column name if DataFrame (otherwise column 0). Returns: - list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0. + list - contains 0's and 1's, empty, 'n/a' and np.nan are converted to 0. """ if isinstance(data, Series): series = data diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index cd6b81c3..04e6d809 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -219,7 +219,7 @@ def post_proc_data(df): df (DataFrame): The DataFrame to be processed. Returns: - DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'. + DataFrame: DataFrame with the 'np.nan replaced by 'n/a'. """ dtypes = df.dtypes.to_dict() diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index ea98d03c..d9d84215 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -134,7 +134,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ df1 = df.copy() df1[self.source_columns] = df1[self.source_columns].replace( - np.NaN, 'n/a') + np.nan, 'n/a') for column in self.integer_sources: int_mask = df1[column] != 'n/a' df1.loc[int_mask, column] = df1.loc[int_mask, column].astype(int) diff --git a/pyproject.toml b/pyproject.toml index b7e341ae..67f8bc46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,5 +77,5 @@ namespaces = false hed = ["schema/schema_data/*.xml", "resources/*.png"] [tool.codespell] -skip = '*.git,*.pdf,*.xml,*.mediawiki,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests' +skip = '*.git,*.pdf,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests,,*.xml,*.mediawiki,*.omn,*.toml' ignore-words-list = 'te,parms,assertIn' diff --git a/spec_tests/hed-examples b/spec_tests/hed-examples index 5e9f9eba..88246912 160000 --- a/spec_tests/hed-examples +++ b/spec_tests/hed-examples @@ -1 +1 @@ -Subproject commit 5e9f9eba404e0b23f0818f5adf1056fbdf60269c +Subproject commit 88246912339625df9c88554071465c14eaa68396 diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification index 0d8303e6..54659951 160000 --- a/spec_tests/hed-specification +++ b/spec_tests/hed-specification @@ -1 +1 @@ -Subproject commit 0d8303e62193ca71600ff62e85936de335ad5d7f +Subproject commit 54659951ca1a2f5e481ef87d98d9de30036f9f56 diff --git a/tests/models/test_basic_search_util.py b/tests/models/test_basic_search_util.py new file mode 100644 index 00000000..9701ca65 --- /dev/null +++ b/tests/models/test_basic_search_util.py @@ -0,0 +1,20 @@ +import unittest +from hed import load_schema_version +from hed.models.basic_search_util import convert_query + + +class TestConvertQueryToForm(unittest.TestCase): + schema = load_schema_version("8.3.0") + + def test_basic_convert(self): + input = "@Event, Head-part*, Time-interval/1" + expected_output = "@Event, Item/Biological-item/Anatomical-item/Body-part/Head-part*, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1" + + actual_output = convert_query(input, self.schema) + self.assertEqual(expected_output, actual_output) + + input = "@Head-part*, Event, Time-interval/1" + expected_output = "@Item/Biological-item/Anatomical-item/Body-part/Head-part*, Event, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1" + + actual_output = convert_query(input, self.schema) + self.assertEqual(expected_output, actual_output) \ No newline at end of file diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py index 384419d3..dfbea0f8 100644 --- a/tests/tools/analysis/test_annotation_util.py +++ b/tests/tools/analysis/test_annotation_util.py @@ -300,13 +300,13 @@ def test_to_factor(self): factor1 = annotation_util.to_factor(series1) self.assertEqual(len(series1), len(factor1)) self.assertEqual(sum(factor1), len(factor1)) - series2 = Series(['a', '', None, np.NAN, 'n/a']) + series2 = Series(['a', '', None, np.nan, 'n/a']) factor2 = annotation_util.to_factor(series2) self.assertEqual(len(series2), len(factor2)) self.assertEqual(sum(factor2), 1) data = { 'Name': ['Alice', '', 'n/a', 1.0], # Contains a space - 'Age': [25, np.NaN, 35, 0] + 'Age': [25, np.nan, 35, 0] } df = DataFrame(data) factor3 = annotation_util.to_factor(df, column='Name') @@ -321,7 +321,7 @@ def test_series_to_factor(self): factor1 = annotation_util.series_to_factor(series1) self.assertEqual(len(series1), len(factor1)) self.assertEqual(sum(factor1), len(factor1)) - series2 = Series(['a', '', None, np.NAN, 'n/a']) + series2 = Series(['a', '', None, np.nan, 'n/a']) factor2 = annotation_util.series_to_factor(series2) self.assertEqual(len(series2), len(factor2)) self.assertEqual(sum(factor2), 1) @@ -465,13 +465,13 @@ def test_to_factor(self): factor1 = annotation_util.to_factor(series1) self.assertEqual(len(series1), len(factor1)) self.assertEqual(sum(factor1), len(factor1)) - series2 = Series(['a', '', None, np.NAN, 'n/a']) + series2 = Series(['a', '', None, np.nan, 'n/a']) factor2 = annotation_util.to_factor(series2) self.assertEqual(len(series2), len(factor2)) self.assertEqual(sum(factor2), 1) data = { 'Name': ['Alice', '', 'n/a', 1.0], # Contains a space - 'Age': [25, np.NaN, 35, 0] + 'Age': [25, np.nan, 35, 0] } df = DataFrame(data) factor3 = annotation_util.to_factor(df, column='Name')