Merge pull request #979 from hed-standard/develop

Making sure NaN fixes Windows test failures
hed-standard · Jun 29, 2024 · 6780b08 · 6780b08
2 parents 940e75d + c1fa53d
commit 6780b08
Show file tree

Hide file tree

Showing 17 changed files with 149 additions and 56 deletions.
diff --git a/.codespellrc b/.codespellrc
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -133,7 +133,7 @@ jobs:
         with:
           coverageCommand: coverage xml
           debug: true
-        uses: paambaati/codeclimate-action@v6.0.0
+        uses: paambaati/codeclimate-action@v8.0.0
         env:
           CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}
 
diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml
@@ -0,0 +1,42 @@
+name: CI
+
+on:
+  push:
+    branches: ["main", "master"]
+  pull_request:
+    branches: ["main", "master"]
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        platform: [windows-latest]
+        python-version: ["3.10"]
+
+    runs-on: ${{ matrix.platform }}
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: false
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - uses: actions/cache@v4
+        with:
+          path: ${{ env.pythonLocation }}
+          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade --upgrade-strategy eager pip
+          pip install -r requirements.txt
+
+      - name: Test with unittest
+        env:
+          HED_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          python -m unittest
diff --git a/hed/models/basic_search_util.py b/hed/models/basic_search_util.py
@@ -0,0 +1,38 @@
+"""
+Utilities to support HED searches based on strings.
+"""
+from hed.models.hed_string import HedString
+from hed.models.hed_tag import HedTag
+
+
+def convert_query(search_query, schema):
+    """Converts the given basic search query into a hed_string
+
+    Parameters:
+        search_query(str): The basic search query to convert.
+        schema(HedSchema): The schema to use to convert tags
+
+    Returns:
+        long_query(str): The converted search query, in long form.
+    """
+    input_tags = HedString.split_hed_string(search_query)
+    output_string = ""
+    skippable_prefix = ("@", "~")
+    skippable_suffix = ("*", )
+    for is_hed_tag, (startpos, endpos) in input_tags:
+        input_tag = search_query[startpos:endpos]
+        add_suffix = ""
+        if is_hed_tag:
+            if input_tag.startswith(skippable_prefix):
+                output_string += input_tag[:1]
+                input_tag = input_tag[1:]
+
+            if input_tag.endswith(skippable_suffix):
+                add_suffix = input_tag[-1:]
+                input_tag = input_tag[:-1]
+            output_string += HedTag(input_tag, schema).long_tag
+            output_string += add_suffix
+        else:
+            output_string += input_tag
+
+    return output_string
diff --git a/hed/schema/__init__.py b/hed/schema/__init__.py
@@ -3,7 +3,7 @@
 from .hed_schema_entry import HedSchemaEntry, UnitClassEntry, UnitEntry, HedTagEntry
 from .hed_schema_group import HedSchemaGroup
 from .hed_schema_section import HedSchemaSection
-from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version
+from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version, from_dataframes
 from .hed_schema_constants import HedKey, HedSectionKey
 from .hed_cache import cache_xml_versions, get_hed_versions, \
     set_cache_directory, get_cache_directory
diff --git a/hed/schema/schema_io/df2schema.py b/hed/schema/schema_io/df2schema.py
@@ -137,35 +137,19 @@ def _read_schema(self, dataframe):
         Parameters:
             dataframe (pd.DataFrame): The dataframe for the main tags section
         """
-        # note: this assumes loading is in order row by row.
-        # If tags are NOT sorted this won't work.(same as mediawiki)
         self._schema._initialize_attributes(HedSectionKey.Tags)
-        known_tag_levels = {"HedTag": -1}
-        parent_tags = []
+        known_parent_tags =  {"HedTag": []}
         level_adj = 0
         for row_number, row in dataframe[constants.TAG_KEY].iterrows():
             # skip blank rows, though there shouldn't be any
             if not any(row):
                 continue
             parent_tag = row[constants.subclass_of]
-            # Return -1 by default for top level rooted tag support(they might not be in the dict)
-            raw_level = known_tag_levels.get(parent_tag, -1) + 1
-            if raw_level == 0:
-                parent_tags = []
-                level_adj = 0
-            else:
-                level = raw_level + level_adj
-                if level < len(parent_tags):
-                    parent_tags = parent_tags[:level]
-                elif level > len(parent_tags):
-                    self._add_fatal_error(row_number, row,
-                                          "Invalid level reported from Level column",
-                                          HedExceptions.GENERIC_ERROR)
-                    continue
-
-            tag_entry, parent_tags, level_adj = self._add_tag_meta(parent_tags, row_number, row, level_adj)
+            org_parent_tags = known_parent_tags.get(parent_tag, []).copy()
+
+            tag_entry, parent_tags, _ = self._add_tag_meta(org_parent_tags, row_number, row, level_adj)
             if tag_entry:
-                known_tag_levels[tag_entry.short_tag_name] = raw_level
+                known_parent_tags[tag_entry.short_tag_name] = parent_tags.copy()
 
     def _read_section(self, df, section_key):
         self._schema._initialize_attributes(section_key)

diff --git a/hed/schema/schema_io/schema2df.py b/hed/schema/schema_io/schema2df.py
@@ -95,7 +95,7 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0):
             constants.hed_id: f"{tag_id}",
             constants.level: f"{level}",
             constants.name:
-                tag_entry.short_tag_name if not tag_entry.has_attribute(HedKey.TakesValue)
+                tag_entry.short_tag_name if not tag_entry.name.endswith("#")
                 else tag_entry.short_tag_name + "-#",
             constants.subclass_of: self._get_subclass_of(tag_entry),
             constants.attributes: self._format_tag_attributes(tag_entry.attributes),

diff --git a/hed/schema/schema_io/schema2wiki.py b/hed/schema/schema_io/schema2wiki.py
@@ -96,14 +96,14 @@ def _add_blank_line(self):
         self.output.append("")
 
     def _format_props_and_desc(self, schema_entry):
-        prop_string = ""
-        tag_props = schema_entry.attributes
-        if tag_props:
-            prop_string += f"{{{self._format_tag_attributes(tag_props)}}}"
+        extras_string = ""
+        attribute_string = self._format_tag_attributes(schema_entry.attributes)
+        if attribute_string:
+            extras_string += f"{{{attribute_string}}}"
         desc = schema_entry.description
         if desc:
-            if tag_props:
-                prop_string += " "
-            prop_string += f"[{desc}]"
+            if attribute_string:
+                extras_string += " "
+            extras_string += f"[{desc}]"
 
-        return prop_string
+        return extras_string
diff --git a/hed/scripts/script_util.py b/hed/scripts/script_util.py
@@ -1,7 +1,8 @@
 import os.path
 from collections import defaultdict
-from hed.schema import from_string, load_schema
+from hed.schema import from_string, load_schema, from_dataframes
 from hed.errors import get_printable_issue_string, HedFileError, SchemaWarnings
+from hed.schema.schema_compare import compare_differences
 
 all_extensions = [".tsv", ".mediawiki", ".xml"]
 
@@ -35,18 +36,17 @@ def validate_schema(file_path):
         mediawiki_string = base_schema.get_as_mediawiki_string()
         reloaded_schema = from_string(mediawiki_string, schema_format=".mediawiki")
 
-        if reloaded_schema != base_schema:
-            error_text = f"Failed to reload {file_path} as mediawiki.  " \
-                         f"There is either a problem with the source file, or the saving/loading code."
-            validation_issues.append(error_text)
+        validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "mediawiki")
 
         xml_string = base_schema.get_as_xml_string()
         reloaded_schema = from_string(xml_string, schema_format=".xml")
 
-        if reloaded_schema != base_schema:
-            error_text = f"Failed to reload {file_path} as xml.  " \
-                         f"There is either a problem with the source file, or the saving/loading code."
-            validation_issues.append(error_text)
+        validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "xml")
+
+        tsv_dataframes = base_schema.get_as_dataframes()
+        reloaded_schema = from_dataframes(tsv_dataframes)
+
+        validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "tsv")
     except HedFileError as e:
         print(f"Saving/loading error: {file_path} {e.message}")
         error_text = e.message
@@ -209,3 +209,15 @@ def get_prerelease_path(repo_path, schema_name, schema_version):
     schema_filename = get_schema_filename(schema_name, schema_version)
 
     return os.path.join(base_path, "hedtsv", schema_filename)
+
+
+def _get_schema_comparison(schema, schema_reload, file_path, file_format):
+    if schema_reload != schema:
+        error_text = f"Failed to reload {file_path} as {file_format}.  " \
+                     f"There is either a problem with the source file, or the saving/loading code."
+        title_prompt = ("If the problem is in the schema file, "
+                        "the following comparison should indicate the approximate source of the issues:")
+        error_text += "\n" + compare_differences(schema, schema_reload, title=title_prompt)
+        return [error_text]
+
+    return []
diff --git a/hed/tools/analysis/annotation_util.py b/hed/tools/analysis/annotation_util.py
@@ -199,7 +199,7 @@ def series_to_factor(series):
         series (Series) - Series to be converted to a list.
 
     Returns:
-        list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0.
+        list - contains 0's and 1's, empty, 'n/a' and np.nan are converted to 0.
     """
     replaced = series.replace('n/a', False)
     filled = replaced.fillna(False)
@@ -273,7 +273,7 @@ def to_factor(data, column=None):
         column (str): Optional column name if DataFrame (otherwise column 0).
 
     Returns:
-        list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0.
+        list - contains 0's and 1's, empty, 'n/a' and np.nan are converted to 0.
     """
     if isinstance(data, Series):
         series = data

diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py
@@ -219,7 +219,7 @@ def post_proc_data(df):
             df (DataFrame): The DataFrame to be processed.
 
         Returns:
-            DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'.
+            DataFrame: DataFrame with the 'np.nan replaced by 'n/a'.
         """
 
         dtypes = df.dtypes.to_dict()

diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py
@@ -134,7 +134,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
         """
         df1 = df.copy()
         df1[self.source_columns] = df1[self.source_columns].replace(
-            np.NaN, 'n/a')
+            np.nan, 'n/a')
         for column in self.integer_sources:
             int_mask = df1[column] != 'n/a'
             df1.loc[int_mask, column] = df1.loc[int_mask, column].astype(int)

diff --git a/pyproject.toml b/pyproject.toml
@@ -77,5 +77,5 @@ namespaces = false
 hed = ["schema/schema_data/*.xml", "resources/*.png"]
 
 [tool.codespell]
-skip = '*.git,*.pdf,*.xml,*.mediawiki,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests'
+skip = '*.git,*.pdf,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests,,*.xml,*.mediawiki,*.omn,*.toml'
 ignore-words-list = 'te,parms,assertIn'
diff --git a/spec_tests/hed-examples b/spec_tests/hed-examples
diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification
diff --git a/tests/models/test_basic_search_util.py b/tests/models/test_basic_search_util.py
@@ -0,0 +1,20 @@
+import unittest
+from hed import load_schema_version
+from hed.models.basic_search_util import convert_query
+
+
+class TestConvertQueryToForm(unittest.TestCase):
+    schema = load_schema_version("8.3.0")
+
+    def test_basic_convert(self):
+        input = "@Event, Head-part*, Time-interval/1"
+        expected_output = "@Event, Item/Biological-item/Anatomical-item/Body-part/Head-part*, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1"
+
+        actual_output = convert_query(input, self.schema)
+        self.assertEqual(expected_output, actual_output)
+
+        input = "@Head-part*, Event, Time-interval/1"
+        expected_output = "@Item/Biological-item/Anatomical-item/Body-part/Head-part*, Event, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1"
+
+        actual_output = convert_query(input, self.schema)
+        self.assertEqual(expected_output, actual_output)
diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py
@@ -300,13 +300,13 @@ def test_to_factor(self):
             factor1 = annotation_util.to_factor(series1)
             self.assertEqual(len(series1), len(factor1))
             self.assertEqual(sum(factor1), len(factor1))
-            series2 = Series(['a', '', None, np.NAN, 'n/a'])
+            series2 = Series(['a', '', None, np.nan, 'n/a'])
             factor2 = annotation_util.to_factor(series2)
             self.assertEqual(len(series2), len(factor2))
             self.assertEqual(sum(factor2), 1)
             data = {
                 'Name': ['Alice', '', 'n/a', 1.0],  # Contains a space
-                'Age': [25, np.NaN, 35, 0]
+                'Age': [25, np.nan, 35, 0]
             }
             df = DataFrame(data)
             factor3 = annotation_util.to_factor(df, column='Name')
@@ -321,7 +321,7 @@ def test_series_to_factor(self):
         factor1 = annotation_util.series_to_factor(series1)
         self.assertEqual(len(series1), len(factor1))
         self.assertEqual(sum(factor1), len(factor1))
-        series2 = Series(['a', '', None, np.NAN, 'n/a'])
+        series2 = Series(['a', '', None, np.nan, 'n/a'])
         factor2 = annotation_util.series_to_factor(series2)
         self.assertEqual(len(series2), len(factor2))
         self.assertEqual(sum(factor2), 1)
@@ -465,13 +465,13 @@ def test_to_factor(self):
         factor1 = annotation_util.to_factor(series1)
         self.assertEqual(len(series1), len(factor1))
         self.assertEqual(sum(factor1), len(factor1))
-        series2 = Series(['a', '', None, np.NAN, 'n/a'])
+        series2 = Series(['a', '', None, np.nan, 'n/a'])
         factor2 = annotation_util.to_factor(series2)
         self.assertEqual(len(series2), len(factor2))
         self.assertEqual(sum(factor2), 1)
         data = {
             'Name': ['Alice', '', 'n/a', 1.0],  # Contains a space
-            'Age': [25, np.NaN, 35, 0]
+            'Age': [25, np.nan, 35, 0]
         }
         df = DataFrame(data)
         factor3 = annotation_util.to_factor(df, column='Name')
+1 −1		.github/workflows/codespell.yaml
+24 −0		.github/workflows/links.yaml
+6 −0		.lycheeignore
+1 −1		CODE_OF_CONDUCT.md
+1 −1		README.md
+3 −5		docs/source/BidsAnnotationQuickstart.md
+4 −5		docs/source/FileRemodelingTools.md
+20 −25		docs/source/HedMatlabTools.md
+3 −3		docs/source/HedOnlineTools.md
+5 −5		docs/source/HedPythonTools.md
+11 −18		docs/source/HedSchemas.md
+77 −68		docs/source/HedSearchGuide.md
+3 −3		docs/source/HedValidationGuide.md
+1 −1		docs/source/HowCanYouUseHed.md
+5 −0		docs/source/WhatsNew.md
+2 −0		docs/source/_templates/layout.html
+2 −2		docs/source/index.rst
+1 −1		src/README.md
+1 −1		src/jupyter_notebooks/README.md
+16 −21		src/jupyter_notebooks/bids/README.md
+32 −32		src/jupyter_notebooks/bids/extract_json_template.ipynb
+40 −50		src/jupyter_notebooks/bids/find_event_combinations.ipynb
+32 −32		src/jupyter_notebooks/bids/merge_spreadsheet_into_sidecar.ipynb
+55 −15		src/jupyter_notebooks/bids/sidecar_to_spreadsheet.ipynb
+37 −38		src/jupyter_notebooks/bids/summarize_events.ipynb
+15 −15		src/jupyter_notebooks/bids/validate_bids_dataset.ipynb
+17 −51		src/jupyter_notebooks/bids/validate_bids_dataset_with_libraries.ipynb
+46 −46		src/jupyter_notebooks/bids/validate_bids_datasets.ipynb
+1 −1		src/jupyter_notebooks/remodeling/README.md
+18 −18		src/jupyter_notebooks/remodeling/run_remodel.ipynb
+0 −35		src/matlab_scripts/README.md
+0 −15		src/matlab_scripts/data_cleaning/getChannelMap.m
+0 −39		src/matlab_scripts/data_cleaning/getEventTable.m
+0 −79		src/matlab_scripts/data_cleaning/getFileList.m
+0 −25		src/matlab_scripts/data_cleaning/renameChannels.m
+0 −40		src/matlab_scripts/data_cleaning/runEeglabChannelsToJson.m
+0 −52		src/matlab_scripts/data_cleaning/runEeglabEventsToFiles.m
+0 −87		src/matlab_scripts/data_cleaning/runEeglabFixChannels.m
+0 −47		src/matlab_scripts/data_cleaning/runEeglabImportEvents.m
+0 −88		src/matlab_scripts/data_cleaning/runEeglabImportEventsOld.m
+0 −52		src/matlab_scripts/data_cleaning/runEeglabJsonToChannels.m
+0 −50		src/matlab_scripts/data_cleaning/runEeglabRenameBCITChannels.m
+0 −33		src/matlab_scripts/data_cleaning/runEeglabRenameTask.m
+0 −31		src/matlab_scripts/data_cleaning/setChanTypes.m
+0 −22		src/matlab_scripts/data_cleaning/setChannelTypes.m
+0 −36		src/matlab_scripts/data_cleaning/writeElectrodeFile.m
+0 −9		src/matlab_scripts/hedtools_wrappers/runRemodel.m
+0 −8		src/matlab_scripts/hedtools_wrappers/runRemodelBackup.m
+0 −8		src/matlab_scripts/hedtools_wrappers/runRemodelRestore.m
+0 −9		src/matlab_scripts/hedtools_wrappers/testBidsValidation.m
+0 −17		src/matlab_scripts/hedtools_wrappers/testRemodel.m
+0 −14		src/matlab_scripts/hedtools_wrappers/validateHedInBids.m
+0 −79		src/matlab_scripts/utility_scripts/getFileList.m
+0 −12		src/matlab_scripts/web_services/exampleGenerateSidecar.m
+0 −12		src/matlab_scripts/web_services/getHostOptions.m
+0 −19		src/matlab_scripts/web_services/getSessionInfo.m
+0 −41		src/matlab_scripts/web_services/getTestData.m
+0 −56		src/matlab_scripts/web_services/outputReport.m
+0 −29		src/matlab_scripts/web_services/runAllTests.m
+0 −26		src/matlab_scripts/web_services/runAssembleTest.m
+0 −9		src/matlab_scripts/web_services/runTest.m
+0 −55		src/matlab_scripts/web_services/testEventRemodelingServices.m
+0 −43		src/matlab_scripts/web_services/testEventSearchServices.m
+0 −118		src/matlab_scripts/web_services/testEventServices.m
+0 −22		src/matlab_scripts/web_services/testGetServices.m
+0 −61		src/matlab_scripts/web_services/testLibraryServices.m
+0 −102		src/matlab_scripts/web_services/testSidecarServices.m
+0 −77		src/matlab_scripts/web_services/testSpreadsheetServices.m
+0 −77		src/matlab_scripts/web_services/testStringServices.m