Skip to content

Commit

Permalink
Merge pull request #979 from hed-standard/develop
Browse files Browse the repository at this point in the history
Making sure NaN fixes Windows test failures
  • Loading branch information
VisLab authored Jun 29, 2024
2 parents 940e75d + c1fa53d commit 6780b08
Show file tree
Hide file tree
Showing 17 changed files with 149 additions and 56 deletions.
3 changes: 0 additions & 3 deletions .codespellrc

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ jobs:
with:
coverageCommand: coverage xml
debug: true
uses: paambaati/codeclimate-action@v6.0.0
uses: paambaati/codeclimate-action@v8.0.0
env:
CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}

42 changes: 42 additions & 0 deletions .github/workflows/ci_windows.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: CI

on:
push:
branches: ["main", "master"]
pull_request:
branches: ["main", "master"]

jobs:
build:
strategy:
matrix:
platform: [windows-latest]
python-version: ["3.10"]

runs-on: ${{ matrix.platform }}

steps:
- uses: actions/checkout@v4
with:
submodules: false

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}

- name: Install dependencies
run: |
python -m pip install --upgrade --upgrade-strategy eager pip
pip install -r requirements.txt
- name: Test with unittest
env:
HED_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python -m unittest
38 changes: 38 additions & 0 deletions hed/models/basic_search_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
Utilities to support HED searches based on strings.
"""
from hed.models.hed_string import HedString
from hed.models.hed_tag import HedTag


def convert_query(search_query, schema):
"""Converts the given basic search query into a hed_string
Parameters:
search_query(str): The basic search query to convert.
schema(HedSchema): The schema to use to convert tags
Returns:
long_query(str): The converted search query, in long form.
"""
input_tags = HedString.split_hed_string(search_query)
output_string = ""
skippable_prefix = ("@", "~")
skippable_suffix = ("*", )
for is_hed_tag, (startpos, endpos) in input_tags:
input_tag = search_query[startpos:endpos]
add_suffix = ""
if is_hed_tag:
if input_tag.startswith(skippable_prefix):
output_string += input_tag[:1]
input_tag = input_tag[1:]

if input_tag.endswith(skippable_suffix):
add_suffix = input_tag[-1:]
input_tag = input_tag[:-1]
output_string += HedTag(input_tag, schema).long_tag
output_string += add_suffix
else:
output_string += input_tag

return output_string
2 changes: 1 addition & 1 deletion hed/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .hed_schema_entry import HedSchemaEntry, UnitClassEntry, UnitEntry, HedTagEntry
from .hed_schema_group import HedSchemaGroup
from .hed_schema_section import HedSchemaSection
from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version
from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version, from_dataframes
from .hed_schema_constants import HedKey, HedSectionKey
from .hed_cache import cache_xml_versions, get_hed_versions, \
set_cache_directory, get_cache_directory
26 changes: 5 additions & 21 deletions hed/schema/schema_io/df2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,35 +137,19 @@ def _read_schema(self, dataframe):
Parameters:
dataframe (pd.DataFrame): The dataframe for the main tags section
"""
# note: this assumes loading is in order row by row.
# If tags are NOT sorted this won't work.(same as mediawiki)
self._schema._initialize_attributes(HedSectionKey.Tags)
known_tag_levels = {"HedTag": -1}
parent_tags = []
known_parent_tags = {"HedTag": []}
level_adj = 0
for row_number, row in dataframe[constants.TAG_KEY].iterrows():
# skip blank rows, though there shouldn't be any
if not any(row):
continue
parent_tag = row[constants.subclass_of]
# Return -1 by default for top level rooted tag support(they might not be in the dict)
raw_level = known_tag_levels.get(parent_tag, -1) + 1
if raw_level == 0:
parent_tags = []
level_adj = 0
else:
level = raw_level + level_adj
if level < len(parent_tags):
parent_tags = parent_tags[:level]
elif level > len(parent_tags):
self._add_fatal_error(row_number, row,
"Invalid level reported from Level column",
HedExceptions.GENERIC_ERROR)
continue

tag_entry, parent_tags, level_adj = self._add_tag_meta(parent_tags, row_number, row, level_adj)
org_parent_tags = known_parent_tags.get(parent_tag, []).copy()

tag_entry, parent_tags, _ = self._add_tag_meta(org_parent_tags, row_number, row, level_adj)
if tag_entry:
known_tag_levels[tag_entry.short_tag_name] = raw_level
known_parent_tags[tag_entry.short_tag_name] = parent_tags.copy()

def _read_section(self, df, section_key):
self._schema._initialize_attributes(section_key)
Expand Down
2 changes: 1 addition & 1 deletion hed/schema/schema_io/schema2df.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0):
constants.hed_id: f"{tag_id}",
constants.level: f"{level}",
constants.name:
tag_entry.short_tag_name if not tag_entry.has_attribute(HedKey.TakesValue)
tag_entry.short_tag_name if not tag_entry.name.endswith("#")
else tag_entry.short_tag_name + "-#",
constants.subclass_of: self._get_subclass_of(tag_entry),
constants.attributes: self._format_tag_attributes(tag_entry.attributes),
Expand Down
16 changes: 8 additions & 8 deletions hed/schema/schema_io/schema2wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,14 @@ def _add_blank_line(self):
self.output.append("")

def _format_props_and_desc(self, schema_entry):
prop_string = ""
tag_props = schema_entry.attributes
if tag_props:
prop_string += f"{{{self._format_tag_attributes(tag_props)}}}"
extras_string = ""
attribute_string = self._format_tag_attributes(schema_entry.attributes)
if attribute_string:
extras_string += f"{{{attribute_string}}}"
desc = schema_entry.description
if desc:
if tag_props:
prop_string += " "
prop_string += f"[{desc}]"
if attribute_string:
extras_string += " "
extras_string += f"[{desc}]"

return prop_string
return extras_string
30 changes: 21 additions & 9 deletions hed/scripts/script_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os.path
from collections import defaultdict
from hed.schema import from_string, load_schema
from hed.schema import from_string, load_schema, from_dataframes
from hed.errors import get_printable_issue_string, HedFileError, SchemaWarnings
from hed.schema.schema_compare import compare_differences

all_extensions = [".tsv", ".mediawiki", ".xml"]

Expand Down Expand Up @@ -35,18 +36,17 @@ def validate_schema(file_path):
mediawiki_string = base_schema.get_as_mediawiki_string()
reloaded_schema = from_string(mediawiki_string, schema_format=".mediawiki")

if reloaded_schema != base_schema:
error_text = f"Failed to reload {file_path} as mediawiki. " \
f"There is either a problem with the source file, or the saving/loading code."
validation_issues.append(error_text)
validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "mediawiki")

xml_string = base_schema.get_as_xml_string()
reloaded_schema = from_string(xml_string, schema_format=".xml")

if reloaded_schema != base_schema:
error_text = f"Failed to reload {file_path} as xml. " \
f"There is either a problem with the source file, or the saving/loading code."
validation_issues.append(error_text)
validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "xml")

tsv_dataframes = base_schema.get_as_dataframes()
reloaded_schema = from_dataframes(tsv_dataframes)

validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "tsv")
except HedFileError as e:
print(f"Saving/loading error: {file_path} {e.message}")
error_text = e.message
Expand Down Expand Up @@ -209,3 +209,15 @@ def get_prerelease_path(repo_path, schema_name, schema_version):
schema_filename = get_schema_filename(schema_name, schema_version)

return os.path.join(base_path, "hedtsv", schema_filename)


def _get_schema_comparison(schema, schema_reload, file_path, file_format):
if schema_reload != schema:
error_text = f"Failed to reload {file_path} as {file_format}. " \
f"There is either a problem with the source file, or the saving/loading code."
title_prompt = ("If the problem is in the schema file, "
"the following comparison should indicate the approximate source of the issues:")
error_text += "\n" + compare_differences(schema, schema_reload, title=title_prompt)
return [error_text]

return []
4 changes: 2 additions & 2 deletions hed/tools/analysis/annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def series_to_factor(series):
series (Series) - Series to be converted to a list.
Returns:
list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0.
list - contains 0's and 1's, empty, 'n/a' and np.nan are converted to 0.
"""
replaced = series.replace('n/a', False)
filled = replaced.fillna(False)
Expand Down Expand Up @@ -273,7 +273,7 @@ def to_factor(data, column=None):
column (str): Optional column name if DataFrame (otherwise column 0).
Returns:
list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0.
list - contains 0's and 1's, empty, 'n/a' and np.nan are converted to 0.
"""
if isinstance(data, Series):
series = data
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/remodeling/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def post_proc_data(df):
df (DataFrame): The DataFrame to be processed.
Returns:
DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'.
DataFrame: DataFrame with the 'np.nan replaced by 'n/a'.
"""

dtypes = df.dtypes.to_dict()
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/remap_columns_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
"""
df1 = df.copy()
df1[self.source_columns] = df1[self.source_columns].replace(
np.NaN, 'n/a')
np.nan, 'n/a')
for column in self.integer_sources:
int_mask = df1[column] != 'n/a'
df1.loc[int_mask, column] = df1.loc[int_mask, column].astype(int)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,5 @@ namespaces = false
hed = ["schema/schema_data/*.xml", "resources/*.png"]

[tool.codespell]
skip = '*.git,*.pdf,*.xml,*.mediawiki,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests'
skip = '*.git,*.pdf,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests,,*.xml,*.mediawiki,*.omn,*.toml'
ignore-words-list = 'te,parms,assertIn'
2 changes: 1 addition & 1 deletion spec_tests/hed-examples
Submodule hed-examples updated 69 files
+1 −1 .github/workflows/codespell.yaml
+24 −0 .github/workflows/links.yaml
+6 −0 .lycheeignore
+1 −1 CODE_OF_CONDUCT.md
+1 −1 README.md
+3 −5 docs/source/BidsAnnotationQuickstart.md
+4 −5 docs/source/FileRemodelingTools.md
+20 −25 docs/source/HedMatlabTools.md
+3 −3 docs/source/HedOnlineTools.md
+5 −5 docs/source/HedPythonTools.md
+11 −18 docs/source/HedSchemas.md
+77 −68 docs/source/HedSearchGuide.md
+3 −3 docs/source/HedValidationGuide.md
+1 −1 docs/source/HowCanYouUseHed.md
+5 −0 docs/source/WhatsNew.md
+2 −0 docs/source/_templates/layout.html
+2 −2 docs/source/index.rst
+1 −1 src/README.md
+1 −1 src/jupyter_notebooks/README.md
+16 −21 src/jupyter_notebooks/bids/README.md
+32 −32 src/jupyter_notebooks/bids/extract_json_template.ipynb
+40 −50 src/jupyter_notebooks/bids/find_event_combinations.ipynb
+32 −32 src/jupyter_notebooks/bids/merge_spreadsheet_into_sidecar.ipynb
+55 −15 src/jupyter_notebooks/bids/sidecar_to_spreadsheet.ipynb
+37 −38 src/jupyter_notebooks/bids/summarize_events.ipynb
+15 −15 src/jupyter_notebooks/bids/validate_bids_dataset.ipynb
+17 −51 src/jupyter_notebooks/bids/validate_bids_dataset_with_libraries.ipynb
+46 −46 src/jupyter_notebooks/bids/validate_bids_datasets.ipynb
+1 −1 src/jupyter_notebooks/remodeling/README.md
+18 −18 src/jupyter_notebooks/remodeling/run_remodel.ipynb
+0 −35 src/matlab_scripts/README.md
+0 −15 src/matlab_scripts/data_cleaning/getChannelMap.m
+0 −39 src/matlab_scripts/data_cleaning/getEventTable.m
+0 −79 src/matlab_scripts/data_cleaning/getFileList.m
+0 −25 src/matlab_scripts/data_cleaning/renameChannels.m
+0 −40 src/matlab_scripts/data_cleaning/runEeglabChannelsToJson.m
+0 −52 src/matlab_scripts/data_cleaning/runEeglabEventsToFiles.m
+0 −87 src/matlab_scripts/data_cleaning/runEeglabFixChannels.m
+0 −47 src/matlab_scripts/data_cleaning/runEeglabImportEvents.m
+0 −88 src/matlab_scripts/data_cleaning/runEeglabImportEventsOld.m
+0 −52 src/matlab_scripts/data_cleaning/runEeglabJsonToChannels.m
+0 −50 src/matlab_scripts/data_cleaning/runEeglabRenameBCITChannels.m
+0 −33 src/matlab_scripts/data_cleaning/runEeglabRenameTask.m
+0 −31 src/matlab_scripts/data_cleaning/setChanTypes.m
+0 −22 src/matlab_scripts/data_cleaning/setChannelTypes.m
+0 −36 src/matlab_scripts/data_cleaning/writeElectrodeFile.m
+0 −9 src/matlab_scripts/hedtools_wrappers/runRemodel.m
+0 −8 src/matlab_scripts/hedtools_wrappers/runRemodelBackup.m
+0 −8 src/matlab_scripts/hedtools_wrappers/runRemodelRestore.m
+0 −9 src/matlab_scripts/hedtools_wrappers/testBidsValidation.m
+0 −17 src/matlab_scripts/hedtools_wrappers/testRemodel.m
+0 −14 src/matlab_scripts/hedtools_wrappers/validateHedInBids.m
+0 −79 src/matlab_scripts/utility_scripts/getFileList.m
+0 −12 src/matlab_scripts/web_services/exampleGenerateSidecar.m
+0 −12 src/matlab_scripts/web_services/getHostOptions.m
+0 −19 src/matlab_scripts/web_services/getSessionInfo.m
+0 −41 src/matlab_scripts/web_services/getTestData.m
+0 −56 src/matlab_scripts/web_services/outputReport.m
+0 −29 src/matlab_scripts/web_services/runAllTests.m
+0 −26 src/matlab_scripts/web_services/runAssembleTest.m
+0 −9 src/matlab_scripts/web_services/runTest.m
+0 −55 src/matlab_scripts/web_services/testEventRemodelingServices.m
+0 −43 src/matlab_scripts/web_services/testEventSearchServices.m
+0 −118 src/matlab_scripts/web_services/testEventServices.m
+0 −22 src/matlab_scripts/web_services/testGetServices.m
+0 −61 src/matlab_scripts/web_services/testLibraryServices.m
+0 −102 src/matlab_scripts/web_services/testSidecarServices.m
+0 −77 src/matlab_scripts/web_services/testSpreadsheetServices.m
+0 −77 src/matlab_scripts/web_services/testStringServices.m
20 changes: 20 additions & 0 deletions tests/models/test_basic_search_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import unittest
from hed import load_schema_version
from hed.models.basic_search_util import convert_query


class TestConvertQueryToForm(unittest.TestCase):
schema = load_schema_version("8.3.0")

def test_basic_convert(self):
input = "@Event, Head-part*, Time-interval/1"
expected_output = "@Event, Item/Biological-item/Anatomical-item/Body-part/Head-part*, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1"

actual_output = convert_query(input, self.schema)
self.assertEqual(expected_output, actual_output)

input = "@Head-part*, Event, Time-interval/1"
expected_output = "@Item/Biological-item/Anatomical-item/Body-part/Head-part*, Event, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1"

actual_output = convert_query(input, self.schema)
self.assertEqual(expected_output, actual_output)
10 changes: 5 additions & 5 deletions tests/tools/analysis/test_annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,13 +300,13 @@ def test_to_factor(self):
factor1 = annotation_util.to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.NAN, 'n/a'])
series2 = Series(['a', '', None, np.nan, 'n/a'])
factor2 = annotation_util.to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)
data = {
'Name': ['Alice', '', 'n/a', 1.0], # Contains a space
'Age': [25, np.NaN, 35, 0]
'Age': [25, np.nan, 35, 0]
}
df = DataFrame(data)
factor3 = annotation_util.to_factor(df, column='Name')
Expand All @@ -321,7 +321,7 @@ def test_series_to_factor(self):
factor1 = annotation_util.series_to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.NAN, 'n/a'])
series2 = Series(['a', '', None, np.nan, 'n/a'])
factor2 = annotation_util.series_to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)
Expand Down Expand Up @@ -465,13 +465,13 @@ def test_to_factor(self):
factor1 = annotation_util.to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.NAN, 'n/a'])
series2 = Series(['a', '', None, np.nan, 'n/a'])
factor2 = annotation_util.to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)
data = {
'Name': ['Alice', '', 'n/a', 1.0], # Contains a space
'Age': [25, np.NaN, 35, 0]
'Age': [25, np.nan, 35, 0]
}
df = DataFrame(data)
factor3 = annotation_util.to_factor(df, column='Name')
Expand Down

0 comments on commit 6780b08

Please sign in to comment.