Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Making sure NaN fixes Windows test failures #979

Merged
merged 29 commits into from
Jun 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
118d7ba
Merge pull request #964 from hed-standard/dev_try_to_sync_develop
VisLab Jun 14, 2024
ce08217
Bump spec_tests/hed-examples from `5e9f9eb` to `5c3544a`
dependabot[bot] Jun 17, 2024
bb3c8ba
Bump paambaati/codeclimate-action from 6.0.0 to 8.0.0
dependabot[bot] Jun 17, 2024
c8122d5
Merge pull request #965 from hed-standard/dependabot/submodules/devel…
VisLab Jun 17, 2024
0f57eae
Merge pull request #966 from hed-standard/dependabot/github_actions/d…
VisLab Jun 17, 2024
cdec86d
Add a search util function to convert to long
IanCa Jun 17, 2024
4d94071
Rename convert_query
IanCa Jun 17, 2024
0343a3e
Merge pull request #968 from IanCa/develop
VisLab Jun 18, 2024
fe17f13
Bump spec_tests/hed-examples from `5c3544a` to `82a6278`
dependabot[bot] Jun 19, 2024
50564ff
Merge pull request #969 from hed-standard/dependabot/submodules/devel…
VisLab Jun 19, 2024
d0c3306
Allow unordered df schema loading
IanCa Jun 20, 2024
2f9385c
Delete codepsellrc, add to toml
IanCa Jun 21, 2024
b9c3159
Merge pull request #970 from IanCa/develop
VisLab Jun 21, 2024
972b10c
Bump spec_tests/hed-specification from `0d8303e` to `0e8a9b1`
dependabot[bot] Jun 21, 2024
2969177
Merge pull request #971 from hed-standard/dependabot/submodules/devel…
VisLab Jun 21, 2024
217ec8e
Bump spec_tests/hed-specification from `0e8a9b1` to `052bd4a`
dependabot[bot] Jun 24, 2024
553ca55
Bump spec_tests/hed-examples from `82a6278` to `9e5691f`
dependabot[bot] Jun 24, 2024
bb4b6f4
Make validation script check tsv, fix saving # in tsv
IanCa Jun 24, 2024
179ea78
Improve output when schema validation fails to reload the file
IanCa Jun 24, 2024
1a2a3ad
Merge pull request #974 from IanCa/develop
VisLab Jun 24, 2024
40f3078
Merge pull request #973 from hed-standard/dependabot/submodules/devel…
VisLab Jun 24, 2024
69df303
Merge pull request #972 from hed-standard/dependabot/submodules/devel…
VisLab Jun 24, 2024
88b2ee4
Don't output empty {} in mediawiki schemas
IanCa Jun 24, 2024
a30b3c0
Bump spec_tests/hed-specification from `052bd4a` to `5465995`
dependabot[bot] Jun 25, 2024
3f14ba6
Bump spec_tests/hed-examples from `9e5691f` to `8824691`
dependabot[bot] Jun 25, 2024
771bf1c
Merge pull request #977 from hed-standard/dependabot/submodules/devel…
VisLab Jun 26, 2024
0ef01a1
Merge pull request #976 from hed-standard/dependabot/submodules/devel…
VisLab Jun 26, 2024
e2097e9
Fix np.nan, add windows CI workflow
IanCa Jun 26, 2024
c1fa53d
Merge pull request #978 from IanCa/develop
VisLab Jun 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .codespellrc

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ jobs:
with:
coverageCommand: coverage xml
debug: true
uses: paambaati/codeclimate-action@v6.0.0
uses: paambaati/codeclimate-action@v8.0.0
env:
CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}

42 changes: 42 additions & 0 deletions .github/workflows/ci_windows.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: CI

on:
push:
branches: ["main", "master"]
pull_request:
branches: ["main", "master"]

jobs:
build:
strategy:
matrix:
platform: [windows-latest]
python-version: ["3.10"]

runs-on: ${{ matrix.platform }}

steps:
- uses: actions/checkout@v4
with:
submodules: false

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}

- name: Install dependencies
run: |
python -m pip install --upgrade --upgrade-strategy eager pip
pip install -r requirements.txt

- name: Test with unittest
env:
HED_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python -m unittest
38 changes: 38 additions & 0 deletions hed/models/basic_search_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
Utilities to support HED searches based on strings.
"""
from hed.models.hed_string import HedString
from hed.models.hed_tag import HedTag


def convert_query(search_query, schema):
"""Converts the given basic search query into a hed_string

Parameters:
search_query(str): The basic search query to convert.
schema(HedSchema): The schema to use to convert tags

Returns:
long_query(str): The converted search query, in long form.
"""
input_tags = HedString.split_hed_string(search_query)
output_string = ""
skippable_prefix = ("@", "~")
skippable_suffix = ("*", )
for is_hed_tag, (startpos, endpos) in input_tags:
input_tag = search_query[startpos:endpos]
add_suffix = ""
if is_hed_tag:
if input_tag.startswith(skippable_prefix):
output_string += input_tag[:1]
input_tag = input_tag[1:]

if input_tag.endswith(skippable_suffix):
add_suffix = input_tag[-1:]
input_tag = input_tag[:-1]
output_string += HedTag(input_tag, schema).long_tag
output_string += add_suffix
else:
output_string += input_tag

return output_string
2 changes: 1 addition & 1 deletion hed/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .hed_schema_entry import HedSchemaEntry, UnitClassEntry, UnitEntry, HedTagEntry
from .hed_schema_group import HedSchemaGroup
from .hed_schema_section import HedSchemaSection
from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version
from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version, from_dataframes
from .hed_schema_constants import HedKey, HedSectionKey
from .hed_cache import cache_xml_versions, get_hed_versions, \
set_cache_directory, get_cache_directory
26 changes: 5 additions & 21 deletions hed/schema/schema_io/df2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,35 +137,19 @@ def _read_schema(self, dataframe):
Parameters:
dataframe (pd.DataFrame): The dataframe for the main tags section
"""
# note: this assumes loading is in order row by row.
# If tags are NOT sorted this won't work.(same as mediawiki)
self._schema._initialize_attributes(HedSectionKey.Tags)
known_tag_levels = {"HedTag": -1}
parent_tags = []
known_parent_tags = {"HedTag": []}
level_adj = 0
for row_number, row in dataframe[constants.TAG_KEY].iterrows():
# skip blank rows, though there shouldn't be any
if not any(row):
continue
parent_tag = row[constants.subclass_of]
# Return -1 by default for top level rooted tag support(they might not be in the dict)
raw_level = known_tag_levels.get(parent_tag, -1) + 1
if raw_level == 0:
parent_tags = []
level_adj = 0
else:
level = raw_level + level_adj
if level < len(parent_tags):
parent_tags = parent_tags[:level]
elif level > len(parent_tags):
self._add_fatal_error(row_number, row,
"Invalid level reported from Level column",
HedExceptions.GENERIC_ERROR)
continue

tag_entry, parent_tags, level_adj = self._add_tag_meta(parent_tags, row_number, row, level_adj)
org_parent_tags = known_parent_tags.get(parent_tag, []).copy()

tag_entry, parent_tags, _ = self._add_tag_meta(org_parent_tags, row_number, row, level_adj)
if tag_entry:
known_tag_levels[tag_entry.short_tag_name] = raw_level
known_parent_tags[tag_entry.short_tag_name] = parent_tags.copy()

def _read_section(self, df, section_key):
self._schema._initialize_attributes(section_key)
Expand Down
2 changes: 1 addition & 1 deletion hed/schema/schema_io/schema2df.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0):
constants.hed_id: f"{tag_id}",
constants.level: f"{level}",
constants.name:
tag_entry.short_tag_name if not tag_entry.has_attribute(HedKey.TakesValue)
tag_entry.short_tag_name if not tag_entry.name.endswith("#")
else tag_entry.short_tag_name + "-#",
constants.subclass_of: self._get_subclass_of(tag_entry),
constants.attributes: self._format_tag_attributes(tag_entry.attributes),
Expand Down
16 changes: 8 additions & 8 deletions hed/schema/schema_io/schema2wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,14 @@ def _add_blank_line(self):
self.output.append("")

def _format_props_and_desc(self, schema_entry):
prop_string = ""
tag_props = schema_entry.attributes
if tag_props:
prop_string += f"{{{self._format_tag_attributes(tag_props)}}}"
extras_string = ""
attribute_string = self._format_tag_attributes(schema_entry.attributes)
if attribute_string:
extras_string += f"{{{attribute_string}}}"
desc = schema_entry.description
if desc:
if tag_props:
prop_string += " "
prop_string += f"[{desc}]"
if attribute_string:
extras_string += " "
extras_string += f"[{desc}]"

return prop_string
return extras_string
30 changes: 21 additions & 9 deletions hed/scripts/script_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os.path
from collections import defaultdict
from hed.schema import from_string, load_schema
from hed.schema import from_string, load_schema, from_dataframes
from hed.errors import get_printable_issue_string, HedFileError, SchemaWarnings
from hed.schema.schema_compare import compare_differences

all_extensions = [".tsv", ".mediawiki", ".xml"]

Expand Down Expand Up @@ -35,18 +36,17 @@ def validate_schema(file_path):
mediawiki_string = base_schema.get_as_mediawiki_string()
reloaded_schema = from_string(mediawiki_string, schema_format=".mediawiki")

if reloaded_schema != base_schema:
error_text = f"Failed to reload {file_path} as mediawiki. " \
f"There is either a problem with the source file, or the saving/loading code."
validation_issues.append(error_text)
validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "mediawiki")

xml_string = base_schema.get_as_xml_string()
reloaded_schema = from_string(xml_string, schema_format=".xml")

if reloaded_schema != base_schema:
error_text = f"Failed to reload {file_path} as xml. " \
f"There is either a problem with the source file, or the saving/loading code."
validation_issues.append(error_text)
validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "xml")

tsv_dataframes = base_schema.get_as_dataframes()
reloaded_schema = from_dataframes(tsv_dataframes)

validation_issues += _get_schema_comparison(base_schema, reloaded_schema, file_path, "tsv")
except HedFileError as e:
print(f"Saving/loading error: {file_path} {e.message}")
error_text = e.message
Expand Down Expand Up @@ -209,3 +209,15 @@ def get_prerelease_path(repo_path, schema_name, schema_version):
schema_filename = get_schema_filename(schema_name, schema_version)

return os.path.join(base_path, "hedtsv", schema_filename)


def _get_schema_comparison(schema, schema_reload, file_path, file_format):
if schema_reload != schema:
error_text = f"Failed to reload {file_path} as {file_format}. " \
f"There is either a problem with the source file, or the saving/loading code."
title_prompt = ("If the problem is in the schema file, "
"the following comparison should indicate the approximate source of the issues:")
error_text += "\n" + compare_differences(schema, schema_reload, title=title_prompt)
return [error_text]

return []
4 changes: 2 additions & 2 deletions hed/tools/analysis/annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def series_to_factor(series):
series (Series) - Series to be converted to a list.

Returns:
list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0.
list - contains 0's and 1's, empty, 'n/a' and np.nan are converted to 0.
"""
replaced = series.replace('n/a', False)
filled = replaced.fillna(False)
Expand Down Expand Up @@ -273,7 +273,7 @@ def to_factor(data, column=None):
column (str): Optional column name if DataFrame (otherwise column 0).

Returns:
list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0.
list - contains 0's and 1's, empty, 'n/a' and np.nan are converted to 0.
"""
if isinstance(data, Series):
series = data
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/remodeling/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def post_proc_data(df):
df (DataFrame): The DataFrame to be processed.

Returns:
DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'.
DataFrame: DataFrame with the 'np.nan replaced by 'n/a'.
"""

dtypes = df.dtypes.to_dict()
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/remap_columns_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
"""
df1 = df.copy()
df1[self.source_columns] = df1[self.source_columns].replace(
np.NaN, 'n/a')
np.nan, 'n/a')
for column in self.integer_sources:
int_mask = df1[column] != 'n/a'
df1.loc[int_mask, column] = df1.loc[int_mask, column].astype(int)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,5 @@ namespaces = false
hed = ["schema/schema_data/*.xml", "resources/*.png"]

[tool.codespell]
skip = '*.git,*.pdf,*.xml,*.mediawiki,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests'
skip = '*.git,*.pdf,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests,,*.xml,*.mediawiki,*.omn,*.toml'
ignore-words-list = 'te,parms,assertIn'
2 changes: 1 addition & 1 deletion spec_tests/hed-examples
Submodule hed-examples updated 69 files
+1 −1 .github/workflows/codespell.yaml
+24 −0 .github/workflows/links.yaml
+6 −0 .lycheeignore
+1 −1 CODE_OF_CONDUCT.md
+1 −1 README.md
+3 −5 docs/source/BidsAnnotationQuickstart.md
+4 −5 docs/source/FileRemodelingTools.md
+20 −25 docs/source/HedMatlabTools.md
+3 −3 docs/source/HedOnlineTools.md
+5 −5 docs/source/HedPythonTools.md
+11 −18 docs/source/HedSchemas.md
+77 −68 docs/source/HedSearchGuide.md
+3 −3 docs/source/HedValidationGuide.md
+1 −1 docs/source/HowCanYouUseHed.md
+5 −0 docs/source/WhatsNew.md
+2 −0 docs/source/_templates/layout.html
+2 −2 docs/source/index.rst
+1 −1 src/README.md
+1 −1 src/jupyter_notebooks/README.md
+16 −21 src/jupyter_notebooks/bids/README.md
+32 −32 src/jupyter_notebooks/bids/extract_json_template.ipynb
+40 −50 src/jupyter_notebooks/bids/find_event_combinations.ipynb
+32 −32 src/jupyter_notebooks/bids/merge_spreadsheet_into_sidecar.ipynb
+55 −15 src/jupyter_notebooks/bids/sidecar_to_spreadsheet.ipynb
+37 −38 src/jupyter_notebooks/bids/summarize_events.ipynb
+15 −15 src/jupyter_notebooks/bids/validate_bids_dataset.ipynb
+17 −51 src/jupyter_notebooks/bids/validate_bids_dataset_with_libraries.ipynb
+46 −46 src/jupyter_notebooks/bids/validate_bids_datasets.ipynb
+1 −1 src/jupyter_notebooks/remodeling/README.md
+18 −18 src/jupyter_notebooks/remodeling/run_remodel.ipynb
+0 −35 src/matlab_scripts/README.md
+0 −15 src/matlab_scripts/data_cleaning/getChannelMap.m
+0 −39 src/matlab_scripts/data_cleaning/getEventTable.m
+0 −79 src/matlab_scripts/data_cleaning/getFileList.m
+0 −25 src/matlab_scripts/data_cleaning/renameChannels.m
+0 −40 src/matlab_scripts/data_cleaning/runEeglabChannelsToJson.m
+0 −52 src/matlab_scripts/data_cleaning/runEeglabEventsToFiles.m
+0 −87 src/matlab_scripts/data_cleaning/runEeglabFixChannels.m
+0 −47 src/matlab_scripts/data_cleaning/runEeglabImportEvents.m
+0 −88 src/matlab_scripts/data_cleaning/runEeglabImportEventsOld.m
+0 −52 src/matlab_scripts/data_cleaning/runEeglabJsonToChannels.m
+0 −50 src/matlab_scripts/data_cleaning/runEeglabRenameBCITChannels.m
+0 −33 src/matlab_scripts/data_cleaning/runEeglabRenameTask.m
+0 −31 src/matlab_scripts/data_cleaning/setChanTypes.m
+0 −22 src/matlab_scripts/data_cleaning/setChannelTypes.m
+0 −36 src/matlab_scripts/data_cleaning/writeElectrodeFile.m
+0 −9 src/matlab_scripts/hedtools_wrappers/runRemodel.m
+0 −8 src/matlab_scripts/hedtools_wrappers/runRemodelBackup.m
+0 −8 src/matlab_scripts/hedtools_wrappers/runRemodelRestore.m
+0 −9 src/matlab_scripts/hedtools_wrappers/testBidsValidation.m
+0 −17 src/matlab_scripts/hedtools_wrappers/testRemodel.m
+0 −14 src/matlab_scripts/hedtools_wrappers/validateHedInBids.m
+0 −79 src/matlab_scripts/utility_scripts/getFileList.m
+0 −12 src/matlab_scripts/web_services/exampleGenerateSidecar.m
+0 −12 src/matlab_scripts/web_services/getHostOptions.m
+0 −19 src/matlab_scripts/web_services/getSessionInfo.m
+0 −41 src/matlab_scripts/web_services/getTestData.m
+0 −56 src/matlab_scripts/web_services/outputReport.m
+0 −29 src/matlab_scripts/web_services/runAllTests.m
+0 −26 src/matlab_scripts/web_services/runAssembleTest.m
+0 −9 src/matlab_scripts/web_services/runTest.m
+0 −55 src/matlab_scripts/web_services/testEventRemodelingServices.m
+0 −43 src/matlab_scripts/web_services/testEventSearchServices.m
+0 −118 src/matlab_scripts/web_services/testEventServices.m
+0 −22 src/matlab_scripts/web_services/testGetServices.m
+0 −61 src/matlab_scripts/web_services/testLibraryServices.m
+0 −102 src/matlab_scripts/web_services/testSidecarServices.m
+0 −77 src/matlab_scripts/web_services/testSpreadsheetServices.m
+0 −77 src/matlab_scripts/web_services/testStringServices.m
20 changes: 20 additions & 0 deletions tests/models/test_basic_search_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import unittest
from hed import load_schema_version
from hed.models.basic_search_util import convert_query


class TestConvertQueryToForm(unittest.TestCase):
schema = load_schema_version("8.3.0")

def test_basic_convert(self):
input = "@Event, Head-part*, Time-interval/1"
expected_output = "@Event, Item/Biological-item/Anatomical-item/Body-part/Head-part*, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1"

actual_output = convert_query(input, self.schema)
self.assertEqual(expected_output, actual_output)

input = "@Head-part*, Event, Time-interval/1"
expected_output = "@Item/Biological-item/Anatomical-item/Body-part/Head-part*, Event, Property/Data-property/Data-value/Spatiotemporal-value/Temporal-value/Time-interval/1"

actual_output = convert_query(input, self.schema)
self.assertEqual(expected_output, actual_output)
10 changes: 5 additions & 5 deletions tests/tools/analysis/test_annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,13 +300,13 @@ def test_to_factor(self):
factor1 = annotation_util.to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.NAN, 'n/a'])
series2 = Series(['a', '', None, np.nan, 'n/a'])
factor2 = annotation_util.to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)
data = {
'Name': ['Alice', '', 'n/a', 1.0], # Contains a space
'Age': [25, np.NaN, 35, 0]
'Age': [25, np.nan, 35, 0]
}
df = DataFrame(data)
factor3 = annotation_util.to_factor(df, column='Name')
Expand All @@ -321,7 +321,7 @@ def test_series_to_factor(self):
factor1 = annotation_util.series_to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.NAN, 'n/a'])
series2 = Series(['a', '', None, np.nan, 'n/a'])
factor2 = annotation_util.series_to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)
Expand Down Expand Up @@ -465,13 +465,13 @@ def test_to_factor(self):
factor1 = annotation_util.to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.NAN, 'n/a'])
series2 = Series(['a', '', None, np.nan, 'n/a'])
factor2 = annotation_util.to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)
data = {
'Name': ['Alice', '', 'n/a', 1.0], # Contains a space
'Age': [25, np.NaN, 35, 0]
'Age': [25, np.nan, 35, 0]
}
df = DataFrame(data)
factor3 = annotation_util.to_factor(df, column='Name')
Expand Down