Skip to content

Commit

Permalink
Merge pull request #855 from IanCa/develop
Browse files Browse the repository at this point in the history
Allow validation of files with out of order onsets.
  • Loading branch information
VisLab authored Feb 7, 2024
2 parents f1e4749 + 3df2dab commit d6f3b73
Show file tree
Hide file tree
Showing 21 changed files with 687 additions and 992 deletions.
6 changes: 6 additions & 0 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ def val_error_CURLY_BRACE_UNSUPPORTED_HERE(tag, problem_tag):
return (f"Curly braces are only permitted in sidecars, fully wrapping text in place of a tag. "
f"Invalid character '{problem_tag}' in tag '{tag}'")


@hed_error(ValidationErrors.ONSETS_OUT_OF_ORDER, default_severity=ErrorSeverity.WARNING)
def val_error_ONSETS_OUT_OF_ORDER():
return "Onsets need to be temporally increasing for most downstream tools to work."


@hed_error(ValidationErrors.COMMA_MISSING)
def val_error_comma_missing(tag):
return f"Comma missing after - '{tag}'"
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class ValidationErrors:
INVALID_TAG_CHARACTER = 'invalidTagCharacter'

CURLY_BRACE_UNSUPPORTED_HERE = "CURLY_BRACE_UNSUPPORTED_HERE"

ONSETS_OUT_OF_ORDER = "ONSETS_OUT_OF_ORDER"


class SidecarErrors:
Expand Down
2 changes: 1 addition & 1 deletion hed/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .column_metadata import ColumnMetadata, ColumnType
from .definition_dict import DefinitionDict
from .definition_entry import DefinitionEntry
from .expression_parser import QueryParser
from .query_handler import QueryHandler
from .hed_group import HedGroup
from .spreadsheet_input import SpreadsheetInput
from .hed_string import HedString
Expand Down
8 changes: 8 additions & 0 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,14 @@ def onsets(self):
if "onset" in self.columns:
return self._dataframe["onset"]

@property
def needs_sorting(self):
"""Returns True if this both has an onset column, and it needs sorting."""
onsets = self.onsets
if onsets is not None:
onsets = onsets.astype(float)
return not onsets.is_monotonic_increasing

@property
def name(self):
""" Name of the data. """
Expand Down
41 changes: 25 additions & 16 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from hed.models.definition_dict import DefinitionDict


def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_columns=True,
shrink_defs=False, expand_defs=True):
def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, shrink_defs=False, expand_defs=True):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file with.
Args:
Expand All @@ -20,8 +19,6 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
If str, will attempt to load as a version if it doesn't have a valid extension.
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
join_columns: bool
If True, join all HED columns into one.
shrink_defs: bool
Shrink any def-expand tags found
expand_defs: bool
Expand All @@ -41,19 +38,12 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
if sidecar:
def_dict = sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)

if join_columns:
if expand_defs:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
elif shrink_defs:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict
if expand_defs:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
elif shrink_defs:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
else:
return [[HedString(x, hed_schema, def_dict).expand_defs() if expand_defs
else HedString(x, hed_schema, def_dict).shrink_defs() if shrink_defs
else HedString(x, hed_schema, def_dict)
for x in text_file_row] for text_file_row in tabular_file.dataframe_a.itertuples(index=False)], \
def_dict
return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict


def convert_to_form(df, hed_schema, tag_form, columns=None):
Expand Down Expand Up @@ -151,3 +141,22 @@ def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs
from hed.models.def_expand_gather import DefExpandGatherer
def_gatherer = DefExpandGatherer(hed_schema, known_defs, ambiguous_defs)
return def_gatherer.process_def_expands(hed_strings)


def sort_dataframe_by_onsets(df):
""" Gather def-expand tags in the strings/compare with known definitions to find any differences
Parameters:
df(pd.Dataframe): Dataframe to sort
Returns:
The sorted dataframe, or the original dataframe if it didn't have an onset column.
"""
if "onset" in df.columns:
# Create a copy and sort by onsets as floats(if needed), but continue to keep the string version.
df_copy = df.copy()
df_copy['_temp_onset_sort'] = df_copy['onset'].astype(float)
df_copy.sort_values(by='_temp_onset_sort', inplace=True)
df_copy.drop(columns=['_temp_onset_sort'], inplace=True)

return df_copy
return df
Loading

0 comments on commit d6f3b73

Please sign in to comment.