Skip to content

Commit

Permalink
Merge pull request #959 from hed-standard/develop
Browse files Browse the repository at this point in the history
Merging in preparation for release
  • Loading branch information
VisLab authored Jun 14, 2024
2 parents a6b1df3 + 233d9ef commit d5deaf8
Show file tree
Hide file tree
Showing 101 changed files with 14,394 additions and 730 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
Release 0.5.0
- Added JSON schema specification of remodeling commands.
- Added support for schema that are specified by .tsv files.
- Added support for embedding schema in an ontology.
- Added WordCloud visualizations.
- Added handling of event context and events of temporal extent.

Release 0.4.0 October 27, 2023
- Refactored the model classes to be based on DataFrame.
- Added additional command line options for remodeling tools.
Expand All @@ -8,7 +15,7 @@ Release 0.4.0 October 27, 2023
- Improvements to API-Docs.

Release 0.3.1 July 3, 2023
- Pinned the version of the pydantic and inflect libraries due to inflict.
- Pinned the version of the pydantic and inflect libraries due to conflict.
- Reorganized JSON output of remodeling summaries so that all of consistent form.
- Fixed summarize_hed_tags_op so that tags were correctly categorized for output.
- Minor refactoring to reduce code complexity.
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ numpy>=1.21.6
openpyxl>=3.1.0
pandas>=1.3.5
portalocker>=2.7.0
semantic_version>=2.10.0
semantic-version>=2.10.0
myst-parser>=1.0.0
Sphinx>=5.2.2
sphinx_rtd_theme>=1.0.0
Expand Down
20 changes: 0 additions & 20 deletions hed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,6 @@
from hed.schema.hed_schema_group import HedSchemaGroup
from hed.schema.hed_schema_io import load_schema, load_schema_version

from hed.tools.bids.bids_dataset import BidsDataset
from hed.tools.analysis.event_manager import EventManager
from hed.tools.analysis.file_dictionary import FileDictionary
from hed.tools.analysis.hed_tag_manager import HedTagManager
from hed.tools.analysis.hed_type_defs import HedTypeDefs
from hed.tools.analysis.hed_type_factors import HedTypeFactors
from hed.tools.analysis.hed_type import HedType
from hed.tools.analysis.hed_type_manager import HedTypeManager
from hed.tools.analysis.hed_type_counts import HedTypeCount
from hed.tools.analysis.key_map import KeyMap
from hed.tools.analysis.tabular_summary import TabularSummary
from hed.tools.analysis.temporal_event import TemporalEvent
from hed.tools.analysis.hed_tag_manager import HedTagManager
from hed.tools.analysis.annotation_util import (check_df_columns, extract_tags, generate_sidecar_entry,
get_bids_dataset, hed_to_df, df_to_hed, merge_hed_dict, str_to_tabular, strs_to_sidecar, to_strlist)

from hed.tools.util.hed_logger import HedLogger
from hed.tools.util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns
from hed.tools.util.io_util import check_filename, clean_filename, extract_suffix_path, get_file_list, make_path
from hed.tools.util.io_util import get_dir_dictionary, get_file_list, get_path_components, parse_bids_filename

from . import _version
__version__ = _version.get_versions()['version']
3 changes: 2 additions & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ class SidecarErrors:

class SchemaErrors:
SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE'

SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"
SCHEMA_INVALID_SIBLING = 'SCHEMA_INVALID_SIBLING'
SCHEMA_INVALID_CHILD = 'SCHEMA_INVALID_CHILD'


class SchemaWarnings:
Expand Down
14 changes: 14 additions & 0 deletions hed/errors/schema_error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section):
f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"


@hed_error(SchemaErrors.SCHEMA_INVALID_SIBLING, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_SCHEMA_INVALID_SIBLING(tag, sibling_tag_list):
tag_join_delimiter = ", "
return f"Placeholder tag '{str(tag)}' has siblings. Placeholder tags must be an only child. Extra tags:" + \
f"{tag_join_delimiter}{tag_join_delimiter.join(str(n) for n in sibling_tag_list)}"


@hed_error(SchemaErrors.SCHEMA_INVALID_CHILD, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_SCHEMA_INVALID_CHILD(tag, child_tag_list):
tag_join_delimiter = ", "
return f"Placeholder tag '{str(tag)}' has children. Placeholder tags must have no children. Extra tags:" + \
f"{tag_join_delimiter}{tag_join_delimiter.join(str(n) for n in child_tag_list)}"


@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
Expand Down
2 changes: 1 addition & 1 deletion hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def _open_dataframe_file(self, file, has_column_names, input_type):
elif input_type in self.TEXT_EXTENSION:
try:
self._dataframe = pd.read_csv(file, delimiter='\t', header=pandas_header,
dtype=str, keep_default_na=True, na_values=("", "null"))
dtype=str, keep_default_na=True, na_values=("", "null"))
except Exception as e:
raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, str(e), self.name) from e
# Convert nan values to a known value
Expand Down
8 changes: 4 additions & 4 deletions hed/models/def_expand_gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ def _handle_known_definition(self, def_tag, def_expand_group, def_group):
if not has_extension:
group_tag = def_expand_group.get_first_group()
self.def_dict.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
takes_value=False,
source_context=[])
takes_value=False,
source_context=[])
return True

# this is needed for the cases where we have a definition with errors, but it's not a known definition.
Expand All @@ -188,8 +188,8 @@ def _handle_ambiguous_definition(self, def_tag, def_expand_group):
if these_defs.validate():
new_contents = these_defs.get_group()
self.def_dict.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=new_contents,
takes_value=True,
source_context=[])
takes_value=True,
source_context=[])
del self.ambiguous_defs[def_tag_name.casefold()]
except ValueError:
for ambiguous_def in these_defs.placeholder_defs:
Expand Down
4 changes: 2 additions & 2 deletions hed/models/definition_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ def check_for_definitions(self, hed_string_obj, error_handler=None):
continue

self.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
takes_value=def_takes_value,
source_context=context)
takes_value=def_takes_value,
source_context=context)

return def_issues

Expand Down
2 changes: 2 additions & 0 deletions hed/models/model_constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
""" Defined constants for definitions, def labels, and expanded labels. """


class DefTagNames:
""" Source names for definitions, def labels, and expanded labels. """

Expand Down
16 changes: 8 additions & 8 deletions hed/models/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,27 @@ def __init__(self, expression_string):
'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event.
'Event and Action' - Find any strings with Event and Action, including descendant tags.
'Event && Action' - Find any strings with Event and Action, including descendant tags.
'Event or Action' - Same as above, but it has either.
'Event || Action' - Same as above, but it has either.
'"Event"' - Finds the Event tag, but not any descendent tags.
`Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder.
'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event.
'[Event and Action]' - Find a group that contains both Event and Action(at any level).
'[Event && Action]' - Find a group that contains both Event and Action(at any level).
'{Event and Action}' - Find a group with Event And Action at the same level.
'{Event && Action}' - Find a group with Event And Action at the same level.
'{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else.
'{Event && Action:}' - Find a group with Event And Action at the same level, and nothing else.
'{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag.
'{Event && Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag.
Practical Complex Example:
{(Onset or Offset), (Def or {Def-expand}): ???} - A group with an onset tag,
{(Onset || Offset), (Def || {Def-expand}): ???} - A group with an onset tag,
a def tag or def-expand group, and an optional wildcard group
Parameters:
Expand Down Expand Up @@ -96,7 +96,7 @@ def _tokenize(expression_string):
"""Tokenize the expression string into a list"""
grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
paren_re = r"\)|\(|~"
word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
word_re = r"\?+|\&\&|\|\||,|[\"_\-a-zA-Z0-9/.^#\*@]+"
re_string = fr"({grouping_re}|{paren_re}|{word_re})"
token_re = re.compile(re_string)

Expand Down
4 changes: 2 additions & 2 deletions hed/models/query_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ class Token:
def __init__(self, text):
tokens = {
",": Token.And,
"and": Token.And,
"or": Token.Or,
"&&": Token.And,
"||": Token.Or,
"[": Token.DescendantGroup,
"]": Token.DescendantGroupEnd,
"(": Token.LogicalGroup,
Expand Down
2 changes: 1 addition & 1 deletion hed/models/tabular_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,4 @@ def get_column_refs(self):

def get_sidecar(self):
"""Return the sidecar associated with this TabularInput."""
return self._sidecar
return self._sidecar
14 changes: 9 additions & 5 deletions hed/schema/hed_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,13 @@ def cache_local_versions(cache_folder):
return -1


def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, hed_library_urls=DEFAULT_LIBRARY_URL_LIST, skip_folders=DEFAULT_SKIP_FOLDERS, cache_folder=None):
def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, hed_library_urls=DEFAULT_LIBRARY_URL_LIST,
skip_folders=DEFAULT_SKIP_FOLDERS, cache_folder=None):
""" Cache all schemas at the given URLs.
Parameters:
hed_base_urls (str or list): Path or list of paths. These should point to a single folder.
hed_library_urls (str or list): Path or list of paths. These should point to a folder containing library folders.
hed_library_urls (str or list): Path or list of paths. These should point to folder containing library folders.
skip_folders (list): A list of subfolders to skip over when downloading.
cache_folder (str): The folder holding the cache.
Expand Down Expand Up @@ -196,7 +197,8 @@ def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, hed_library_urls=DEFAULT_
new_hed_versions = _get_hed_xml_versions_one_library(hed_base_url)
_merge_in_versions(all_hed_versions, new_hed_versions)
for hed_library_url in hed_library_urls:
new_hed_versions = _get_hed_xml_versions_from_url_all_libraries(hed_library_url, skip_folders=skip_folders)
new_hed_versions = _get_hed_xml_versions_from_url_all_libraries(hed_library_url,
skip_folders=skip_folders)
_merge_in_versions(all_hed_versions, new_hed_versions)

for library_name, hed_versions in all_hed_versions.items():
Expand Down Expand Up @@ -299,7 +301,8 @@ def _get_hed_xml_versions_one_folder(hed_folder_url):
found_library_name = expression_match.group(2)
if found_library_name not in all_hed_versions:
all_hed_versions[found_library_name] = {}
all_hed_versions[found_library_name][version] = file_entry["sha"], file_entry["download_url"], hed_folder_url.endswith(prerelease_suffix)
all_hed_versions[found_library_name][version] = (
file_entry["sha"], file_entry["download_url"], hed_folder_url.endswith(prerelease_suffix))

return all_hed_versions

Expand Down Expand Up @@ -330,7 +333,8 @@ def _get_hed_xml_versions_one_library(hed_one_library_url):
return ordered_versions


def _get_hed_xml_versions_from_url_all_libraries(hed_base_library_url, library_name=None, skip_folders=DEFAULT_SKIP_FOLDERS):
def _get_hed_xml_versions_from_url_all_libraries(hed_base_library_url, library_name=None,
skip_folders=DEFAULT_SKIP_FOLDERS):
""" Get all available schemas and their hash values
Parameters:
Expand Down
6 changes: 4 additions & 2 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,7 @@ def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_sl
word_start_index += len(name) + 1

def has_duplicates(self):
"""Returns the first duplicate tag/unit/etc if any section has a duplicate name"""
"""Returns the first duplicate tag/unit/etc. if any section has a duplicate name"""
for section in self._sections.values():
has_duplicates = bool(section.duplicate_names)
if has_duplicates:
Expand All @@ -601,6 +601,8 @@ def has_duplicates(self):
# ===============================================
def finalize_dictionaries(self):
""" Call to finish loading. """
# Kludge - Reset this here so it recalculates while having all properties
self._schema83 = None
self._update_all_entries()

def _update_all_entries(self):
Expand Down Expand Up @@ -728,7 +730,7 @@ def _get_attributes_for_section(self, key_class):
attributes = {attribute: entry for attribute, entry in self._sections[HedSectionKey.Attributes].items()
if entry.has_attribute(attrib_class) or entry.has_attribute(element_prop_key)}
return attributes

# ===============================================
# Semi private function used to create a schema in memory(usually from a source file)
# ===============================================
Expand Down
5 changes: 4 additions & 1 deletion hed/schema/hed_schema_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Abstract base class for HedSchema and HedSchemaGroup, showing the common functionality
"""
from hed.schema.hed_schema_constants import HedSectionKey
from hed.schema.hed_schema_constants import HedSectionKey, HedKey
from abc import ABC, abstractmethod
from hed.schema.schema_io import schema_util

Expand Down Expand Up @@ -37,6 +37,9 @@ def schema_83_props(self):
return self._schema83

self._schema83 = schema_util.schema_version_greater_equal(self, "8.3.0")
if self.get_tag_entry(HedKey.ElementDomain, HedSectionKey.Properties):
self._schema83 = True
return self._schema83

@abstractmethod
def get_schema_versions(self):
Expand Down
3 changes: 2 additions & 1 deletion hed/schema/hed_schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,5 +154,6 @@ class HedKeyOld:
character_types["text"] = character_types["printable"].copy()
character_types["text"].add("nonascii")
character_types["text"] -= banned_delimiters
character_types["name"] = character_types["alphanumeric"] | character_types["hyphen"] | character_types["period"] | character_types["underscore"]
character_types["name"] = (character_types["alphanumeric"] | character_types["hyphen"] |
character_types["period"] | character_types["underscore"])
character_types["name"].add("nonascii")
14 changes: 11 additions & 3 deletions hed/schema/hed_schema_df_constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from hed.schema.hed_schema_constants import HedSectionKey
from hed.schema import hed_schema_constants

# Known tsv format suffixes

Expand All @@ -17,8 +18,8 @@

PROPERTY_KEYS = [ANNOTATION_KEY, DATA_KEY, OBJECT_KEY]
DF_SUFFIXES = {TAG_KEY, STRUCT_KEY, VALUE_CLASS_KEY,
UNIT_CLASS_KEY, UNIT_KEY, UNIT_MODIFIER_KEY,
*PROPERTY_KEYS, ATTRIBUTE_PROPERTY_KEY}
UNIT_CLASS_KEY, UNIT_KEY, UNIT_MODIFIER_KEY,
*PROPERTY_KEYS, ATTRIBUTE_PROPERTY_KEY}

section_mapping = {
STRUCT_KEY: None,
Expand All @@ -43,7 +44,7 @@
equivalent_to = "omn:EquivalentTo"
has_unit_class = "hasUnitClass"

struct_columns = [hed_id, name, attributes, subclass_of, description]
struct_columns = [hed_id, name, attributes, subclass_of, description, equivalent_to]
tag_columns = [hed_id, name, level, subclass_of, attributes, description, equivalent_to]
unit_columns = [hed_id, name, subclass_of, has_unit_class, attributes, description, equivalent_to]

Expand Down Expand Up @@ -76,3 +77,10 @@
"HedEpilogue": 12
}

# todo: this should be retrieved directly from the appropriate spreadsheet
valid_omn_attributes = {
hed_schema_constants.VERSION_ATTRIBUTE: "HED_0000300",
hed_schema_constants.LIBRARY_ATTRIBUTE: "HED_0000301",
hed_schema_constants.WITH_STANDARD_ATTRIBUTE: "HED_0000302",
hed_schema_constants.UNMERGED_ATTRIBUTE: "HED_0000303"
}
6 changes: 4 additions & 2 deletions hed/schema/hed_schema_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,10 @@ def __str__(self):
@staticmethod
def _compare_attributes_no_order(left, right):
if left != right:
left = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in left.items()}
right = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in right.items()}
left = {name: (set(value.split(",")) if isinstance(value, str) else value)
for (name, value) in left.items()}
right = {name: (set(value.split(",")) if isinstance(value, str) else value)
for (name, value) in right.items()}

return left == right

Expand Down
2 changes: 1 addition & 1 deletion hed/schema/hed_schema_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class HedSchemaGroup(HedSchemaBase):
Notes:
- The container class is useful when library schema are included.
- You cannot save/load/etc the combined schema object directly.
- You cannot save/load/etc. the combined schema object directly.
"""
def __init__(self, schema_list, name=""):
Expand Down
Loading

0 comments on commit d5deaf8

Please sign in to comment.