diff --git a/hed/schema/hed_schema_df_constants.py b/hed/schema/hed_schema_df_constants.py index a1f3e418..a6824c10 100644 --- a/hed/schema/hed_schema_df_constants.py +++ b/hed/schema/hed_schema_df_constants.py @@ -1,4 +1,5 @@ from hed.schema.hed_schema_constants import HedSectionKey +from hed.schema import hed_schema_constants # Known tsv format suffixes @@ -43,7 +44,7 @@ equivalent_to = "omn:EquivalentTo" has_unit_class = "hasUnitClass" -struct_columns = [hed_id, name, attributes, subclass_of, description] +struct_columns = [hed_id, name, attributes, subclass_of, description, equivalent_to] tag_columns = [hed_id, name, level, subclass_of, attributes, description, equivalent_to] unit_columns = [hed_id, name, subclass_of, has_unit_class, attributes, description, equivalent_to] @@ -76,3 +77,10 @@ "HedEpilogue": 12 } +# todo: this should be retrieved directly from the appropriate spreadsheet +valid_omn_attributes = { + hed_schema_constants.VERSION_ATTRIBUTE: "HED_0000300", + hed_schema_constants.LIBRARY_ATTRIBUTE: "HED_0000301", + hed_schema_constants.WITH_STANDARD_ATTRIBUTE: "HED_0000302", + hed_schema_constants.UNMERGED_ATTRIBUTE: "HED_0000303" +} diff --git a/hed/schema/schema_io/ontology_util.py b/hed/schema/schema_io/ontology_util.py index 1db3e8b8..663f2749 100644 --- a/hed/schema/schema_io/ontology_util.py +++ b/hed/schema/schema_io/ontology_util.py @@ -7,7 +7,7 @@ from hed.errors.exceptions import HedFileError from hed.schema import hed_schema_df_constants as constants from hed.schema.hed_schema_constants import HedKey -from hed.schema.schema_io.text_util import parse_attribute_string +from hed.schema.schema_io.text_util import parse_attribute_string, _parse_header_attributes_line library_index_ranges = { "": (10000, 40000), @@ -274,8 +274,6 @@ def convert_df_to_omn(dataframes): full_text = "" omn_data = {} for suffix, dataframe in dataframes.items(): - if suffix == constants.STRUCT_KEY: # not handled here yet - continue output_text = _convert_df_to_omn(dataframes[suffix], annotation_properties=annotation_props) omn_data[suffix] = output_text full_text += output_text + "\n" @@ -398,6 +396,10 @@ def get_attributes_from_row(row): attr_string = row[constants.attributes] else: attr_string = "" + + if constants.subclass_of in row.index and row[constants.subclass_of] == "HedHeader": + header_attributes, _ = _parse_header_attributes_line(attr_string) + return header_attributes return parse_attribute_string(attr_string) diff --git a/hed/schema/schema_io/schema2df.py b/hed/schema/schema_io/schema2df.py index 7ff09354..c451997f 100644 --- a/hed/schema/schema_io/schema2df.py +++ b/hed/schema/schema_io/schema2df.py @@ -3,6 +3,7 @@ from hed.schema.hed_schema_constants import HedSectionKey, HedKey from hed.schema.schema_io.ontology_util import get_library_name_and_id, remove_prefix, create_empty_dataframes from hed.schema.schema_io.schema2base import Schema2Base +from hed.schema.schema_io import text_util import pandas as pd import hed.schema.hed_schema_df_constants as constants from hed.schema.hed_schema_entry import HedTagEntry @@ -67,6 +68,7 @@ def _create_and_add_object_row(self, base_object, attributes="", description="") constants.attributes: attributes, constants.subclass_of: base_object, constants.description: description.replace("\n", "\\n"), + constants.equivalent_to: self._get_header_equivalent_to(attributes, base_object) } self.output[constants.STRUCT_KEY].loc[len(self.output[constants.STRUCT_KEY])] = new_row @@ -215,6 +217,35 @@ def _attribute_disallowed(self, attribute): # strip out hedID in dataframe format return attribute in [HedKey.HedID, HedKey.AnnotationProperty] + def _get_header_equivalent_to(self, attributes_string, subclass_of): + attribute_strings = [] + + attributes, _ = text_util._parse_header_attributes_line(attributes_string) + schema_name, schema_id = self._get_object_name_and_id("HedSchema", include_prefix=True) + + if self._get_as_ids: + attribute_strings.append(f"(hed:HED_0000102 some {schema_id})") + else: + attribute_strings.append(f"(inHedSchema some {schema_name})") + + for attribute, value in attributes.items(): + if attribute not in constants.valid_omn_attributes: + continue + + if self._get_as_ids: + attribute = f"hed:{constants.valid_omn_attributes[attribute]}" + attribute_strings.append(f'({attribute} value "{value}")') + + if self._get_as_ids: + # we just want the ID for normal hed objects, not schema specific + subclass_of = self._get_object_id(subclass_of, base_id=0, include_prefix=True) + + # If they match, we want to leave equivalent_to blank + final_out = " and ".join([subclass_of] + attribute_strings) + if final_out == subclass_of: + return "" + return final_out + def _get_tag_equivalent_to(self, tag_entry): subclass = self._get_subclass_of(tag_entry) diff --git a/hed/scripts/create_ontology.py b/hed/scripts/create_ontology.py index 24878d33..df94c49c 100644 --- a/hed/scripts/create_ontology.py +++ b/hed/scripts/create_ontology.py @@ -14,7 +14,7 @@ def create_ontology(repo_path, schema_name, schema_version, dest): _, omn_dict = convert_df_to_omn(dataframes) base = get_schema_filename(schema_name, schema_version) - output_dest = os.path.join(dest, base) + output_dest = os.path.join(dest, base, "generated_omn") os.makedirs(output_dest, exist_ok=True) for suffix, omn_text in omn_dict.items(): filename = os.path.join(output_dest, f"{base}_{suffix}.omn")