Skip to content

Commit

Permalink
Allow unordered df schema loading
Browse files Browse the repository at this point in the history
  • Loading branch information
IanCa committed Jun 20, 2024
1 parent 50564ff commit d0c3306
Showing 1 changed file with 5 additions and 21 deletions.
26 changes: 5 additions & 21 deletions hed/schema/schema_io/df2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,35 +137,19 @@ def _read_schema(self, dataframe):
Parameters:
dataframe (pd.DataFrame): The dataframe for the main tags section
"""
# note: this assumes loading is in order row by row.
# If tags are NOT sorted this won't work.(same as mediawiki)
self._schema._initialize_attributes(HedSectionKey.Tags)
known_tag_levels = {"HedTag": -1}
parent_tags = []
known_parent_tags = {"HedTag": []}
level_adj = 0
for row_number, row in dataframe[constants.TAG_KEY].iterrows():
# skip blank rows, though there shouldn't be any
if not any(row):
continue
parent_tag = row[constants.subclass_of]
# Return -1 by default for top level rooted tag support(they might not be in the dict)
raw_level = known_tag_levels.get(parent_tag, -1) + 1
if raw_level == 0:
parent_tags = []
level_adj = 0
else:
level = raw_level + level_adj
if level < len(parent_tags):
parent_tags = parent_tags[:level]
elif level > len(parent_tags):
self._add_fatal_error(row_number, row,
"Invalid level reported from Level column",
HedExceptions.GENERIC_ERROR)
continue

tag_entry, parent_tags, level_adj = self._add_tag_meta(parent_tags, row_number, row, level_adj)
org_parent_tags = known_parent_tags.get(parent_tag, []).copy()

tag_entry, parent_tags, _ = self._add_tag_meta(org_parent_tags, row_number, row, level_adj)
if tag_entry:
known_tag_levels[tag_entry.short_tag_name] = raw_level
known_parent_tags[tag_entry.short_tag_name] = parent_tags.copy()

def _read_section(self, df, section_key):
self._schema._initialize_attributes(section_key)
Expand Down

0 comments on commit d0c3306

Please sign in to comment.