diff --git a/.codespellrc b/.codespellrc deleted file mode 100644 index 18082524..00000000 --- a/.codespellrc +++ /dev/null @@ -1,3 +0,0 @@ -[codespell] -skip = .git,*.pdf,*.svg,deprecated,*.xml,*.mediawiki,*.omn,*.toml -ignore-words-list = covert,hed,assertIn,parms diff --git a/hed/schema/schema_io/df2schema.py b/hed/schema/schema_io/df2schema.py index 2fd6e6cc..5b26c19f 100644 --- a/hed/schema/schema_io/df2schema.py +++ b/hed/schema/schema_io/df2schema.py @@ -137,35 +137,19 @@ def _read_schema(self, dataframe): Parameters: dataframe (pd.DataFrame): The dataframe for the main tags section """ - # note: this assumes loading is in order row by row. - # If tags are NOT sorted this won't work.(same as mediawiki) self._schema._initialize_attributes(HedSectionKey.Tags) - known_tag_levels = {"HedTag": -1} - parent_tags = [] + known_parent_tags = {"HedTag": []} level_adj = 0 for row_number, row in dataframe[constants.TAG_KEY].iterrows(): # skip blank rows, though there shouldn't be any if not any(row): continue parent_tag = row[constants.subclass_of] - # Return -1 by default for top level rooted tag support(they might not be in the dict) - raw_level = known_tag_levels.get(parent_tag, -1) + 1 - if raw_level == 0: - parent_tags = [] - level_adj = 0 - else: - level = raw_level + level_adj - if level < len(parent_tags): - parent_tags = parent_tags[:level] - elif level > len(parent_tags): - self._add_fatal_error(row_number, row, - "Invalid level reported from Level column", - HedExceptions.GENERIC_ERROR) - continue - - tag_entry, parent_tags, level_adj = self._add_tag_meta(parent_tags, row_number, row, level_adj) + org_parent_tags = known_parent_tags.get(parent_tag, []).copy() + + tag_entry, parent_tags, _ = self._add_tag_meta(org_parent_tags, row_number, row, level_adj) if tag_entry: - known_tag_levels[tag_entry.short_tag_name] = raw_level + known_parent_tags[tag_entry.short_tag_name] = parent_tags.copy() def _read_section(self, df, section_key): self._schema._initialize_attributes(section_key) diff --git a/pyproject.toml b/pyproject.toml index b7e341ae..67f8bc46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,5 +77,5 @@ namespaces = false hed = ["schema/schema_data/*.xml", "resources/*.png"] [tool.codespell] -skip = '*.git,*.pdf,*.xml,*.mediawiki,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests' +skip = '*.git,*.pdf,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests,,*.xml,*.mediawiki,*.omn,*.toml' ignore-words-list = 'te,parms,assertIn'