From d8f4a941691e429f74b73eecdd98948412db67e8 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 15 Feb 2024 10:17:43 -0600 Subject: [PATCH] Documentation cleanup --- hed/schema/schema_io/__init__.py | 1 - hed/schema/schema_io/base2schema.py | 52 +++++++++++------------ hed/schema/schema_io/owl2schema.py | 47 +++++++++++--------- hed/schema/schema_io/owl_constants.py | 3 +- hed/schema/schema_io/schema2base.py | 28 ++++++------ hed/schema/schema_io/schema2owl.py | 43 +++++++++++-------- hed/schema/schema_io/schema2wiki.py | 36 ++++++++++------ hed/schema/schema_io/schema2xml.py | 52 ++++++++++++++--------- hed/tools/analysis/annotation_util.py | 4 +- hed/tools/analysis/column_name_summary.py | 23 +++++++++- hed/tools/analysis/event_manager.py | 10 ++--- hed/tools/analysis/file_dictionary.py | 4 +- hed/tools/analysis/hed_tag_counts.py | 46 ++++++++++++++++---- hed/tools/analysis/hed_tag_manager.py | 20 +++++---- hed/tools/analysis/hed_type.py | 2 +- hed/tools/analysis/hed_type_counts.py | 34 ++++++++++++--- hed/tools/analysis/hed_type_defs.py | 24 ++++++----- hed/tools/analysis/hed_type_factors.py | 22 ++++++++++ hed/tools/analysis/hed_type_manager.py | 32 +++++++++++++- hed/tools/analysis/key_map.py | 21 ++++++--- 20 files changed, 339 insertions(+), 165 deletions(-) diff --git a/hed/schema/schema_io/__init__.py b/hed/schema/schema_io/__init__.py index 737947c49..e69de29bb 100644 --- a/hed/schema/schema_io/__init__.py +++ b/hed/schema/schema_io/__init__.py @@ -1 +0,0 @@ -""" XML, OWL, and MediaWiki IO routines. """ \ No newline at end of file diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py index eab065003..bc193eafc 100644 --- a/hed/schema/schema_io/base2schema.py +++ b/hed/schema/schema_io/base2schema.py @@ -8,22 +8,22 @@ class SchemaLoader(ABC): - """ Baseclass for schema loading, to handle basic errors and partnered schemas. + """ Baseclass for schema loading, to handle basic errors and partnered schemas - Expected usage is SchemaLoaderXML.load(filename). + Expected usage is SchemaLoaderXML.load(filename) - SchemaLoaderXML(filename) will load just the header_attributes. + SchemaLoaderXML(filename) will load just the header_attributes """ def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): - """ Load the given schema from one of the two parameters. + """Loads the given schema from one of the two parameters. Parameters: - filename(str or None): A valid filepath or None. - schema_as_string(str or None): A full schema as text or None. - schema(HedSchema or None): A HED schema to merge this new file into. + filename(str or None): A valid filepath or None + schema_as_string(str or None): A full schema as text or None + schema(HedSchema or None): A hed schema to merge this new file into It must be a with-standard schema with the same value. file_format(str or None): The format of this file if needed(only for owl currently) - name(str or None): Optional user supplied identifier, by default uses filename. + name(str or None): Optional user supplied identifier, by default uses filename """ if schema_as_string and filename: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.", @@ -61,8 +61,7 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self.name) elif withStandard != self._schema.with_standard: raise HedFileError(HedExceptions.BAD_WITH_STANDARD_VERSION, - "When merging two schemas without a schema namespace, " + - "you they must have the same withStandard value.", self.name) + "When merging two schemas without a schema namespace, you they must have the same withStandard value.", self.name) hed_attributes[hed_schema_constants.VERSION_ATTRIBUTE] = self._schema.version_number + f",{version_number}" hed_attributes[hed_schema_constants.LIBRARY_ATTRIBUTE] = self._schema.library + f",{self.library}" if name: @@ -71,35 +70,35 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self._schema.header_attributes = hed_attributes self._loading_merged = False + @property def schema(self): - """ The partially loaded schema if you are after just header attributes..""" + """ The partially loaded schema if you are after just header attributes.""" return self._schema @classmethod def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None, name=""): - """ Load and return the schema, including partnered schema if applicable. + """ Loads and returns the schema, including partnered schema if applicable. Parameters: - filename(str or None): A valid filepath or None. - schema_as_string(str or None): A full schema as text or None. - schema(HedSchema or None): A HED schema to merge this new file into. + filename(str or None): A valid filepath or None + schema_as_string(str or None): A full schema as text or None + schema(HedSchema or None): A hed schema to merge this new file into It must be a with-standard schema with the same value. file_format(str or None): If this is an owl file being loaded, this is the format. - Allowed values include: turtle, json-ld, and owl(xml). - name(str or None): Optional user supplied identifier, by default uses filename. - + Allowed values include: turtle, json-ld, and owl(xml) + name(str or None): Optional user supplied identifier, by default uses filename Returns: - schema(HedSchema): The new schema. + schema(HedSchema): The new schema """ loader = cls(filename, schema_as_string, schema, file_format, name) return loader._load() def _load(self): - """ Parse the previously loaded data, including loading a partnered schema if needed. + """ Parses the previously loaded data, including loading a partnered schema if needed. Returns: - schema(HedSchema): The new schema. + schema(HedSchema): The new schema """ self._loading_merged = True # Do a full load of the standard schema if this is a partnered schema @@ -126,27 +125,26 @@ def _load(self): @abstractmethod def _open_file(self): - """ Overloaded versions should retrieve the input from filename/schema_as_string. """ + """Overloaded versions should retrieve the input from filename/schema_as_string""" pass @abstractmethod def _get_header_attributes(self, input_data): - """ Overloaded versions should return the header attributes from the input data..""" + """Overloaded versions should return the header attributes from the input data.""" pass @abstractmethod def _parse_data(self): - """ Put the input data into the new schema. """ + """Puts the input data into the new schema""" pass def _add_to_dict_base(self, entry, key_class): if not entry.has_attribute(HedKey.InLibrary) and self.appending_to_schema and self._schema.merged: return None - if self.library and (not self._schema.with_standard or - (not self._schema.merged and self._schema.with_standard)): + if self.library and (not self._schema.with_standard or (not self._schema.merged and self._schema.with_standard)): # only add it if not already present - This is a rare case if not entry.has_attribute(HedKey.InLibrary): entry._set_attribute_value(HedKey.InLibrary, self.library) - return self._schema._add_tag_to_dict(entry.name, entry, key_class) + return self._schema._add_tag_to_dict(entry.name, entry, key_class) \ No newline at end of file diff --git a/hed/schema/schema_io/owl2schema.py b/hed/schema/schema_io/owl2schema.py index 6cdc55a5e..561fa8212 100644 --- a/hed/schema/schema_io/owl2schema.py +++ b/hed/schema/schema_io/owl2schema.py @@ -1,5 +1,5 @@ """ -Create a HedSchema object from an OWL file or graph. +This module is used to create a HedSchema object from an OWL file or graph. """ @@ -9,18 +9,18 @@ from .base2schema import SchemaLoader import rdflib from rdflib.exceptions import ParserError -from rdflib import RDF, RDFS, URIRef, OWL +from rdflib import Graph, RDF, RDFS, Literal, URIRef, OWL, XSD from collections import defaultdict from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM class SchemaLoaderOWL(SchemaLoader): - """ Load XML schemas from filenames or strings. + """ Loads XML schemas from filenames or strings. - Expected usage is SchemaLoaderXML.load(filename). + Expected usage is SchemaLoaderXML.load(filename) - SchemaLoaderXML(filename) will load just the header_attributes. + SchemaLoaderXML(filename) will load just the header_attributes """ def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): if schema_as_string and not file_format: @@ -35,7 +35,7 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self._rooted_cache = {} def _open_file(self): - """ Parse a Turtle/owl/etc. file and returns the RDF graph. """ + """Parses a Turtle/owl/etc file and returns the RDF graph.""" graph = rdflib.Graph() try: @@ -51,17 +51,17 @@ def _open_file(self): return graph def _read_prologue(self): - """ Read the Prologue section from the ontology. """ + """Reads the Prologue section from the ontology.""" prologue = self.graph.value(subject=HED.Prologue, predicate=HED.elementValue, any=False) return str(prologue) if prologue else "" def _read_epilogue(self): - """ Read the Epilogue section from the ontology. """ + """Reads the Epilogue section from the ontology.""" epilogue = self.graph.value(subject=HED.Epilogue, predicate=HED.elementValue, any=False) return str(epilogue) if epilogue else "" def _get_header_attributes(self, graph): - """ Parse header attributes from an RDF graph into a dictionary. """ + """Parses header attributes from an RDF graph into a dictionary.""" header_attributes = {} for s, _, _ in graph.triples((None, RDF.type, HED.HeaderMember)): label = graph.value(s, RDFS.label) @@ -77,6 +77,7 @@ def _parse_data(self): self.graph.bind("hedu", HEDU) self.graph.bind("hedum", HEDUM) + self._schema.epilogue = self._read_epilogue() self._schema.prologue = self._read_prologue() self._get_header_attributes(self.graph) @@ -90,7 +91,9 @@ def _parse_data(self): breakHere = 3 def get_local_names_from_uris(parent_chain, tag_uri): - """ Extract local names from URIs using RDFlib's n3() method. """ + """ + Extracts local names from URIs using RDFlib's n3() method. + """ full_names = [] for uri in parent_chain + [tag_uri]: # Serialize the URI into N3 format and extract the local name @@ -100,18 +103,18 @@ def get_local_names_from_uris(parent_chain, tag_uri): return full_names def sort_classes_by_hierarchy(self, classes): - """ Sort all tags based on assembled full name. + """ + Sorts all tags based on assembled full name Returns: list of tuples. - Left Tag URI, right side is parent labels(not including self). + Left Tag URI, right side is parent labels(not including self) """ parent_chains = [] full_tag_names = [] for tag_uri in classes: parent_chain = self._get_parent_chain(tag_uri) - parent_chain = [uri.n3(namespace_manager=self.graph.namespace_manager).split(':')[-1] - for uri in parent_chain + [tag_uri]] + parent_chain = [uri.n3(namespace_manager=self.graph.namespace_manager).split(':')[-1] for uri in parent_chain + [tag_uri]] # parent_chain = [self.graph.value(p, RDFS.label) or p for p in parent_chain + [tag_uri]] full_tag_names.append("/".join(parent_chain)) parent_chains.append((tag_uri, parent_chain[:-1])) @@ -122,7 +125,7 @@ def sort_classes_by_hierarchy(self, classes): return parent_chains def _get_parent_chain(self, cls): - """ Recursively build the parent chain for a given class. """ + """ Recursively builds the parent chain for a given class. """ parent = self.graph.value(subject=cls, predicate=HED.hasHedParent) if parent is None: return [] @@ -168,7 +171,7 @@ def _parse_uri(self, uri, key_class, name=None): return tag_entry def _get_classes_with_subproperty(self, subproperty_uri, base_type): - """ Iterate over all classes that have a specified rdfs:subPropertyOf. """ + """Iterates over all classes that have a specified rdfs:subPropertyOf.""" classes = set() for s in self.graph.subjects(RDF.type, base_type): if (s, RDFS.subPropertyOf, subproperty_uri) in self.graph: @@ -176,7 +179,9 @@ def _get_classes_with_subproperty(self, subproperty_uri, base_type): return classes def _get_all_subclasses(self, base_type): - """ Recursively find all subclasses of the given base_type. """ + """ + Recursively finds all subclasses of the given base_type. + """ subclasses = set() for subclass in self.graph.subjects(RDFS.subClassOf, base_type): subclasses.add(subclass) @@ -184,7 +189,9 @@ def _get_all_subclasses(self, base_type): return subclasses def _get_classes(self, base_type): - """ Retrieve all instances of the given base_type, including instances of its subclasses. """ + """ + Retrieves all instances of the given base_type, including instances of its subclasses. + """ classes = set() # Add instances of the base type for s in self.graph.subjects(RDF.type, base_type): @@ -231,6 +238,8 @@ def _read_units(self): self._add_to_dict(new_entry, key_class) unit_classes[uri] = new_entry + + key_class = HedSectionKey.Units units = self._get_classes(HED.HedUnit) for uri in units: @@ -265,7 +274,7 @@ def _add_tag_internal(self, uri, parent_tags): self._add_to_dict(tag_entry, HedSectionKey.Tags) def _read_tags(self): - """ Populate a dictionary of dictionaries associated with tags and their attributes. """ + """Populates a dictionary of dictionaries associated with tags and their attributes.""" classes = self._get_classes(HED.HedTag) classes.update(self._get_classes(HED.HedPlaceholder)) sorted_classes = self.sort_classes_by_hierarchy(classes) diff --git a/hed/schema/schema_io/owl_constants.py b/hed/schema/schema_io/owl_constants.py index 088f8e2f6..8d450d901 100644 --- a/hed/schema/schema_io/owl_constants.py +++ b/hed/schema/schema_io/owl_constants.py @@ -1,5 +1,5 @@ -""" OWL constants used to define namespaces. """ from rdflib import Namespace + from hed.schema.hed_schema_constants import HedSectionKey @@ -48,3 +48,4 @@ HedSectionKey.UnitModifiers: "HedUnitModifier", HedSectionKey.ValueClasses: "HedValueClass", } + diff --git a/hed/schema/schema_io/schema2base.py b/hed/schema/schema_io/schema2base.py index 0737c9f85..c54e9b977 100644 --- a/hed/schema/schema_io/schema2base.py +++ b/hed/schema/schema_io/schema2base.py @@ -1,12 +1,11 @@ -""" Baseclass for mediawiki/xml writers. """ +"""Baseclass for mediawiki/xml writers""" from hed.schema.hed_schema_constants import HedSectionKey, HedKey from hed.errors.exceptions import HedFileError, HedExceptions class Schema2Base: - """ Baseclass for mediawiki/xml writers. """ def __init__(self): - # Placeholder output variable. + # Placeholder output variable self.output = None self._save_lib = False self._save_base = False @@ -15,15 +14,20 @@ def __init__(self): @classmethod def process_schema(cls, hed_schema, save_merged=False): - """ Take a HedSchema object and return a list of strings representing its .mediawiki version. - - Parameters: - hed_schema (HedSchema): The schema to be processed. - save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema. - If it is not a "withStandard" schema, this setting has no effect. - - Returns: - (Any): Varies based on inherited class. + """ + Takes a HedSchema object and returns a list of strings representing its .mediawiki version. + + Parameters + ---------- + hed_schema : HedSchema + save_merged: bool + If True, this will save the schema as a merged schema if it is a "withStandard" schema. + If it is not a "withStandard" schema, this setting has no effect. + + Returns + ------- + converted_output: Any + Varies based on inherited class """ if not hed_schema.can_save(): diff --git a/hed/schema/schema_io/schema2owl.py b/hed/schema/schema_io/schema2owl.py index c4a6480f4..0b683942e 100644 --- a/hed/schema/schema_io/schema2owl.py +++ b/hed/schema/schema_io/schema2owl.py @@ -1,4 +1,4 @@ -""" Output of HedSchema objects as .xml format. """ +"""Allows output of HedSchema objects as .xml format""" from hed.schema.hed_schema_constants import HedSectionKey, HedKey from hed.schema.schema_io import owl_constants @@ -62,7 +62,6 @@ class Schema2Owl(Schema2Base): - """ Output of HedSchema objects as .xml format. """ def __init__(self): super().__init__() self.owl_graph = Graph() @@ -76,7 +75,7 @@ def __init__(self): # Required baseclass function # ========================================= def _output_header(self, attributes, prologue): - # Create a dictionary mapping label names to property URIs. + # Create a dictionary mapping label names to property URIs property_uris = { "library": HED.Library, "unmerged": HED.Unmerged, @@ -216,15 +215,22 @@ def _add_attribute(self, base_uri, name, label, comment, entry): return hed_tag_uri def _write_tag_entry(self, tag_entry, parent_node=None, level=0): - """ Create a tag node and adds it to the parent. - - Parameters: - tag_entry (HedTagEntry): The entry for that tag we want to write out. - parent_node (Any): Unused. - level (Any): Unused: - - Returns: - ? + """ + Creates a tag node and adds it to the parent. + + Parameters + ---------- + tag_entry: HedTagEntry + The entry for that tag we want to write out + parent_node: Any + Unused + level: Any + Unused + + Returns + ------- + SubElement + The added node """ tag_name = tag_entry.short_tag_name parent = tag_entry.parent @@ -241,14 +247,15 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0): ) def _write_entry(self, entry, parent_node=None, include_props=True): - """ Create an entry node and adds it to the parent. + """ + Creates an entry node and adds it to the parent. Parameters: - entry(HedSchemaEntry): The entry for that tag we want to write out. - parent_node(str): URI for unit class owner, if this is a unit. + entry(HedSchemaEntry): The entry for that tag we want to write out + parent_node(str): URI for unit class owner, if this is a unit include_props(bool): Add the description and attributes to new node. Returns: - str: The added URI. + str: The added URI """ key_class = entry.section_key prefix = HED_URIS[key_class] @@ -292,10 +299,10 @@ def _write_entry(self, entry, parent_node=None, include_props=True): def sanitize_for_turtle(name): - """ Sanitize a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. + """ Sanitizes a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. Excludes: `control characters, space, <, >, double quote, {, }, |, ^, backtick, and backslash.` - Replacing them with underscores. + Replacing them with underscores Parameters: name (str): The string to sanitize. diff --git a/hed/schema/schema_io/schema2wiki.py b/hed/schema/schema_io/schema2wiki.py index 9d468563d..2a8a315b4 100644 --- a/hed/schema/schema_io/schema2wiki.py +++ b/hed/schema/schema_io/schema2wiki.py @@ -1,4 +1,4 @@ -""" Output of HedSchema objects as .mediawiki format. """ +"""Allows output of HedSchema objects as .mediawiki format""" from hed.schema.hed_schema_constants import HedSectionKey from hed.schema.schema_io import wiki_constants @@ -6,7 +6,6 @@ class Schema2Wiki(Schema2Base): - """ Output of HedSchema objects as .mediawiki format. """ def __init__(self): super().__init__() self.current_tag_string = "" @@ -107,26 +106,35 @@ def _format_props_and_desc(self, schema_entry): @staticmethod def _get_attribs_string_from_schema(header_attributes): - """ Get the schema attributes and converts it to a string. + """ + Gets the schema attributes and converts it to a string. - Parameters: - header_attributes (dict): Attributes to format attributes from. + Parameters + ---------- + header_attributes : dict + Attributes to format attributes from - Returns: - str: A string of the attributes that can be written to a .mediawiki formatted file. + Returns + ------- + str: + A string of the attributes that can be written to a .mediawiki formatted file """ attrib_values = [f"{attr}=\"{value}\"" for attr, value in header_attributes.items()] final_attrib_string = " ".join(attrib_values) return final_attrib_string def _format_tag_attributes(self, attributes): - """ Take a dictionary of tag attributes and return a string with the .mediawiki representation. - - Parameters: - attributes (dict): Dictionary of form {attribute_name : attribute_value}. - - Returns: - str: The formatted string that should be output to the file. + """ + Takes a dictionary of tag attributes and returns a string with the .mediawiki representation + + Parameters + ---------- + attributes : {str:str} + {attribute_name : attribute_value} + Returns + ------- + str: + The formatted string that should be output to the file. """ prop_string = "" final_props = [] diff --git a/hed/schema/schema_io/schema2xml.py b/hed/schema/schema_io/schema2xml.py index a63334ddc..d18456459 100644 --- a/hed/schema/schema_io/schema2xml.py +++ b/hed/schema/schema_io/schema2xml.py @@ -1,4 +1,4 @@ -""" Output of HedSchema objects as .xml format. """ +"""Allows output of HedSchema objects as .xml format""" from xml.etree.ElementTree import Element, SubElement from hed.schema.hed_schema_constants import HedSectionKey @@ -7,7 +7,6 @@ class Schema2XML(Schema2Base): - """ Output of HedSchema objects as .xml format. """ def __init__(self): super().__init__() self.hed_node = Element('HED') @@ -37,16 +36,21 @@ def _end_tag_section(self): pass def _write_tag_entry(self, tag_entry, parent_node=None, level=0): - """ Create a tag node and add it to the parent. - - Parameters: - tag_entry (HedTagEntry): The entry for that tag we want to write out. - parent_node (SubElement): The parent node if any of this tag. - level (int): The level of this tag, 0 being a root tag. - - Returns: - SubElement:The added node. + """ + Creates a tag node and adds it to the parent. + Parameters + ---------- + tag_entry: HedTagEntry + The entry for that tag we want to write out + parent_node: SubElement + The parent node if any of this tag. + level: int + The level of this tag, 0 being a root tag. + Returns + ------- + SubElement + The added node """ key_class = HedSectionKey.Tags tag_element = xml_constants.ELEMENT_NAMES[key_class] @@ -66,15 +70,21 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0): return tag_node def _write_entry(self, entry, parent_node=None, include_props=True): - """ Create an entry node and add it to the parent. - - Parameters: - entry (HedSchemaEntry): The entry for that tag we want to write out. - parent_node (SubElement): The parent node of this tag, if any. - include_props (bool): If True, add the description and attributes to new node. + """ + Creates an entry node and adds it to the parent. - Returns: - SubElement: The added node. + Parameters + ---------- + entry: HedSchemaEntry + The entry for that tag we want to write out + parent_node: SubElement + The parent node of this tag, if any + include_props: bool + Add the description and attributes to new node. + Returns + ------- + SubElement + The added node """ key_class = entry.section_key element = xml_constants.ELEMENT_NAMES[key_class] @@ -98,9 +108,9 @@ def _write_entry(self, entry, parent_node=None, include_props=True): # Output helper functions to create nodes # ========================================= def _add_tag_node_attributes(self, tag_node, tag_attributes, attribute_node_name=xml_constants.ATTRIBUTE_ELEMENT): - """Add the attributes to a tag. + """Adds the attributes to a tag. - Parameters: + Parameters ---------- tag_node: Element A tag element. diff --git a/hed/tools/analysis/annotation_util.py b/hed/tools/analysis/annotation_util.py index 361328898..aafb2a8d0 100644 --- a/hed/tools/analysis/annotation_util.py +++ b/hed/tools/analysis/annotation_util.py @@ -9,7 +9,7 @@ def check_df_columns(df, required_cols=('column_name', 'column_value', 'descript """ Return a list of the specified columns that are missing from a dataframe. Parameters: - df (DataFrame): Spreadsheet to check the columns of. + df (DataFrame): Spreadsheet to check the columns of. required_cols (tuple): List of column names that must be present. Returns: @@ -122,7 +122,7 @@ def hed_to_df(sidecar_dict, col_names=None): Parameters: sidecar_dict (dict): A dictionary conforming to BIDS JSON events sidecar format. - col_names (list, None): A list of the cols to include in the flattened side car. + col_names (list, None): A list of the cols to include in the flattened sidecar. Returns: DataFrame: Four-column spreadsheet representing HED portion of sidecar. diff --git a/hed/tools/analysis/column_name_summary.py b/hed/tools/analysis/column_name_summary.py index 79d114465..7f7e8ef32 100644 --- a/hed/tools/analysis/column_name_summary.py +++ b/hed/tools/analysis/column_name_summary.py @@ -1,10 +1,10 @@ -""" Summarizes the unique column names in a dataset. """ +""" Summarize the unique column names in a dataset. """ import json class ColumnNameSummary: - """ Summarizes the unique column names in a dataset. """ + """ Summarize the unique column names in a dataset. """ def __init__(self, name=''): self.name = name @@ -12,6 +12,13 @@ def __init__(self, name=''): self.unique_headers = [] def update(self, name, columns): + """ Update the summary based on columns associated with a file. + + Parameters: + name (str): File name associated with the columns. + columns (list): List of file names. + + """ position = self.update_headers(columns) if name not in self.file_dict: self.file_dict[name] = position @@ -21,6 +28,12 @@ def update(self, name, columns): f"Current: {str(columns)} Previous: {str(self.unique_headers[self.file_dict[name]])}") def update_headers(self, column_names): + """ Update the unique combinations of column names. + + Parameters: + column_names (list): List of column names to update. + + """ for index, item in enumerate(self.unique_headers): if item == column_names: return index @@ -28,6 +41,12 @@ def update_headers(self, column_names): return len(self.unique_headers) - 1 def get_summary(self, as_json=False): + """ Return summary as an object or in JSON. + + Parameters: + as_json (bool): If False (the default), return the underlying summary object, otherwise transform to JSON. + + """ patterns = [list() for _ in self.unique_headers] for key, value in self.file_dict.items(): patterns[value].append(key) diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 4e3c152e0..959398e68 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -65,7 +65,7 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): onset_dict (dict): Running dict that keeps track of temporal events that haven't yet ended. Note: - This removes the events of temporal extent from hed. + This removes the events of temporal extent from HED. """ if not hed: @@ -87,14 +87,14 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): hed.remove(to_remove) def unfold_context(self, remove_types=[]): - """ Unfold the event information into hed, base, and contexts input either as arrays of str or of HedString. + """ Unfold the event information into a tuple based on context. Parameters: remove_types (list): List of types to remove. Returns: - list of str or HedString representing the information without the events of temporal extent - list of str or HedString representing the onsets of the events of temporal extent + list of str or HedString representing the information without the events of temporal extent. + list of str or HedString representing the onsets of the events of temporal extent. list of str or HedString representing the ongoing context information. """ @@ -117,7 +117,7 @@ def _expand_context(self): """ Expand the onset and the ongoing context for additional processing. Returns: - tuple of lists: (base list of str, context list of str) + tuple of lists: (base list of str, context list of str). Notes: For each event, the Onset goes in the base list and the remainder of the times go in the contexts list. diff --git a/hed/tools/analysis/file_dictionary.py b/hed/tools/analysis/file_dictionary.py index 6095ce441..757c899c9 100644 --- a/hed/tools/analysis/file_dictionary.py +++ b/hed/tools/analysis/file_dictionary.py @@ -38,7 +38,7 @@ def __init__(self, collection_name, file_list, key_indices=(0, 2), separator='_' @property def name(self): - """ Name of this dictionary""" + """ Name of this dictionary. """ return self.collection_name @property @@ -92,7 +92,7 @@ def iter_files(self): yield key, file def key_diffs(self, other_dict): - """ Return symmetric key difference with other. + """ Return symmetric key difference with another dict. Parameters: other_dict (FileDictionary) A file dictionary object. diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py index 712f4b075..e4b303e49 100644 --- a/hed/tools/analysis/hed_tag_counts.py +++ b/hed/tools/analysis/hed_tag_counts.py @@ -1,4 +1,4 @@ -""" Counts of HED tags in a file's annotations. """ +""" Classes for managing counts of HED tags for columnar files. """ import copy @@ -38,6 +38,14 @@ def set_value(self, hed_tag): self.value_dict[value] = 1 def get_info(self, verbose=False): + """ Return counts for this tag. + + Parameters: + verbose (bool): If False (the default) only number of files included, otherwise a list of files. + + Returns: + dict: Keys are 'tag', 'events', and 'files'. + """ if verbose: files = [name for name in self.files] else: @@ -62,7 +70,7 @@ def get_empty(self): class HedTagCounts: - """ Counts of HED tags for a columnar file. + """ Counts of HED tags for a group of columnar files. Parameters: name (str): An identifier for these counts (usually the filename of the tabular file). @@ -77,7 +85,7 @@ def __init__(self, name, total_events=0): self.total_events = total_events def update_event_counts(self, hed_string_obj, file_name): - """ Update the tag counts based on a hed string object. + """ Update the tag counts based on a HedString object. Parameters: hed_string_obj (HedString): The HED string whose tags should be counted. @@ -106,8 +114,8 @@ def organize_tags(self, tag_template): tag_template (dict): A dictionary whose keys are titles and values are lists of HED tags (str). Returns: - dict - keys are tags (strings) and values are list of HedTagCount for items fitting template. - list - of HedTagCount objects corresponding to tags that don't fit the template. + dict: Keys are tags (strings) and values are list of HedTagCount for items fitting template. + list: HedTagCount objects corresponding to tags that don't fit the template. """ template = self.create_template(tag_template) @@ -117,6 +125,12 @@ def organize_tags(self, tag_template): return template, unmatched def merge_tag_dicts(self, other_dict): + """ Merge the information from another dictionary with this object's tag dictionary. + + Parameters: + other_dict (dict): Dictionary of tag, HedTagCount to merge. + + """ for tag, count in other_dict.items(): if tag not in self.tag_dict: self.tag_dict[tag] = count.get_empty() @@ -132,6 +146,11 @@ def merge_tag_dicts(self, other_dict): self.tag_dict[tag].value_dict[value] = val_count def get_summary(self): + """ Return a summary object containing the tag count information of this summary. + + Returns: + dict: Keys are 'name', 'files', 'total_events', and 'details'. + """ details = {} for tag, count in self.tag_dict.items(): details[tag] = count.get_summary() @@ -140,6 +159,17 @@ def get_summary(self): @staticmethod def create_template(tags): + """ Creates a dictionary with keys based on list of keys in tags dictionary. + + Parameters: + tags (dict): dictionary of tags and key lists. + + Returns: + dict: Dictionary with keys in key lists and values are empty lists. + + Note: This class is used to organize the results of the tags based on a template for display. + + """ template_dict = {} for key, key_list in tags.items(): for element in key_list: @@ -157,8 +187,8 @@ def _update_template(tag_count, template, unmatched): """ tag_list = reversed(list(tag_count.tag_terms)) - for tkey in tag_list: - if tkey in template.keys(): - template[tkey].append(tag_count) + for tag_key in tag_list: + if tag_key in template.keys(): + template[tag_key].append(tag_count) return unmatched.append(tag_count) diff --git a/hed/tools/analysis/hed_tag_manager.py b/hed/tools/analysis/hed_tag_manager.py index 057bd21db..1cd3041f2 100644 --- a/hed/tools/analysis/hed_tag_manager.py +++ b/hed/tools/analysis/hed_tag_manager.py @@ -1,11 +1,11 @@ -""" Manager for the HED tags in a columnar file. """ +""" Manager for HED tags from a columnar file. """ from hed.models import HedString from hed.models.string_util import split_base_tags class HedTagManager: - """ Manager for the HED tags in a columnar file. """ + """ Manager for the HED tags from a columnar file. """ def __init__(self, event_manager, remove_types=[]): """ Create a tag manager for one tabular file. @@ -45,16 +45,18 @@ def get_hed_objs(self, include_context=True, replace_defs=False): return hed_objs def get_hed_obj(self, hed_str, remove_types=False, remove_group=False): - """ Return a HED string object with the types removed. """ + """ Return a HED string object with the types removed. + + Parameters: + hed_str (str): Represents a HED string. + remove_types (bool): If False (the default), do not remove the types managed by this manager. + remove_group (bool): If False (the default), do not remove the group when removing a type tag, + otherwise remove its enclosing group. + + """ if not hed_str: return None hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) if remove_types: hed_obj, temp = split_base_tags(hed_obj, self.remove_types, remove_group=remove_group) return hed_obj - - # def get_hed_string_obj(self, hed_str, filter_types=False): - # hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) - # # if filter_types: - # # hed_obj = hed_obj - # return hed_obj diff --git a/hed/tools/analysis/hed_type.py b/hed/tools/analysis/hed_type.py index 882218738..10059cefd 100644 --- a/hed/tools/analysis/hed_type.py +++ b/hed/tools/analysis/hed_type.py @@ -1,4 +1,4 @@ -""" Manager of a type variable and its associated context. """ +""" Manager a type variable and its associated context. """ import pandas as pd from hed.models import HedGroup, HedTag from hed.tools.analysis.hed_type_defs import HedTypeDefs diff --git a/hed/tools/analysis/hed_type_counts.py b/hed/tools/analysis/hed_type_counts.py index 4ef5780d3..31d8bd9ca 100644 --- a/hed/tools/analysis/hed_type_counts.py +++ b/hed/tools/analysis/hed_type_counts.py @@ -1,8 +1,8 @@ -""" Manager of the counts of tags for one type tag such as Condition-variable and Task. """ +""" Classes for managing counts of tags for one type tag such as Condition-variable or Task. """ class HedTypeCount: - """ Manager of the counts of tags for one type tag such as Condition-variable and Task. + """ Manager of the counts of tags for one type tag such as Condition-variable or Task. Parameters: type_value (str): The value of the variable to be counted. @@ -48,12 +48,19 @@ def update(self, type_sum, file_id): self._update_levels(type_sum.get('level_counts', {})) def to_dict(self): + """ Return count information as a dictionary. """ return {'type_value': self.type_value, 'type_tag': self.type_tag, 'direct_references': self.direct_references, 'total_events': self.total_events, 'events': self.events, 'files': self.files, 'events_with_multiple_refs': self.events_with_multiple_refs, 'max_refs_per_event': self.max_refs_per_event, 'level_counts': self.level_counts} def _update_levels(self, level_dict): + """ Helper for updating counts in a level dictionary. + + Parameters: + level_dict (dict): A dictionary of level count information. + + """ for key, item in level_dict.items(): if key not in self.level_counts: self.level_counts[key] = {'files': 0, 'events': 0, 'tags': '', 'description': ''} @@ -70,6 +77,12 @@ def _update_levels(self, level_dict): level_counts['description'] = item['description'] def get_summary(self): + """ Return the summary of one value of one type tag. + + Returns: + dict: Count information for one tag of one type. + + """ summary = {'type_value': self.type_value, 'type_tag': self.type_tag, 'levels': len(self.level_counts.keys()), @@ -85,10 +98,7 @@ def get_summary(self): class HedTypeCounts: - """ Keeps a summary of tag counts for a file. - - - """ + """ Manager for summaries of tag counts for columnar files. """ def __init__(self, name, type_tag): self.name = name @@ -133,6 +143,12 @@ def add_descriptions(self, type_defs): type_count.level_counts[level]['description'] = level_dict['description'] def update(self, counts): + """ Update count information based on counts in another HedTypeCounts. + + Parameters: + counts (HedTypeCounts): Information to use in the update. + + """ self.total_events = self.total_events + counts.total_events for key, count in counts.type_dict.items(): if key not in self.type_dict: @@ -143,6 +159,12 @@ def update(self, counts): self.files[file_id] = '' def get_summary(self): + """ Return the information in the manager as a dictionary. + + Returns: + dict: Dict with keys 'name', 'type_tag', 'files', 'total_events', and 'details'. + + """ details = {} for type_value, count in self.type_dict.items(): details[type_value] = count.get_summary() diff --git a/hed/tools/analysis/hed_type_defs.py b/hed/tools/analysis/hed_type_defs.py index baa72d037..a152123d4 100644 --- a/hed/tools/analysis/hed_type_defs.py +++ b/hed/tools/analysis/hed_type_defs.py @@ -8,11 +8,13 @@ class HedTypeDefs: """Manager for definitions associated with a type such as condition-variable. Properties: - def_map (dict): keys are definition names, values are dict {type_values, description, tags} - Example: A definition 'famous-face-cond' with contents - `(Condition-variable/Face-type,Description/A face that should be recognized by the - participants,(Image,(Face,Famous)))` - would have type_values ['face_type']. All items are strings not objects. + def_map (dict): keys are definition names, values are dict {type_values, description, tags}. + + Example: A definition 'famous-face-cond' with contents: + + '(Condition-variable/Face-type,Description/A face that should be recognized.,(Image,(Face,Famous)))' + + would have type_values ['face_type']. All items are strings not objects. """ @@ -55,7 +57,7 @@ def get_type_values(self, item): @property def type_def_names(self): - """ List of names of definition that have this type-variable. + """ Return list of names of definition that have this type-variable. Returns: list: definition names that have this type. @@ -65,7 +67,7 @@ def type_def_names(self): @property def type_names(self): - """ List of names of the type-variables associated with type definitions. + """ Return list of names of the type-variables associated with type definitions. Returns: list: type names associated with the type definitions @@ -149,12 +151,12 @@ def split_name(name, lowercase=True): """ Split a name/# or name/x into name, x. Parameters: - name (str): The extension or value portion of a tag - lowercase (bool): If True + name (str): The extension or value portion of a tag. + lowercase (bool): If True (default), return values are converted to lowercase. Returns: - str: name of the definition - str: value of the definition if it has one + str: name of the definition. + str: value of the definition if it has one. """ if not name: diff --git a/hed/tools/analysis/hed_type_factors.py b/hed/tools/analysis/hed_type_factors.py index ed7755190..d9d38564c 100644 --- a/hed/tools/analysis/hed_type_factors.py +++ b/hed/tools/analysis/hed_type_factors.py @@ -64,6 +64,16 @@ def get_factors(self, factor_encoding="one-hot"): f"{factor_encoding} is not in the allowed encodings: {str(self.ALLOWED_ENCODINGS)}") def _one_hot_to_categorical(self, factors, levels): + """ Convert factors to one-hot representation. + + Parameters: + factors (DataFrame): Dataframe containing categorical values. + levels (list): List of categorical columns to convert. + + Return: + DataFrame: Contains one-hot representation of requested levels. + + """ df = pd.DataFrame('n/a', index=range(len(factors.index)), columns=[self.type_value]) for index, row in factors.iterrows(): if self.type_value in row.index and row[self.type_value]: @@ -77,6 +87,12 @@ def _one_hot_to_categorical(self, factors, levels): return df def get_summary(self): + """ Return the summary of the type tag value as a dictionary. + + Returns: + dict: Contains the summary. + + """ count_list = [0] * self.number_elements for index in list(self.direct_indices.keys()): count_list[index] = count_list[index] + 1 @@ -92,6 +108,12 @@ def get_summary(self): return summary def _get_level_counts(self): + """ Return the level counts as a dictionary. + + Returns: + dict: Dictionary with counts of level values. + + """ count_dict = {} for level, cond in self.levels.items(): count_dict[level] = len(cond.values()) diff --git a/hed/tools/analysis/hed_type_manager.py b/hed/tools/analysis/hed_type_manager.py index 1bdecea9f..2cb01111a 100644 --- a/hed/tools/analysis/hed_type_manager.py +++ b/hed/tools/analysis/hed_type_manager.py @@ -12,7 +12,7 @@ def __init__(self, event_manager): """ Create a variable manager for one tabular file for all type variables. Parameters: - event_manager (EventManager): an event manager for the tabular file. + event_manager (EventManager): An event manager for the tabular file. :raises HedFileError: - On errors such as unmatched onsets or missing definitions. @@ -24,9 +24,21 @@ def __init__(self, event_manager): @property def types(self): + """ Return a list of types managed by this manager. + + Returns: + list: Type tags names. + """ + return list(self._type_map.keys()) def add_type(self, type_name): + """ Add a type variable to be managed by this manager. + + Parameters: + type_name (str): Type tag name of the type to be added. + + """ if type_name.lower() in self._type_map: return self._type_map[type_name.lower()] = \ @@ -84,12 +96,30 @@ def get_type_tag_factor(self, type_tag, type_value): return None def get_type_def_names(self, type_var): + """ Return the definitions associated with a particular type tag. + + Parameters: + type_var (str): The name of a type tag such as Condition-variable. + + Returns: + list: Names of definitions that use this type. + + """ this_map = self._type_map.get(type_var, None) if not this_map: return [] return this_map.get_type_def_names() def summarize_all(self, as_json=False): + """ Return a dictionary containing the summaries for the types managed by this manager. + + Parameters: + as_json (bool): If False (the default), return as an object otherwise return as a JSON string. + + Returns: + dict or str: Dictionary with the summary. + + """ summary = {} for type_tag, type_tag_var in self._type_map.items(): summary[type_tag] = type_tag_var.get_summary() diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py index 4221c3109..50eec864c 100644 --- a/hed/tools/analysis/key_map.py +++ b/hed/tools/analysis/key_map.py @@ -22,9 +22,9 @@ def __init__(self, key_cols, target_cols=None, name=''): """ Information for remapping columns of tabular files. Parameters: - key_cols (list): List of columns to be replaced (assumed in the DataFrame). - target_cols(list): List of replacement columns (assumed to not be in the DataFrame). - name (str): Name associated with this remap (usually a pathname of the events file). + key_cols (list): List of columns to be replaced (assumed in the DataFrame). + target_cols(list): List of replacement columns (assumed to not be in the DataFrame). + name (str): Name associated with this remap (usually a pathname of the events file). """ @@ -45,6 +45,11 @@ def __init__(self, key_cols, target_cols=None, name=''): @property def columns(self): + """ Return the column names of the columns managed by this map. + + Returns: + list: Column names of the columns managed by this map. + """ return self.key_cols + self.target_cols def __str__(self): @@ -85,6 +90,12 @@ def make_template(self, additional_cols=None, show_counts=True): return df def _get_counts(self): + """ Return counts for the key column combinations. + + Returns: + list: List which is the same length as the col_map containing the counts of the combinations. + + """ counts = [0 for _ in range(len(self.col_map))] for index, row in self.col_map.iterrows(): key_hash = get_row_hash(row, self.key_cols) @@ -199,8 +210,8 @@ def _handle_update(self, row, row_list, next_pos): Parameters: row (DataSeries): Data the values in a row. - row_list (list): A list of rows to be appended to hold the unique rows - next_pos (int): Index into the + row_list (list): A list of rows to be appended to hold the unique rows. + next_pos (int): Index into the row_list of this row Returns: tuple: (key, pos_update) key is the row hash and pos_update is 1 if new row or 0 otherwise.