Skip to content

Commit

Permalink
Configuration File for TermSet validations (#1016)
Browse files Browse the repository at this point in the history
* config

* rough draft

* move

* testing

* check

* new way of thinking draft

* support multiple config files

* testing

* placeholder'

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update

* Delete docs/gallery/example_config.yaml

* clean up

* clean up

* clean up

* checkpoint

* need to clean

* partial clean up

* warn

* yaml changes

* revert

* except

* clean up

* warning tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* tests

* tests

* tests

* ruff

* update

* update

* cov

* tests

* tests/clean

* coverage'
git push

* coverage'
git push

* final clean ups

* final clean ups

* Update CHANGELOG.md

* Update CHANGELOG.md

* Update CHANGELOG.md

* Update src/hdmf/container.py

* Update src/hdmf/container.py

* Update src/hdmf/term_set.py

* Update src/hdmf/term_set.py

* in progress

* Update src/hdmf/container.py

Co-authored-by: Ryan Ly <[email protected]>

* Update tests/unit/test_term_set.py

Co-authored-by: Ryan Ly <[email protected]>

* Update tests/unit/test_term_set.py

Co-authored-by: Ryan Ly <[email protected]>

* in progress

* in progress

* in progress

* in progress

* clean tests

* checkpoint of updates

* checkpoint of updates

* checkpoint of updates

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* copy

* clean up

* clean

* Update CHANGELOG.md

* clean up

* clean up

* test copy

* name

* Update CHANGELOG.md

* Update requirements-opt.txt

* Update requirements-opt.txt

* Update container.py

Co-authored-by: Ryan Ly <[email protected]>

* Update container.py

Co-authored-by: Ryan Ly <[email protected]>

* Update __init__.py

Co-authored-by: Ryan Ly <[email protected]>

* Update manager.py

Co-authored-by: Ryan Ly <[email protected]>

* clean

* namespace

* Update src/hdmf/common/__init__.py

* Update src/hdmf/common/__init__.py

Co-authored-by: Ryan Ly <[email protected]>

* Update __init__.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ryan Ly <[email protected]>
  • Loading branch information
3 people authored Mar 28, 2024
1 parent 0000202 commit 244d17a
Show file tree
Hide file tree
Showing 14 changed files with 402 additions and 78 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# HDMF Changelog

## HDMF 3.14.0 (Upcoming)

### Enhancements
- Added `TermSetConfigurator` to automatically wrap fields with `TermSetWrapper` according to a configuration file. @mavaylon1 [#1016](https://github.com/hdmf-dev/hdmf/pull/1016)

## HDMF 3.13.0 (March 20, 2024)

### Enhancements
Expand Down
6 changes: 2 additions & 4 deletions requirements-opt.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF
tqdm==4.66.2
zarr==2.17.1
linkml-runtime==1.7.3; python_version >= "3.9"
linkml-runtime==1.7.4; python_version >= "3.9"
schemasheets==0.2.1; python_version >= "3.9"
oaklib==0.5.31; python_version >= "3.9"
pydantic==2.6.4
pyyaml==6.0.1; python_version >= "3.9"
oaklib==0.5.32; python_version >= "3.9"
2 changes: 1 addition & 1 deletion src/hdmf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .container import Container, Data, DataRegion, HERDManager
from .region import ListSlicer
from .utils import docval, getargs
from .term_set import TermSet, TermSetWrapper
from .term_set import TermSet, TermSetWrapper, TypeConfigurator


@docval(
Expand Down
18 changes: 13 additions & 5 deletions src/hdmf/build/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, BaseBuilder
from .classgenerator import ClassGenerator, CustomClassGenerator, MCIClassGenerator
from ..container import AbstractContainer, Container, Data
from ..term_set import TypeConfigurator
from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog
from ..spec.spec import BaseStorageSpec
from ..utils import docval, getargs, ExtenderMeta, get_docval
Expand Down Expand Up @@ -391,25 +392,32 @@ def data_type(self):


class TypeMap:
''' A class to maintain the map between ObjectMappers and AbstractContainer classes
'''
"""
A class to maintain the map between ObjectMappers and AbstractContainer classes
"""

@docval({'name': 'namespaces', 'type': NamespaceCatalog, 'doc': 'the NamespaceCatalog to use', 'default': None},
{'name': 'mapper_cls', 'type': type, 'doc': 'the ObjectMapper class to use', 'default': None})
{'name': 'mapper_cls', 'type': type, 'doc': 'the ObjectMapper class to use', 'default': None},
{'name': 'type_config', 'type': TypeConfigurator, 'doc': 'The TypeConfigurator to use.',
'default': None})
def __init__(self, **kwargs):
namespaces, mapper_cls = getargs('namespaces', 'mapper_cls', kwargs)
namespaces, mapper_cls, type_config = getargs('namespaces', 'mapper_cls', 'type_config', kwargs)
if namespaces is None:
namespaces = NamespaceCatalog()
if mapper_cls is None:
from .objectmapper import ObjectMapper # avoid circular import
mapper_cls = ObjectMapper
if type_config is None:
type_config = TypeConfigurator()
self.__ns_catalog = namespaces
self.__mappers = dict() # already constructed ObjectMapper classes
self.__mapper_cls = dict() # the ObjectMapper class to use for each container type
self.__container_types = OrderedDict()
self.__data_types = dict()
self.__default_mapper_cls = mapper_cls
self.__class_generator = ClassGenerator()
self.type_config = type_config

self.register_generator(CustomClassGenerator)
self.register_generator(MCIClassGenerator)

Expand All @@ -422,7 +430,7 @@ def container_types(self):
return self.__container_types

def __copy__(self):
ret = TypeMap(copy(self.__ns_catalog), self.__default_mapper_cls)
ret = TypeMap(copy(self.__ns_catalog), self.__default_mapper_cls, self.type_config)
ret.merge(self)
return ret

Expand Down
25 changes: 25 additions & 0 deletions src/hdmf/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,31 @@
# a global type map
global __TYPE_MAP

@docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'},
is_method=False)
def load_type_config(**kwargs):
"""
This method will either load the default config or the config provided by the path.
NOTE: This config is global and shared across all type maps.
"""
config_path = kwargs['config_path']
__TYPE_MAP.type_config.load_type_config(config_path)

def get_loaded_type_config():
"""
This method returns the entire config file.
"""
if __TYPE_MAP.type_config.config is None:
msg = "No configuration is loaded."
raise ValueError(msg)
else:
return __TYPE_MAP.type_config.config

def unload_type_config():
"""
Unload the configuration file.
"""
return __TYPE_MAP.type_config.unload_type_config()

# a function to register a container classes with the global map
@docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to get the spec for'},
Expand Down
82 changes: 77 additions & 5 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Type
from uuid import uuid4
from warnings import warn
import os

import h5py
import numpy as np
Expand All @@ -13,6 +14,7 @@
from .data_utils import DataIO, append_data, extend_data
from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict

from .term_set import TermSet, TermSetWrapper

def _set_exp(cls):
"""Set a class as being experimental"""
Expand All @@ -34,7 +36,7 @@ class HERDManager:
This class manages whether to set/attach an instance of HERD to the subclass.
"""

@docval({'name': 'herd', 'type': 'hdmf.common.resources.HERD',
@docval({'name': 'herd', 'type': 'HERD',
'doc': 'The external resources to be used for the container.'},)
def link_resources(self, **kwargs):
"""
Expand Down Expand Up @@ -75,7 +77,6 @@ def _setter(cls, field):
Make a setter function for creating a :py:func:`property`
"""
name = field['name']

if not field.get('settable', True):
return None

Expand All @@ -85,10 +86,82 @@ def setter(self, val):
if name in self.fields:
msg = "can't set attribute '%s' -- already set" % name
raise AttributeError(msg)
self.fields[name] = val
self.fields[name] = self._field_config(arg_name=name, val=val)

return setter

@property
def data_type(self):
"""
Return the spec data type associated with this container.
"""
return getattr(self, self._data_type_attr)


def _field_config(self, arg_name, val):
"""
This method will be called in the setter. The termset configuration will be used (if loaded)
to check for a defined TermSet associated with the field. If found, the value of the field
will be wrapped with a TermSetWrapper.
Even though the path field in the configurator can be a list of paths, the config
itself is only one file. When a user loads custom configs, the config is appended/modified.
The modifications are not written to file, avoiding permanent modifications.
"""
# load termset configuration file from global Config
from hdmf.common import get_type_map # circular import
type_map = get_type_map()
configurator = type_map.type_config

if len(configurator.path)>0:
# The type_map has a config always set; however, when toggled off, the config path is empty.
CUR_DIR = os.path.dirname(os.path.realpath(configurator.path[0]))
termset_config = configurator.config
else:
return val
# check to see that the namespace for the container is in the config
if self.namespace not in type_map.container_types:
msg = "%s not found within loaded configuration." % self.namespace
warn(msg)
return val
else:
# check to see that the container type is in the config under the namespace
config_namespace = termset_config['namespaces'][self.namespace]
data_type = self.data_type

if data_type not in config_namespace['data_types']:
msg = '%s not found within the configuration for %s' % (data_type, self.namespace)
warn(msg)
return val
else:
for attr in config_namespace['data_types'][data_type]:
obj_mapper = type_map.get_map(self)

# get the spec according to attr name in schema
# Note: this is the name for the field in the config
spec = obj_mapper.get_attr_spec(attr)

# In the case of dealing with datasets directly or not defined in the spec.
# (Data/VectorData/DynamicTable/etc)
if spec is None:
msg = "Spec not found for %s." % attr
warn(msg)
return val
else:
# If the val has been manually wrapped then skip checking the config for the attr
if isinstance(val, TermSetWrapper):
msg = "Field value already wrapped with TermSetWrapper."
warn(msg)
return val
else:
# From the spec, get the mapped attribute name
mapped_attr_name = obj_mapper.get_attribute(spec)
termset_path = os.path.join(CUR_DIR,
config_namespace['data_types'][data_type][mapped_attr_name]['termset'])
termset = TermSet(term_schema_path=termset_path)
val = TermSetWrapper(value=val, termset=termset)
return val

@classmethod
def _getter(cls, field):
"""
Expand Down Expand Up @@ -389,7 +462,7 @@ def set_modified(self, **kwargs):
def children(self):
return tuple(self.__children)

@docval({'name': 'child', 'type': 'hdmf.container.Container',
@docval({'name': 'child', 'type': 'Container',
'doc': 'the child Container for this Container', 'default': None})
def add_child(self, **kwargs):
warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.'))
Expand Down Expand Up @@ -787,7 +860,6 @@ class Data(AbstractContainer):
"""
A class for representing dataset containers
"""

@docval({'name': 'name', 'type': str, 'doc': 'the name of this container'},
{'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'})
def __init__(self, **kwargs):
Expand Down
87 changes: 79 additions & 8 deletions src/hdmf/term_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import warnings
import numpy as np
from .data_utils import append_data, extend_data
from ruamel.yaml import YAML


class TermSet:
Expand Down Expand Up @@ -162,19 +163,20 @@ def __schemasheets_convert(self):
This method returns a path to the new schema to be viewed via SchemaView.
"""
try:
import yaml
from linkml_runtime.utils.schema_as_dict import schema_as_dict
from schemasheets.schemamaker import SchemaMaker
except ImportError: # pragma: no cover
msg = "Install schemasheets."
raise ValueError(msg)

schema_maker = SchemaMaker()
tsv_file_paths = glob.glob(self.schemasheets_folder + "/*.tsv")
schema = schema_maker.create_schema(tsv_file_paths)
schema_dict = schema_as_dict(schema)
schemasheet_schema_path = os.path.join(self.schemasheets_folder, f"{schema_dict['name']}.yaml")

with open(schemasheet_schema_path, "w") as f:
yaml=YAML(typ='safe')
yaml.dump(schema_dict, f)

return schemasheet_schema_path
Expand Down Expand Up @@ -262,13 +264,6 @@ def __getitem__(self, val):
"""
return self.__value[val]

# uncomment when DataChunkIterator objects can be wrapped by TermSet
# def __next__(self):
# """
# Return the next item of a wrapped iterator.
# """
# return self.__value.__next__()
#
def __len__(self):
return len(self.__value)

Expand Down Expand Up @@ -304,3 +299,79 @@ def extend(self, arg):
else:
msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data]))
raise ValueError(msg)

class TypeConfigurator:
"""
This class allows users to toggle on/off a global configuration for defined data types.
When toggled on, every instance of a configuration file supported data type will be validated
according to the corresponding TermSet.
"""
@docval({'name': 'path', 'type': str, 'doc': 'Path to the configuration file.', 'default': None})
def __init__(self, **kwargs):
self.config = None
if kwargs['path'] is None:
self.path = []
else:
self.path = [kwargs['path']]
self.load_type_config(config_path=self.path[0])

@docval({'name': 'data_type', 'type': str,
'doc': 'The desired data type within the configuration file.'},
{'name': 'namespace', 'type': str,
'doc': 'The namespace for the data type.'})
def get_config(self, data_type, namespace):
"""
Return the config for that data type in the given namespace.
"""
try:
namespace_config = self.config['namespaces'][namespace]
except KeyError:
msg = 'The namespace %s was not found within the configuration.' % namespace
raise ValueError(msg)

try:
type_config = namespace_config['data_types'][data_type]
return type_config
except KeyError:
msg = '%s was not found within the configuration for that namespace.' % data_type
raise ValueError(msg)

@docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'})
def load_type_config(self,config_path):
"""
Load the configuration file for validation on the fields defined for the objects within the file.
"""
with open(config_path, 'r') as config:
yaml=YAML(typ='safe')
termset_config = yaml.load(config)
if self.config is None: # set the initial config/load after config has been unloaded
self.config = termset_config
if len(self.path)==0: # for loading after an unloaded config
self.path.append(config_path)
else: # append/replace to the existing config
if config_path in self.path:
msg = 'This configuration file path already exists within the configurator.'
raise ValueError(msg)
else:
for namespace in termset_config['namespaces']:
if namespace not in self.config['namespaces']: # append namespace config if not present
self.config['namespaces'][namespace] = termset_config['namespaces'][namespace]
else: # check for any needed overrides within existing namespace configs
for data_type in termset_config['namespaces'][namespace]['data_types']:
# NOTE: these two branches effectively do the same thing, but are split for clarity.
if data_type in self.config['namespaces'][namespace]['data_types']:
replace_config = termset_config['namespaces'][namespace]['data_types'][data_type]
self.config['namespaces'][namespace]['data_types'][data_type] = replace_config
else: # append to config
new_config = termset_config['namespaces'][namespace]['data_types'][data_type]
self.config['namespaces'][namespace]['data_types'][data_type] = new_config

# append path to self.path
self.path.append(config_path)

def unload_type_config(self):
"""
Remove validation according to termset configuration file.
"""
self.path = []
self.config = None
14 changes: 13 additions & 1 deletion tests/unit/common/test_common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from hdmf import Data, Container
from hdmf.common import get_type_map
from hdmf.common import get_type_map, load_type_config, unload_type_config
from hdmf.testing import TestCase


Expand All @@ -11,3 +11,15 @@ def test_base_types(self):
self.assertIs(cls, Container)
cls = tm.get_dt_container_cls('Data', 'hdmf-common')
self.assertIs(cls, Data)

def test_copy_ts_config(self):
path = 'tests/unit/hdmf_config.yaml'
load_type_config(config_path=path)
tm = get_type_map()
config = {'namespaces': {'hdmf-common': {'version': '3.12.2',
'data_types': {'VectorData': {'description': {'termset': 'example_test_term_set.yaml'}},
'VectorIndex': {'data': '...'}}}}}

self.assertEqual(tm.type_config.config, config)
self.assertEqual(tm.type_config.path, [path])
unload_type_config()
Loading

0 comments on commit 244d17a

Please sign in to comment.