Merge branch 'dev' into pre-commit-ci-update-config

hdmf-dev · Jan 13, 2024 · cf9053e · cf9053e
2 parents ff7d60b + beb22b4
commit cf9053e
Show file tree

Hide file tree

Showing 15 changed files with 183 additions and 60 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -24,7 +24,7 @@ body:
         Please copy and paste the code you were trying to run that caused the error.
 
         Feel free to include as little or as much as you think is relevant. This section will be automatically formatted into code, so no need for backticks.
-      render: shell
+      render: python
     validations:
       required: true
   - type: textarea

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -67,7 +67,7 @@ jobs:
 
       - name: Upload distribution as a workspace artifact
         if: ${{ matrix.upload-wheels }}
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: distributions
           path: dist

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,10 +4,9 @@
 
 ### Enhancements
 - Add Data.set_data_io(), which allows for setting a `DataIO` to a data object after-the-fact. @bendichter and @CodyCBakerPhD [#1013](https://github.com/hdmf-dev/hdmf/pull/1013)
-
-### Enhancements
 - Added `add_ref_termset`, updated helper methods for `HERD`, revised `add_ref` to support validations prior to populating the tables
   and added `add_ref_container`.  @mavaylon1 [#968](https://github.com/hdmf-dev/hdmf/pull/968)
+- Use `stacklevel` in most warnings. @rly [#1027](https://github.com/hdmf-dev/hdmf/pull/1027)
 
 ### Minor Improvements
 - Updated `__gather_columns` to ignore the order of bases when generating columns from the super class. @mavaylon1 [#991](https://github.com/hdmf-dev/hdmf/pull/991)
@@ -18,8 +17,8 @@
 ### Bug fixes
 - Fixed issue with custom class generation when a spec has a `name`. @rly [#1006](https://github.com/hdmf-dev/hdmf/pull/1006)
 - Fixed issue with usage of deprecated `ruamel.yaml.safe_load` in `src/hdmf/testing/validate_spec.py`. @rly [#1008](https://github.com/hdmf-dev/hdmf/pull/1008)
-
 - Fixed issue where `ElementIdentifiers` data could be set to non-integer values. @rly [#1009](https://github.com/hdmf-dev/hdmf/pull/1009)
+- Fixed issue where string datasets/attributes with isodatetime-formatted values failed validation against a text spec. @rly [#1026](https://github.com/hdmf-dev/hdmf/pull/1026)
 
 ## HDMF 3.11.0 (October 30, 2023)
 

diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst
@@ -19,7 +19,7 @@ inconsistencies.
 There are badges in the README_ file which shows the current condition of the dev branch.
 
 .. _GitHub Actions: https://github.com/hdmf-dev/hdmf/actions
-.. _README: https://github.com/hdmf-dev/hdmf#readme
+.. _README: https://github.com/hdmf-dev/hdmf/blob/dev/README.rst
 
 
 --------

diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py
@@ -499,7 +499,7 @@ def __init__(self, **kwargs):
         # Check for possible collision with other parameters
         if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data:
             self.__link_data = False
-            warnings.warn('link_data parameter in H5DataIO will be ignored')
+            warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=2)
         # Call the super constructor and consume the data parameter
         super().__init__(**kwargs)
         # Construct the dict with the io args, ignoring all options that were set to None
@@ -523,7 +523,7 @@ def __init__(self, **kwargs):
                 self.__iosettings.pop('compression', None)
                 if 'compression_opts' in self.__iosettings:
                     warnings.warn('Compression disabled by compression=False setting. ' +
-                                  'compression_opts parameter will, therefore, be ignored.')
+                                  'compression_opts parameter will, therefore, be ignored.', stacklevel=2)
                     self.__iosettings.pop('compression_opts', None)
         # Validate the compression options used
         self._check_compression_options()
@@ -537,7 +537,8 @@ def __init__(self, **kwargs):
         # Check possible parameter collisions
         if isinstance(self.data, Dataset):
             for k in self.__iosettings.keys():
-                warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k)
+                warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k,
+                              stacklevel=2)
 
         self.__dataset = None
 
@@ -594,7 +595,7 @@ def _check_compression_options(self):
             if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]:
                 warnings.warn(str(self.__iosettings['compression']) + " compression may not be available "
                               "on all installations of HDF5. Use of gzip is recommended to ensure portability of "
-                              "the generated HDF5 files.")
+                              "the generated HDF5 files.", stacklevel=3)
 
     @staticmethod
     def filter_available(filter, allow_plugin_filters):

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
@@ -324,7 +324,9 @@ def copy_file(self, **kwargs):
         """
 
         warnings.warn("The copy_file class method is no longer supported and may be removed in a future version of "
-                      "HDMF. Please use the export method or h5py.File.copy method instead.", DeprecationWarning)
+                      "HDMF. Please use the export method or h5py.File.copy method instead.",
+                      category=DeprecationWarning,
+                      stacklevel=2)
 
         source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs('source_filename',
                                                                                             'dest_filename',

diff --git a/src/hdmf/build/map.py b/src/hdmf/build/map.py
@@ -4,4 +4,4 @@
 
 import warnings
 warnings.warn('Classes in map.py should be imported from hdmf.build. Importing from hdmf.build.map will be removed '
-              'in HDMF 3.0.', DeprecationWarning)
+              'in HDMF 3.0.', DeprecationWarning, stacklevel=2)
diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py
@@ -628,7 +628,7 @@ def add_ref(self, **kwargs):
             if entity_uri is not None:
                 entity_uri = entity.entity_uri
                 msg = 'This entity already exists. Ignoring new entity uri'
-                warn(msg)
+                warn(msg, stacklevel=2)
 
         #################
         # Validate Object

diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py
@@ -503,7 +503,7 @@ def __set_table_attr(self, col):
             msg = ("An attribute '%s' already exists on %s '%s' so this column cannot be accessed as an attribute, "
                    "e.g., table.%s; it can only be accessed using other methods, e.g., table['%s']."
                    % (col.name, self.__class__.__name__, self.name, col.name, col.name))
-            warn(msg)
+            warn(msg, stacklevel=2)
         else:
             setattr(self, col.name, col)
 
@@ -764,7 +764,7 @@ def add_column(self, **kwargs):  # noqa: C901
 
         if isinstance(index, VectorIndex):
             warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be "
-                 "deprecated in a future version of HDMF.", FutureWarning)
+                 "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=2)
 
         if name in self.__colids:  # column has already been added
             msg = "column '%s' already exists in %s '%s'" % (name, self.__class__.__name__, self.name)
@@ -781,7 +781,7 @@ def add_column(self, **kwargs):  # noqa: C901
                        "Please ensure the new column complies with the spec. "
                        "This will raise an error in a future version of HDMF."
                        % (name, self.__class__.__name__, spec_table))
-                warn(msg)
+                warn(msg, stacklevel=2)
 
             index_bool = index or not isinstance(index, bool)
             spec_index = self.__uninit_cols[name].get('index', False)
@@ -791,7 +791,7 @@ def add_column(self, **kwargs):  # noqa: C901
                        "Please ensure the new column complies with the spec. "
                        "This will raise an error in a future version of HDMF."
                        % (name, self.__class__.__name__, spec_index))
-                warn(msg)
+                warn(msg, stacklevel=2)
 
             spec_col_cls = self.__uninit_cols[name].get('class', VectorData)
             if col_cls != spec_col_cls:
@@ -800,7 +800,7 @@ def add_column(self, **kwargs):  # noqa: C901
                        "Please ensure the new column complies with the spec. "
                        "This will raise an error in a future version of HDMF."
                        % (name, self.__class__.__name__, spec_col_cls))
-                warn(msg)
+                warn(msg, stacklevel=2)
 
         ckwargs = dict(kwargs)
 
@@ -1517,7 +1517,7 @@ def _validate_on_set_parent(self):
         if set(table_ancestor_ids).isdisjoint(self_ancestor_ids):
             msg = (f"The linked table for DynamicTableRegion '{self.name}' does not share an ancestor with the "
                    "DynamicTableRegion.")
-            warn(msg)
+            warn(msg, stacklevel=2)
         return super()._validate_on_set_parent()
 
 

diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py
@@ -50,13 +50,13 @@ def __init__(self, **kwargs):
             self['full_name'] = full_name
         if version == str(SpecNamespace.UNVERSIONED):
             # the unversioned version may be written to file as a string and read from file as a string
-            warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name)
+            warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name, stacklevel=2)
             version = SpecNamespace.UNVERSIONED
         if version is None:
             # version is required on write -- see YAMLSpecWriter.write_namespace -- but can be None on read in order to
             # be able to read older files with extensions that are missing the version key.
             warn(("Loaded namespace '%s' is missing the required key 'version'. Version will be set to '%s'. "
-                  "Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED))
+                  "Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED), stacklevel=2)
             version = SpecNamespace.UNVERSIONED
         self['version'] = version
         if date is not None:
@@ -529,7 +529,7 @@ def load_namespaces(self, **kwargs):
                 if ns['version'] != self.__namespaces.get(ns['name'])['version']:
                     # warn if the cached namespace differs from the already loaded namespace
                     warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
-                         % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']))
+                         % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']), stacklevel=2)
             else:
                 to_load.append(ns)
         # now load specs into namespace

diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py
@@ -318,7 +318,7 @@ def __init__(self, **kwargs):
         default_name = getargs('default_name', kwargs)
         if default_name:
             if name is not None:
-                warn("found 'default_name' with 'name' - ignoring 'default_name'")
+                warn("found 'default_name' with 'name' - ignoring 'default_name'", stacklevel=2)
             else:
                 self['default_name'] = default_name
         self.__attributes = dict()

diff --git a/src/hdmf/spec/write.py b/src/hdmf/spec/write.py
@@ -247,7 +247,7 @@ def export_spec(ns_builder, new_data_types, output_dir):
     """
 
     if len(new_data_types) == 0:
-        warnings.warn('No data types specified. Exiting.')
+        warnings.warn('No data types specified. Exiting.', stacklevel=2)
         return
 
     ns_path = ns_builder.name + '.namespace.yaml'

diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py
@@ -434,7 +434,7 @@ def fmt_docval_args(func, kwargs):
                   "removes all arguments not accepted by the function's docval, so if you are passing kwargs that "
                   "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True "
                   "is set), then you will need to pop the extra arguments out of kwargs before calling the function.",
-                  PendingDeprecationWarning)
+                  PendingDeprecationWarning, stacklevel=2)
     func_docval = getattr(func, docval_attr_name, None)
     ret_args = list()
     ret_kwargs = dict()
@@ -488,7 +488,7 @@ def call_docval_func(func, kwargs):
                   "removes all arguments not accepted by the function's docval, so if you are passing kwargs that "
                   "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True "
                   "is set), then you will need to pop the extra arguments out of kwargs before calling the function.",
-                  PendingDeprecationWarning)
+                  PendingDeprecationWarning, stacklevel=2)
     with warnings.catch_warnings(record=True):
         # catch and ignore only PendingDeprecationWarnings from fmt_docval_args so that two
         # PendingDeprecationWarnings saying the same thing are not raised
@@ -645,7 +645,7 @@ def _check_args(args, kwargs):
             parse_warnings = parsed.get('future_warnings')
             if parse_warnings:
                 msg = '%s: %s' % (func.__qualname__, ', '.join(parse_warnings))
-                warnings.warn(msg, FutureWarning)
+                warnings.warn(msg, category=FutureWarning, stacklevel=3)
 
             for error_type, ExceptionType in (('type_errors', TypeError),
                                               ('value_errors', ValueError),

diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
@@ -42,7 +42,7 @@
 __allowable['numeric'] = set(chain.from_iterable(__allowable[k] for k in __allowable if 'int' in k or 'float' in k))
 
 
-def check_type(expected, received):
+def check_type(expected, received, string_format=None):
     '''
     *expected* should come from the spec
     *received* should come from the data
@@ -52,6 +52,12 @@ def check_type(expected, received):
             raise ValueError('compound type shorter than expected')
         for i, exp in enumerate(DtypeHelper.simplify_cpd_type(expected)):
             rec = received[i]
+            if exp == "isodatetime":  # short circuit for isodatetime
+                sub_string_format = string_format[i]
+                return (
+                    rec in __allowable[exp] or
+                    rec in ("utf", "ascii") and sub_string_format == "isodatetime"
+                )
             if rec not in __allowable[exp]:
                 return False
         return True
@@ -71,6 +77,11 @@ def check_type(expected, received):
                 received = received.name
         elif isinstance(received, type):
             received = received.__name__
+        if expected == "isodatetime":  # short circuit for isodatetime
+            return (
+                received in __allowable[expected] or
+                (received in ("utf", "ascii") and string_format == "isodatetime")
+            )
         if isinstance(expected, RefSpec):
             expected = expected.reftype
         elif isinstance(expected, type):
@@ -89,48 +100,58 @@ def get_iso8601_regex():
 _iso_re = get_iso8601_regex()
 
 
-def _check_isodatetime(s, default=None):
+def get_string_format(data):
+    """Return the string format of the given data. Possible outputs are "isodatetime" and None.
+    """
+    assert isinstance(data, (str, bytes))
     try:
-        if _iso_re.match(pystr(s)) is not None:
+        if _iso_re.match(pystr(data)) is not None:
             return 'isodatetime'
     except Exception:
         pass
-    return default
+    return None
 
 
 class EmptyArrayError(Exception):
     pass
 
 
-def get_type(data):
+def get_type(data, builder_dtype=None):
+    """Return a tuple of (the string representation of the type, the format of the string data) for the given data."""
     if isinstance(data, str):
-        return _check_isodatetime(data, 'utf')
+        return 'utf', get_string_format(data)
     elif isinstance(data, bytes):
-        return _check_isodatetime(data, 'ascii')
+        return 'ascii', get_string_format(data)
     elif isinstance(data, RegionBuilder):
-        return 'region'
+        return 'region', None
     elif isinstance(data, ReferenceBuilder):
-        return 'object'
+        return 'object', None
     elif isinstance(data, ReferenceResolver):
-        return data.dtype
+        return data.dtype, None
     elif isinstance(data, np.ndarray):
         if data.size == 0:
             raise EmptyArrayError()
-        return get_type(data[0])
+        return get_type(data[0], builder_dtype)
     elif isinstance(data, np.bool_):
-        return 'bool'
+        return 'bool', None
     if not hasattr(data, '__len__'):
-        return type(data).__name__
+        return type(data).__name__, None
     else:
+        if builder_dtype and isinstance(builder_dtype, list):  # compound dtype
+            dtypes = []
+            string_formats = []
+            for i in range(len(builder_dtype)):
+                dtype, string_format = get_type(data[0][i])
+                dtypes.append(dtype)
+                string_formats.append(string_format)
+            return dtypes, string_formats
         if hasattr(data, 'dtype'):
-            if isinstance(data.dtype, list):
-                return [get_type(data[0][i]) for i in range(len(data.dtype))]
             if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None:
                 return get_type(data[0])
-            return data.dtype
+            return data.dtype, None
         if len(data) == 0:
             raise EmptyArrayError()
-        return get_type(data[0])
+        return get_type(data[0], builder_dtype)
 
 
 def check_shape(expected, received):
@@ -310,7 +331,7 @@ def validate(self, **kwargs):
                 if not isinstance(value, BaseBuilder):
                     expected = '%s reference' % spec.dtype.reftype
                     try:
-                        value_type = get_type(value)
+                        value_type, _ = get_type(value)
                         ret.append(DtypeError(self.get_spec_loc(spec), expected, value_type))
                     except EmptyArrayError:
                         # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
@@ -323,8 +344,8 @@ def validate(self, **kwargs):
                         ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type))
             else:
                 try:
-                    dtype = get_type(value)
-                    if not check_type(spec.dtype, dtype):
+                    dtype, string_format = get_type(value)
+                    if not check_type(spec.dtype, dtype, string_format):
                         ret.append(DtypeError(self.get_spec_loc(spec), spec.dtype, dtype))
                 except EmptyArrayError:
                     # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
@@ -385,14 +406,17 @@ def validate(self, **kwargs):
         data = builder.data
         if self.spec.dtype is not None:
             try:
-                dtype = get_type(data)
-                if not check_type(self.spec.dtype, dtype):
+                dtype, string_format = get_type(data, builder.dtype)
+                if not check_type(self.spec.dtype, dtype, string_format):
                     ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype,
                                           location=self.get_builder_loc(builder)))
             except EmptyArrayError:
                 # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
                 pass
-        shape = get_data_shape(data)
+        if isinstance(builder.dtype, list):
+            shape = (len(builder.data), )  # only 1D datasets with compound types are supported
+        else:
+            shape = get_data_shape(data)
         if not check_shape(self.spec.shape, shape):
             if shape is None:
                 ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(data),