From 6cf77526210c5cbd586a77ed55258e510d436854 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 11 Nov 2024 13:28:42 -0800 Subject: [PATCH 1/3] add string dataset condition for data type conversion on export (#1205) * add strdataset condition for dtype conversion * add strdataset conversion test * update CHANGELOG --- CHANGELOG.md | 1 + src/hdmf/build/objectmapper.py | 7 +++++-- tests/unit/build_tests/test_convert_dtype.py | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4f0fde80..9ac153581 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Bug fixes - Fixed inaccurate error message when validating reference data types. @stephprince [#1199](https://github.com/hdmf-dev/hdmf/pull/1199) +- Fixed incorrect dtype conversion of a StrDataset. @stephprince [#1205](https://github.com/hdmf-dev/hdmf/pull/1205) ## HDMF 3.14.5 (October 6, 2024) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 493a55bab..3394ebb91 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -21,7 +21,7 @@ from ..query import ReferenceResolver from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec from ..spec.spec import BaseStorageSpec -from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape +from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape, StrDataset _const_arg = '__constructor_arg' @@ -212,7 +212,10 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 if (isinstance(value, np.ndarray) or (hasattr(value, 'astype') and hasattr(value, 'dtype'))): if spec_dtype_type is _unicode: - ret = value.astype('U') + if isinstance(value, StrDataset): + ret = value + else: + ret = value.astype('U') ret_dtype = "utf8" elif spec_dtype_type is _ascii: ret = value.astype('S') diff --git a/tests/unit/build_tests/test_convert_dtype.py b/tests/unit/build_tests/test_convert_dtype.py index 8f9e49239..8f30386d8 100644 --- a/tests/unit/build_tests/test_convert_dtype.py +++ b/tests/unit/build_tests/test_convert_dtype.py @@ -1,12 +1,17 @@ from datetime import datetime, date import numpy as np +import h5py +import unittest + from hdmf.backends.hdf5 import H5DataIO from hdmf.build import ObjectMapper from hdmf.data_utils import DataChunkIterator from hdmf.spec import DatasetSpec, RefSpec, DtypeSpec from hdmf.testing import TestCase +from hdmf.utils import StrDataset +H5PY_3 = h5py.__version__.startswith('3') class TestConvertDtype(TestCase): @@ -321,6 +326,19 @@ def test_text_spec(self): self.assertIs(ret, value) self.assertEqual(ret_dtype, 'utf8') + @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+") + def test_text_spec_str_dataset(self): + text_spec_types = ['text', 'utf', 'utf8', 'utf-8'] + for spec_type in text_spec_types: + with self.subTest(spec_type=spec_type): + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + spec = DatasetSpec('an example dataset', spec_type, name='data') + + value = StrDataset(f.create_dataset('data', data=['a', 'b', 'c']), None) + ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion + self.assertIs(ret, value) + self.assertEqual(ret_dtype, 'utf8') + def test_ascii_spec(self): ascii_spec_types = ['ascii', 'bytes'] for spec_type in ascii_spec_types: From a8e496b637e9a84bc2ff0c0e24e040682a6a8653 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:40:11 -0800 Subject: [PATCH 2/3] Raise error when using colon for container name (#1202) Co-authored-by: Ryan Ly Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 3 ++- src/hdmf/container.py | 4 ++-- tests/unit/test_container.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ac153581..adcbd6bb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,8 @@ ### Enhancements - Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188) -- Improved html representation of data in `Containers` @h-mayorquin [#1100](https://github.com/hdmf-dev/hdmf/pull/1100) +- Improved html representation of data in `Container` objects. @h-mayorquin [#1100](https://github.com/hdmf-dev/hdmf/pull/1100) +- Added error when using colon for `Container` name. A colon cannot be used as a group name when writing to Zarr on Windows. @stephprince [#1202](https://github.com/hdmf-dev/hdmf/pull/1202) ### Bug fixes - Fixed inaccurate error message when validating reference data types. @stephprince [#1199](https://github.com/hdmf-dev/hdmf/pull/1199) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 4ee24ec2d..a61dc19e8 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -303,8 +303,8 @@ def __new__(cls, *args, **kwargs): @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}) def __init__(self, **kwargs): name = getargs('name', kwargs) - if '/' in name: - raise ValueError("name '" + name + "' cannot contain '/'") + if ('/' in name or ':' in name) and not self._in_construct_mode: + raise ValueError(f"name '{name}' cannot contain a '/' or ':'") self.__name = name self.__field_values = dict() self.__read_io = None diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 35d8e480c..c12247de7 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -180,6 +180,17 @@ def test_set_parent_overwrite_proxy(self): def test_slash_restriction(self): self.assertRaises(ValueError, Container, 'bad/name') + # check no error raised in construct mode + child_obj = Container.__new__(Container, in_construct_mode=True) + child_obj.__init__('bad/name') + + def test_colon_restriction(self): + self.assertRaises(ValueError, Container, 'bad:name') + + # check no error raised in construct mode + child_obj = Container.__new__(Container, in_construct_mode=True) + child_obj.__init__('bad:name') + def test_set_modified_parent(self): """Test that set modified properly sets parent modified """ From 8c3eecbc312ef0431bb7a2ac178ce55dc3d4da22 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:22:17 -0800 Subject: [PATCH 3/3] [pre-commit.ci] pre-commit autoupdate (#1200) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4122f041f..0cc5da870 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.7.3 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate