From 6cf77526210c5cbd586a77ed55258e510d436854 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 11 Nov 2024 13:28:42 -0800 Subject: [PATCH] add string dataset condition for data type conversion on export (#1205) * add strdataset condition for dtype conversion * add strdataset conversion test * update CHANGELOG --- CHANGELOG.md | 1 + src/hdmf/build/objectmapper.py | 7 +++++-- tests/unit/build_tests/test_convert_dtype.py | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4f0fde80..9ac153581 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Bug fixes - Fixed inaccurate error message when validating reference data types. @stephprince [#1199](https://github.com/hdmf-dev/hdmf/pull/1199) +- Fixed incorrect dtype conversion of a StrDataset. @stephprince [#1205](https://github.com/hdmf-dev/hdmf/pull/1205) ## HDMF 3.14.5 (October 6, 2024) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 493a55bab..3394ebb91 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -21,7 +21,7 @@ from ..query import ReferenceResolver from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec from ..spec.spec import BaseStorageSpec -from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape +from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape, StrDataset _const_arg = '__constructor_arg' @@ -212,7 +212,10 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 if (isinstance(value, np.ndarray) or (hasattr(value, 'astype') and hasattr(value, 'dtype'))): if spec_dtype_type is _unicode: - ret = value.astype('U') + if isinstance(value, StrDataset): + ret = value + else: + ret = value.astype('U') ret_dtype = "utf8" elif spec_dtype_type is _ascii: ret = value.astype('S') diff --git a/tests/unit/build_tests/test_convert_dtype.py b/tests/unit/build_tests/test_convert_dtype.py index 8f9e49239..8f30386d8 100644 --- a/tests/unit/build_tests/test_convert_dtype.py +++ b/tests/unit/build_tests/test_convert_dtype.py @@ -1,12 +1,17 @@ from datetime import datetime, date import numpy as np +import h5py +import unittest + from hdmf.backends.hdf5 import H5DataIO from hdmf.build import ObjectMapper from hdmf.data_utils import DataChunkIterator from hdmf.spec import DatasetSpec, RefSpec, DtypeSpec from hdmf.testing import TestCase +from hdmf.utils import StrDataset +H5PY_3 = h5py.__version__.startswith('3') class TestConvertDtype(TestCase): @@ -321,6 +326,19 @@ def test_text_spec(self): self.assertIs(ret, value) self.assertEqual(ret_dtype, 'utf8') + @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+") + def test_text_spec_str_dataset(self): + text_spec_types = ['text', 'utf', 'utf8', 'utf-8'] + for spec_type in text_spec_types: + with self.subTest(spec_type=spec_type): + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + spec = DatasetSpec('an example dataset', spec_type, name='data') + + value = StrDataset(f.create_dataset('data', data=['a', 'b', 'c']), None) + ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion + self.assertIs(ret, value) + self.assertEqual(ret_dtype, 'utf8') + def test_ascii_spec(self): ascii_spec_types = ['ascii', 'bytes'] for spec_type in ascii_spec_types: