Skip to content

Commit

Permalink
Fix validation of datetime-formatted strings (#1026)
Browse files Browse the repository at this point in the history
  • Loading branch information
rly authored Jan 12, 2024
1 parent 1a1d5ab commit beb22b4
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 36 deletions.
4 changes: 1 addition & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

### Enhancements
- Add Data.set_data_io(), which allows for setting a `DataIO` to a data object after-the-fact. @bendichter and @CodyCBakerPhD [#1013](https://github.com/hdmf-dev/hdmf/pull/1013)

### Enhancements
- Added `add_ref_termset`, updated helper methods for `HERD`, revised `add_ref` to support validations prior to populating the tables
and added `add_ref_container`. @mavaylon1 [#968](https://github.com/hdmf-dev/hdmf/pull/968)
- Use `stacklevel` in most warnings. @rly [#1027](https://github.com/hdmf-dev/hdmf/pull/1027)
Expand All @@ -19,8 +17,8 @@
### Bug fixes
- Fixed issue with custom class generation when a spec has a `name`. @rly [#1006](https://github.com/hdmf-dev/hdmf/pull/1006)
- Fixed issue with usage of deprecated `ruamel.yaml.safe_load` in `src/hdmf/testing/validate_spec.py`. @rly [#1008](https://github.com/hdmf-dev/hdmf/pull/1008)

- Fixed issue where `ElementIdentifiers` data could be set to non-integer values. @rly [#1009](https://github.com/hdmf-dev/hdmf/pull/1009)
- Fixed issue where string datasets/attributes with isodatetime-formatted values failed validation against a text spec. @rly [#1026](https://github.com/hdmf-dev/hdmf/pull/1026)

## HDMF 3.11.0 (October 30, 2023)

Expand Down
70 changes: 47 additions & 23 deletions src/hdmf/validate/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
__allowable['numeric'] = set(chain.from_iterable(__allowable[k] for k in __allowable if 'int' in k or 'float' in k))


def check_type(expected, received):
def check_type(expected, received, string_format=None):
'''
*expected* should come from the spec
*received* should come from the data
Expand All @@ -52,6 +52,12 @@ def check_type(expected, received):
raise ValueError('compound type shorter than expected')
for i, exp in enumerate(DtypeHelper.simplify_cpd_type(expected)):
rec = received[i]
if exp == "isodatetime": # short circuit for isodatetime
sub_string_format = string_format[i]
return (
rec in __allowable[exp] or
rec in ("utf", "ascii") and sub_string_format == "isodatetime"
)
if rec not in __allowable[exp]:
return False
return True
Expand All @@ -71,6 +77,11 @@ def check_type(expected, received):
received = received.name
elif isinstance(received, type):
received = received.__name__
if expected == "isodatetime": # short circuit for isodatetime
return (
received in __allowable[expected] or
(received in ("utf", "ascii") and string_format == "isodatetime")
)
if isinstance(expected, RefSpec):
expected = expected.reftype
elif isinstance(expected, type):
Expand All @@ -89,48 +100,58 @@ def get_iso8601_regex():
_iso_re = get_iso8601_regex()


def _check_isodatetime(s, default=None):
def get_string_format(data):
"""Return the string format of the given data. Possible outputs are "isodatetime" and None.
"""
assert isinstance(data, (str, bytes))
try:
if _iso_re.match(pystr(s)) is not None:
if _iso_re.match(pystr(data)) is not None:
return 'isodatetime'
except Exception:
pass
return default
return None


class EmptyArrayError(Exception):
pass


def get_type(data):
def get_type(data, builder_dtype=None):
"""Return a tuple of (the string representation of the type, the format of the string data) for the given data."""
if isinstance(data, str):
return _check_isodatetime(data, 'utf')
return 'utf', get_string_format(data)
elif isinstance(data, bytes):
return _check_isodatetime(data, 'ascii')
return 'ascii', get_string_format(data)
elif isinstance(data, RegionBuilder):
return 'region'
return 'region', None
elif isinstance(data, ReferenceBuilder):
return 'object'
return 'object', None
elif isinstance(data, ReferenceResolver):
return data.dtype
return data.dtype, None
elif isinstance(data, np.ndarray):
if data.size == 0:
raise EmptyArrayError()
return get_type(data[0])
return get_type(data[0], builder_dtype)
elif isinstance(data, np.bool_):
return 'bool'
return 'bool', None
if not hasattr(data, '__len__'):
return type(data).__name__
return type(data).__name__, None
else:
if builder_dtype and isinstance(builder_dtype, list): # compound dtype
dtypes = []
string_formats = []
for i in range(len(builder_dtype)):
dtype, string_format = get_type(data[0][i])
dtypes.append(dtype)
string_formats.append(string_format)
return dtypes, string_formats
if hasattr(data, 'dtype'):
if isinstance(data.dtype, list):
return [get_type(data[0][i]) for i in range(len(data.dtype))]
if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None:
return get_type(data[0])
return data.dtype
return data.dtype, None
if len(data) == 0:
raise EmptyArrayError()
return get_type(data[0])
return get_type(data[0], builder_dtype)


def check_shape(expected, received):
Expand Down Expand Up @@ -310,7 +331,7 @@ def validate(self, **kwargs):
if not isinstance(value, BaseBuilder):
expected = '%s reference' % spec.dtype.reftype
try:
value_type = get_type(value)
value_type, _ = get_type(value)
ret.append(DtypeError(self.get_spec_loc(spec), expected, value_type))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
Expand All @@ -323,8 +344,8 @@ def validate(self, **kwargs):
ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type))
else:
try:
dtype = get_type(value)
if not check_type(spec.dtype, dtype):
dtype, string_format = get_type(value)
if not check_type(spec.dtype, dtype, string_format):
ret.append(DtypeError(self.get_spec_loc(spec), spec.dtype, dtype))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
Expand Down Expand Up @@ -385,14 +406,17 @@ def validate(self, **kwargs):
data = builder.data
if self.spec.dtype is not None:
try:
dtype = get_type(data)
if not check_type(self.spec.dtype, dtype):
dtype, string_format = get_type(data, builder.dtype)
if not check_type(self.spec.dtype, dtype, string_format):
ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype,
location=self.get_builder_loc(builder)))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
pass
shape = get_data_shape(data)
if isinstance(builder.dtype, list):
shape = (len(builder.data), ) # only 1D datasets with compound types are supported
else:
shape = get_data_shape(data)
if not check_shape(self.spec.shape, shape):
if shape is None:
ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(data),
Expand Down
117 changes: 107 additions & 10 deletions tests/unit/validator_tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,18 @@ def getSpecs(self):
),
DatasetSpec('an example time dataset', 'isodatetime', name='datetime'),
DatasetSpec('an example time dataset', 'isodatetime', name='date', quantity='?'),
DatasetSpec('an array of times', 'isodatetime', name='time_array', dims=('num_times',), shape=(None,))
DatasetSpec('an array of times', 'isodatetime', name='time_array', dims=('num_times',), shape=(None,)),
DatasetSpec(
doc='an array with compound dtype that includes an isodatetime',
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='isodatetime'),
],
name='cpd_array',
dims=('num_times',),
shape=(None,),
quantity="?",
),
],
attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')])
return ret,
Expand All @@ -129,7 +140,15 @@ def test_valid_isodatetime(self):
DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0)),
DatasetBuilder('date', date(2017, 5, 1)),
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]),
DatasetBuilder(
name='cpd_array',
data=[(1, datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()))],
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='isodatetime'),
],
),
]
)
validator = self.vmap.get_validator('Bar')
Expand All @@ -143,7 +162,7 @@ def test_invalid_isodatetime(self):
datasets=[
DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime', 100),
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]),
]
)
validator = self.vmap.get_validator('Bar')
Expand All @@ -152,18 +171,44 @@ def test_invalid_isodatetime(self):
self.assertValidationError(result[0], DtypeError, name='Bar/datetime')

def test_invalid_isodatetime_array(self):
builder = GroupBuilder('my_bar',
attributes={'data_type': 'Bar', 'attr1': 'a string attribute'},
datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime',
datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())),
DatasetBuilder('time_array',
datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()))])
builder = GroupBuilder(
'my_bar',
attributes={'data_type': 'Bar', 'attr1': 'a string attribute'},
datasets=[
DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())),
DatasetBuilder('time_array', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())),
],
)
validator = self.vmap.get_validator('Bar')
result = validator.validate(builder)
self.assertEqual(len(result), 1)
self.assertValidationError(result[0], ExpectedArrayError, name='Bar/time_array')

def test_invalid_cpd_isodatetime_array(self):
builder = GroupBuilder(
'my_bar',
attributes={'data_type': 'Bar', 'attr1': 'a string attribute'},
datasets=[
DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0)),
DatasetBuilder('date', date(2017, 5, 1)),
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]),
DatasetBuilder(
name='cpd_array',
data=[(1, "wrong")],
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='isodatetime'),
],
),
],
)
validator = self.vmap.get_validator('Bar')
result = validator.validate(builder)
self.assertEqual(len(result), 1)
self.assertValidationError(result[0], DtypeError, name='Bar/cpd_array')


class TestNestedTypes(ValidatorTestBase):

Expand Down Expand Up @@ -508,6 +553,58 @@ def test_empty_nparray(self):
# TODO test shape validation more completely


class TestStringDatetime(TestCase):

def test_str_coincidental_isodatetime(self):
"""Test validation of a text spec allows a string that coincidentally matches the isodatetime format."""
spec_catalog = SpecCatalog()
spec = GroupSpec(
doc='A test group specification with a data type',
data_type_def='Bar',
datasets=[
DatasetSpec(doc='an example scalar dataset', dtype="text", name='data1'),
DatasetSpec(doc='an example 1D dataset', dtype="text", name='data2', shape=(None, )),
DatasetSpec(
doc='an example 1D compound dtype dataset',
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='text'),
],
name='data3',
shape=(None, ),
),
],
attributes=[
AttributeSpec(name='attr1', doc='an example scalar attribute', dtype="text"),
AttributeSpec(name='attr2', doc='an example 1D attribute', dtype="text", shape=(None, )),
]
)
spec_catalog.register_spec(spec, 'test.yaml')
namespace = SpecNamespace(
'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog
)
vmap = ValidatorMap(namespace)

bar_builder = GroupBuilder(
name='my_bar',
attributes={'data_type': 'Bar', 'attr1': "2023-01-01", 'attr2': ["2023-01-01"]},
datasets=[
DatasetBuilder(name='data1', data="2023-01-01"),
DatasetBuilder(name='data2', data=["2023-01-01"]),
DatasetBuilder(
name='data3',
data=[(1, "2023-01-01")],
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='text'),
],
),
],
)
results = vmap.validate(bar_builder)
self.assertEqual(len(results), 0)


class TestLinkable(TestCase):

def set_up_spec(self):
Expand Down

0 comments on commit beb22b4

Please sign in to comment.