Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix validation of datetime-formatted strings #1026

Merged
merged 5 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

### Enhancements
- Add Data.set_data_io(), which allows for setting a `DataIO` to a data object after-the-fact. @bendichter and @CodyCBakerPhD [#1013](https://github.com/hdmf-dev/hdmf/pull/1013)

### Enhancements
- Added `add_ref_termset`, updated helper methods for `HERD`, revised `add_ref` to support validations prior to populating the tables
and added `add_ref_container`. @mavaylon1 [#968](https://github.com/hdmf-dev/hdmf/pull/968)
- Use `stacklevel` in most warnings. @rly [#1027](https://github.com/hdmf-dev/hdmf/pull/1027)
Expand All @@ -19,8 +17,8 @@
### Bug fixes
- Fixed issue with custom class generation when a spec has a `name`. @rly [#1006](https://github.com/hdmf-dev/hdmf/pull/1006)
- Fixed issue with usage of deprecated `ruamel.yaml.safe_load` in `src/hdmf/testing/validate_spec.py`. @rly [#1008](https://github.com/hdmf-dev/hdmf/pull/1008)

- Fixed issue where `ElementIdentifiers` data could be set to non-integer values. @rly [#1009](https://github.com/hdmf-dev/hdmf/pull/1009)
- Fixed issue where string datasets/attributes with isodatetime-formatted values failed validation against a text spec. @rly [#1026](https://github.com/hdmf-dev/hdmf/pull/1026)

## HDMF 3.11.0 (October 30, 2023)

Expand Down
70 changes: 47 additions & 23 deletions src/hdmf/validate/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
__allowable['numeric'] = set(chain.from_iterable(__allowable[k] for k in __allowable if 'int' in k or 'float' in k))


def check_type(expected, received):
def check_type(expected, received, string_format=None):
'''
*expected* should come from the spec
*received* should come from the data
Expand All @@ -52,6 +52,12 @@
raise ValueError('compound type shorter than expected')
for i, exp in enumerate(DtypeHelper.simplify_cpd_type(expected)):
rec = received[i]
if exp == "isodatetime": # short circuit for isodatetime
sub_string_format = string_format[i]
return (
rec in __allowable[exp] or
rec in ("utf", "ascii") and sub_string_format == "isodatetime"
)
if rec not in __allowable[exp]:
return False
return True
Expand All @@ -71,6 +77,11 @@
received = received.name
elif isinstance(received, type):
received = received.__name__
if expected == "isodatetime": # short circuit for isodatetime
return (
received in __allowable[expected] or
(received in ("utf", "ascii") and string_format == "isodatetime")
)
if isinstance(expected, RefSpec):
expected = expected.reftype
elif isinstance(expected, type):
Expand All @@ -89,48 +100,58 @@
_iso_re = get_iso8601_regex()


def _check_isodatetime(s, default=None):
def get_string_format(data):
"""Return the string format of the given data. Possible outputs are "isodatetime" and None.
"""
assert isinstance(data, (str, bytes))
try:
if _iso_re.match(pystr(s)) is not None:
if _iso_re.match(pystr(data)) is not None:
return 'isodatetime'
except Exception:
pass
return default
return None


class EmptyArrayError(Exception):
pass


def get_type(data):
def get_type(data, builder_dtype=None):
"""Return a tuple of (the string representation of the type, the format of the string data) for the given data."""
if isinstance(data, str):
return _check_isodatetime(data, 'utf')
return 'utf', get_string_format(data)
elif isinstance(data, bytes):
return _check_isodatetime(data, 'ascii')
return 'ascii', get_string_format(data)
elif isinstance(data, RegionBuilder):
return 'region'
return 'region', None

Check warning on line 126 in src/hdmf/validate/validator.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/validate/validator.py#L126

Added line #L126 was not covered by tests
elif isinstance(data, ReferenceBuilder):
return 'object'
return 'object', None

Check warning on line 128 in src/hdmf/validate/validator.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/validate/validator.py#L128

Added line #L128 was not covered by tests
elif isinstance(data, ReferenceResolver):
return data.dtype
return data.dtype, None
elif isinstance(data, np.ndarray):
if data.size == 0:
raise EmptyArrayError()
return get_type(data[0])
return get_type(data[0], builder_dtype)
elif isinstance(data, np.bool_):
return 'bool'
return 'bool', None
if not hasattr(data, '__len__'):
return type(data).__name__
return type(data).__name__, None
else:
if builder_dtype and isinstance(builder_dtype, list): # compound dtype
dtypes = []
string_formats = []
for i in range(len(builder_dtype)):
dtype, string_format = get_type(data[0][i])
dtypes.append(dtype)
string_formats.append(string_format)
return dtypes, string_formats
if hasattr(data, 'dtype'):
if isinstance(data.dtype, list):
return [get_type(data[0][i]) for i in range(len(data.dtype))]
if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None:
return get_type(data[0])
return data.dtype
return data.dtype, None
if len(data) == 0:
raise EmptyArrayError()
return get_type(data[0])
return get_type(data[0], builder_dtype)


def check_shape(expected, received):
Expand Down Expand Up @@ -310,7 +331,7 @@
if not isinstance(value, BaseBuilder):
expected = '%s reference' % spec.dtype.reftype
try:
value_type = get_type(value)
value_type, _ = get_type(value)

Check warning on line 334 in src/hdmf/validate/validator.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/validate/validator.py#L334

Added line #L334 was not covered by tests
ret.append(DtypeError(self.get_spec_loc(spec), expected, value_type))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
Expand All @@ -323,8 +344,8 @@
ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type))
else:
try:
dtype = get_type(value)
if not check_type(spec.dtype, dtype):
dtype, string_format = get_type(value)
if not check_type(spec.dtype, dtype, string_format):
ret.append(DtypeError(self.get_spec_loc(spec), spec.dtype, dtype))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
Expand Down Expand Up @@ -385,14 +406,17 @@
data = builder.data
if self.spec.dtype is not None:
try:
dtype = get_type(data)
if not check_type(self.spec.dtype, dtype):
dtype, string_format = get_type(data, builder.dtype)
if not check_type(self.spec.dtype, dtype, string_format):
ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype,
location=self.get_builder_loc(builder)))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
pass
shape = get_data_shape(data)
if isinstance(builder.dtype, list):
shape = (len(builder.data), ) # only 1D datasets with compound types are supported
else:
shape = get_data_shape(data)
if not check_shape(self.spec.shape, shape):
if shape is None:
ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(data),
Expand Down
117 changes: 107 additions & 10 deletions tests/unit/validator_tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,18 @@ def getSpecs(self):
),
DatasetSpec('an example time dataset', 'isodatetime', name='datetime'),
DatasetSpec('an example time dataset', 'isodatetime', name='date', quantity='?'),
DatasetSpec('an array of times', 'isodatetime', name='time_array', dims=('num_times',), shape=(None,))
DatasetSpec('an array of times', 'isodatetime', name='time_array', dims=('num_times',), shape=(None,)),
DatasetSpec(
doc='an array with compound dtype that includes an isodatetime',
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='isodatetime'),
],
name='cpd_array',
dims=('num_times',),
shape=(None,),
quantity="?",
),
],
attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')])
return ret,
Expand All @@ -129,7 +140,15 @@ def test_valid_isodatetime(self):
DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0)),
DatasetBuilder('date', date(2017, 5, 1)),
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]),
DatasetBuilder(
name='cpd_array',
data=[(1, datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()))],
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='isodatetime'),
],
),
]
)
validator = self.vmap.get_validator('Bar')
Expand All @@ -143,7 +162,7 @@ def test_invalid_isodatetime(self):
datasets=[
DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime', 100),
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]),
]
)
validator = self.vmap.get_validator('Bar')
Expand All @@ -152,18 +171,44 @@ def test_invalid_isodatetime(self):
self.assertValidationError(result[0], DtypeError, name='Bar/datetime')

def test_invalid_isodatetime_array(self):
builder = GroupBuilder('my_bar',
attributes={'data_type': 'Bar', 'attr1': 'a string attribute'},
datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime',
datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())),
DatasetBuilder('time_array',
datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()))])
builder = GroupBuilder(
'my_bar',
attributes={'data_type': 'Bar', 'attr1': 'a string attribute'},
datasets=[
DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())),
DatasetBuilder('time_array', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())),
],
)
validator = self.vmap.get_validator('Bar')
result = validator.validate(builder)
self.assertEqual(len(result), 1)
self.assertValidationError(result[0], ExpectedArrayError, name='Bar/time_array')

def test_invalid_cpd_isodatetime_array(self):
builder = GroupBuilder(
'my_bar',
attributes={'data_type': 'Bar', 'attr1': 'a string attribute'},
datasets=[
DatasetBuilder('data', 100, attributes={'attr2': 10}),
DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0)),
DatasetBuilder('date', date(2017, 5, 1)),
DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]),
DatasetBuilder(
name='cpd_array',
data=[(1, "wrong")],
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='isodatetime'),
],
),
],
)
validator = self.vmap.get_validator('Bar')
result = validator.validate(builder)
self.assertEqual(len(result), 1)
self.assertValidationError(result[0], DtypeError, name='Bar/cpd_array')


class TestNestedTypes(ValidatorTestBase):

Expand Down Expand Up @@ -508,6 +553,58 @@ def test_empty_nparray(self):
# TODO test shape validation more completely


class TestStringDatetime(TestCase):

def test_str_coincidental_isodatetime(self):
"""Test validation of a text spec allows a string that coincidentally matches the isodatetime format."""
spec_catalog = SpecCatalog()
spec = GroupSpec(
doc='A test group specification with a data type',
data_type_def='Bar',
datasets=[
DatasetSpec(doc='an example scalar dataset', dtype="text", name='data1'),
DatasetSpec(doc='an example 1D dataset', dtype="text", name='data2', shape=(None, )),
DatasetSpec(
doc='an example 1D compound dtype dataset',
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='text'),
],
name='data3',
shape=(None, ),
),
],
attributes=[
AttributeSpec(name='attr1', doc='an example scalar attribute', dtype="text"),
AttributeSpec(name='attr2', doc='an example 1D attribute', dtype="text", shape=(None, )),
]
)
spec_catalog.register_spec(spec, 'test.yaml')
namespace = SpecNamespace(
'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog
)
vmap = ValidatorMap(namespace)

bar_builder = GroupBuilder(
name='my_bar',
attributes={'data_type': 'Bar', 'attr1': "2023-01-01", 'attr2': ["2023-01-01"]},
datasets=[
DatasetBuilder(name='data1', data="2023-01-01"),
DatasetBuilder(name='data2', data=["2023-01-01"]),
DatasetBuilder(
name='data3',
data=[(1, "2023-01-01")],
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='text'),
],
),
],
)
results = vmap.validate(bar_builder)
self.assertEqual(len(results), 0)


class TestLinkable(TestCase):

def set_up_spec(self):
Expand Down
Loading