From 2a28e9dd044044a704057194a23bd4512e481ed3 Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Wed, 20 Mar 2024 19:02:00 -0400 Subject: [PATCH] handle NaN, Infinity, -Infinity --- .../LindiH5ZarrStore/_h5_attr_to_zarr_attr.py | 12 +++++-- lindi/LindiH5pyFile/LindiH5pyAttributes.py | 32 ++++++++++++------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/lindi/LindiH5ZarrStore/_h5_attr_to_zarr_attr.py b/lindi/LindiH5ZarrStore/_h5_attr_to_zarr_attr.py index 21285a3..054b290 100644 --- a/lindi/LindiH5ZarrStore/_h5_attr_to_zarr_attr.py +++ b/lindi/LindiH5ZarrStore/_h5_attr_to_zarr_attr.py @@ -14,6 +14,14 @@ def _h5_attr_to_zarr_attr(attr: Any, *, label: str = '', h5f: h5py.File): Otherwise, raise NotImplementedError """ + + # first disallow special strings + special_strings = ['NaN', 'Infinity', '-Infinity'] + if isinstance(attr, str) and attr in special_strings: + raise ValueError(f"Special string {attr} not allowed in attribute value at {label}") + if isinstance(attr, bytes) and attr in [x.encode('utf-8') for x in special_strings]: + raise ValueError(f"Special string {attr} not allowed in attribute value at {label}") + if attr is None: return None elif isinstance(attr, bytes): @@ -26,8 +34,8 @@ def _h5_attr_to_zarr_attr(attr: Any, *, label: str = '', h5f: h5py.File): return float(attr) elif np.issubdtype(type(attr), np.bool_): return bool(attr) - elif np.issubdtype(type(attr), np.bytes_): - return attr.decode('utf-8') + elif type(attr) is np.bytes_: + return attr.tobytes().decode('utf-8') elif isinstance(attr, h5py.Reference): return _h5_ref_to_zarr_attr(attr, label=label + '._REFERENCE', h5f=h5f) elif isinstance(attr, list): diff --git a/lindi/LindiH5pyFile/LindiH5pyAttributes.py b/lindi/LindiH5pyFile/LindiH5pyAttributes.py index a5b2f03..4ffb010 100644 --- a/lindi/LindiH5pyFile/LindiH5pyAttributes.py +++ b/lindi/LindiH5pyFile/LindiH5pyAttributes.py @@ -44,18 +44,11 @@ def __getitem__(self, key): if isinstance(val, dict) and "_REFERENCE" in val: return LindiH5pyReference(val["_REFERENCE"]) - # Convert special values - # @rly: doing this saves a lot of headache when loading the nwb file - # but then how can we represent strings that should not be converted? - elif val == 'NaN': - return float('nan') - elif val == 'Infinity': - return float('inf') - elif val == '-Infinity': - return float('-inf') - - else: - return val + # Convert special float values to actual floats (NaN, Inf, -Inf) + # Note that string versions of these values are not supported + val = _decode_nan_inf_ninf_in_attr_val(val) + + return val else: raise ValueError(f"Unknown attrs_type: {self._attrs_type}") @@ -90,3 +83,18 @@ def __repr__(self): def __str__(self): return str(self._attrs) + + +def _decode_nan_inf_ninf_in_attr_val(val): + if isinstance(val, list): + return [_decode_nan_inf_ninf_in_attr_val(v) for v in val] + elif isinstance(val, dict): + return {k: _decode_nan_inf_ninf_in_attr_val(v) for k, v in val.items()} + elif val == 'NaN': + return float('nan') + elif val == 'Infinity': + return float('inf') + elif val == '-Infinity': + return float('-inf') + else: + return val