From 9169b19bf54285a032cc0d08c963d7ec9c335b2a Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Tue, 14 Nov 2023 17:33:25 -0500 Subject: [PATCH] Use fixed-width, null-padded strings --- psims/mzmlb/writer.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/psims/mzmlb/writer.py b/psims/mzmlb/writer.py index d2f39c3..7217142 100644 --- a/psims/mzmlb/writer.py +++ b/psims/mzmlb/writer.py @@ -426,4 +426,14 @@ def create_buffer(self, name, content): def _create_fixed_length_attribute(self, group: h5py.Group, name: str, value: str, encoding: str='ascii'): encoded = value.encode(encoding) - group.attrs.create(name, encoded, dtype=h5py.string_dtype('ascii', len(encoded))) + type_id = h5py.h5t.TypeID.copy(h5py.h5t.C_S1) + type_id.set_size(len(value) + 1) + type_id.set_strpad(h5py.h5t.STR_NULLTERM) + if encoding == 'ascii': + type_id.set_cset(h5py.h5t.CSET_ASCII) + elif encoding == 'utf8': + type_id.set_cset(h5py.h5t.CSET_UTF8) + else: + warnings.warn(f"{encoding} is not compatible with HDF5, defaulting to UTF8") + type_id.set_cset(h5py.h5t.CSET_UTF8) + group.attrs.create(name, encoded, dtype=type_id)