Skip to content

Commit

Permalink
GE P-file reader: adaptive character encoding
Browse files Browse the repository at this point in the history
`ge_read_pfile` and `ge_pfile` assumed utf-8 encoding in character strings within the p-file; this does not appear to be standard across systems. Suggested patch attempts a few likely encoding candidates, before falling back on a permissive ascii encoding.
  • Loading branch information
alexcraven authored Nov 5, 2024
1 parent 3ba4b3c commit bdf8334
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 17 deletions.
32 changes: 19 additions & 13 deletions spec2nii/GE/ge_pfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,15 @@ def _process_svs_pfile(pfile):
:return: List of NIFTI MRS data objects
:return: List of file name suffixes
"""
psd = pfile.hdr.rhi_psdname.decode('utf-8').lower()
proto = pfile.hdr.rhs_se_desc.decode('utf-8').lower()

assert(pfile.encoding is not None) # encoding should have been set in ge_read_pfile get_mapper

psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower()
proto = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower()
if psd == 'hbcd' and "press" in proto:
print('\nPSD was: ', psd)
print('Proto is: ', proto)
psd = pfile.hdr.rhs_se_desc.decode('utf-8').lower()
psd = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower()
print('PSD updated to: ', psd)

# MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path
Expand Down Expand Up @@ -429,7 +432,10 @@ def _process_mrsi_pfile(pfile):
:return: List of NIFTI MRS data objects
:return: List of file name suffixes
"""
psd = pfile.hdr.rhi_psdname.decode('utf-8').lower()

assert(pfile.encoding is not None) # encoding should have been set in ge_read_pfile get_mapper

psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower()

known_formats = ('probe-p', 'probe-sl', 'slaser_cni', 'presscsi')
if psd not in known_formats:
Expand Down Expand Up @@ -573,37 +579,37 @@ def _populate_metadata(pfile, water_suppressed=True, data_dimensions=None):
# 'Manufacturer'
meta.set_standard_def('Manufacturer', 'GE')
# 'ManufacturersModelName'
meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode('utf-8'))
meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode(pfile.encoding, errors='replace'))
# 'DeviceSerialNumber'
meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode('utf-8'))
meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode(pfile.encoding, errors='replace'))
# 'SoftwareVersions'
meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode('utf-8'))
meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode(pfile.encoding, errors='replace'))
# 'InstitutionName'
meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode('utf-8'))
meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode(pfile.encoding, errors='replace'))
# 'InstitutionAddress'
# Not known
# 'TxCoil'
# Not Known
# 'RxCoil'
meta.set_user_def(key='ReceiveCoilName', value=hdr.rhi_cname.decode('utf-8'), doc='Rx coil name.')
meta.set_user_def(key='ReceiveCoilName', value=hdr.rhi_cname.decode(pfile.encoding, errors='replace'), doc='Rx coil name.')

# # 5.3 Sequence information
# 'SequenceName'
meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode('utf-8'))
meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode(pfile.encoding, errors='replace'))
# 'ProtocolName'
meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode('utf-8'))
meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode(pfile.encoding, errors='replace'))

# # 5.4 Sequence information
# 'PatientPosition'
# Not known
# 'PatientName'
meta.set_standard_def('PatientName', hdr.rhe_patname.decode('utf-8'))
meta.set_standard_def('PatientName', hdr.rhe_patname.decode(pfile.encoding, errors='replace'))
# 'PatientID'
# Not known
# 'PatientWeight'
# Not known
# 'PatientDoB'
meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode('utf-8'))
meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode(pfile.encoding, errors='replace'))
# 'PatientSex'
if hdr.rhe_patsex == 1:
sex_str = 'M'
Expand Down
30 changes: 26 additions & 4 deletions spec2nii/GE/ge_read_pfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def __init__(self, fname):
self.hdr = None
self.map = None
self.endian = 'little' # def for version >= 11
self.encoding = None

self.read_header()

Expand Down Expand Up @@ -176,10 +177,31 @@ def get_mapper(self):
if self.hdr is None:
return None

psd = self.hdr.rhi_psdname.decode('utf-8').lower()
proto = self.hdr.rhs_se_desc.decode('utf-8').lower()
if psd == 'hbcd' and "press" in proto:
psd = self.hdr.rhs_se_desc.decode('utf-8').lower()
# ARC 20241105 : utf-8 codec is not standard across systems; here, we try a
# couple of likely candidates, falling back on permissive ascii

for encoding, errors in [
("utf-8", "strict"),
("ISO-8859-1", "strict"),
("ascii", "replace"),
]:
try:
psd = self.hdr.rhi_psdname.decode(encoding, errors).lower()
proto = self.hdr.rhs_se_desc.decode(encoding, errors).lower()

# the following is unused in this context, but can inform codec selection
_ = self.hdr.rhe_patname.decode(encoding, errors)

if psd == "hbcd" and "press" in proto:
psd = self.hdr.rhs_se_desc.decode(encoding, errors).lower()
except UnicodeDecodeError as err:
psd = ""
proto = ""
continue
self.encoding = encoding
break

assert(self.encoding is not None) # final codec must should have succeeded

# MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path
if psd.endswith('gaba'):
Expand Down

0 comments on commit bdf8334

Please sign in to comment.