Skip to content

Commit

Permalink
Merge pull request #449 from European-XFEL/units
Browse files Browse the repository at this point in the history
Expose units symbol & name on KeyData objects
  • Loading branch information
takluyver authored Oct 9, 2023
2 parents 377efa3 + 5926e94 commit b532940
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 3 deletions.
4 changes: 4 additions & 0 deletions docs/reading_files.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ below, e.g.::

.. versionadded:: 1.9

.. autoattribute:: units

.. autoattribute:: units_name


The run or file object (a :class:`DataCollection`) also has methods to load
data by sources and keys. :meth:`get_array`, :meth:`get_dask_array` and
Expand Down
50 changes: 48 additions & 2 deletions extra_data/keydata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List, Optional, Tuple

import h5py
import numpy as np

from .exceptions import TrainIDError, NoDataError
Expand Down Expand Up @@ -106,6 +107,30 @@ def size_gb(self):
"""The size of the data in memory in gigabytes."""
return self.nbytes / 1e9

@property
def units(self):
"""The units symbol for this data, e.g. 'μJ', or None if not found"""
attrs = self.attributes()
base_unit = attrs.get('unitSymbol', None)
if base_unit is None:
return None

prefix = attrs.get('metricPrefixSymbol', '')
if prefix == 'u':
prefix = 'μ' # We are not afraid of unicode
return prefix + base_unit

@property
def units_name(self):
"""The units name for this data, e.g. 'microjoule', or None if not found"""
attrs = self.attributes()
base_unit = attrs.get('unitName', None)
if base_unit is None:
return None

prefix = attrs.get('metricPrefixName', '')
return prefix + base_unit

@property
def source_file_paths(self):
paths = []
Expand All @@ -129,6 +154,24 @@ def source_file_paths(self):
from pathlib import Path
return [Path(p) for p in paths]

def attributes(self):
"""Get a dict of all attributes stored with this data
This may be awkward to use. See .units and .units_name for more
convenient forms.
"""
dset = self.files[0].file[self.hdf5_data_path]
attrs = dict(dset.attrs)
if (not attrs) and dset.is_virtual:
# Virtual datasets were initially created without these attributes.
# Find a source file. Not using source_file_paths as it can give [].
_, filename, _, _ = dset.virtual_sources()[0]
# Not using FileAccess: no need for train or source lists.
with h5py.File(filename, 'r') as f:
attrs = dict(f[self.hdf5_data_path].attrs)

return attrs

def select_trains(self, trains):
"""Select a subset of trains in this data as a new :class:`KeyData` object.
Expand All @@ -144,8 +187,11 @@ def __getitem__(self, item):

def _only_tids(self, tids):
tids_arr = np.array(tids)
files = [f for f in self.files
if f.has_train_ids(tids_arr, self.inc_suspect_trains)]
# Keep 1 file, even if 0 trains selected.
files = [
f for f in self.files
if f.has_train_ids(tids_arr, self.inc_suspect_trains)
] or [self.files[0]]

return KeyData(
self.source,
Expand Down
15 changes: 15 additions & 0 deletions extra_data/tests/mockdata/xgm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import numpy as np

from .base import DeviceBase

class XGM(DeviceBase):
Expand Down Expand Up @@ -53,3 +55,16 @@ class XGM(DeviceBase):
('xTD', 'f4', (1000,)),
('yTD', 'f4', (1000,)),
]

def write_instrument(self, f):
super().write_instrument(f)

# Annotate intensityTD with some units to test retrieving them
# Karabo stores ASCII strings, assigning bytes is a shortcut to mimic that
ds = f[f'INSTRUMENT/{self.device_id}:output/data/intensityTD']
ds.attrs['metricPrefixEnum']= np.array([14], dtype=np.int32)
ds.attrs['metricPrefixName'] = b'micro'
ds.attrs['metricPrefixSymbol'] = b'u'
ds.attrs['unitEnum'] = np.array([15], dtype=np.int32)
ds.attrs['unitName'] = b'joule'
ds.attrs['unitSymbol'] = b'J'
23 changes: 22 additions & 1 deletion extra_data/tests/test_keydata.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_select_trains(mock_spb_raw_run):
# Empty selection
sel2 = xgm_beam_x[80:]
assert sel2.shape == (0,)
assert len(sel2.files) == 0
assert len(sel2.files) == 1
assert sel2.xarray().shape == (0,)

# Single train
Expand Down Expand Up @@ -339,3 +339,24 @@ def test_file_no_trains(run_with_file_no_trains):
run = RunDirectory(run_with_file_no_trains)
xpos = run['SPB_XTD9_XGM/DOOCS/MAIN', 'beamPosition.ixPos'].ndarray()
assert xpos.shape == (64,)


def test_attributes(mock_sa3_control_data):
run = H5File(mock_sa3_control_data)
xgm_intensity = run['SA3_XTD10_XGM/XGM/DOOCS:output', 'data.intensityTD']
attrs = xgm_intensity.attributes()

assert isinstance(attrs, dict)
assert attrs['metricPrefixName'] == 'micro'
assert attrs['unitSymbol'] == 'J'


def test_units(mock_sa3_control_data):
run = H5File(mock_sa3_control_data)
xgm_intensity = run['SA3_XTD10_XGM/XGM/DOOCS:output', 'data.intensityTD']

assert xgm_intensity.units == 'μJ'
assert xgm_intensity.units_name == 'microjoule'

# Check that it still works after selecting 0 trains
assert xgm_intensity.select_trains(np.s_[:0]).units == 'μJ'
2 changes: 2 additions & 0 deletions extra_data/tests/test_voview.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ def test_use_voview(mock_spb_raw_run, tmp_path):
assert {p.name for p in xgm_intens[:30].source_file_paths} == {
'RAW-R0238-DA01-S00000.h5'
}
assert xgm_intens.units == 'μJ'
assert xgm_intens.units_name == 'microjoule'



Expand Down

0 comments on commit b532940

Please sign in to comment.