Skip to content

Commit

Permalink
Raise an error in case CellCollection contains None or np.nan value.
Browse files Browse the repository at this point in the history
Because HDF5 does not have a value for NULL, we can't save such values.
Upon saving None is saved as '' which is not correct. See
AllenInstitute/sonata#122 for discussion.

Change-Id: Ib7ab8484d465727d74d0a5f323956374ad8e19b2
  • Loading branch information
asanin-epfl committed Jul 17, 2020
1 parent 25bdda1 commit 71103c2
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 8 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ Version 2.7.1
- Deprecated the Hierarchy class in profit of the RegionMap. The Hierarchy class should be removed
in 2.8.0. Redo the docs for the RegionMap object.

- Changed saving of `CellCollection`. Raise an error if there is a `None` or `np.NaN` in
`CellCollection`.

Version 2.7.0
-------------

Expand Down
11 changes: 10 additions & 1 deletion tests/test_cell_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ def test_roundtrip_empty():
check_roundtrip(cells)


def test_roundtrip_none():
cells = test_module.CellCollection()
cells.properties['y-factor'] = [0.25, np.nan, 0.75]
assert_raises(VoxcellError, check_roundtrip, cells)
cells.properties['y-factor'] = [None, 0.1, 0.75]
assert_raises(VoxcellError, check_roundtrip, cells)


def test_roundtrip_properties_numeric_single():
cells = test_module.CellCollection()
cells.properties['y-factor'] = [0.25, 0.5, 0.75]
Expand Down Expand Up @@ -175,8 +183,9 @@ def test_roundtrip_complex():

cells.positions = random_positions(n)
cells.orientations = random_orientations(n)
cells.properties['all_none'] = pd.Categorical.from_codes(codes=[0] * n, categories=[''])
cells.properties['synapse_class'] = pd.Categorical.from_codes(
codes=[0, 1, 1, 0, 0], categories=['EXC', 'INH'])
codes=[0, 1, 1, 0, 0], categories=['', 'INH'])
cells.properties['mtype'] = ['L5_NGC', 'L5_BTC', 'L5_BTC', 'L6_LBC', 'L6_LBC']
cells.properties['etype'] = ['cADpyr', 'dNAC', 'dNAC', 'bSTUT', 'bSTUT']
cells.properties['morphology'] = [
Expand Down
16 changes: 10 additions & 6 deletions voxcell/cell_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ def save(self, filename):
filename: filepath to write. If it ends with '.mvd3' then it is treated as MVD3,
otherwise as SONATA.
"""
none_properties = self.properties.isnull().any(axis=0)
if none_properties.any():
names = none_properties.index[none_properties].to_list()
raise VoxcellError("Replace `None` in {} properties before saving".format(names))
if str(filename).lower().endswith('mvd3'):
self.save_mvd3(filename)
else:
Expand All @@ -150,14 +154,14 @@ def save_mvd3(self, filename):
# http://docs.h5py.org/en/latest/strings.html
str_dt = h5py.special_dtype(vlen=text_type)
for name, series in self.properties.iteritems():
data = series.values
values = series.to_numpy()
if _is_string_enum(series) and not name.startswith(self.SONATA_DYNAMIC_PROPERTY):
unique_values, indices = np.unique(data, return_inverse=True)
unique_values, indices = np.unique(values, return_inverse=True)
f.create_dataset('cells/properties/' + name, data=indices.astype(np.uint32))
f.create_dataset('library/' + name, data=unique_values, dtype=str_dt)
else:
dt = str_dt if data.dtype == np.object else data.dtype
f.create_dataset('cells/properties/' + name, data=data, dtype=dt)
dt = str_dt if values.dtype == np.object else values.dtype
f.create_dataset('cells/properties/' + name, data=values, dtype=dt)

@classmethod
def load(cls, filename):
Expand Down Expand Up @@ -214,14 +218,14 @@ def save_sonata(self, filename):
group = population.create_group('0')
str_dt = h5py.special_dtype(vlen=text_type)
for name, series in self.properties.iteritems():
values = series.values
values = series.to_numpy()
if name.startswith(self.SONATA_DYNAMIC_PROPERTY):
name = name.split(self.SONATA_DYNAMIC_PROPERTY)[1]
dt = str_dt if series.dtype == np.object else series.dtype
group.create_dataset('dynamics_params/' + name, data=values, dtype=dt)
elif _is_string_enum(series):
unique_values, indices = np.unique(values, return_inverse=True)
if len(unique_values) < len(values):
if len(unique_values) < .5 * len(values):
group.create_dataset(name, data=indices.astype(np.uint32))
group.create_dataset('@library/' + name, data=unique_values, dtype=str_dt)
else:
Expand Down
2 changes: 1 addition & 1 deletion voxcell/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""voxcell version"""
VERSION = '2.7.1.dev1'
VERSION = '2.7.1.dev2'

0 comments on commit 71103c2

Please sign in to comment.