Skip to content

Commit

Permalink
Bool indexing, tests (#17)
Browse files Browse the repository at this point in the history
 * add 1-D boolean indexing support
 * add tests
 * add requirements.txt
  • Loading branch information
d-sot authored Feb 7, 2020
1 parent 12437c1 commit aef6710
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 38 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install .
- name: Test with pytest
run: |
Expand Down
41 changes: 25 additions & 16 deletions lazy_ops/lazy_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import h5py
import numpy as np


class DatasetView(h5py.Dataset):

def __init__(self, dataset: h5py.Dataset = None, slice_index=(np.index_exp[:],()), axis_order=None):
Expand All @@ -26,8 +25,10 @@ def __init__(self, dataset: h5py.Dataset = None, slice_index=(np.index_exp[:],()
Returns:
lazy object of the view
"""

h5py.Dataset.__init__(self, dataset.id)
if dataset is None or isinstance(dataset,h5py.Dataset) is False:
raise TypeError("DatasetView requires a h5py.Dataset as positional argument")
else:
h5py.Dataset.__init__(self, dataset.id)
if axis_order is None:
self._axis_order = tuple(range(len(dataset.shape)))
else:
Expand Down Expand Up @@ -72,7 +73,7 @@ def _slice_tuple(self, key):
Returns:
The slice object tuple
"""
if isinstance(key, (slice,int)):
if isinstance(key, (slice,int,np.ndarray)):
key = key,
else:
key = *key,
Expand All @@ -88,9 +89,9 @@ def _slice_shape(self, slice_):
slice_key: An equivalent slice tuple with positive starts and stops
int_index: a nested tuple, int_index records the information needed by dsetread to access data
Each element of int_index, denoted ind is given by:
int_index[2] is the dataset axis at which the integer index operates
int_index[1] is the value of the integer index entered by the user
int_index[0] is the lazy_axis at which the integer index operates
ind[2] is the dataset axis at which the integer index operates
ind[1] is the value of the integer index entered by the user
ind[0] is the lazy_axis at which the integer index operates
,the lazy_axis is the axis number had the operations
been carried out by h5py instead of lazy_ops
axis_order: removes the elements of current axis_order where integer indexing has been applied
Expand Down Expand Up @@ -220,20 +221,28 @@ def _slice_composition(self, new_slice):
slice_result += (new_slice[i],)
else:
try:
if any(not isinstance(el,int) for el in new_slice[i]):
raise ValueError("Indices must be integers")
if not all(isinstance(el,int) for el in new_slice[i]):
if new_slice[i].dtype.kind != 'b':
raise ValueError("Indices must be either integers or booleans")
else:
# boolean indexing
if len(new_slice[i]) != self.shape[i]:
raise IndexError("Length of boolean index $d must be equal to size %d in dim %d" % (len(new_slice[i]),self.shape[i],i))
new_slice_i = new_slice[i].nonzero()[0]
else:
new_slice_i = new_slice[i]
if i < len(self.key):
if any(el >= self._shape[i] or el <= ~self._shape[i] for el in new_slice[i]):
raise IndexError("Index %s out of range, dim %d of size %d" % (str(new_slice[i]),i,self._shape[i]))
if any(el >= self._shape[i] or el <= ~self._shape[i] for el in new_slice_i):
raise IndexError("Index %s out of range, dim %d of size %d" % (str(new_slice_i),i,self._shape[i]))
if isinstance(self.key[i],slice):
slice_result += (tuple(self.key[i].start + self.key[i].step*(ind%self._shape[i]) for ind in new_slice[i]),)
slice_result += (tuple(self.key[i].start + self.key[i].step*(ind%self._shape[i]) for ind in new_slice_i),)
else:
# self.key[i] is an iterator of integers
slice_result += (tuple(self.key[i][ind] for ind in new_slice[i]),)
slice_result += (tuple(self.key[i][ind] for ind in new_slice_i),)
else:
slice_result += (new_slice[i],)
slice_result += (new_slice_i,)
except:
raise IndexError("Indices must be either integers, iterators of integers, or slice objects")
raise IndexError("Indices must be either integers, iterators of integers, slice objects, or numpy boolean arrays")
slice_result += self.key[len(new_slice):]

return slice_result
Expand Down Expand Up @@ -273,7 +282,7 @@ def _ellipsis_slices(self, new_slice):
Returns:
equivalent slices with Ellipsis expanded
"""
ellipsis_count = new_slice.count(Ellipsis)
ellipsis_count = sum(s==Ellipsis for s in new_slice if not isinstance(s,np.ndarray))
if ellipsis_count == 1:
ellipsis_index = new_slice.index(Ellipsis)
if ellipsis_index == len(new_slice)-1:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
h5py
157 changes: 135 additions & 22 deletions tests/test_dsetview.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def setUp(self):

self.ndims = 7
num_datasets = 5
self.srand = secrets.SystemRandom()
self.dset_list = list(self.h5py_file.create_dataset(name='dset'+str(i),
data=np.random.rand(*self.srand.choices(range(1, 90//self.ndims), k=self.ndims)))
for i in range(num_datasets))
Expand Down Expand Up @@ -65,15 +64,46 @@ def _array_indexing(cls,shape):
return tuple(slice(None,None,None) if i != single_array_dim else single_array_indexing
for i in range(len(shape)))

@classmethod
def _bool_indexing(cls,shape):
''' find an appropriate tuple with a single array index '''
single_array_dim = cls.srand.randrange(0,len(shape))
single_bool_indexing = np.array(cls.srand.choices([True,False], k=shape[single_array_dim]))
return tuple(slice(None,None,None) if i != single_array_dim else single_bool_indexing
for i in range(len(shape)))

@classmethod
def _slices_and_int(cls,shape):
''' find an appropriate tuple of slices and integers '''
return tuple(slice(cls.srand.randint(~s-1, s+1), cls.srand.randint(~s-1, s+1),
cls.srand.randint(1, s))
if cls.srand.choice([True, False]) else
if cls.srand.choice([True, False]) else
cls.srand.randint(0, s-1)
for s in shape)

@classmethod
def _slices_and_array(cls,shape, single_array_dim):
''' find an appropriate tuple of slices and a single array index'''
single_array_len = cls.srand.randrange(0,shape[single_array_dim])
single_array_indexing = sorted(cls.srand.sample(range(shape[single_array_dim]),
single_array_len))
return tuple(slice(cls.srand.randint(~s-1, s+1), cls.srand.randint(~s-1, s+1),
cls.srand.randint(1, s))
if i != single_array_dim else
single_array_indexing
for i, s in enumerate(shape))

@classmethod
def _slices_and_bool(cls,shape, single_array_dim):
''' find an appropriate tuple of slices and a single array index'''
single_bool_indexing = np.array(cls.srand.choices([True,False], k=shape[single_array_dim]))
return tuple(slice(cls.srand.randint(~s-1, s+1), cls.srand.randint(~s-1, s+1),
cls.srand.randint(1, s))
if i != single_array_dim else
single_bool_indexing
for i, s in enumerate(shape))


##########################################
# basic tests #
##########################################
Expand Down Expand Up @@ -105,6 +135,12 @@ def test_dsetview_lazy_slice(self):
slices = self._slices(self.dset.shape)
assert_array_equal(self.dset[slices], self.dsetview.lazy_slice[slices])

@dset_iterator
def test_dsetview_lazy_slice_bool(self):
# test __getitem__ read after lazy_slice, single slice
indexing = self._bool_indexing(self.dset.shape)
assert_array_equal(self.dset[indexing], self.dsetview.lazy_slice[indexing])

@dset_iterator
def test_dsetview_lazy_slice_lower_dimensions(self):
for num_slice_dims in range(1, len(self.dset.shape)+1):
Expand Down Expand Up @@ -136,6 +172,19 @@ def test_dsetview_lazy_slice_array_indexing(self):
# array indexing only
assert_array_equal(self.dset[indexing], self.dsetview.lazy_slice[indexing])

@dset_iterator
def test_dsetview_lazy_slice_bool_indexing(self):
for num_slice_dims in range(2, len(self.dset.shape)+1):
# num_slice_dims starts from 2, dset[(1-D bool np.ndarray,)] is invalid in h5py
# dset[(1-D bool np.ndarray, slice(None))] is valid
indexing = self._bool_indexing(self.dset.shape[:num_slice_dims])
# test __getitem__ read specifying lower dimensions
assert_array_equal(self.dset[indexing], self.dsetview[indexing])
# test __getitem__ read after lazy_slice
# for lower and all dimensions
# bool indexing only
assert_array_equal(self.dset[indexing], self.dsetview.lazy_slice[indexing])

@dset_iterator
def test_dsetview_lazy_iter(self):
for axis in range(len(self.dset.shape)):
Expand All @@ -150,6 +199,86 @@ def test_dsetview_lazy_transpose(self):
# test lazy_ops.lazy_transpose
assert_array_equal(np.transpose(self.dset[()], axis),lazy_transpose(self.dsetview, axis))

###########################################
# tests for multiple lazy slice calls #
###########################################

# multi lazy_slice using only slices
@dset_iterator
def test_dsetview_multi_lazy_slice(self):
self._dsetview_multi_lazy_slice(self.dset, self.dsetview)

@classmethod
def _dsetview_multi_lazy_slice(cls, dset, dsetview):
for num_slice_dims in range(1, len(dset.shape)+1):
slices = cls._slices(dset.shape[:num_slice_dims])
dset_new = dset[slices]
dsetview_new = dsetview.lazy_slice[slices]
# test __getitem__ read after lazy_slice for lower dimensions
assert_array_equal(dset_new, dsetview_new)
if np.prod(dset_new.shape) != 0:
cls._dsetview_multi_lazy_slice(dset_new, dsetview_new)

# multi lazy_slice using slices and int indexing
@dset_iterator
def test_dsetview_multi_lazy_slice_with_slice_and_int_indexing(self):
self._dsetview_multi_lazy_slice_with_slice_and_int_indexing(self.dset, self.dsetview)

@classmethod
def _dsetview_multi_lazy_slice_with_slice_and_int_indexing(cls, dset, dsetview):
for num_slice_dims in range(1, len(dset.shape)+1):
indexing = cls._slices_and_int(dset.shape[:num_slice_dims])
dset_new = dset[indexing]
dsetview_new = dsetview.lazy_slice[indexing]
# test __getitem__ read after lazy_slice
# for lower and all dimensions
# combination of slice and int indexing
assert_array_equal(dset_new, dsetview_new)
if np.prod(dset_new.shape) != 0:
cls._dsetview_multi_lazy_slice_with_slice_and_int_indexing(dset_new, dsetview_new)

# multi lazy_slice using slices and array indexing
@dset_iterator
def test_dsetview_multi_lazy_slice_with_slice_and_array_indexing(self):
remaining_slice_calls = 10
array_dim = self.srand.randint(0, len(self.dset.shape)-1)
self._dsetview_multi_lazy_slice_with_slice_and_array_indexing(self.dset, self.dsetview, remaining_slice_calls, array_dim)

@classmethod
def _dsetview_multi_lazy_slice_with_slice_and_array_indexing(cls, dset, dsetview, remaining_slice_calls, array_dim):
for num_slice_dims in range(array_dim+1, len(dset.shape)+1):
indexing = cls._slices_and_array(dset.shape[:num_slice_dims], array_dim)
dset_new = dset[indexing]
dsetview_new = dsetview.lazy_slice[indexing]
# test __getitem__ read after lazy_slice
# for lower and all dimensions
# combination of slice and array indexing
assert_array_equal(dset_new, dsetview_new)
if np.prod(dset_new.shape) != 0 and remaining_slice_calls > 0:
cls._dsetview_multi_lazy_slice_with_slice_and_array_indexing(dset_new, dsetview_new, remaining_slice_calls - 1, array_dim)

# multi lazy_slice using slices and boolean array indexing
@dset_iterator
def test_dsetview_multi_lazy_slice_with_slice_and_bool_indexing(self):
remaining_slice_calls = 4
array_dim = self.srand.randint(1, len(self.dset.shape)-1)
# array_dim starts from 1, for array_dim=0, dset[(1-D bool np.ndarray,)] is invalid in h5py
# dset[(slice(None),1-D bool np.ndarray)] is valid
self._dsetview_multi_lazy_slice_with_slice_and_bool_indexing(self.dset, self.dsetview, remaining_slice_calls, array_dim)

@classmethod
def _dsetview_multi_lazy_slice_with_slice_and_bool_indexing(cls, dset, dsetview, remaining_slice_calls, array_dim):
for num_slice_dims in range(array_dim+1, len(dset.shape)+1):
indexing = cls._slices_and_bool(dset.shape[:num_slice_dims], array_dim)
dset_new = dset[indexing]
dsetview_new = dsetview.lazy_slice[indexing]
# test __getitem__ read after lazy_slice
# for lower and all dimensions
# combination of slice and bool indexing
assert_array_equal(dset_new, dsetview_new)
if np.prod(dset_new.shape) != 0 and remaining_slice_calls > 0:
cls._dsetview_multi_lazy_slice_with_slice_and_bool_indexing(dset_new, dsetview_new, remaining_slice_calls - 1, array_dim)

###########################################
# tests for multiple lazy operation calls #
###########################################
Expand Down Expand Up @@ -180,22 +309,6 @@ def _dsetview_multi_lazy_transpose(self, dset, dsetview, remaining_transpose_cal
if remaining_transpose_calls > 0:
self._dsetview_multi_lazy_transpose(dset_new, dsetview_new, remaining_transpose_calls - 1)

# multi lazy_slice using only slices
@dset_iterator
def test_dsetview_multi_lazy_slice(self):
self._dsetview_multi_lazy_slice(self.dset, self.dsetview)

@classmethod
def _dsetview_multi_lazy_slice(cls, dset, dsetview):
for num_slice_dims in range(1, len(dset.shape)+1):
slices = cls._slices(dset.shape[:num_slice_dims])
dset_new = dset[slices]
dsetview_new = dsetview.lazy_slice[slices]
# test __getitem__ read after lazy_slice for lower dimensions
assert_array_equal(dset_new, dsetview_new)
if np.prod(dset_new.shape) != 0:
cls._dsetview_multi_lazy_slice(dset_new, dsetview_new)

# multi lazy_transpose and lazy_slice using only slices
@dset_iterator
def test_dsetview_multi_lazy_ops_with_slice_indexing(self):
Expand Down Expand Up @@ -223,11 +336,11 @@ def _dsetview_multi_lazy_ops_with_slice_indexing(cls, dset, dsetview, remaining_

# multi lazy_transpose and lazy_slice using slices and int
@dset_iterator
def test_dsetview_multi_lazy_slice_with_slice_and_int_indexing(self):
self._dsetview_multi_lazy_slice_with_slice_and_int_indexing(self.dset, self.dsetview)
def test_dsetview_multi_lazy_ops_with_slice_and_int_indexing(self):
self._dsetview_multi_lazy_ops_with_slice_and_int_indexing(self.dset, self.dsetview)

@classmethod
def _dsetview_multi_lazy_slice_with_slice_and_int_indexing(cls, dset, dsetview):
def _dsetview_multi_lazy_ops_with_slice_and_int_indexing(cls, dset, dsetview):
for num_slice_dims in range(1, len(dset.shape)+1):
slices = cls._slices_and_int(dset.shape[:num_slice_dims])
dset_new = dset[slices]
Expand All @@ -237,5 +350,5 @@ def _dsetview_multi_lazy_slice_with_slice_and_int_indexing(cls, dset, dsetview):
# combination of slice and int indexing
assert_array_equal(dset_new, dsetview_new)
if np.prod(dset_new.shape) != 0:
cls._dsetview_multi_lazy_slice_with_slice_and_int_indexing(dset_new, dsetview_new)
cls._dsetview_multi_lazy_ops_with_slice_and_int_indexing(dset_new, dsetview_new)

0 comments on commit aef6710

Please sign in to comment.