diff --git a/dask/benchmarks/array_block.py b/dask/benchmarks/array_block.py new file mode 100644 index 0000000..1269b9c --- /dev/null +++ b/dask/benchmarks/array_block.py @@ -0,0 +1,106 @@ + +from dask.optimization import fuse_linear +from dask import array as da +import numpy as np +from .common import DaskSuite + + +class Block3D(DaskSuite): + """This creates a (5n)^3 block matrix. + + This is very similar to the numpy benchmark Block3D. + + In this benchmark, we are comparing the performance of dask.array.block + to that of numpy and a direct memory copy of the array. + + We also compare the optimized version and unoptimized version of the + computation as well as the performance of concatenating 1D versions of + the dask arrays. + + Finally, we also ensure that a call to persist on a 3D block doesn't + copy memory around by returning in a minimal amount of time. + """ + # + # Having all these modes puts the plots on the same graph + # as opposed to being displayed as separate benchmarks + params = [[1, 10, 100], + ['block', 'block optimized', + 'block persist', 'block optimized persist', + 'concatenate', + 'np_block', 'np_copy']] + param_names = ['n', 'mode'] + + def setup(self, n, mode): + dtype = 'uint64' + self.n000 = np.full((2 * n, 2 * n, 2 * n), fill_value=1, dtype=dtype) + self.n001 = np.full((2 * n, 2 * n, 3 * n), fill_value=4, dtype=dtype) + + self.n010 = np.full((2 * n, 3 * n, 2 * n), fill_value=3, dtype=dtype) + self.n011 = np.full((2 * n, 3 * n, 3 * n), fill_value=5, dtype=dtype) + + self.n100 = np.full((3 * n, 2 * n, 2 * n), fill_value=2, dtype=dtype) + self.n101 = np.full((3 * n, 2 * n, 3 * n), fill_value=6, dtype=dtype) + + self.n110 = np.full((3 * n, 3 * n, 2 * n), fill_value=7, dtype=dtype) + self.n111 = np.full((3 * n, 3 * n, 3 * n), fill_value=8, dtype=dtype) + + self.d000 = da.from_array(self.n000, chunks=-1).persist() + self.d001 = da.from_array(self.n001, chunks=-1).persist() + self.d010 = da.from_array(self.n010, chunks=-1).persist() + self.d011 = da.from_array(self.n011, chunks=-1).persist() + self.d100 = da.from_array(self.n100, chunks=-1).persist() + self.d101 = da.from_array(self.n101, chunks=-1).persist() + self.d110 = da.from_array(self.n110, chunks=-1).persist() + self.d111 = da.from_array(self.n111, chunks=-1).persist() + + self.np_block = [ + [ + [self.n000, self.n001], + [self.n010, self.n011], + ], + [ + [self.n100, self.n101], + [self.n110, self.n111], + ] + ] + self.np_arr_list = [a.flat + for two_d in self.np_block + for one_d in two_d + for a in one_d] + + self.block = [ + [ + [self.d000, self.d001], + [self.d010, self.d011], + ], + [ + [self.d100, self.d101], + [self.d110, self.d111], + ] + ] + self.arr_list = [da.ravel(d) + for two_d in self.block + for one_d in two_d + for d in one_d] + + self.da_block = da.block(self.block) + self.da_concatenate = da.concatenate(self.arr_list) + if mode.startswith('block optimized'): + self.da_block.dask, _ = fuse_linear(self.da_block.dask) + + + def time_3d(self, n, mode): + if mode.startswith('block'): + if mode.endswith('persist'): + self.da_block.persist() + else: + self.da_block.compute() + elif mode.startswith('concatenate'): + if mode.endswith('persist'): + self.da_concatenate.persist() + else: + self.da_concatenate.compute() + elif mode == 'np_block': + np.block(self.np_block) + else: + [arr.copy() for arr in self.np_arr_list]