From f0eef077a1ae183ad6c2a08c9780ce419a69d405 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 29 Aug 2023 13:29:36 -0700 Subject: [PATCH 01/66] adding support for adding where to unary reduction operations on the python side --- cunumeric/_ufunc/ufunc.py | 6 +---- cunumeric/array.py | 52 ++++++++++++++++++++++++--------------- cunumeric/deferred.py | 6 +++++ cunumeric/eager.py | 2 ++ cunumeric/module.py | 26 +++++++++++--------- 5 files changed, 56 insertions(+), 36 deletions(-) diff --git a/cunumeric/_ufunc/ufunc.py b/cunumeric/_ufunc/ufunc.py index 11800e53f..27692c383 100644 --- a/cunumeric/_ufunc/ufunc.py +++ b/cunumeric/_ufunc/ufunc.py @@ -688,7 +688,7 @@ def reduce( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Union[Any, None] = None, - where: bool = True, + where: Optional[ndarray] = None, ) -> ndarray: """ reduce(array, axis=0, dtype=None, out=None, keepdims=False, initial= ndarray: """a.all(axis=None, out=None, keepdims=False, initial=None, where=True) @@ -1796,7 +1796,7 @@ def any( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Union[int, float, None] = None, - where: Union[bool, ndarray] = True, + where: Union[ndarray, None] = None, ) -> ndarray: """a.any(axis=None, out=None, keepdims=False, initial=None, where=True) @@ -3051,7 +3051,7 @@ def max( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Union[int, float, None] = None, - where: Union[bool, ndarray] = True, + where: Union[ndarray, None] = None, ) -> ndarray: """a.max(axis=None, out=None, keepdims=False, initial=, where=True) @@ -3086,6 +3086,7 @@ def mean( dtype: Union[np.dtype[Any], None] = None, out: Union[ndarray, None] = None, keepdims: bool = False, + where: Union[ndarray, None]=None, ) -> ndarray: """a.mean(axis=None, dtype=None, out=None, keepdims=False) @@ -3120,12 +3121,14 @@ def mean( dtype=dtype, out=out, keepdims=keepdims, + where=where ) else: sum_array = self.sum( axis=axis, dtype=dtype, keepdims=keepdims, + where=where ) if axis is None: divisor = reduce(lambda x, y: x * y, self.shape, 1) @@ -3154,7 +3157,7 @@ def min( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Union[int, float, None] = None, - where: Union[bool, ndarray] = True, + where: Union[ndarray, None] = None, ) -> ndarray: """a.min(axis=None, out=None, keepdims=False, initial=, where=True) @@ -3252,7 +3255,7 @@ def prod( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Union[int, float, None] = None, - where: Union[bool, ndarray] = True, + where: Union[ndarray, None] = None, ) -> ndarray: """a.prod(axis=None, dtype=None, out=None, keepdims=False, initial=1, where=True) @@ -3622,10 +3625,10 @@ def sum( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Union[int, float, None] = None, - where: Union[bool, ndarray] = True, + where: Union[ndarray, None ] = None, ) -> ndarray: """a.sum(axis=None, dtype=None, out=None, keepdims=False, initial=0, - where=True) + where=None) Return the sum of the array elements over the given axis. @@ -4134,7 +4137,7 @@ def _perform_unary_reduction( keepdims: bool = False, args: Union[Any, None] = None, initial: Union[int, float, None] = None, - where: Union[bool, ndarray] = True, + where: Union[ndarray, None] = None, ) -> ndarray: # When 'res_dtype' is not None, the input and output of the reduction # have different types. Such reduction operators don't take a dtype of @@ -4156,7 +4159,12 @@ def _perform_unary_reduction( # TODO: Need to require initial to be given when the array is empty # or a where mask is given. - if isinstance(where, ndarray): + + #if not where: + # where = ndarray(shape=(1,), dtype=bool) + # where.fill(True) + + if where and isinstance(where, ndarray): # The where array has to broadcast to the src.shape if np.broadcast_shapes(src.shape, where.shape) != src.shape: raise ValueError( @@ -4191,7 +4199,7 @@ def _perform_unary_reduction( if out is None: out = ndarray( - shape=out_shape, dtype=res_dtype, inputs=(src, where) + shape=out_shape, dtype=res_dtype, inputs=(src,where) ) elif out.shape != out_shape: errmsg = ( @@ -4211,16 +4219,20 @@ def _perform_unary_reduction( ) if where: - result._thunk.unary_reduction( - op, - src._thunk, - cls._get_where_thunk(where, result.shape), - axis, - axes, - keepdims, - args, - initial, - ) + where_thunk = cls._get_where_thunk(where, result.shape) + else: + where_thunk=None + + result._thunk.unary_reduction( + op, + src._thunk, + where_thunk, + axis, + axes, + keepdims, + args, + initial, + ) if result is not out: out._thunk.convert(result._thunk) diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 6a56f65db..8dce70e6d 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -3183,6 +3183,9 @@ def unary_reduction( task.add_reduction(lhs, _UNARY_RED_TO_REDUCTION_OPS[op]) task.add_input(rhs_array.base) + if where: + task.add_input(where.base) + task.add_alignment(rhs_array.base, where.base) task.add_scalar_arg(op, ty.int32) task.add_scalar_arg(rhs_array.shape, (ty.int64,)) @@ -3222,6 +3225,9 @@ def unary_reduction( task.add_input(rhs_array.base) task.add_reduction(result, _UNARY_RED_TO_REDUCTION_OPS[op]) + if where: + task.add_input(where.base) + task.add_alignment(rhs_array.base, where.base) task.add_scalar_arg(axis, ty.int32) task.add_scalar_arg(op, ty.int32) diff --git a/cunumeric/eager.py b/cunumeric/eager.py index 680f1b5a1..c16b6ca4a 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -1499,6 +1499,8 @@ def unary_reduction( initial, ) return + if not where: + where=True if op in _UNARY_RED_OPS_WITH_ARG: fn = _UNARY_RED_OPS_WITH_ARG[op] # arg based APIs don't have the following arguments: where, initial diff --git a/cunumeric/module.py b/cunumeric/module.py index e8d933da6..beadb57e8 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -4864,7 +4864,7 @@ def all( axis: Optional[Union[int, tuple[int, ...]]] = None, out: Optional[ndarray] = None, keepdims: bool = False, - where: bool = True, + where: Optional[ndarray] = None, ) -> ndarray: """ Test whether all array elements along a given axis evaluate to True. @@ -4922,7 +4922,7 @@ def any( axis: Optional[Union[int, tuple[int, ...]]] = None, out: Optional[ndarray] = None, keepdims: bool = False, - where: bool = True, + where: Optional[ndarray] = None, ) -> ndarray: """ Test whether any array element along a given axis evaluates to True. @@ -5182,7 +5182,7 @@ def prod( out: Optional[ndarray] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: bool = True, + where: Optional[ndarray] = None, ) -> ndarray: """ @@ -5263,7 +5263,7 @@ def sum( out: Optional[ndarray] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: bool = True, + where: Optional[ndarray] = None, ) -> ndarray: """ @@ -5723,7 +5723,7 @@ def nanmin( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: Any = True, + where: Optional[ndarray]= None, ) -> ndarray: """ Return minimum of an array or minimum along an axis, ignoring any @@ -5816,7 +5816,7 @@ def nanmax( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: Any = True, + where: Optional[dnarray]=None, ) -> ndarray: """ Return the maximum of an array or maximum along an axis, ignoring any @@ -5913,7 +5913,7 @@ def nanprod( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: Any = True, + where: Optional[ndarray]=None, ) -> ndarray: """ Return the product of array elements over a given axis treating @@ -6009,7 +6009,7 @@ def nansum( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: Any = True, + where: Optionla[ndarray]=None, ) -> ndarray: """ Return the sum of array elements over a given axis treating @@ -6173,7 +6173,7 @@ def amax( out: Optional[ndarray] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: bool = True, + where:Optional[ndarray] = None, ) -> ndarray: """ @@ -6250,7 +6250,7 @@ def amin( out: Optional[ndarray] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: bool = True, + where: Optional[ndarray] = None, ) -> ndarray: """ @@ -7002,6 +7002,7 @@ def mean( dtype: Optional[np.dtype[Any]] = None, out: Optional[ndarray] = None, keepdims: bool = False, + where: Optional[Uniton[ndarray, tuple[bool, ...]]] = None, ) -> ndarray: """ @@ -7043,6 +7044,9 @@ def mean( sub-class' method does not implement `keepdims` any exceptions will be raised. + where : array_like of bool, optional + Elements to include in the mean. + Returns ------- m : ndarray @@ -7058,7 +7062,7 @@ def mean( -------- Multiple GPUs, Multiple CPUs """ - return a.mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims) + return a.mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where) # Histograms From a93748db98c88c12e58033cbb0d2e9a15c51541a Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 29 Aug 2023 21:44:56 -0700 Subject: [PATCH 02/66] adding support for adding where to unary reduction operations on the C++ side --- cunumeric/array.py | 4 - src/cunumeric/unary/scalar_unary_red.h | 2 + .../unary/scalar_unary_red_template.inl | 138 +++++++++++++++++- tests/integration/test_mean.py | 19 +++ 4 files changed, 155 insertions(+), 8 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 527767bee..37b9be051 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -162,10 +162,6 @@ def convert_to_predicate_ndarray(obj: Any) -> bool: # Keep all boolean types as they are if obj is True or obj is False: return obj - # GH #135 - raise NotImplementedError( - "the `where` parameter is currently not supported" - ) def maybe_convert_to_np_ndarray(obj: Any) -> Any: diff --git a/src/cunumeric/unary/scalar_unary_red.h b/src/cunumeric/unary/scalar_unary_red.h index ef96d1bfc..856eeba65 100644 --- a/src/cunumeric/unary/scalar_unary_red.h +++ b/src/cunumeric/unary/scalar_unary_red.h @@ -24,6 +24,8 @@ namespace cunumeric { struct ScalarUnaryRedArgs { const Array& out; const Array& in; + const Array& where; + bool has_where; UnaryRedCode op_code; legate::DomainPoint shape; std::vector args; diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 9f073c716..8739119d4 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -28,8 +28,13 @@ namespace cunumeric { using namespace legate; -template +template struct ScalarUnaryRed { + ScalarUnaryRed(ScalarUnaryRedArgs& args){assert(false);} +}; + +template +struct ScalarUnaryRed< KIND, OP_CODE, CODE, DIM, false> { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using LHS = typename OP::VAL; @@ -116,6 +121,114 @@ struct ScalarUnaryRed { } }; +template +struct ScalarUnaryRed< KIND, OP_CODE, CODE, DIM, true> { + using OP = UnaryRedOp; + using LG_OP = typename OP::OP; + using LHS = typename OP::VAL; + using RHS = legate_type_of; + using OUT = AccessorRD; + using IN = AccessorRO; + using WHERE = AccessorRO; + + IN in; + const RHS* inptr; + OUT out; + size_t volume; + Pitches pitches; + Rect rect; + Point origin; + Point shape; + RHS to_find; + bool dense; + WHERE where; + const bool* whereptr; + + struct DenseReduction {}; + struct SparseReduction {}; + + ScalarUnaryRed(ScalarUnaryRedArgs& args) : dense(false) + { + rect = args.in.shape(); + origin = rect.lo; + in = args.in.read_accessor(rect); + volume = pitches.flatten(rect); + shape = args.shape; + + out = args.out.reduce_accessor(); + if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { to_find = args.args[0].scalar(); } + + if ((args.has_where==true) && (OP_CODE != UnaryRedCode::CONTAINS)) { + where = args.where.read_accessor(rect); +#ifndef LEGATE_BOUNDS_CHECKS + if (in.accessor.is_dense_row_major(rect) && where.accessor.is_dense_row_major(rect) ) { + dense = true; + inptr = in.ptr(rect); + whereptr = where.ptr(rect); + } +#endif + } + else{ +#ifndef LEGATE_BOUNDS_CHECKS + // Check to see if this is dense or not + if (in.accessor.is_dense_row_major(rect)) { + dense = true; + inptr = in.ptr(rect); + } +#endif + } + + } + + __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, DenseReduction) const noexcept + { + if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { + if (inptr[idx] == to_find) { lhs = true; } + } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || + OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { + auto p = pitches.unflatten(idx, origin); + if (whereptr[idx]==true) + OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); + } else { + if (whereptr[idx]==true) + OP::template fold(lhs, OP::convert(inptr[idx], identity)); + } + } + + __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, SparseReduction) const noexcept + { + if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { + auto point = pitches.unflatten(idx, origin); + if (in[point] == to_find) { lhs = true; } + } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || + OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { + auto p = pitches.unflatten(idx, origin); + if (where[p]==true) + OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); + } else { + auto p = pitches.unflatten(idx, origin); + if (where[p]==true) + OP::template fold(lhs, OP::convert(in[p], identity)); + } + } + + void execute() const noexcept + { + auto identity = LG_OP::identity; +#ifndef LEGATE_BOUNDS_CHECKS + // The constexpr if here prevents the DenseReduction from being instantiated for GPU kernels + // which limits compile times and binary sizes. + if constexpr (KIND != VariantKind::GPU) { + // Check to see if this is dense or not + if (dense) { + return ScalarReductionPolicy()(volume, out, identity, *this); + } + } +#endif + return ScalarReductionPolicy()(volume, out, identity, *this); + } +}; + template struct ScalarUnaryRedImpl { template @@ -123,8 +236,13 @@ struct ScalarUnaryRedImpl { { // The operation is always valid for contains if constexpr (UnaryRedOp::valid || OP_CODE == UnaryRedCode::CONTAINS) { - ScalarUnaryRed red(args); - red.execute(); + if (args.has_where){ + ScalarUnaryRed red(args); + red.execute();} + else{ + ScalarUnaryRed red(args); + red.execute(); + } } } }; @@ -155,9 +273,21 @@ static void scalar_unary_red_template(TaskContext& context) shape.dim = 1; shape[0] = 1; } + + bool has_where=false; + if (inputs.size()==2){ + has_where=true; + } + if (has_where==true){ ScalarUnaryRedArgs args{ - context.reductions()[0], inputs[0], op_code, shape, std::move(extra_args)}; + context.reductions()[0], inputs[0], inputs[1], has_where, op_code, shape, std::move(extra_args)}; op_dispatch(args.op_code, ScalarUnaryRedDispatch{}, args); + }else{ + Array where; + ScalarUnaryRedArgs args{ + context.reductions()[0], inputs[0], where, has_where, op_code, shape, std::move(extra_args)}; + op_dispatch(args.op_code, ScalarUnaryRedDispatch{}, args); + } } } // namespace cunumeric diff --git a/tests/integration/test_mean.py b/tests/integration/test_mean.py index 0f9064280..7ecb3cd16 100755 --- a/tests/integration/test_mean.py +++ b/tests/integration/test_mean.py @@ -60,6 +60,13 @@ def test_scalar(val): assert np.array_equal(res_np, res_num) + +@pytest.mark.parametrize("val", (0.0, 10.0, -5, 1 + 1j)) +def test_scalar_where(val): + res_np = np.mean(val, where=True) + res_num = num.mean(val,where=True) + assert np.array_equal(res_np, res_num) + @pytest.mark.parametrize("size", NO_EMPTY_SIZE) def test_basic(size): arr_np = np.random.randint(-5, 5, size=size) @@ -69,6 +76,18 @@ def test_basic(size): np.array_equal(res_np, res_num) +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_basic_where(size): + arr_np = np.random.randint(-5, 5, size=size) + arr_num = num.array(arr_np) + where_np = arr_np%2 + where_np = arr_np.astype(bool) + where_num = num.array(where_np) + res_np = np.mean(arr_np, where=where_np) + res_num = num.mean(arr_num, where=where_num) + np.array_equal(res_np, res_num) + + @pytest.mark.xfail @pytest.mark.parametrize("axis", ((-3, -1), (-1, 0), (-2, 2), (0, 2))) def test_axis_tuple(axis): From 79c7aca840b531c61a7c4e9d7d019254f7417172 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 29 Aug 2023 22:10:47 -0700 Subject: [PATCH 03/66] formatting --- cunumeric/_ufunc/ufunc.py | 2 +- cunumeric/array.py | 28 +++---- cunumeric/eager.py | 2 +- cunumeric/module.py | 18 ++-- .../unary/scalar_unary_red_template.inl | 82 +++++++++---------- tests/integration/test_mean.py | 6 +- 6 files changed, 66 insertions(+), 72 deletions(-) diff --git a/cunumeric/_ufunc/ufunc.py b/cunumeric/_ufunc/ufunc.py index 27692c383..763efa6db 100644 --- a/cunumeric/_ufunc/ufunc.py +++ b/cunumeric/_ufunc/ufunc.py @@ -14,7 +14,7 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, Sequence, Union +from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Union import numpy as np from legate.core.utils import OrderedSet diff --git a/cunumeric/array.py b/cunumeric/array.py index 37b9be051..70d84f745 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -158,10 +158,11 @@ def convert_to_cunumeric_ndarray(obj: Any, share: bool = False) -> ndarray: return ndarray(shape=None, thunk=thunk, writeable=writeable) -def convert_to_predicate_ndarray(obj: Any) -> bool: +def convert_to_predicate_ndarray(obj: Any) -> Any: # Keep all boolean types as they are if obj is True or obj is False: return obj + return def maybe_convert_to_np_ndarray(obj: Any) -> Any: @@ -3082,7 +3083,7 @@ def mean( dtype: Union[np.dtype[Any], None] = None, out: Union[ndarray, None] = None, keepdims: bool = False, - where: Union[ndarray, None]=None, + where: Union[ndarray, None] = None, ) -> ndarray: """a.mean(axis=None, dtype=None, out=None, keepdims=False) @@ -3113,18 +3114,11 @@ def mean( # Do the sum if out is not None and out.dtype == dtype: sum_array = self.sum( - axis=axis, - dtype=dtype, - out=out, - keepdims=keepdims, - where=where + axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where ) else: sum_array = self.sum( - axis=axis, - dtype=dtype, - keepdims=keepdims, - where=where + axis=axis, dtype=dtype, keepdims=keepdims, where=where ) if axis is None: divisor = reduce(lambda x, y: x * y, self.shape, 1) @@ -3621,7 +3615,7 @@ def sum( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Union[int, float, None] = None, - where: Union[ndarray, None ] = None, + where: Union[ndarray, None] = None, ) -> ndarray: """a.sum(axis=None, dtype=None, out=None, keepdims=False, initial=0, where=None) @@ -4003,7 +3997,7 @@ def find_common_type(*args: ndarray) -> np.dtype[Any]: scalars.append(array.dtype.type(0)) else: array_types.append(array.dtype) - return np.result_type(*array_types, *scalars) + return np.find_common_type(array_types, scalar_types) def _maybe_convert(self, dtype: np.dtype[Any], hints: Any) -> ndarray: if self.dtype == dtype: @@ -4156,7 +4150,7 @@ def _perform_unary_reduction( # TODO: Need to require initial to be given when the array is empty # or a where mask is given. - #if not where: + # if not where: # where = ndarray(shape=(1,), dtype=bool) # where.fill(True) @@ -4195,7 +4189,7 @@ def _perform_unary_reduction( if out is None: out = ndarray( - shape=out_shape, dtype=res_dtype, inputs=(src,where) + shape=out_shape, dtype=res_dtype, inputs=(src, where) ) elif out.shape != out_shape: errmsg = ( @@ -4217,8 +4211,8 @@ def _perform_unary_reduction( if where: where_thunk = cls._get_where_thunk(where, result.shape) else: - where_thunk=None - + where_thunk = None + result._thunk.unary_reduction( op, src._thunk, diff --git a/cunumeric/eager.py b/cunumeric/eager.py index c16b6ca4a..d7934d04e 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -1500,7 +1500,7 @@ def unary_reduction( ) return if not where: - where=True + where = True if op in _UNARY_RED_OPS_WITH_ARG: fn = _UNARY_RED_OPS_WITH_ARG[op] # arg based APIs don't have the following arguments: where, initial diff --git a/cunumeric/module.py b/cunumeric/module.py index beadb57e8..f8e60266b 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -5723,7 +5723,7 @@ def nanmin( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: Optional[ndarray]= None, + where: Optional[ndarray] = None, ) -> ndarray: """ Return minimum of an array or minimum along an axis, ignoring any @@ -5816,7 +5816,7 @@ def nanmax( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: Optional[dnarray]=None, + where: Optional[ndarray] = None, ) -> ndarray: """ Return the maximum of an array or maximum along an axis, ignoring any @@ -5913,7 +5913,7 @@ def nanprod( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: Optional[ndarray]=None, + where: Optional[ndarray] = None, ) -> ndarray: """ Return the product of array elements over a given axis treating @@ -6009,7 +6009,7 @@ def nansum( out: Union[ndarray, None] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where: Optionla[ndarray]=None, + where: Optional[ndarray] = None, ) -> ndarray: """ Return the sum of array elements over a given axis treating @@ -6173,7 +6173,7 @@ def amax( out: Optional[ndarray] = None, keepdims: bool = False, initial: Optional[Union[int, float]] = None, - where:Optional[ndarray] = None, + where: Optional[ndarray] = None, ) -> ndarray: """ @@ -7002,7 +7002,7 @@ def mean( dtype: Optional[np.dtype[Any]] = None, out: Optional[ndarray] = None, keepdims: bool = False, - where: Optional[Uniton[ndarray, tuple[bool, ...]]] = None, + where: Optional[ndarray] = None, ) -> ndarray: """ @@ -7045,7 +7045,7 @@ def mean( exceptions will be raised. where : array_like of bool, optional - Elements to include in the mean. + Elements to include in the mean. Returns ------- @@ -7062,7 +7062,9 @@ def mean( -------- Multiple GPUs, Multiple CPUs """ - return a.mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where) + return a.mean( + axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where + ) # Histograms diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 8739119d4..710eb334c 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -30,11 +30,11 @@ using namespace legate; template struct ScalarUnaryRed { - ScalarUnaryRed(ScalarUnaryRedArgs& args){assert(false);} + ScalarUnaryRed(ScalarUnaryRedArgs& args) { assert(false); } }; template -struct ScalarUnaryRed< KIND, OP_CODE, CODE, DIM, false> { +struct ScalarUnaryRed { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using LHS = typename OP::VAL; @@ -122,7 +122,7 @@ struct ScalarUnaryRed< KIND, OP_CODE, CODE, DIM, false> { }; template -struct ScalarUnaryRed< KIND, OP_CODE, CODE, DIM, true> { +struct ScalarUnaryRed { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using LHS = typename OP::VAL; @@ -158,26 +158,24 @@ struct ScalarUnaryRed< KIND, OP_CODE, CODE, DIM, true> { out = args.out.reduce_accessor(); if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { to_find = args.args[0].scalar(); } - if ((args.has_where==true) && (OP_CODE != UnaryRedCode::CONTAINS)) { - where = args.where.read_accessor(rect); + if ((args.has_where == true) && (OP_CODE != UnaryRedCode::CONTAINS)) { + where = args.where.read_accessor(rect); #ifndef LEGATE_BOUNDS_CHECKS - if (in.accessor.is_dense_row_major(rect) && where.accessor.is_dense_row_major(rect) ) { - dense = true; - inptr = in.ptr(rect); - whereptr = where.ptr(rect); - } + if (in.accessor.is_dense_row_major(rect) && where.accessor.is_dense_row_major(rect)) { + dense = true; + inptr = in.ptr(rect); + whereptr = where.ptr(rect); + } #endif - } - else{ + } else { #ifndef LEGATE_BOUNDS_CHECKS - // Check to see if this is dense or not - if (in.accessor.is_dense_row_major(rect)) { - dense = true; - inptr = in.ptr(rect); - } + // Check to see if this is dense or not + if (in.accessor.is_dense_row_major(rect)) { + dense = true; + inptr = in.ptr(rect); + } #endif } - } __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, DenseReduction) const noexcept @@ -187,11 +185,10 @@ struct ScalarUnaryRed< KIND, OP_CODE, CODE, DIM, true> { } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { auto p = pitches.unflatten(idx, origin); - if (whereptr[idx]==true) - OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); + if (whereptr[idx] == true) + OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); } else { - if (whereptr[idx]==true) - OP::template fold(lhs, OP::convert(inptr[idx], identity)); + if (whereptr[idx] == true) OP::template fold(lhs, OP::convert(inptr[idx], identity)); } } @@ -203,12 +200,10 @@ struct ScalarUnaryRed< KIND, OP_CODE, CODE, DIM, true> { } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { auto p = pitches.unflatten(idx, origin); - if (where[p]==true) - OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); + if (where[p] == true) OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); } else { auto p = pitches.unflatten(idx, origin); - if (where[p]==true) - OP::template fold(lhs, OP::convert(in[p], identity)); + if (where[p] == true) OP::template fold(lhs, OP::convert(in[p], identity)); } } @@ -221,7 +216,7 @@ struct ScalarUnaryRed< KIND, OP_CODE, CODE, DIM, true> { if constexpr (KIND != VariantKind::GPU) { // Check to see if this is dense or not if (dense) { - return ScalarReductionPolicy()(volume, out, identity, *this); + return ScalarReductionPolicy()(volume, out, identity, *this); } } #endif @@ -236,10 +231,10 @@ struct ScalarUnaryRedImpl { { // The operation is always valid for contains if constexpr (UnaryRedOp::valid || OP_CODE == UnaryRedCode::CONTAINS) { - if (args.has_where){ - ScalarUnaryRed red(args); - red.execute();} - else{ + if (args.has_where) { + ScalarUnaryRed red(args); + red.execute(); + } else { ScalarUnaryRed red(args); red.execute(); } @@ -274,19 +269,22 @@ static void scalar_unary_red_template(TaskContext& context) shape[0] = 1; } - bool has_where=false; - if (inputs.size()==2){ - has_where=true; - } - if (has_where==true){ - ScalarUnaryRedArgs args{ - context.reductions()[0], inputs[0], inputs[1], has_where, op_code, shape, std::move(extra_args)}; - op_dispatch(args.op_code, ScalarUnaryRedDispatch{}, args); - }else{ + bool has_where = false; + if (inputs.size() == 2) { has_where = true; } + if (has_where == true) { + ScalarUnaryRedArgs args{context.reductions()[0], + inputs[0], + inputs[1], + has_where, + op_code, + shape, + std::move(extra_args)}; + op_dispatch(args.op_code, ScalarUnaryRedDispatch{}, args); + } else { Array where; ScalarUnaryRedArgs args{ - context.reductions()[0], inputs[0], where, has_where, op_code, shape, std::move(extra_args)}; - op_dispatch(args.op_code, ScalarUnaryRedDispatch{}, args); + context.reductions()[0], inputs[0], where, has_where, op_code, shape, std::move(extra_args)}; + op_dispatch(args.op_code, ScalarUnaryRedDispatch{}, args); } } diff --git a/tests/integration/test_mean.py b/tests/integration/test_mean.py index 7ecb3cd16..68816cac6 100755 --- a/tests/integration/test_mean.py +++ b/tests/integration/test_mean.py @@ -60,13 +60,13 @@ def test_scalar(val): assert np.array_equal(res_np, res_num) - @pytest.mark.parametrize("val", (0.0, 10.0, -5, 1 + 1j)) def test_scalar_where(val): res_np = np.mean(val, where=True) - res_num = num.mean(val,where=True) + res_num = num.mean(val, where=True) assert np.array_equal(res_np, res_num) + @pytest.mark.parametrize("size", NO_EMPTY_SIZE) def test_basic(size): arr_np = np.random.randint(-5, 5, size=size) @@ -80,7 +80,7 @@ def test_basic(size): def test_basic_where(size): arr_np = np.random.randint(-5, 5, size=size) arr_num = num.array(arr_np) - where_np = arr_np%2 + where_np = arr_np % 2 where_np = arr_np.astype(bool) where_num = num.array(where_np) res_np = np.mean(arr_np, where=where_np) From a64e82978aaf358dba567d3bd0f9b6697f0b3bf1 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 30 Aug 2023 14:50:59 -0700 Subject: [PATCH 04/66] fixing error when calculation divisor --- cunumeric/array.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 70d84f745..17ac848fd 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3121,9 +3121,15 @@ def mean( axis=axis, dtype=dtype, keepdims=keepdims, where=where ) if axis is None: - divisor = reduce(lambda x, y: x * y, self.shape, 1) + if where: + divisor = np.sum(where) + else: + divisor = reduce(lambda x, y: x * y, self.shape, 1) else: - divisor = self.shape[axis] + if where: + divisor = np.sum(where, axis=axis) + else: + divisor = self.shape[axis] # Divide by the number of things in the collapsed dimensions # Pick the right kinds of division based on the dtype if dtype.kind == "f" or dtype.kind == "c": From 6f7b0592924ebba74a13fd51ecbd1fe699f9b71c Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 30 Aug 2023 22:33:04 -0700 Subject: [PATCH 05/66] fixing error with mean(where) --- cunumeric/array.py | 59 ++++++++++++--- cunumeric/deferred.py | 14 ++-- cunumeric/module.py | 74 ++++++++++++++++++- .../unary/scalar_unary_red_template.inl | 12 +-- 4 files changed, 137 insertions(+), 22 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 17ac848fd..855da8ed4 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -161,8 +161,12 @@ def convert_to_cunumeric_ndarray(obj: Any, share: bool = False) -> ndarray: def convert_to_predicate_ndarray(obj: Any) -> Any: # Keep all boolean types as they are if obj is True or obj is False: - return obj - return + where = ndarray(shape=(), dtype=bool) + where.fill(obj) + return where + if obj is None: + return None + return convert_to_cunumeric_ndarray(obj) def maybe_convert_to_np_ndarray(obj: Any) -> Any: @@ -3121,13 +3125,14 @@ def mean( axis=axis, dtype=dtype, keepdims=keepdims, where=where ) if axis is None: - if where: - divisor = np.sum(where) + if where is not None: + divisor = np.sum(where.astype(int)) + else: divisor = reduce(lambda x, y: x * y, self.shape, 1) else: - if where: - divisor = np.sum(where, axis=axis) + if where is not None: + divisor = np.sum(where.astype(int), axis=axis) else: divisor = self.shape[axis] # Divide by the number of things in the collapsed dimensions @@ -3146,6 +3151,40 @@ def mean( else: return sum_array + @add_boilerplate() + def nanmean( + self, + axis: Any = None, + dtype: Union[np.dtype[Any], None] = None, + out: Union[ndarray, None] = None, + keepdims: bool = False, + where: Union[ndarray, None] = None, + ) -> ndarray: + """a.mean(axis=None, dtype=None, out=None, keepdims=False) + + Returns the average of the array elements along given axis, + ignoring NaN entires + + Refer to :func:`cunumeric.mean` for full documentation. + + See Also + -------- + cunumeric.nanmean : equivalent function + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + # nan_where = self != Nan + # if where is not None: + # new_where = where and nan_where + # else: + # new_where = nan_where + new_where = where + + return self.mean(axis, dtype, out, keepdims, new_where) + @add_boilerplate() def min( self, @@ -4160,7 +4199,7 @@ def _perform_unary_reduction( # where = ndarray(shape=(1,), dtype=bool) # where.fill(True) - if where and isinstance(where, ndarray): + if where is not None and isinstance(where, ndarray): # The where array has to broadcast to the src.shape if np.broadcast_shapes(src.shape, where.shape) != src.shape: raise ValueError( @@ -4214,8 +4253,10 @@ def _perform_unary_reduction( shape=out_shape, dtype=res_dtype, inputs=(src, where) ) - if where: - where_thunk = cls._get_where_thunk(where, result.shape) + if where is not None: + where_thunk = ( + where._thunk + ) # cls._get_where_thunk(where, result.shape) else: where_thunk = None diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 8dce70e6d..7f563396b 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -3183,11 +3183,12 @@ def unary_reduction( task.add_reduction(lhs, _UNARY_RED_TO_REDUCTION_OPS[op]) task.add_input(rhs_array.base) - if where: - task.add_input(where.base) - task.add_alignment(rhs_array.base, where.base) task.add_scalar_arg(op, ty.int32) task.add_scalar_arg(rhs_array.shape, (ty.int64,)) + task.add_scalar_arg((where is not None), ty.bool_) + if where is not None: + task.add_input(where.base) + task.add_alignment(rhs_array.base, where.base) self.add_arguments(task, args) @@ -3225,11 +3226,12 @@ def unary_reduction( task.add_input(rhs_array.base) task.add_reduction(result, _UNARY_RED_TO_REDUCTION_OPS[op]) - if where: - task.add_input(where.base) - task.add_alignment(rhs_array.base, where.base) task.add_scalar_arg(axis, ty.int32) task.add_scalar_arg(op, ty.int32) + task.add_scalar_arg((where is not None), ty.bool_) + if where is not None: + task.add_input(where.base) + task.add_alignment(rhs_array.base, where.base) self.add_arguments(task, args) diff --git a/cunumeric/module.py b/cunumeric/module.py index f8e60266b..d738498af 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -6995,7 +6995,7 @@ def count_nonzero( # Averages and variances -@add_boilerplate("a") +@add_boilerplate("a", "where") def mean( a: ndarray, axis: Optional[Union[int, tuple[int, ...]]] = None, @@ -7067,6 +7067,78 @@ def mean( ) +@add_boilerplate("a") +def nanmean( + a: ndarray, + axis: Optional[Union[int, tuple[int, ...]]] = None, + dtype: Optional[np.dtype[Any]] = None, + out: Optional[ndarray] = None, + keepdims: bool = False, + where: Optional[ndarray] = None, +) -> ndarray: + """ + + Compute the arithmetic mean along the specified axis, ignoring NaNs. + + Returns the average of the array elements. The average is taken over + the flattened array by default, otherwise over the specified axis. + `float64` intermediate and return values are used for integer inputs. + + Parameters + ---------- + a : array_like + Array containing numbers whose mean is desired. If `a` is not an + array, a conversion is attempted. + axis : None or int or tuple[int], optional + Axis or axes along which the means are computed. The default is to + compute the mean of the flattened array. + + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default + is `float64`; for floating point inputs, it is the same as the + input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + See `ufuncs-output-type` for more details. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `mean` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + where : array_like of bool, optional + Elements to include in the mean. + + Returns + ------- + m : ndarray + If `out=None`, returns a new array of the same dtype a above + containing the mean values, otherwise a reference to the output + array is returned. + + See Also + -------- + numpy.nanmean + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.nanmean( + axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where + ) + + # Histograms diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 710eb334c..034172ba6 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -258,19 +258,19 @@ static void scalar_unary_red_template(TaskContext& context) auto& inputs = context.inputs(); auto& scalars = context.scalars(); + auto op_code = scalars[0].value(); + auto shape = scalars[1].value(); + bool has_where = scalars[2].value(); + size_t start_idx = has_where ? 2 : 1; std::vector extra_args; - for (size_t idx = 1; idx < inputs.size(); ++idx) extra_args.push_back(std::move(inputs[idx])); - - auto op_code = scalars[0].value(); - auto shape = scalars[1].value(); + for (size_t idx = start_idx; idx < inputs.size(); ++idx) + extra_args.push_back(std::move(inputs[idx])); // If the RHS was a scalar, use (1,) as the shape if (shape.dim == 0) { shape.dim = 1; shape[0] = 1; } - bool has_where = false; - if (inputs.size() == 2) { has_where = true; } if (has_where == true) { ScalarUnaryRedArgs args{context.reductions()[0], inputs[0], From d12fe55f461feb3c19781d5a414fd9bead11dd47 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 1 Sep 2023 09:10:21 -0700 Subject: [PATCH 06/66] updating test --- cunumeric/array.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 855da8ed4..4659cd63e 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -161,9 +161,10 @@ def convert_to_cunumeric_ndarray(obj: Any, share: bool = False) -> ndarray: def convert_to_predicate_ndarray(obj: Any) -> Any: # Keep all boolean types as they are if obj is True or obj is False: - where = ndarray(shape=(), dtype=bool) - where.fill(obj) - return where + # where = ndarray(shape=(), dtype=bool) + # where.fill(obj) + # return where + return obj if obj is None: return None return convert_to_cunumeric_ndarray(obj) @@ -3104,7 +3105,7 @@ def mean( Multiple GPUs, Multiple CPUs """ - if axis is not None and not isinstance(axis, int): + if axis is not None and type(axis) != int: raise NotImplementedError( "cunumeric.mean only supports int types for " "'axis' currently" @@ -3115,6 +3116,13 @@ def mean( dtype = np.dtype(np.float64) else: dtype = self.dtype + + # if self.size == 1: + # if where is True: + # return self.copy() + # if where is False: + # return self.copy() / 0 + # Do the sum if out is not None and out.dtype == dtype: sum_array = self.sum( @@ -3124,12 +3132,14 @@ def mean( sum_array = self.sum( axis=axis, dtype=dtype, keepdims=keepdims, where=where ) + if axis is None: if where is not None: divisor = np.sum(where.astype(int)) else: divisor = reduce(lambda x, y: x * y, self.shape, 1) + else: if where is not None: divisor = np.sum(where.astype(int), axis=axis) @@ -4012,8 +4022,8 @@ def _get_where_thunk( ) -> Union[Literal[True], NumPyThunk]: if where is True: return True - if where is False: - raise RuntimeError("should have caught this earlier") + # if where is False: + # raise RuntimeError("should have caught this earlier") if not isinstance(where, ndarray) or where.dtype != np.bool_: raise RuntimeError("should have converted this earlier") if where.shape != out_shape: @@ -4254,9 +4264,7 @@ def _perform_unary_reduction( ) if where is not None: - where_thunk = ( - where._thunk - ) # cls._get_where_thunk(where, result.shape) + where_thunk = cls._get_where_thunk(where, src.shape) else: where_thunk = None From ec41b527a6929ded12683d314697378b6e7ce4a0 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 1 Sep 2023 11:22:53 -0700 Subject: [PATCH 07/66] adding logic for where in unary_red on C++ --- cunumeric/array.py | 10 +- src/cunumeric/unary/unary_red.cc | 23 ++++ src/cunumeric/unary/unary_red.cu | 124 +++++++++++++++++++++ src/cunumeric/unary/unary_red.h | 1 + src/cunumeric/unary/unary_red_omp.cc | 28 +++++ src/cunumeric/unary/unary_red_template.inl | 41 +++++-- 6 files changed, 213 insertions(+), 14 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 4659cd63e..d95e2e97e 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3117,11 +3117,11 @@ def mean( else: dtype = self.dtype - # if self.size == 1: - # if where is True: - # return self.copy() - # if where is False: - # return self.copy() / 0 + if self.size == 1: + if where is True: + return self.copy() # type: ignore + if where is False: + return self.copy() / 0 # type: ignore # Do the sum if out is not None and out.dtype == dtype: diff --git a/src/cunumeric/unary/unary_red.cc b/src/cunumeric/unary/unary_red.cc index bec85fc6f..34af69434 100644 --- a/src/cunumeric/unary/unary_red.cc +++ b/src/cunumeric/unary/unary_red.cc @@ -42,6 +42,29 @@ struct UnaryRedImplBody { } }; +template +struct UnaryRedImplBodyWhere { + using OP = UnaryRedOp; + using LG_OP = typename OP::OP; + using RHS = legate_type_of; + + void operator()(AccessorRD lhs, + AccessorRO rhs, + AccessorRO where, + const Rect& rect, + const Pitches& pitches, + int collapsed_dim, + size_t volume) const + { + for (size_t idx = 0; idx < volume; ++idx) { + auto point = pitches.unflatten(idx, rect.lo); + auto identity = LG_OP::identity; + if (where[point] == true) + lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); + } + } +}; + /*static*/ void UnaryRedTask::cpu_variant(TaskContext& context) { unary_red_template(context); diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index b417b3638..708d21a17 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -279,6 +279,86 @@ static __device__ __forceinline__ Point local_reduce(LHS& result, return point; } +template +static __device__ __forceinline__ Point local_reduce_where(LHS& result, + AccessorRO in, + AccessorRO where, + LHS identity, + const ThreadBlocks& blocks, + const Rect& domain, + int32_t collapsed_dim) +{ + const coord_t tid = threadIdx.x; + const coord_t bid = blockIdx.x; + + Point point = blocks.point(bid, tid, domain.lo); + if (!domain.contains(point)) return point; + + while (point[collapsed_dim] <= domain.hi[collapsed_dim]) { + if (where[point] == true) { + LHS value = OP::convert(point, collapsed_dim, identity, in[point]); + REDOP::template fold(result, value); + } + blocks.next_point(point); + } + +#if __CUDA_ARCH__ >= 700 + // If we're collapsing the innermost dimension, we perform some optimization + // with shared memory to reduce memory traffic due to atomic updates + if (collapsed_dim == DIM - 1) { + __shared__ uint8_t shmem[THREADS_PER_BLOCK * sizeof(LHS)]; + LHS* trampoline = reinterpret_cast(shmem); + // Check for the case where all the threads in the same warp have + // the same x value in which case they're all going to conflict + // so instead we do a warp-level reduction so just one thread ends + // up doing the full atomic + coord_t bucket = 0; + for (int32_t dim = DIM - 2; dim >= 0; --dim) + bucket = bucket * (domain.hi[dim] - domain.lo[dim] + 1) + point[dim] - domain.lo[dim]; + + const uint32_t same_mask = __match_any_sync(0xffffffff, bucket); + int32_t laneid; + asm volatile("mov.s32 %0, %laneid;" : "=r"(laneid)); + const uint32_t active_mask = __ballot_sync(0xffffffff, same_mask - (1 << laneid)); + if ((active_mask & (1 << laneid)) != 0) { + // Store our data into shared + trampoline[tid] = result; + // Make sure all the threads in the warp are done writing + __syncwarp(active_mask); + // Have the lowest thread in each mask pull in the values + int32_t lowest_index = -1; + for (int32_t i = 0; i < warpSize; i++) + if (same_mask & (1 << i)) { + if (lowest_index == -1) { + if (i != laneid) { + // We're not the lowest thread in the warp for + // this value so we're done, set the value back + // to identity to ensure that we don't try to + // perform the reduction out to memory + result = identity; + break; + } else // Make sure we don't do this test again + lowest_index = i; + // It was already our value, so just keep going + } else { + // Pull in the value from shared memory + const int32_t index = tid + i - laneid; + REDOP::template fold(result, trampoline[index]); + } + } + } + } +#endif + +#ifdef LEGATE_BOUNDS_CHECKS + // Note: this isn't necessary because we know that the affine transformation on the output + // accessor will ignore coordinates of the collapsed dimension. However, Legion's bounds checks + // want the accessor to honor the sub-rectangle passed when it was created, so we need to + // put points back in the bounds to appease the checks. + point[collapsed_dim] = domain.lo[collapsed_dim]; +#endif + return point; +} template static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) reduce_with_rd_acc(AccessorRD out, @@ -294,6 +374,22 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) if (result != identity) out.reduce(point, result); } +template +static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + reduce_with_rd_acc_where(AccessorRD out, + AccessorRO in, + AccessorRO where, + LHS identity, + ThreadBlocks blocks, + Rect domain, + int32_t collapsed_dim) +{ + auto result = identity; + auto point = local_reduce_where( + result, in, where, identity, blocks, domain, collapsed_dim); + if (result != identity) out.reduce(point, result); +} + template struct UnaryRedImplBody { using OP = UnaryRedOp; @@ -321,6 +417,34 @@ struct UnaryRedImplBody { } }; +template +struct UnaryRedImplBodyWhere { + using OP = UnaryRedOp; + using LG_OP = typename OP::OP; + using RHS = legate_type_of; + using LHS = typename OP::VAL; + + void operator()(AccessorRD lhs, + AccessorRO rhs, + AccessorRO where, + const Rect& rect, + const Pitches& pitches, + int collapsed_dim, + size_t volume) const + { + auto Kernel = reduce_with_rd_acc_where; + auto stream = get_cached_stream(); + + ThreadBlocks blocks; + blocks.initialize(rect, collapsed_dim); + + blocks.compute_maximum_concurrency(reinterpret_cast(Kernel)); + Kernel<<>>( + lhs, rhs, where, LG_OP::identity, blocks, rect, collapsed_dim); + CHECK_CUDA_STREAM(stream); + } +}; + /*static*/ void UnaryRedTask::gpu_variant(TaskContext& context) { unary_red_template(context); diff --git a/src/cunumeric/unary/unary_red.h b/src/cunumeric/unary/unary_red.h index bea848468..a7b44584f 100644 --- a/src/cunumeric/unary/unary_red.h +++ b/src/cunumeric/unary/unary_red.h @@ -24,6 +24,7 @@ namespace cunumeric { struct UnaryRedArgs { const Array& lhs; const Array& rhs; + const Array& where; int32_t collapsed_dim; UnaryRedCode op_code; }; diff --git a/src/cunumeric/unary/unary_red_omp.cc b/src/cunumeric/unary/unary_red_omp.cc index 21ba49d4a..ab1e071d8 100644 --- a/src/cunumeric/unary/unary_red_omp.cc +++ b/src/cunumeric/unary/unary_red_omp.cc @@ -98,6 +98,34 @@ struct UnaryRedImplBody { } }; +template +struct UnaryRedImplBodyWhere { + using OP = UnaryRedOp; + using LG_OP = typename OP::OP; + using RHS = legate_type_of; + + void operator()(AccessorRD lhs, + AccessorRO rhs, + AccessorRO where, + const Rect& rect, + const Pitches& pitches, + int collapsed_dim, + size_t volume) const + { + Splitter splitter; + auto split = splitter.split(rect, collapsed_dim); + +#pragma omp parallel for schedule(static) + for (size_t o_idx = 0; o_idx < split.outer; ++o_idx) + for (size_t i_idx = 0; i_idx < split.inner; ++i_idx) { + auto point = splitter.combine(o_idx, i_idx, rect.lo); + auto identity = LG_OP::identity; + if (where[point] == true) + lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); + } + } +}; + /*static*/ void UnaryRedTask::omp_variant(TaskContext& context) { unary_red_template(context); diff --git a/src/cunumeric/unary/unary_red_template.inl b/src/cunumeric/unary/unary_red_template.inl index 1e3b298d3..3c77fdd50 100644 --- a/src/cunumeric/unary/unary_red_template.inl +++ b/src/cunumeric/unary/unary_red_template.inl @@ -30,7 +30,10 @@ using namespace legate; template struct UnaryRedImplBody; -template +template +struct UnaryRedImplBodyWhere; + +template struct UnaryRedImpl { template (rect); auto lhs = args.lhs.reduce_accessor(rect); - UnaryRedImplBody()( - lhs, rhs, rect, pitches, args.collapsed_dim, volume); + + if constexpr (HAS_WHERE) { + auto where = args.where.read_accessor(rect); + UnaryRedImplBodyWhere()( + lhs, rhs, where, rect, pitches, args.collapsed_dim, volume); + } else { + UnaryRedImplBody()( + lhs, rhs, rect, pitches, args.collapsed_dim, volume); + } } template +template struct UnaryRedDispatch { template void operator()(UnaryRedArgs& args) const { auto dim = std::max(1, args.rhs.dim()); - return double_dispatch(dim, args.rhs.code(), UnaryRedImpl{}, args); + return double_dispatch(dim, args.rhs.code(), UnaryRedImpl{}, args); } }; @@ -78,10 +88,23 @@ static void unary_red_template(TaskContext& context) auto& inputs = context.inputs(); auto& reductions = context.reductions(); auto& scalars = context.scalars(); - - UnaryRedArgs args{ - reductions[0], inputs[0], scalars[0].value(), scalars[1].value()}; - op_dispatch(args.op_code, UnaryRedDispatch{}, args); + bool has_where = scalars[2].value(); + if (has_where) { + UnaryRedArgs args{reductions[0], + inputs[0], + inputs[1], + scalars[0].value(), + scalars[1].value()}; + op_dispatch(args.op_code, UnaryRedDispatch{}, args); + } else { + Array where; + UnaryRedArgs args{reductions[0], + inputs[0], + where, + scalars[0].value(), + scalars[1].value()}; + op_dispatch(args.op_code, UnaryRedDispatch{}, args); + } } } // namespace cunumeric From 91285302fb370b5a60160a665e7d0598a4bcc354 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Sat, 2 Sep 2023 10:59:08 -0700 Subject: [PATCH 08/66] towards improving data usage by mean --- cunumeric/array.py | 29 +++++++++++++++++++---------- cunumeric/module.py | 9 +-------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index d95e2e97e..570f6f2ab 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3081,6 +3081,16 @@ def max( where=where, ) + def _count_nonzero(self, axis: Any = None) -> Union[int, ndarray]: + if self.size == 0: + return 0 + return ndarray._perform_unary_reduction( + UnaryRedCode.COUNT_NONZERO, + self, + res_dtype=np.dtype(np.uint64), + axis=axis, + ) + @add_boilerplate() def mean( self, @@ -3119,40 +3129,39 @@ def mean( if self.size == 1: if where is True: - return self.copy() # type: ignore + return self.astype(dtype) # type: ignore if where is False: - return self.copy() / 0 # type: ignore + return self.astype(dtype) / 0 # type: ignore # Do the sum if out is not None and out.dtype == dtype: sum_array = self.sum( - axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where + axis=axis, out=out, keepdims=keepdims, where=where ) else: - sum_array = self.sum( - axis=axis, dtype=dtype, keepdims=keepdims, where=where - ) + sum_array = self.sum(axis=axis, keepdims=keepdims, where=where) if axis is None: if where is not None: - divisor = np.sum(where.astype(int)) + divisor = where._count_nonzero() else: divisor = reduce(lambda x, y: x * y, self.shape, 1) else: if where is not None: - divisor = np.sum(where.astype(int), axis=axis) + divisor = where._count_nonzero(axis=axis) else: divisor = self.shape[axis] # Divide by the number of things in the collapsed dimensions # Pick the right kinds of division based on the dtype + sum_array = sum_array.astype(dtype) if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__( - np.array(divisor, dtype=sum_array.dtype), + np.array(divisor, dtype=dtype), ) else: - sum_array.__ifloordiv__(np.array(divisor, dtype=sum_array.dtype)) + sum_array.__ifloordiv__(np.array(divisor, dtype=dtype)) # Convert to the output we didn't already put it there if out is not None and sum_array is not out: assert out.dtype != sum_array.dtype diff --git a/cunumeric/module.py b/cunumeric/module.py index d738498af..f66c1bec4 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -6978,14 +6978,7 @@ def count_nonzero( -------- Multiple GPUs, Multiple CPUs """ - if a.size == 0: - return 0 - return ndarray._perform_unary_reduction( - UnaryRedCode.COUNT_NONZERO, - a, - res_dtype=np.dtype(np.uint64), - axis=axis, - ) + return a._count_nonzero(axis) ############ From 76dc871c2cbf476543be9fdc3aa1062b8c64cdb8 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Sun, 3 Sep 2023 09:51:27 -0700 Subject: [PATCH 09/66] debugging --- cunumeric/deferred.py | 15 +++++++++++---- tests/integration/test_mean.py | 28 +++++++++++++++++++++------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 7f563396b..7ae1edc3f 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -3158,6 +3158,9 @@ def unary_reduction( inputs=[self], ) + is_where = bool(where is not None) + if is_where: + print("IRINA DEBUG inside cunumeric", where.shape, is_where) # See if we are doing reduction to a point or another region if lhs_array.size == 1: assert axes is None or lhs_array.ndim == rhs_array.ndim - ( @@ -3181,12 +3184,15 @@ def unary_reduction( CuNumericOpCode.SCALAR_UNARY_RED ) + print( + "IRINA DEBUG inside scalar unary_red", lhs.shape, lhs.size + ) task.add_reduction(lhs, _UNARY_RED_TO_REDUCTION_OPS[op]) task.add_input(rhs_array.base) task.add_scalar_arg(op, ty.int32) task.add_scalar_arg(rhs_array.shape, (ty.int64,)) - task.add_scalar_arg((where is not None), ty.bool_) - if where is not None: + task.add_scalar_arg(is_where, ty.bool_) + if is_where: task.add_input(where.base) task.add_alignment(rhs_array.base, where.base) @@ -3223,13 +3229,14 @@ def unary_reduction( with Annotation({"OpCode": op.name, "ArgRed?": str(argred)}): task = self.context.create_auto_task(CuNumericOpCode.UNARY_RED) + print("IRINA DEBUG inside unary_red", result.shape) task.add_input(rhs_array.base) task.add_reduction(result, _UNARY_RED_TO_REDUCTION_OPS[op]) task.add_scalar_arg(axis, ty.int32) task.add_scalar_arg(op, ty.int32) - task.add_scalar_arg((where is not None), ty.bool_) - if where is not None: + task.add_scalar_arg(is_where, ty.bool_) + if is_where: task.add_input(where.base) task.add_alignment(rhs_array.base, where.base) diff --git a/tests/integration/test_mean.py b/tests/integration/test_mean.py index 68816cac6..ec12e86f7 100755 --- a/tests/integration/test_mean.py +++ b/tests/integration/test_mean.py @@ -60,11 +60,11 @@ def test_scalar(val): assert np.array_equal(res_np, res_num) -@pytest.mark.parametrize("val", (0.0, 10.0, -5, 1 + 1j)) -def test_scalar_where(val): - res_np = np.mean(val, where=True) - res_num = num.mean(val, where=True) - assert np.array_equal(res_np, res_num) +# @pytest.mark.parametrize("val", (0.0, 10.0, -5, 1 + 1j)) +# def test_scalar_where(val): +# res_np = np.mean(val, where=True) +# res_num = num.mean(val, where=True) +# assert np.array_equal(res_np, res_num) @pytest.mark.parametrize("size", NO_EMPTY_SIZE) @@ -73,7 +73,7 @@ def test_basic(size): arr_num = num.array(arr_np) res_np = np.mean(arr_np) res_num = num.mean(arr_num) - np.array_equal(res_np, res_num) + assert np.array_equal(res_np, res_num) @pytest.mark.parametrize("size", NO_EMPTY_SIZE) @@ -85,7 +85,7 @@ def test_basic_where(size): where_num = num.array(where_np) res_np = np.mean(arr_np, where=where_np) res_num = num.mean(arr_num, where=where_num) - np.array_equal(res_np, res_num) + assert np.array_equal(res_np, res_num, equal_nan=True) @pytest.mark.xfail @@ -113,6 +113,20 @@ def test_axis_keepdims(size, keepdims): assert np.array_equal(out_np, out_num) +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_axis_where(size): + arr_np = np.random.randint(-5, 5, size=size) + arr_num = num.array(arr_np) + where_np = arr_np % 2 + where_np = arr_np.astype(bool) + where_num = num.array(where_np) + ndim = arr_np.ndim + for axis in range(-ndim, ndim): + out_np = np.mean(arr_np, axis=axis, where=where_np) + out_num = num.mean(arr_num, axis=axis, where=where_num) + assert np.array_equal(out_np, out_num, equal_nan=True) + + @pytest.mark.parametrize("array_dt", (np.int32, np.float32, np.complex64)) @pytest.mark.parametrize("dt", (np.int32, np.float32, np.complex64)) @pytest.mark.parametrize("size", NO_EMPTY_SIZE) From cbe93d91afd5f2dccf013d8976df03b86edc8c29 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Sun, 3 Sep 2023 10:06:48 -0700 Subject: [PATCH 10/66] removing debug output --- cunumeric/deferred.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 7ae1edc3f..bb4744653 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -3159,8 +3159,6 @@ def unary_reduction( ) is_where = bool(where is not None) - if is_where: - print("IRINA DEBUG inside cunumeric", where.shape, is_where) # See if we are doing reduction to a point or another region if lhs_array.size == 1: assert axes is None or lhs_array.ndim == rhs_array.ndim - ( @@ -3184,9 +3182,6 @@ def unary_reduction( CuNumericOpCode.SCALAR_UNARY_RED ) - print( - "IRINA DEBUG inside scalar unary_red", lhs.shape, lhs.size - ) task.add_reduction(lhs, _UNARY_RED_TO_REDUCTION_OPS[op]) task.add_input(rhs_array.base) task.add_scalar_arg(op, ty.int32) @@ -3229,7 +3224,6 @@ def unary_reduction( with Annotation({"OpCode": op.name, "ArgRed?": str(argred)}): task = self.context.create_auto_task(CuNumericOpCode.UNARY_RED) - print("IRINA DEBUG inside unary_red", result.shape) task.add_input(rhs_array.base) task.add_reduction(result, _UNARY_RED_TO_REDUCTION_OPS[op]) From 419a5afc9c6ad2a2a5dc2cf1cea003a94ab3ead3 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 10 Oct 2023 17:23:11 -0700 Subject: [PATCH 11/66] adding support for broadcasting where + initial implementation of nanmean --- cunumeric/array.py | 73 +++++++++++++++++++++------------- tests/integration/test_mean.py | 21 +++++++--- 2 files changed, 62 insertions(+), 32 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 570f6f2ab..e1442be29 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2021-2023 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -191,6 +191,19 @@ def check_writeable(arr: Union[ndarray, tuple[ndarray, ...], None]) -> None: if any(not arr.flags.writeable for arr in check_list): raise ValueError("array is not writeable") +def broadcast_where(where:Union[ndarray, None], shape:NdShape )-> ndarray: + if where is not None: + where_array = convert_to_cunumeric_ndarray(where) + else: + where_array = None + + if where_array is not None and np.ndim(where_array)!=0 and where_array.shape != shape: + where_array = ndarray( + shape=shape, + thunk=where_array._thunk.broadcast_to(shape), + writeable=False, + ) + return where_array class flagsobj: """ @@ -857,7 +870,7 @@ def __bool__(self) -> bool: def __complex__(self) -> complex: """a.__complex__(/)""" return complex(self.__array__()) - + def __contains__(self, item: Any) -> ndarray: """a.__contains__(key, /) @@ -3126,31 +3139,26 @@ def mean( dtype = np.dtype(np.float64) else: dtype = self.dtype - - if self.size == 1: - if where is True: - return self.astype(dtype) # type: ignore - if where is False: - return self.astype(dtype) / 0 # type: ignore - + + where_array = broadcast_where(where, self.shape) # Do the sum if out is not None and out.dtype == dtype: sum_array = self.sum( - axis=axis, out=out, keepdims=keepdims, where=where + axis=axis, out=out, keepdims=keepdims, where=where_array ) else: - sum_array = self.sum(axis=axis, keepdims=keepdims, where=where) + sum_array = self.sum(axis=axis, keepdims=keepdims, where=where_array) if axis is None: - if where is not None: - divisor = where._count_nonzero() + if where_array is not None: + divisor = where_array._count_nonzero() else: divisor = reduce(lambda x, y: x * y, self.shape, 1) else: - if where is not None: - divisor = where._count_nonzero(axis=axis) + if where_array is not None: + divisor = where_array._count_nonzero(axis=axis) else: divisor = self.shape[axis] # Divide by the number of things in the collapsed dimensions @@ -3195,14 +3203,30 @@ def nanmean( Multiple GPUs, Multiple CPUs """ - # nan_where = self != Nan - # if where is not None: - # new_where = where and nan_where - # else: - # new_where = nan_where - new_where = where + if dtype is None: + dtype = a.dtype + + if np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_): + return a.mean(axis, dtype, out, keepdims, where=where) + + nan_mask = _ufunc.bit_twiddling.bitwise_not(_ufunc.floating.isnan(a)) + normalizer = nan_mask.sum( axis=axis, dtype=np.int32, keepdims=keepdims, where=where) + #normalizer = normalizer.astype(dtype) + sum_array = a.nansum(axis, dtype=dtype, keepdims=keepdims, where=where) + if dtype.kind == "f" or dtype.kind == "c": + sum_array.__itruediv__( + np.array(normalizer, dtype=dtype), + ) + else: + sum_array.__ifloordiv__(np.array(normalizer, dtype=dtype)) + + if out is not None and sum_array is not out: + assert out.dtype != sum_array.dtype + out._thunk.convert(sum_array._thunk) + return out + else: + return sum_array - return self.mean(axis, dtype, out, keepdims, new_where) @add_boilerplate() def min( @@ -4213,11 +4237,6 @@ def _perform_unary_reduction( # TODO: Need to require initial to be given when the array is empty # or a where mask is given. - - # if not where: - # where = ndarray(shape=(1,), dtype=bool) - # where.fill(True) - if where is not None and isinstance(where, ndarray): # The where array has to broadcast to the src.shape if np.broadcast_shapes(src.shape, where.shape) != src.shape: diff --git a/tests/integration/test_mean.py b/tests/integration/test_mean.py index ec12e86f7..4c2fc4f4d 100755 --- a/tests/integration/test_mean.py +++ b/tests/integration/test_mean.py @@ -60,11 +60,11 @@ def test_scalar(val): assert np.array_equal(res_np, res_num) -# @pytest.mark.parametrize("val", (0.0, 10.0, -5, 1 + 1j)) -# def test_scalar_where(val): -# res_np = np.mean(val, where=True) -# res_num = num.mean(val, where=True) -# assert np.array_equal(res_np, res_num) +@pytest.mark.parametrize("val", (0.0, 10.0, -5, 1 + 1j)) +def test_scalar_where(val): + res_np = np.mean(val, where=True) + res_num = num.mean(val, where=True) + assert np.array_equal(res_np, res_num) @pytest.mark.parametrize("size", NO_EMPTY_SIZE) @@ -87,6 +87,17 @@ def test_basic_where(size): res_num = num.mean(arr_num, where=where_num) assert np.array_equal(res_np, res_num, equal_nan=True) +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_where_broadcast(size): + arr_np = np.random.randint(-5, 5, size=size) + arr_num = num.array(arr_np) + where_np = np.zeros((1,), bool) + where_num = num.array(where_np) + res_np = np.mean(arr_np, where=where_np) + res_num = num.mean(arr_num, where=where_num) + assert np.array_equal(res_np, res_num, equal_nan=True) + + @pytest.mark.xfail @pytest.mark.parametrize("axis", ((-3, -1), (-1, 0), (-2, 2), (0, 2))) From 0d7d66c6bdd5b2268cba3be8cf71ae9a1e6b5b9e Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 17 Oct 2023 09:39:58 -0700 Subject: [PATCH 12/66] adding test for nanmean --- cunumeric/array.py | 63 ++++++++++-- cunumeric/module.py | 20 +--- tests/integration/test_nanmean.py | 154 ++++++++++++++++++++++++++++++ 3 files changed, 211 insertions(+), 26 deletions(-) create mode 100755 tests/integration/test_nanmean.py diff --git a/cunumeric/array.py b/cunumeric/array.py index e1442be29..d9165512f 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3203,16 +3203,18 @@ def nanmean( Multiple GPUs, Multiple CPUs """ - if dtype is None: - dtype = a.dtype + from . import _ufunc if np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_): - return a.mean(axis, dtype, out, keepdims, where=where) + return self.mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where) - nan_mask = _ufunc.bit_twiddling.bitwise_not(_ufunc.floating.isnan(a)) + if dtype is None: + dtype = self.dtype + + nan_mask = _ufunc.bit_twiddling.bitwise_not(_ufunc.floating.isnan(self)) normalizer = nan_mask.sum( axis=axis, dtype=np.int32, keepdims=keepdims, where=where) #normalizer = normalizer.astype(dtype) - sum_array = a.nansum(axis, dtype=dtype, keepdims=keepdims, where=where) + sum_array = self.nansum(axis, dtype=dtype, keepdims=keepdims, where=where) if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__( np.array(normalizer, dtype=dtype), @@ -3221,8 +3223,10 @@ def nanmean( sum_array.__ifloordiv__(np.array(normalizer, dtype=dtype)) if out is not None and sum_array is not out: - assert out.dtype != sum_array.dtype - out._thunk.convert(sum_array._thunk) + if out.dtype != sum_array.dtype: + out._thunk.convert(sum_array._thunk) + else: + out.copy(sum_array) return out else: return sum_array @@ -3742,6 +3746,51 @@ def sum( where=where, ) + @add_boilerplate() + def nansum( + self, + axis: Any = None, + dtype: Any = None, + out: Union[ndarray, None] = None, + keepdims: bool = False, + initial: Optional[Union[int, float]] = None, + where: Optional[ndarray] = None, + ) -> ndarray: + """a.nansum(axis=None, dtype=None, out=None, keepdims=False, initial=0, + where=None) + + Return the sum of the array elements over the given axis ignoring nan. + + Refer to :func:`cunumeric.nansum` for full documentation. + + See Also + -------- + cunumeric.nansum : equivalent function + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + # Note that np.nansum and np.sum allow complex datatypes + # so there are no "disallowed types" for this API + + if self.dtype.kind in ("f", "c"): + unary_red_code = UnaryRedCode.NANSUM + else: + unary_red_code = UnaryRedCode.SUM + + return self._perform_unary_reduction( + unary_red_code, + self, + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) + def swapaxes(self, axis1: Any, axis2: Any) -> ndarray: """a.swapaxes(axis1, axis2) diff --git a/cunumeric/module.py b/cunumeric/module.py index f66c1bec4..7704eb89f 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -6076,25 +6076,7 @@ def nansum( Multiple GPUs, Multiple CPUs """ - # Note that np.nansum and np.sum allow complex datatypes - # so there are no "disallowed types" for this API - - if a.dtype.kind in ("f", "c"): - unary_red_code = UnaryRedCode.NANSUM - else: - unary_red_code = UnaryRedCode.SUM - - return a._perform_unary_reduction( - unary_red_code, - a, - axis=axis, - dtype=dtype, - out=out, - keepdims=keepdims, - initial=initial, - where=where, - ) - + return a.nansum(axis, dtype, out, keepdims, initial, where) # Exponents and logarithms diff --git a/tests/integration/test_nanmean.py b/tests/integration/test_nanmean.py new file mode 100755 index 000000000..d1e8dc433 --- /dev/null +++ b/tests/integration/test_nanmean.py @@ -0,0 +1,154 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest + +import cunumeric as num + +DIM = 7 + +NO_EMPTY_SIZE = ( + (1,), + (DIM,), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +) + + +def gen_out_shape(size, axis): + if axis is None: + return () + if axis < 0: + axis += len(size) + if axis >= 0 and axis < len(size): + return size[:axis] + size[axis + 1 :] + else: + return -1 + + +@pytest.mark.parametrize("arr", ([], [[], []])) +def test_empty_arr(arr): + res_np = np.nanmean(arr) + res_num = num.nanmean(arr) + assert np.isnan(res_np) and np.isnan(res_num) + + +@pytest.mark.parametrize("val", (np.nan, 0.0, 10.0, -5, 1 + 1j)) +def test_scalar(val): + res_np = np.nanmean(val) + res_num = num.nanmean(val) + assert np.array_equal(res_np, res_num, equal_nan=True) + + +@pytest.mark.parametrize("val", (np.nan, 0.0, 10.0, -5, 1 + 1j)) +def test_scalar_where(val): + res_np = np.nanmean(val, where=True) + res_num = num.nanmean(val, where=True) + assert np.array_equal(res_np, res_num, equal_nan=True) + + +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_basic(size): + arr_np = np.random.randint(-5, 5, size=size).astype(float) + arr_np[arr_np%2==0] = np.nan + arr_num = num.array(arr_np) + res_np = np.nanmean(arr_np) + res_num = num.nanmean(arr_num) + assert np.array_equal(res_np, res_num, equal_nan=True) + + +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_basic_where(size): + arr_np = np.random.randint(-5, 5, size=size).astype(float) + arr_np[arr_np%2==0] = np.nan + arr_num = num.array(arr_np) + where_np = arr_np % 2 + where_np = arr_np.astype(bool) + where_num = num.array(where_np) + res_np = np.nanmean(arr_np, where=where_np) + res_num = num.nanmean(arr_num, where=where_num) + assert np.array_equal(res_np, res_num, equal_nan=True) + + +@pytest.mark.xfail +@pytest.mark.parametrize("axis", ((-3, -1), (-1, 0), (-2, 2), (0, 2))) +def test_axis_tuple(axis): + # In Numpy, it pass + # In cuNumeric, it raises NotImplementedError + size = (3, 4, 7) + arr_np = np.random.randint(-5, 5, size=size).astype(float) + arr_np[arr_np%2==1] = np.nan + arr_num = num.array(arr_np) + out_np = np.nanmean(arr_np, axis=axis) + out_num = num.nanmean(arr_num, axis=axis) + assert np.array_equal(out_np, out_num, equal_nan=True) + + +@pytest.mark.parametrize("keepdims", (False, True)) +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_axis_keepdims(size, keepdims): + arr_np = np.random.randint(-5, 5, size=size).astype(float) + arr_np[arr_np%2==1] = np.nan + arr_num = num.array(arr_np) + ndim = arr_np.ndim + for axis in range(-ndim, ndim): + out_np = np.nanmean(arr_np, axis=axis, keepdims=keepdims) + out_num = num.nanmean(arr_num, axis=axis, keepdims=keepdims) + assert np.array_equal(out_np, out_num, equal_nan=True) + + +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_axis_where(size): + arr_np = np.random.randint(-5, 5, size=size).astype(float) + arr_np[arr_np%2==0] = np.nan + arr_num = num.array(arr_np) + where_np = arr_np[arr_np%2==1] % 2 + where_np = arr_np.astype(bool) + where_num = num.array(where_np) + ndim = arr_np.ndim + for axis in range(-ndim, ndim): + out_np = np.nanmean(arr_np, axis=axis, where=where_np) + out_num = num.nanmean(arr_num, axis=axis, where=where_num) + assert np.array_equal(out_np, out_num, equal_nan=True) + + +@pytest.mark.parametrize("out_dt", (np.float32, np.complex128)) +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_out(size, out_dt): + arr_np = np.random.randint(-5, 5, size=size).astype(float) + arr_np[arr_np%2==0] = np.nan + arr_num = num.array(arr_np) + ndim = arr_np.ndim + for axis in (-1, ndim - 1, None): + out_shape = gen_out_shape(size, axis) + out_np = np.empty(out_shape, dtype=out_dt) + out_num = num.empty(out_shape, dtype=out_dt) + np.nanmean(arr_np, axis=axis, out=out_np) + num.nanmean(arr_num, axis=axis, out=out_num) + np.array_equal(out_np, out_num, equal_nan=True) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) From baf61ed58d94e6a0455163e482ae63c00e3628ee Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 17 Oct 2023 09:56:44 -0700 Subject: [PATCH 13/66] some clean-up + formatting --- cunumeric/array.py | 45 ++++++++++++++++++++----------- cunumeric/module.py | 1 + tests/integration/test_mean.py | 2 +- tests/integration/test_nanmean.py | 14 +++++----- 4 files changed, 38 insertions(+), 24 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index d9165512f..1fea5a905 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -191,13 +191,20 @@ def check_writeable(arr: Union[ndarray, tuple[ndarray, ...], None]) -> None: if any(not arr.flags.writeable for arr in check_list): raise ValueError("array is not writeable") -def broadcast_where(where:Union[ndarray, None], shape:NdShape )-> ndarray: + +def broadcast_where( + where: Union[ndarray, None], shape: NdShape +) -> Union[ndarray, None]: if where is not None: where_array = convert_to_cunumeric_ndarray(where) else: - where_array = None + where_array = None - if where_array is not None and np.ndim(where_array)!=0 and where_array.shape != shape: + if ( + where_array is not None + and np.ndim(where_array) != 0 + and where_array.shape != shape + ): where_array = ndarray( shape=shape, thunk=where_array._thunk.broadcast_to(shape), @@ -205,6 +212,7 @@ def broadcast_where(where:Union[ndarray, None], shape:NdShape )-> ndarray: ) return where_array + class flagsobj: """ Information about the memory layout of the array. @@ -870,7 +878,7 @@ def __bool__(self) -> bool: def __complex__(self) -> complex: """a.__complex__(/)""" return complex(self.__array__()) - + def __contains__(self, item: Any) -> ndarray: """a.__contains__(key, /) @@ -3139,7 +3147,7 @@ def mean( dtype = np.dtype(np.float64) else: dtype = self.dtype - + where_array = broadcast_where(where, self.shape) # Do the sum if out is not None and out.dtype == dtype: @@ -3147,12 +3155,13 @@ def mean( axis=axis, out=out, keepdims=keepdims, where=where_array ) else: - sum_array = self.sum(axis=axis, keepdims=keepdims, where=where_array) + sum_array = self.sum( + axis=axis, keepdims=keepdims, where=where_array + ) if axis is None: if where_array is not None: divisor = where_array._count_nonzero() - else: divisor = reduce(lambda x, y: x * y, self.shape, 1) @@ -3205,16 +3214,21 @@ def nanmean( """ from . import _ufunc - if np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_): - return self.mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where) + if np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_): + return self.mean( + axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where + ) if dtype is None: dtype = self.dtype - - nan_mask = _ufunc.bit_twiddling.bitwise_not(_ufunc.floating.isnan(self)) - normalizer = nan_mask.sum( axis=axis, dtype=np.int32, keepdims=keepdims, where=where) - #normalizer = normalizer.astype(dtype) - sum_array = self.nansum(axis, dtype=dtype, keepdims=keepdims, where=where) + + nan_mask = _ufunc.bit_twiddling.bitwise_not( + _ufunc.floating.isnan(self) + ) + normalizer = nan_mask.sum(axis=axis, keepdims=keepdims, where=where) + # normalizer = normalizer.astype(dtype) + sum_array = self.nansum(axis, keepdims=keepdims, where=where) + sum_array = sum_array.astype(dtype) if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__( np.array(normalizer, dtype=dtype), @@ -3226,12 +3240,11 @@ def nanmean( if out.dtype != sum_array.dtype: out._thunk.convert(sum_array._thunk) else: - out.copy(sum_array) + out = sum_array return out else: return sum_array - @add_boilerplate() def min( self, diff --git a/cunumeric/module.py b/cunumeric/module.py index 7704eb89f..9504636e6 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -6078,6 +6078,7 @@ def nansum( return a.nansum(axis, dtype, out, keepdims, initial, where) + # Exponents and logarithms diff --git a/tests/integration/test_mean.py b/tests/integration/test_mean.py index 4c2fc4f4d..f6ffb801a 100755 --- a/tests/integration/test_mean.py +++ b/tests/integration/test_mean.py @@ -87,6 +87,7 @@ def test_basic_where(size): res_num = num.mean(arr_num, where=where_num) assert np.array_equal(res_np, res_num, equal_nan=True) + @pytest.mark.parametrize("size", NO_EMPTY_SIZE) def test_where_broadcast(size): arr_np = np.random.randint(-5, 5, size=size) @@ -98,7 +99,6 @@ def test_where_broadcast(size): assert np.array_equal(res_np, res_num, equal_nan=True) - @pytest.mark.xfail @pytest.mark.parametrize("axis", ((-3, -1), (-1, 0), (-2, 2), (0, 2))) def test_axis_tuple(axis): diff --git a/tests/integration/test_nanmean.py b/tests/integration/test_nanmean.py index d1e8dc433..9849e62dd 100755 --- a/tests/integration/test_nanmean.py +++ b/tests/integration/test_nanmean.py @@ -70,7 +70,7 @@ def test_scalar_where(val): @pytest.mark.parametrize("size", NO_EMPTY_SIZE) def test_basic(size): arr_np = np.random.randint(-5, 5, size=size).astype(float) - arr_np[arr_np%2==0] = np.nan + arr_np[arr_np % 2 == 0] = np.nan arr_num = num.array(arr_np) res_np = np.nanmean(arr_np) res_num = num.nanmean(arr_num) @@ -80,7 +80,7 @@ def test_basic(size): @pytest.mark.parametrize("size", NO_EMPTY_SIZE) def test_basic_where(size): arr_np = np.random.randint(-5, 5, size=size).astype(float) - arr_np[arr_np%2==0] = np.nan + arr_np[arr_np % 2 == 0] = np.nan arr_num = num.array(arr_np) where_np = arr_np % 2 where_np = arr_np.astype(bool) @@ -97,7 +97,7 @@ def test_axis_tuple(axis): # In cuNumeric, it raises NotImplementedError size = (3, 4, 7) arr_np = np.random.randint(-5, 5, size=size).astype(float) - arr_np[arr_np%2==1] = np.nan + arr_np[arr_np % 2 == 1] = np.nan arr_num = num.array(arr_np) out_np = np.nanmean(arr_np, axis=axis) out_num = num.nanmean(arr_num, axis=axis) @@ -108,7 +108,7 @@ def test_axis_tuple(axis): @pytest.mark.parametrize("size", NO_EMPTY_SIZE) def test_axis_keepdims(size, keepdims): arr_np = np.random.randint(-5, 5, size=size).astype(float) - arr_np[arr_np%2==1] = np.nan + arr_np[arr_np % 2 == 1] = np.nan arr_num = num.array(arr_np) ndim = arr_np.ndim for axis in range(-ndim, ndim): @@ -120,9 +120,9 @@ def test_axis_keepdims(size, keepdims): @pytest.mark.parametrize("size", NO_EMPTY_SIZE) def test_axis_where(size): arr_np = np.random.randint(-5, 5, size=size).astype(float) - arr_np[arr_np%2==0] = np.nan + arr_np[arr_np % 2 == 0] = np.nan arr_num = num.array(arr_np) - where_np = arr_np[arr_np%2==1] % 2 + where_np = arr_np[arr_np % 2 == 1] % 2 where_np = arr_np.astype(bool) where_num = num.array(where_np) ndim = arr_np.ndim @@ -136,7 +136,7 @@ def test_axis_where(size): @pytest.mark.parametrize("size", NO_EMPTY_SIZE) def test_out(size, out_dt): arr_np = np.random.randint(-5, 5, size=size).astype(float) - arr_np[arr_np%2==0] = np.nan + arr_np[arr_np % 2 == 0] = np.nan arr_num = num.array(arr_np) ndim = arr_np.ndim for axis in (-1, ndim - 1, None): From 30b1e1829c9a1eee7d442c46144f83874a1e4bc1 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 17 Oct 2023 13:34:58 -0700 Subject: [PATCH 14/66] fixing issues after rebase --- cunumeric/array.py | 4 ++-- cunumeric/module.py | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 1fea5a905..6d58be54f 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3136,7 +3136,7 @@ def mean( Multiple GPUs, Multiple CPUs """ - if axis is not None and type(axis) != int: + if axis is not None and not isinstance(axis, int): raise NotImplementedError( "cunumeric.mean only supports int types for " "'axis' currently" @@ -4147,7 +4147,7 @@ def find_common_type(*args: ndarray) -> np.dtype[Any]: scalars.append(array.dtype.type(0)) else: array_types.append(array.dtype) - return np.find_common_type(array_types, scalar_types) + return np.result_type(*array_types, *scalars) def _maybe_convert(self, dtype: np.dtype[Any], hints: Any) -> ndarray: if self.dtype == dtype: diff --git a/cunumeric/module.py b/cunumeric/module.py index 9504636e6..0f984f7e5 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -6076,7 +6076,14 @@ def nansum( Multiple GPUs, Multiple CPUs """ - return a.nansum(axis, dtype, out, keepdims, initial, where) + return a.nansum( + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) # Exponents and logarithms From 2277234775b85fad335221497561db2a21528d63 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 17 Oct 2023 15:02:04 -0700 Subject: [PATCH 15/66] code clean-up --- cunumeric/_ufunc/ufunc.py | 2 +- cunumeric/array.py | 34 ++---- cunumeric/deferred.py | 2 +- cunumeric/eager.py | 2 +- cunumeric/module.py | 2 +- src/cunumeric/unary/scalar_unary_red.cc | 2 +- src/cunumeric/unary/scalar_unary_red.cu | 2 +- src/cunumeric/unary/scalar_unary_red.h | 2 +- src/cunumeric/unary/scalar_unary_red_omp.cc | 2 +- .../unary/scalar_unary_red_template.inl | 2 +- src/cunumeric/unary/unary_red.cc | 2 +- src/cunumeric/unary/unary_red.cu | 107 ++++++------------ src/cunumeric/unary/unary_red_omp.cc | 2 +- tests/integration/test_mean.py | 2 +- tests/integration/test_nanmean.py | 2 +- 15 files changed, 57 insertions(+), 110 deletions(-) diff --git a/cunumeric/_ufunc/ufunc.py b/cunumeric/_ufunc/ufunc.py index 763efa6db..01d2fc30b 100644 --- a/cunumeric/_ufunc/ufunc.py +++ b/cunumeric/_ufunc/ufunc.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2021-2023 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cunumeric/array.py b/cunumeric/array.py index 6d58be54f..043d01e16 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -159,14 +159,9 @@ def convert_to_cunumeric_ndarray(obj: Any, share: bool = False) -> ndarray: def convert_to_predicate_ndarray(obj: Any) -> Any: - # Keep all boolean types as they are - if obj is True or obj is False: - # where = ndarray(shape=(), dtype=bool) - # where.fill(obj) - # return where + # Keep all boolean types and None as they are + if obj is True or obj is False or obj is None: return obj - if obj is None: - return None return convert_to_cunumeric_ndarray(obj) @@ -3225,9 +3220,11 @@ def nanmean( nan_mask = _ufunc.bit_twiddling.bitwise_not( _ufunc.floating.isnan(self) ) - normalizer = nan_mask.sum(axis=axis, keepdims=keepdims, where=where) - # normalizer = normalizer.astype(dtype) - sum_array = self.nansum(axis, keepdims=keepdims, where=where) + where_array = broadcast_where(where, nan_mask.shape) + normalizer = nan_mask.sum( + axis=axis, keepdims=keepdims, where=where_array + ) + sum_array = self.nansum(axis, keepdims=keepdims, where=where_array) sum_array = sum_array.astype(dtype) if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__( @@ -4113,12 +4110,10 @@ def unique(self) -> ndarray: @classmethod def _get_where_thunk( - cls, where: Union[bool, ndarray], out_shape: NdShape - ) -> Union[Literal[True], NumPyThunk]: - if where is True: - return True - # if where is False: - # raise RuntimeError("should have caught this earlier") + cls, where: Union[Literal[True], None, ndarray], out_shape: NdShape + ) -> Union[Literal[True], None, NumPyThunk]: + if where is True or where is None: + return where if not isinstance(where, ndarray) or where.dtype != np.bool_: raise RuntimeError("should have converted this earlier") if where.shape != out_shape: @@ -4353,15 +4348,10 @@ def _perform_unary_reduction( shape=out_shape, dtype=res_dtype, inputs=(src, where) ) - if where is not None: - where_thunk = cls._get_where_thunk(where, src.shape) - else: - where_thunk = None - result._thunk.unary_reduction( op, src._thunk, - where_thunk, + cls._get_where_thunk(where, src.shape), axis, axes, keepdims, diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index bb4744653..02dcbb13f 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2021-2023 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cunumeric/eager.py b/cunumeric/eager.py index d7934d04e..20d18cd10 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2021-2023 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cunumeric/module.py b/cunumeric/module.py index 0f984f7e5..900c2ec30 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2021-2023 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/cunumeric/unary/scalar_unary_red.cc b/src/cunumeric/unary/scalar_unary_red.cc index 77f746eb4..c10e065f9 100644 --- a/src/cunumeric/unary/scalar_unary_red.cc +++ b/src/cunumeric/unary/scalar_unary_red.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2021-2023 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/unary/scalar_unary_red.cu b/src/cunumeric/unary/scalar_unary_red.cu index 6959d05c9..76dcaeb32 100644 --- a/src/cunumeric/unary/scalar_unary_red.cu +++ b/src/cunumeric/unary/scalar_unary_red.cu @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2021-2023 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/unary/scalar_unary_red.h b/src/cunumeric/unary/scalar_unary_red.h index 856eeba65..aa7925cf9 100644 --- a/src/cunumeric/unary/scalar_unary_red.h +++ b/src/cunumeric/unary/scalar_unary_red.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2021-2023 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/unary/scalar_unary_red_omp.cc b/src/cunumeric/unary/scalar_unary_red_omp.cc index c33576355..646f0193a 100644 --- a/src/cunumeric/unary/scalar_unary_red_omp.cc +++ b/src/cunumeric/unary/scalar_unary_red_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2021-2023 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 034172ba6..a061105ab 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2021-2023 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/unary/unary_red.cc b/src/cunumeric/unary/unary_red.cc index 34af69434..ddfdf7a59 100644 --- a/src/cunumeric/unary/unary_red.cc +++ b/src/cunumeric/unary/unary_red.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2021-2023 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index 708d21a17..de82f3583 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2021-2023 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -201,26 +201,14 @@ std::ostream& operator<<(std::ostream& os, const ThreadBlocks& blocks) return os; } -template -static __device__ __forceinline__ Point local_reduce(LHS& result, - AccessorRO in, - LHS identity, - const ThreadBlocks& blocks, - const Rect& domain, - int32_t collapsed_dim) +template +static void __device__ collapse_dims(LHS& result, + Point& point, + const Rect& domain, + int32_t collapsed_dim, + LHS identity, + coord_t tid) { - const coord_t tid = threadIdx.x; - const coord_t bid = blockIdx.x; - - Point point = blocks.point(bid, tid, domain.lo); - if (!domain.contains(point)) return point; - - while (point[collapsed_dim] <= domain.hi[collapsed_dim]) { - LHS value = OP::convert(point, collapsed_dim, identity, in[point]); - REDOP::template fold(result, value); - blocks.next_point(point); - } - #if __CUDA_ARCH__ >= 700 // If we're collapsing the innermost dimension, we perform some optimization // with shared memory to reduce memory traffic due to atomic updates @@ -276,6 +264,29 @@ static __device__ __forceinline__ Point local_reduce(LHS& result, // put points back in the bounds to appease the checks. point[collapsed_dim] = domain.lo[collapsed_dim]; #endif +} + +template +static __device__ __forceinline__ Point local_reduce(LHS& result, + AccessorRO in, + LHS identity, + const ThreadBlocks& blocks, + const Rect& domain, + int32_t collapsed_dim) +{ + const coord_t tid = threadIdx.x; + const coord_t bid = blockIdx.x; + + Point point = blocks.point(bid, tid, domain.lo); + if (!domain.contains(point)) return point; + + while (point[collapsed_dim] <= domain.hi[collapsed_dim]) { + LHS value = OP::convert(point, collapsed_dim, identity, in[point]); + REDOP::template fold(result, value); + blocks.next_point(point); + } + + collapse_dims(result, point, domain, collapsed_dim, identity, tid); return point; } @@ -302,61 +313,7 @@ static __device__ __forceinline__ Point local_reduce_where(LHS& result, blocks.next_point(point); } -#if __CUDA_ARCH__ >= 700 - // If we're collapsing the innermost dimension, we perform some optimization - // with shared memory to reduce memory traffic due to atomic updates - if (collapsed_dim == DIM - 1) { - __shared__ uint8_t shmem[THREADS_PER_BLOCK * sizeof(LHS)]; - LHS* trampoline = reinterpret_cast(shmem); - // Check for the case where all the threads in the same warp have - // the same x value in which case they're all going to conflict - // so instead we do a warp-level reduction so just one thread ends - // up doing the full atomic - coord_t bucket = 0; - for (int32_t dim = DIM - 2; dim >= 0; --dim) - bucket = bucket * (domain.hi[dim] - domain.lo[dim] + 1) + point[dim] - domain.lo[dim]; - - const uint32_t same_mask = __match_any_sync(0xffffffff, bucket); - int32_t laneid; - asm volatile("mov.s32 %0, %laneid;" : "=r"(laneid)); - const uint32_t active_mask = __ballot_sync(0xffffffff, same_mask - (1 << laneid)); - if ((active_mask & (1 << laneid)) != 0) { - // Store our data into shared - trampoline[tid] = result; - // Make sure all the threads in the warp are done writing - __syncwarp(active_mask); - // Have the lowest thread in each mask pull in the values - int32_t lowest_index = -1; - for (int32_t i = 0; i < warpSize; i++) - if (same_mask & (1 << i)) { - if (lowest_index == -1) { - if (i != laneid) { - // We're not the lowest thread in the warp for - // this value so we're done, set the value back - // to identity to ensure that we don't try to - // perform the reduction out to memory - result = identity; - break; - } else // Make sure we don't do this test again - lowest_index = i; - // It was already our value, so just keep going - } else { - // Pull in the value from shared memory - const int32_t index = tid + i - laneid; - REDOP::template fold(result, trampoline[index]); - } - } - } - } -#endif - -#ifdef LEGATE_BOUNDS_CHECKS - // Note: this isn't necessary because we know that the affine transformation on the output - // accessor will ignore coordinates of the collapsed dimension. However, Legion's bounds checks - // want the accessor to honor the sub-rectangle passed when it was created, so we need to - // put points back in the bounds to appease the checks. - point[collapsed_dim] = domain.lo[collapsed_dim]; -#endif + collapse_dims(result, point, domain, collapsed_dim, identity, tid); return point; } template diff --git a/src/cunumeric/unary/unary_red_omp.cc b/src/cunumeric/unary/unary_red_omp.cc index ab1e071d8..fdded9b68 100644 --- a/src/cunumeric/unary/unary_red_omp.cc +++ b/src/cunumeric/unary/unary_red_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2021-2023 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/integration/test_mean.py b/tests/integration/test_mean.py index f6ffb801a..40092455c 100755 --- a/tests/integration/test_mean.py +++ b/tests/integration/test_mean.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2021-2023 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_nanmean.py b/tests/integration/test_nanmean.py index 9849e62dd..98962842b 100755 --- a/tests/integration/test_nanmean.py +++ b/tests/integration/test_nanmean.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2021-2023 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 73e06091bb9d209c414eef55337b398a05fd3d0e Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 17 Oct 2023 21:30:22 -0700 Subject: [PATCH 16/66] code clean-up --- cunumeric/array.py | 52 +++++++++++++-------------------------------- cunumeric/module.py | 2 +- 2 files changed, 16 insertions(+), 38 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 043d01e16..6c40767b4 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -188,18 +188,14 @@ def check_writeable(arr: Union[ndarray, tuple[ndarray, ...], None]) -> None: def broadcast_where( - where: Union[ndarray, None], shape: NdShape + where: Union[bool, ndarray, None], shape: NdShape ) -> Union[ndarray, None]: if where is not None: where_array = convert_to_cunumeric_ndarray(where) else: where_array = None - if ( - where_array is not None - and np.ndim(where_array) != 0 - and where_array.shape != shape - ): + if where_array is not None and where_array.shape != shape: where_array = ndarray( shape=shape, thunk=where_array._thunk.broadcast_to(shape), @@ -3220,11 +3216,8 @@ def nanmean( nan_mask = _ufunc.bit_twiddling.bitwise_not( _ufunc.floating.isnan(self) ) - where_array = broadcast_where(where, nan_mask.shape) - normalizer = nan_mask.sum( - axis=axis, keepdims=keepdims, where=where_array - ) - sum_array = self.nansum(axis, keepdims=keepdims, where=where_array) + normalizer = nan_mask.sum(axis=axis, keepdims=keepdims, where=where) + sum_array = self._nansum(axis, keepdims=keepdims, where=where) sum_array = sum_array.astype(dtype) if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__( @@ -3757,7 +3750,7 @@ def sum( ) @add_boilerplate() - def nansum( + def _nansum( self, axis: Any = None, dtype: Any = None, @@ -3766,22 +3759,6 @@ def nansum( initial: Optional[Union[int, float]] = None, where: Optional[ndarray] = None, ) -> ndarray: - """a.nansum(axis=None, dtype=None, out=None, keepdims=False, initial=0, - where=None) - - Return the sum of the array elements over the given axis ignoring nan. - - Refer to :func:`cunumeric.nansum` for full documentation. - - See Also - -------- - cunumeric.nansum : equivalent function - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ # Note that np.nansum and np.sum allow complex datatypes # so there are no "disallowed types" for this API @@ -4170,7 +4147,7 @@ def _perform_unary_op( out: Union[Any, None] = None, extra_args: Any = None, dtype: Union[np.dtype[Any], None] = None, - where: Union[bool, ndarray] = True, + where: Union[bool, None, ndarray] = True, out_dtype: Union[np.dtype[Any], None] = None, ) -> ndarray: if out is not None: @@ -4212,7 +4189,7 @@ def _perform_unary_op( # Quick exit if where is False: return out - + where_array = broadcast_where(where, out.shape) if out_dtype is None: if out.dtype != src.dtype and not ( op == UnaryOpCode.ABSOLUTE and src.dtype.kind == "c" @@ -4220,12 +4197,12 @@ def _perform_unary_op( temp = ndarray( out.shape, dtype=src.dtype, - inputs=(src, where), + inputs=(src, where_array), ) temp._thunk.unary_op( op, src._thunk, - cls._get_where_thunk(where, out.shape), + cls._get_where_thunk(where_array, out.shape), extra_args, ) out._thunk.convert(temp._thunk) @@ -4233,7 +4210,7 @@ def _perform_unary_op( out._thunk.unary_op( op, src._thunk, - cls._get_where_thunk(where, out.shape), + cls._get_where_thunk(where_array, out.shape), extra_args, ) else: @@ -4241,12 +4218,12 @@ def _perform_unary_op( temp = ndarray( out.shape, dtype=out_dtype, - inputs=(src, where), + inputs=(src, where_array), ) temp._thunk.unary_op( op, src._thunk, - cls._get_where_thunk(where, out.shape), + cls._get_where_thunk(where_array, out.shape), extra_args, ) out._thunk.convert(temp._thunk) @@ -4254,7 +4231,7 @@ def _perform_unary_op( out._thunk.unary_op( op, src._thunk, - cls._get_where_thunk(where, out.shape), + cls._get_where_thunk(where_array, out.shape), extra_args, ) return out @@ -4348,10 +4325,11 @@ def _perform_unary_reduction( shape=out_shape, dtype=res_dtype, inputs=(src, where) ) + where_array = broadcast_where(where, src.shape) result._thunk.unary_reduction( op, src._thunk, - cls._get_where_thunk(where, src.shape), + cls._get_where_thunk(where_array, src.shape), axis, axes, keepdims, diff --git a/cunumeric/module.py b/cunumeric/module.py index 900c2ec30..b6363eef5 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -6076,7 +6076,7 @@ def nansum( Multiple GPUs, Multiple CPUs """ - return a.nansum( + return a._nansum( axis=axis, dtype=dtype, out=out, From dfafdcc6e01ada995621b12255fb650ff585f3a3 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 18 Oct 2023 10:05:30 -0700 Subject: [PATCH 17/66] adding test_where for unary operations --- cunumeric/deferred.py | 2 ++ tests/integration/test_logical.py | 15 ++++++++++----- tests/integration/test_nan_reduction.py | 19 +++++++++++++++++++ tests/integration/test_prod.py | 14 ++++++++++---- tests/integration/test_reduction.py | 12 +++++++++--- 5 files changed, 50 insertions(+), 12 deletions(-) diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 02dcbb13f..c1a102e73 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -3159,6 +3159,8 @@ def unary_reduction( ) is_where = bool(where is not None) + if is_where: + where = self.runtime.to_deferred_array(where) # See if we are doing reduction to a point or another region if lhs_array.size == 1: assert axes is None or lhs_array.ndim == rhs_array.ndim - ( diff --git a/tests/integration/test_logical.py b/tests/integration/test_logical.py index ca9b99220..b0f83aaa6 100644 --- a/tests/integration/test_logical.py +++ b/tests/integration/test_logical.py @@ -102,19 +102,24 @@ def test_nd_inputs(ndim, func): assert np.array_equal(out_np, out_num) -@pytest.mark.skip def test_where(): - # "the `where` parameter is currently not supported" - x = np.array([[True, True, False], [True, True, True]]) y = np.array([[True, False], [True, True]]) cy = num.array(y) + # where needs to be broadcasted assert num.array_equal( - num.all(cy, where=[True, False]), np.all(x, where=[True, False]) + num.all(cy, where=[True, False]), np.all(y, where=[True, False]) ) assert num.array_equal( num.any(cy, where=[[True], [False]]), - np.any(x, where=[[True], [False]]), + np.any(y, where=[[True], [False]]), + ) + + # Where is a boolean + assert num.array_equal(num.all(cy, where=True), np.all(y, where=True)) + assert num.array_equal( + num.any(cy, where=False), + np.any(y, where=False), ) diff --git a/tests/integration/test_nan_reduction.py b/tests/integration/test_nan_reduction.py index 34bbd1447..d3b5fd7e9 100644 --- a/tests/integration/test_nan_reduction.py +++ b/tests/integration/test_nan_reduction.py @@ -291,6 +291,25 @@ def test_all_nans_nansum(self, ndim): assert out_num == 0.0 + def test_where(self): + arr = [[1, np.nan, 3], [2, np.nan, 4]] + out_np = np.nansum(arr, where=[False, True, True]) + out_num = num.nansum(arr, where=[False, True, True]) + assert np.allclose(out_np, out_num) + + out_np = np.nanprod(arr, where=[False, True, True]) + out_num = num.nanprod(arr, where=[False, True, True]) + assert np.allclose(out_np, out_num) + + # where is a boolean + out_np = np.nanmax(arr, where=True) + out_num = num.nanmax(arr, where=True) + assert np.allclose(out_np, out_num) + + out_np = np.nanmin(arr, where=True) + out_num = num.nanmin(arr, where=True) + assert np.allclose(out_np, out_num) + class TestCornerCases: """ diff --git a/tests/integration/test_prod.py b/tests/integration/test_prod.py index ab0f4def8..c004c95a3 100644 --- a/tests/integration/test_prod.py +++ b/tests/integration/test_prod.py @@ -147,15 +147,21 @@ def test_initial_empty_array(self): out_np = np.prod(arr_np, initial=initial_value) assert allclose(out_np, out_num) - @pytest.mark.xfail def test_where(self): arr = [[1, 2], [3, 4]] - out_np = np.prod(arr, where=[False, True]) # return 8 - # cuNumeric raises NotImplementedError: - # the `where` parameter is currently not supported + out_np = np.prod(arr, where=[False, True]) out_num = num.prod(arr, where=[False, True]) assert allclose(out_np, out_num) + # where is boolean + out_np = np.prod(arr, where=True) + out_num = num.prod(arr, where=True) + assert allclose(out_np, out_num) + + out_np = np.prod(arr, where=False) + out_num = num.prod(arr, where=False) + assert allclose(out_np, out_num) + class TestProdPositive(object): """ diff --git a/tests/integration/test_reduction.py b/tests/integration/test_reduction.py index a7a89a6af..f3379265b 100644 --- a/tests/integration/test_reduction.py +++ b/tests/integration/test_reduction.py @@ -151,15 +151,21 @@ def test_initial_empty_array(self): out_np = np.sum(arr_np, initial=initial_value) # return initial_value assert allclose(out_np, out_num) - @pytest.mark.xfail def test_where(self): arr = [[1, 2], [3, 4]] out_np = np.sum(arr, where=[False, True]) # return 6 - # cuNumeric raises NotImplementedError: - # "the `where` parameter is currently not supported" out_num = num.sum(arr, where=[False, True]) assert allclose(out_np, out_num) + # where is a boolean + out_np = np.sum(arr, where=True) + out_num = num.sum(arr, where=True) + assert allclose(out_np, out_num) + + out_np = np.sum(arr, where=False) + out_num = num.sum(arr, where=False) + assert allclose(out_np, out_num) + class TestSumPositive(object): """ From 3071edde7e518d87333771ed97450197fa0a7255 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 18 Oct 2023 10:51:05 -0700 Subject: [PATCH 18/66] adding nanmean to docs --- docs/cunumeric/source/api/statistics.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/cunumeric/source/api/statistics.rst b/docs/cunumeric/source/api/statistics.rst index 7d844d788..ef3056da8 100644 --- a/docs/cunumeric/source/api/statistics.rst +++ b/docs/cunumeric/source/api/statistics.rst @@ -10,6 +10,7 @@ Averages and variances :toctree: generated/ mean + nanmean Histograms From 67aab76159442fdc0b0ed8896c809fa699fe3b68 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 18 Oct 2023 12:53:22 -0700 Subject: [PATCH 19/66] nanmean is not oficiall method of ndarray --- cunumeric/array.py | 18 +----------------- cunumeric/module.py | 2 +- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 6c40767b4..03810a3b7 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3179,7 +3179,7 @@ def mean( return sum_array @add_boilerplate() - def nanmean( + def _nanmean( self, axis: Any = None, dtype: Union[np.dtype[Any], None] = None, @@ -3187,22 +3187,6 @@ def nanmean( keepdims: bool = False, where: Union[ndarray, None] = None, ) -> ndarray: - """a.mean(axis=None, dtype=None, out=None, keepdims=False) - - Returns the average of the array elements along given axis, - ignoring NaN entires - - Refer to :func:`cunumeric.mean` for full documentation. - - See Also - -------- - cunumeric.nanmean : equivalent function - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ from . import _ufunc if np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_): diff --git a/cunumeric/module.py b/cunumeric/module.py index b6363eef5..146fcd462 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -7117,7 +7117,7 @@ def nanmean( -------- Multiple GPUs, Multiple CPUs """ - return a.nanmean( + return a._nanmean( axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where ) From 09a4477b8f1a9bc7f3dea0ce8457146474670f06 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 18 Oct 2023 14:44:10 -0700 Subject: [PATCH 20/66] fixing test --- tests/integration/test_nan_reduction.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_nan_reduction.py b/tests/integration/test_nan_reduction.py index d3b5fd7e9..b764dbf93 100644 --- a/tests/integration/test_nan_reduction.py +++ b/tests/integration/test_nan_reduction.py @@ -302,12 +302,12 @@ def test_where(self): assert np.allclose(out_np, out_num) # where is a boolean - out_np = np.nanmax(arr, where=True) - out_num = num.nanmax(arr, where=True) + out_np = np.nanmax(arr, where=True, initial=-1) + out_num = num.nanmax(arr, where=True, initial=-1) assert np.allclose(out_np, out_num) - out_np = np.nanmin(arr, where=True) - out_num = num.nanmin(arr, where=True) + out_np = np.nanmin(arr, where=True, initial=10) + out_num = num.nanmin(arr, where=True, initial=10) assert np.allclose(out_np, out_num) From 851dd42ceb15c6c157f11fe9e382f5e0ba8551aa Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 10:57:20 -0800 Subject: [PATCH 21/66] Update src/cunumeric/unary/scalar_unary_red_template.inl Co-authored-by: Jacob Faibussowitsch --- src/cunumeric/unary/scalar_unary_red_template.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index a061105ab..d30c93154 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -185,7 +185,7 @@ struct ScalarUnaryRed { } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { auto p = pitches.unflatten(idx, origin); - if (whereptr[idx] == true) + if (whereptr[idx]) OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); } else { if (whereptr[idx] == true) OP::template fold(lhs, OP::convert(inptr[idx], identity)); From 83308a65e31281d230eaac80b8963597587ce86e Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 10:57:51 -0800 Subject: [PATCH 22/66] Update src/cunumeric/unary/scalar_unary_red_template.inl Co-authored-by: Jacob Faibussowitsch --- src/cunumeric/unary/scalar_unary_red_template.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index d30c93154..a8bc5272f 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -188,7 +188,7 @@ struct ScalarUnaryRed { if (whereptr[idx]) OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); } else { - if (whereptr[idx] == true) OP::template fold(lhs, OP::convert(inptr[idx], identity)); + if (whereptr[idx]) OP::template fold(lhs, OP::convert(inptr[idx], identity)); } } From df70b28de1a48b88db9cd0ae6fc6d61f0453f11d Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 10:58:40 -0800 Subject: [PATCH 23/66] Update src/cunumeric/unary/scalar_unary_red_template.inl Co-authored-by: Jacob Faibussowitsch --- src/cunumeric/unary/scalar_unary_red_template.inl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index a8bc5272f..251d5ae3f 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -216,11 +216,12 @@ struct ScalarUnaryRed { if constexpr (KIND != VariantKind::GPU) { // Check to see if this is dense or not if (dense) { - return ScalarReductionPolicy()(volume, out, identity, *this); + ScalarReductionPolicy()(volume, out, identity, *this); + return; } } #endif - return ScalarReductionPolicy()(volume, out, identity, *this); + ScalarReductionPolicy()(volume, out, identity, *this); } }; From 7070d9f8e8eff697777864edba7912a6bc4474fa Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 10:59:28 -0800 Subject: [PATCH 24/66] Update src/cunumeric/unary/scalar_unary_red_template.inl Co-authored-by: Jacob Faibussowitsch --- src/cunumeric/unary/scalar_unary_red_template.inl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 251d5ae3f..b1b3a6729 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -264,8 +264,9 @@ static void scalar_unary_red_template(TaskContext& context) bool has_where = scalars[2].value(); size_t start_idx = has_where ? 2 : 1; std::vector extra_args; + extra_args.reserve(inputs.size()); for (size_t idx = start_idx; idx < inputs.size(); ++idx) - extra_args.push_back(std::move(inputs[idx])); + extra_args.emplace_back(std::move(inputs[idx])); // If the RHS was a scalar, use (1,) as the shape if (shape.dim == 0) { shape.dim = 1; From c00661a434fd1c9af1117657882ef5cb0ed66ba9 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:14:41 -0800 Subject: [PATCH 25/66] Update cunumeric/array.py Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 03810a3b7..9a9c8d10f 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3154,7 +3154,7 @@ def mean( if where_array is not None: divisor = where_array._count_nonzero() else: - divisor = reduce(lambda x, y: x * y, self.shape, 1) + divisor = np.array(reduce(lambda x, y: x * y, self.shape, 1)) else: if where_array is not None: From ab3bb104a98b752444d9927899b30015359a9465 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:14:51 -0800 Subject: [PATCH 26/66] Update src/cunumeric/unary/scalar_unary_red_template.inl Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/scalar_unary_red_template.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index b1b3a6729..b0b8b8f86 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -273,7 +273,7 @@ static void scalar_unary_red_template(TaskContext& context) shape[0] = 1; } - if (has_where == true) { + if (has_where) { ScalarUnaryRedArgs args{context.reductions()[0], inputs[0], inputs[1], From df5248e709841643dadf27b258bb985d87b9c844 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:15:37 -0800 Subject: [PATCH 27/66] Update src/cunumeric/unary/unary_red.cu Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/unary_red.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index de82f3583..b89905903 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -202,7 +202,7 @@ std::ostream& operator<<(std::ostream& os, const ThreadBlocks& blocks) } template -static void __device__ collapse_dims(LHS& result, +static void __device__ __forceinline__ collapse_dims(LHS& result, Point& point, const Rect& domain, int32_t collapsed_dim, From 8b3d0b716f67b9b3ca704d8af8820d22b71e6ce4 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:15:55 -0800 Subject: [PATCH 28/66] Update cunumeric/array.py Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 9a9c8d10f..271dfe92d 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3166,7 +3166,7 @@ def mean( sum_array = sum_array.astype(dtype) if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__( - np.array(divisor, dtype=dtype), + divisor.astype(dtype), ) else: sum_array.__ifloordiv__(np.array(divisor, dtype=dtype)) From 2a2ead5a6181dcc11c38089329430f7f72a44d28 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:16:08 -0800 Subject: [PATCH 29/66] Update cunumeric/array.py Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 271dfe92d..f0eb480e5 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3169,7 +3169,7 @@ def mean( divisor.astype(dtype), ) else: - sum_array.__ifloordiv__(np.array(divisor, dtype=dtype)) + sum_array.__ifloordiv__(divisor.astype(dtype)) # Convert to the output we didn't already put it there if out is not None and sum_array is not out: assert out.dtype != sum_array.dtype From 329bf5ac13e0546b8f1542f74958d2c7608aabf5 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:16:19 -0800 Subject: [PATCH 30/66] Update cunumeric/array.py Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index f0eb480e5..60e8cf2b2 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3160,7 +3160,7 @@ def mean( if where_array is not None: divisor = where_array._count_nonzero(axis=axis) else: - divisor = self.shape[axis] + divisor = np.array(self.shape[axis]) # Divide by the number of things in the collapsed dimensions # Pick the right kinds of division based on the dtype sum_array = sum_array.astype(dtype) From dcce884102a1d2dc0ec4210ead38e12c365545bf Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:23:01 -0800 Subject: [PATCH 31/66] Update cunumeric/eager.py Co-authored-by: Manolis Papadakis --- cunumeric/eager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric/eager.py b/cunumeric/eager.py index 20d18cd10..e9cce7db4 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -1499,7 +1499,7 @@ def unary_reduction( initial, ) return - if not where: + if where is None: where = True if op in _UNARY_RED_OPS_WITH_ARG: fn = _UNARY_RED_OPS_WITH_ARG[op] From a50f165c9c4af931488d7e71adea519a230ff34d Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:24:07 -0800 Subject: [PATCH 32/66] Update cunumeric/module.py Co-authored-by: Manolis Papadakis --- cunumeric/module.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index 146fcd462..7fc7fe28d 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -7093,11 +7093,6 @@ def nanmean( in the result as dimensions with size one. With this option, the result will broadcast correctly against the input array. - If the default value is passed, then `keepdims` will not be - passed through to the `mean` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. where : array_like of bool, optional Elements to include in the mean. From c52e0c58f436e6704265d6bddd473684b782ad4b Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:26:08 -0800 Subject: [PATCH 33/66] Update cunumeric/array.py Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 60e8cf2b2..0a362a166 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3181,7 +3181,7 @@ def mean( @add_boilerplate() def _nanmean( self, - axis: Any = None, + axis: Optional[Union[int, tuple[int, ...]]] = None, dtype: Union[np.dtype[Any], None] = None, out: Union[ndarray, None] = None, keepdims: bool = False, From f7235b1ccce83935b870bfcdcd92961d6d6a12fd Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:30:34 -0800 Subject: [PATCH 34/66] Update cunumeric/module.py Co-authored-by: Manolis Papadakis --- cunumeric/module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index 7fc7fe28d..a59a7a0b4 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -7100,7 +7100,7 @@ def nanmean( Returns ------- m : ndarray - If `out=None`, returns a new array of the same dtype a above + If `out=None`, returns a new array of the same dtype as a above containing the mean values, otherwise a reference to the output array is returned. From 568dfc529c8c7d23de57f319c98f9525473c58fc Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 11:45:08 -0800 Subject: [PATCH 35/66] updating documentation + fixing mypy issue --- cunumeric/array.py | 4 ++-- cunumeric/module.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 0a362a166..d30569135 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3152,13 +3152,13 @@ def mean( if axis is None: if where_array is not None: - divisor = where_array._count_nonzero() + divisor = np.array(where_array._count_nonzero()) else: divisor = np.array(reduce(lambda x, y: x * y, self.shape, 1)) else: if where_array is not None: - divisor = where_array._count_nonzero(axis=axis) + divisor = np.array(where_array._count_nonzero(axis=axis)) else: divisor = np.array(self.shape[axis]) # Divide by the number of things in the collapsed dimensions diff --git a/cunumeric/module.py b/cunumeric/module.py index a59a7a0b4..e633dcbf3 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -7033,7 +7033,7 @@ def mean( Returns ------- m : ndarray - If `out=None`, returns a new array of the same dtype a above + If `out is None`, returns a new array of the same dtype a above containing the mean values, otherwise a reference to the output array is returned. @@ -7100,7 +7100,7 @@ def nanmean( Returns ------- m : ndarray - If `out=None`, returns a new array of the same dtype as a above + If `out is None`, returns a new array of the same dtype as a above containing the mean values, otherwise a reference to the output array is returned. From 67abe0ca585daebefc1f5c367fb0300095c840c1 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 13:47:12 -0800 Subject: [PATCH 36/66] cleaning up some of the C++ code --- src/cunumeric/unary/scalar_unary_red.h | 1 - .../unary/scalar_unary_red_template.inl | 90 +++++++------------ src/cunumeric/unary/unary_red_template.inl | 23 ++--- 3 files changed, 41 insertions(+), 73 deletions(-) diff --git a/src/cunumeric/unary/scalar_unary_red.h b/src/cunumeric/unary/scalar_unary_red.h index aa7925cf9..570c0d605 100644 --- a/src/cunumeric/unary/scalar_unary_red.h +++ b/src/cunumeric/unary/scalar_unary_red.h @@ -25,7 +25,6 @@ struct ScalarUnaryRedArgs { const Array& out; const Array& in; const Array& where; - bool has_where; UnaryRedCode op_code; legate::DomainPoint shape; std::vector args; diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index b0b8b8f86..d339f557a 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -34,7 +34,7 @@ struct ScalarUnaryRed { }; template -struct ScalarUnaryRed { +struct ScalarUnaryRed { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using LHS = typename OP::VAL; @@ -122,7 +122,7 @@ struct ScalarUnaryRed { }; template -struct ScalarUnaryRed { +struct ScalarUnaryRed { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using LHS = typename OP::VAL; @@ -158,35 +158,24 @@ struct ScalarUnaryRed { out = args.out.reduce_accessor(); if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { to_find = args.args[0].scalar(); } - if ((args.has_where == true) && (OP_CODE != UnaryRedCode::CONTAINS)) { - where = args.where.read_accessor(rect); + where = args.where.read_accessor(rect); #ifndef LEGATE_BOUNDS_CHECKS - if (in.accessor.is_dense_row_major(rect) && where.accessor.is_dense_row_major(rect)) { - dense = true; - inptr = in.ptr(rect); - whereptr = where.ptr(rect); - } -#endif - } else { -#ifndef LEGATE_BOUNDS_CHECKS - // Check to see if this is dense or not - if (in.accessor.is_dense_row_major(rect)) { - dense = true; - inptr = in.ptr(rect); - } -#endif + if (in.accessor.is_dense_row_major(rect) && where.accessor.is_dense_row_major(rect)) { + dense = true; + inptr = in.ptr(rect); + whereptr = where.ptr(rect); } +#endif } __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, DenseReduction) const noexcept { if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { - if (inptr[idx] == to_find) { lhs = true; } + if (whereptr[idx] && (inptr[idx] == to_find)) { lhs = true; } } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { auto p = pitches.unflatten(idx, origin); - if (whereptr[idx]) - OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); + if (whereptr[idx]) OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); } else { if (whereptr[idx]) OP::template fold(lhs, OP::convert(inptr[idx], identity)); } @@ -194,16 +183,14 @@ struct ScalarUnaryRed { __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, SparseReduction) const noexcept { + auto p = pitches.unflatten(idx, origin); if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { - auto point = pitches.unflatten(idx, origin); - if (in[point] == to_find) { lhs = true; } + if (where[p] && (in[p] == to_find)) { lhs = true; } } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { - auto p = pitches.unflatten(idx, origin); - if (where[p] == true) OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); + if (where[p]) OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); } else { - auto p = pitches.unflatten(idx, origin); - if (where[p] == true) OP::template fold(lhs, OP::convert(in[p], identity)); + if (where[p]) OP::template fold(lhs, OP::convert(in[p], identity)); } } @@ -216,29 +203,24 @@ struct ScalarUnaryRed { if constexpr (KIND != VariantKind::GPU) { // Check to see if this is dense or not if (dense) { - ScalarReductionPolicy()(volume, out, identity, *this); - return; + ScalarReductionPolicy()(volume, out, identity, *this); + return; } } #endif - ScalarReductionPolicy()(volume, out, identity, *this); + ScalarReductionPolicy()(volume, out, identity, *this); } }; -template +template struct ScalarUnaryRedImpl { template void operator()(ScalarUnaryRedArgs& args) const { // The operation is always valid for contains if constexpr (UnaryRedOp::valid || OP_CODE == UnaryRedCode::CONTAINS) { - if (args.has_where) { - ScalarUnaryRed red(args); - red.execute(); - } else { - ScalarUnaryRed red(args); - red.execute(); - } + ScalarUnaryRed red(args); + red.execute(); } } }; @@ -246,10 +228,13 @@ struct ScalarUnaryRedImpl { template struct ScalarUnaryRedDispatch { template - void operator()(ScalarUnaryRedArgs& args) const + void operator()(ScalarUnaryRedArgs& args, bool has_where) const { auto dim = std::max(1, args.in.dim()); - double_dispatch(dim, args.in.code(), ScalarUnaryRedImpl{}, args); + if (has_where) + double_dispatch(dim, args.in.code(), ScalarUnaryRedImpl{}, args); + else + double_dispatch(dim, args.in.code(), ScalarUnaryRedImpl{}, args); } }; @@ -266,28 +251,21 @@ static void scalar_unary_red_template(TaskContext& context) std::vector extra_args; extra_args.reserve(inputs.size()); for (size_t idx = start_idx; idx < inputs.size(); ++idx) - extra_args.emplace_back(std::move(inputs[idx])); + extra_args.emplace_back(std::move(inputs[idx])); // If the RHS was a scalar, use (1,) as the shape if (shape.dim == 0) { shape.dim = 1; shape[0] = 1; } - if (has_where) { - ScalarUnaryRedArgs args{context.reductions()[0], - inputs[0], - inputs[1], - has_where, - op_code, - shape, - std::move(extra_args)}; - op_dispatch(args.op_code, ScalarUnaryRedDispatch{}, args); - } else { - Array where; - ScalarUnaryRedArgs args{ - context.reductions()[0], inputs[0], where, has_where, op_code, shape, std::move(extra_args)}; - op_dispatch(args.op_code, ScalarUnaryRedDispatch{}, args); - } + Array dummy_where; + ScalarUnaryRedArgs args{context.reductions()[0], + inputs[0], + has_where ? inputs[1] : dummy_where, + op_code, + shape, + std::move(extra_args)}; + op_dispatch(args.op_code, ScalarUnaryRedDispatch{}, args, has_where); } } // namespace cunumeric diff --git a/src/cunumeric/unary/unary_red_template.inl b/src/cunumeric/unary/unary_red_template.inl index 3c77fdd50..2a0224370 100644 --- a/src/cunumeric/unary/unary_red_template.inl +++ b/src/cunumeric/unary/unary_red_template.inl @@ -89,22 +89,13 @@ static void unary_red_template(TaskContext& context) auto& reductions = context.reductions(); auto& scalars = context.scalars(); bool has_where = scalars[2].value(); - if (has_where) { - UnaryRedArgs args{reductions[0], - inputs[0], - inputs[1], - scalars[0].value(), - scalars[1].value()}; - op_dispatch(args.op_code, UnaryRedDispatch{}, args); - } else { - Array where; - UnaryRedArgs args{reductions[0], - inputs[0], - where, - scalars[0].value(), - scalars[1].value()}; - op_dispatch(args.op_code, UnaryRedDispatch{}, args); - } + Array dummy_where; + UnaryRedArgs args{reductions[0], + inputs[0], + has_where ? inputs[1] : dummy_where, + scalars[0].value(), + scalars[1].value()}; + op_dispatch(args.op_code, UnaryRedDispatch{}, args); } } // namespace cunumeric From bb8a46274544b546e846d39f782c3f84a2dc98e3 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 9 Nov 2023 13:54:48 -0800 Subject: [PATCH 37/66] Update cunumeric/array.py Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index d30569135..1b2a73038 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3194,30 +3194,14 @@ def _nanmean( axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where ) - if dtype is None: - dtype = self.dtype - nan_mask = _ufunc.bit_twiddling.bitwise_not( _ufunc.floating.isnan(self) ) - normalizer = nan_mask.sum(axis=axis, keepdims=keepdims, where=where) - sum_array = self._nansum(axis, keepdims=keepdims, where=where) - sum_array = sum_array.astype(dtype) - if dtype.kind == "f" or dtype.kind == "c": - sum_array.__itruediv__( - np.array(normalizer, dtype=dtype), - ) - else: - sum_array.__ifloordiv__(np.array(normalizer, dtype=dtype)) - - if out is not None and sum_array is not out: - if out.dtype != sum_array.dtype: - out._thunk.convert(sum_array._thunk) - else: - out = sum_array - return out - else: - return sum_array + if where is not None: + nan_mask &= where + return self.mean( + axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=nan_mask + ) @add_boilerplate() def min( From 186af6f078b8bf046e864e666ab08c2949312fe2 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 10:46:16 -0800 Subject: [PATCH 38/66] fixing the logic for nanmean after previous commit --- cunumeric/array.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 1b2a73038..2434360a2 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3111,6 +3111,7 @@ def mean( out: Union[ndarray, None] = None, keepdims: bool = False, where: Union[ndarray, None] = None, + ignore_nan: bool = False, ) -> ndarray: """a.mean(axis=None, dtype=None, out=None, keepdims=False) @@ -3141,14 +3142,19 @@ def mean( where_array = broadcast_where(where, self.shape) # Do the sum - if out is not None and out.dtype == dtype: - sum_array = self.sum( + sum_array = ( + self._nansum( axis=axis, out=out, keepdims=keepdims, where=where_array ) - else: - sum_array = self.sum( - axis=axis, keepdims=keepdims, where=where_array + if out is not None and out.dtype == dtype and ignore_nan + else self.sum( + axis=axis, out=out, keepdims=keepdims, where=where_array ) + if out is not None and out.dtype == dtype + else self._nansum(axis=axis, keepdims=keepdims, where=where_array) + if ignore_nan + else self.sum(axis=axis, keepdims=keepdims, where=where_array) + ) if axis is None: if where_array is not None: @@ -3158,9 +3164,12 @@ def mean( else: if where_array is not None: - divisor = np.array(where_array._count_nonzero(axis=axis)) + divisor = np.array( + where_array.sum(axis=axis, keepdims=keepdims) + ) else: divisor = np.array(self.shape[axis]) + # Divide by the number of things in the collapsed dimensions # Pick the right kinds of division based on the dtype sum_array = sum_array.astype(dtype) @@ -3200,7 +3209,12 @@ def _nanmean( if where is not None: nan_mask &= where return self.mean( - axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=nan_mask + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + where=nan_mask, + ignore_nan=True, ) @add_boilerplate() From 2729b55b32ef8cea02cdd1d2cde2900b838d9240 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 11:38:42 -0800 Subject: [PATCH 39/66] claning up scalar_unary_red_template.inl --- .../unary/scalar_unary_red_template.inl | 126 +++--------------- 1 file changed, 21 insertions(+), 105 deletions(-) diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index d339f557a..11d0bbe12 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -30,17 +30,13 @@ using namespace legate; template struct ScalarUnaryRed { - ScalarUnaryRed(ScalarUnaryRedArgs& args) { assert(false); } -}; - -template -struct ScalarUnaryRed { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using LHS = typename OP::VAL; using RHS = legate_type_of; using OUT = AccessorRD; using IN = AccessorRO; + using WHERE = AccessorRO; IN in; const RHS* inptr; @@ -52,6 +48,8 @@ struct ScalarUnaryRed { Point shape; RHS to_find; bool dense; + WHERE where; + const bool* whereptr; struct DenseReduction {}; struct SparseReduction {}; @@ -67,40 +65,49 @@ struct ScalarUnaryRed { out = args.out.reduce_accessor(); if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { to_find = args.args[0].scalar(); } + if constexpr (HAS_WHERE) where = args.where.read_accessor(rect); #ifndef LEGATE_BOUNDS_CHECKS // Check to see if this is dense or not if (in.accessor.is_dense_row_major(rect)) { dense = true; inptr = in.ptr(rect); } + if constexpr (HAS_WHERE) { + dense = dense && where.accessor.is_dense_row_major(rect); + if (dense) whereptr = where.ptr(rect); + } #endif } __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, DenseReduction) const noexcept { + bool mask = true; + if constexpr (HAS_WHERE) mask = (whereptr[idx] == true); + if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { - if (inptr[idx] == to_find) { lhs = true; } + if (mask && (inptr[idx] == to_find)) { lhs = true; } } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { auto p = pitches.unflatten(idx, origin); - OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); + if (mask) OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); } else { - OP::template fold(lhs, OP::convert(inptr[idx], identity)); + if (mask) OP::template fold(lhs, OP::convert(inptr[idx], identity)); } } __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, SparseReduction) const noexcept { + auto p = pitches.unflatten(idx, origin); + bool mask = true; + if constexpr (HAS_WHERE) mask = (where[p] == true); + if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { - auto point = pitches.unflatten(idx, origin); - if (in[point] == to_find) { lhs = true; } + if (mask && (in[p] == to_find)) { lhs = true; } } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { - auto p = pitches.unflatten(idx, origin); - OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); + if (mask) OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); } else { - auto p = pitches.unflatten(idx, origin); - OP::template fold(lhs, OP::convert(in[p], identity)); + if (mask) OP::template fold(lhs, OP::convert(in[p], identity)); } } @@ -121,97 +128,6 @@ struct ScalarUnaryRed { } }; -template -struct ScalarUnaryRed { - using OP = UnaryRedOp; - using LG_OP = typename OP::OP; - using LHS = typename OP::VAL; - using RHS = legate_type_of; - using OUT = AccessorRD; - using IN = AccessorRO; - using WHERE = AccessorRO; - - IN in; - const RHS* inptr; - OUT out; - size_t volume; - Pitches pitches; - Rect rect; - Point origin; - Point shape; - RHS to_find; - bool dense; - WHERE where; - const bool* whereptr; - - struct DenseReduction {}; - struct SparseReduction {}; - - ScalarUnaryRed(ScalarUnaryRedArgs& args) : dense(false) - { - rect = args.in.shape(); - origin = rect.lo; - in = args.in.read_accessor(rect); - volume = pitches.flatten(rect); - shape = args.shape; - - out = args.out.reduce_accessor(); - if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { to_find = args.args[0].scalar(); } - - where = args.where.read_accessor(rect); -#ifndef LEGATE_BOUNDS_CHECKS - if (in.accessor.is_dense_row_major(rect) && where.accessor.is_dense_row_major(rect)) { - dense = true; - inptr = in.ptr(rect); - whereptr = where.ptr(rect); - } -#endif - } - - __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, DenseReduction) const noexcept - { - if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { - if (whereptr[idx] && (inptr[idx] == to_find)) { lhs = true; } - } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || - OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { - auto p = pitches.unflatten(idx, origin); - if (whereptr[idx]) OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); - } else { - if (whereptr[idx]) OP::template fold(lhs, OP::convert(inptr[idx], identity)); - } - } - - __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, SparseReduction) const noexcept - { - auto p = pitches.unflatten(idx, origin); - if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { - if (where[p] && (in[p] == to_find)) { lhs = true; } - } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || - OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { - if (where[p]) OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); - } else { - if (where[p]) OP::template fold(lhs, OP::convert(in[p], identity)); - } - } - - void execute() const noexcept - { - auto identity = LG_OP::identity; -#ifndef LEGATE_BOUNDS_CHECKS - // The constexpr if here prevents the DenseReduction from being instantiated for GPU kernels - // which limits compile times and binary sizes. - if constexpr (KIND != VariantKind::GPU) { - // Check to see if this is dense or not - if (dense) { - ScalarReductionPolicy()(volume, out, identity, *this); - return; - } - } -#endif - ScalarReductionPolicy()(volume, out, identity, *this); - } -}; - template struct ScalarUnaryRedImpl { template From e67537474bdce7b3ad618b758c0eed6b00c87859 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 11:54:44 -0800 Subject: [PATCH 40/66] claning up unary_red_* files --- src/cunumeric/unary/unary_red.cc | 30 +++---------- src/cunumeric/unary/unary_red.cu | 51 ++++++---------------- src/cunumeric/unary/unary_red_omp.cc | 35 +++------------ src/cunumeric/unary/unary_red_template.inl | 11 ++--- 4 files changed, 28 insertions(+), 99 deletions(-) diff --git a/src/cunumeric/unary/unary_red.cc b/src/cunumeric/unary/unary_red.cc index ddfdf7a59..bdd1c7012 100644 --- a/src/cunumeric/unary/unary_red.cc +++ b/src/cunumeric/unary/unary_red.cc @@ -21,29 +21,8 @@ namespace cunumeric { using namespace legate; -template -struct UnaryRedImplBody { - using OP = UnaryRedOp; - using LG_OP = typename OP::OP; - using RHS = legate_type_of; - - void operator()(AccessorRD lhs, - AccessorRO rhs, - const Rect& rect, - const Pitches& pitches, - int collapsed_dim, - size_t volume) const - { - for (size_t idx = 0; idx < volume; ++idx) { - auto point = pitches.unflatten(idx, rect.lo); - auto identity = LG_OP::identity; - lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); - } - } -}; - -template -struct UnaryRedImplBodyWhere { +template +struct UnaryRedImplBody { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using RHS = legate_type_of; @@ -59,8 +38,9 @@ struct UnaryRedImplBodyWhere { for (size_t idx = 0; idx < volume; ++idx) { auto point = pitches.unflatten(idx, rect.lo); auto identity = LG_OP::identity; - if (where[point] == true) - lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); + bool mask = true; + if constexpr (HAS_WHERE) mask = (where[point] == true); + if (mask) lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); } } }; diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index b89905903..7127046a6 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -203,11 +203,11 @@ std::ostream& operator<<(std::ostream& os, const ThreadBlocks& blocks) template static void __device__ __forceinline__ collapse_dims(LHS& result, - Point& point, - const Rect& domain, - int32_t collapsed_dim, - LHS identity, - coord_t tid) + Point& point, + const Rect& domain, + int32_t collapsed_dim, + LHS identity, + coord_t tid) { #if __CUDA_ARCH__ >= 700 // If we're collapsing the innermost dimension, we perform some optimization @@ -347,35 +347,8 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) if (result != identity) out.reduce(point, result); } -template -struct UnaryRedImplBody { - using OP = UnaryRedOp; - using LG_OP = typename OP::OP; - using RHS = legate_type_of; - using LHS = typename OP::VAL; - - void operator()(AccessorRD lhs, - AccessorRO rhs, - const Rect& rect, - const Pitches& pitches, - int collapsed_dim, - size_t volume) const - { - auto Kernel = reduce_with_rd_acc; - auto stream = get_cached_stream(); - - ThreadBlocks blocks; - blocks.initialize(rect, collapsed_dim); - - blocks.compute_maximum_concurrency(reinterpret_cast(Kernel)); - Kernel<<>>( - lhs, rhs, LG_OP::identity, blocks, rect, collapsed_dim); - CHECK_CUDA_STREAM(stream); - } -}; - -template -struct UnaryRedImplBodyWhere { +template +struct UnaryRedImplBody { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using RHS = legate_type_of; @@ -389,15 +362,19 @@ struct UnaryRedImplBodyWhere { int collapsed_dim, size_t volume) const { - auto Kernel = reduce_with_rd_acc_where; + auto Kernel = reduce_with_rd_acc; auto stream = get_cached_stream(); ThreadBlocks blocks; blocks.initialize(rect, collapsed_dim); blocks.compute_maximum_concurrency(reinterpret_cast(Kernel)); - Kernel<<>>( - lhs, rhs, where, LG_OP::identity, blocks, rect, collapsed_dim); + if constexpr (HAS_WHERE) + Kernel<<>>( + lhs, rhs, where, LG_OP::identity, blocks, rect, collapsed_dim); + else + Kernel<<>>( + lhs, rhs, LG_OP::identity, blocks, rect, collapsed_dim); CHECK_CUDA_STREAM(stream); } }; diff --git a/src/cunumeric/unary/unary_red_omp.cc b/src/cunumeric/unary/unary_red_omp.cc index fdded9b68..5074031df 100644 --- a/src/cunumeric/unary/unary_red_omp.cc +++ b/src/cunumeric/unary/unary_red_omp.cc @@ -72,34 +72,8 @@ class Splitter { size_t pitches_[DIM]; }; -template -struct UnaryRedImplBody { - using OP = UnaryRedOp; - using LG_OP = typename OP::OP; - using RHS = legate_type_of; - - void operator()(AccessorRD lhs, - AccessorRO rhs, - const Rect& rect, - const Pitches& pitches, - int collapsed_dim, - size_t volume) const - { - Splitter splitter; - auto split = splitter.split(rect, collapsed_dim); - -#pragma omp parallel for schedule(static) - for (size_t o_idx = 0; o_idx < split.outer; ++o_idx) - for (size_t i_idx = 0; i_idx < split.inner; ++i_idx) { - auto point = splitter.combine(o_idx, i_idx, rect.lo); - auto identity = LG_OP::identity; - lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); - } - } -}; - -template -struct UnaryRedImplBodyWhere { +template +struct UnaryRedImplBody { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using RHS = legate_type_of; @@ -120,8 +94,9 @@ struct UnaryRedImplBodyWhere { for (size_t i_idx = 0; i_idx < split.inner; ++i_idx) { auto point = splitter.combine(o_idx, i_idx, rect.lo); auto identity = LG_OP::identity; - if (where[point] == true) - lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); + bool mask = true; + if constexpr (HAS_WHERE) mask = (where[point] == true); + if (mask) lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); } } }; diff --git a/src/cunumeric/unary/unary_red_template.inl b/src/cunumeric/unary/unary_red_template.inl index 2a0224370..eecc45ddf 100644 --- a/src/cunumeric/unary/unary_red_template.inl +++ b/src/cunumeric/unary/unary_red_template.inl @@ -27,12 +27,9 @@ namespace cunumeric { using namespace legate; -template +template struct UnaryRedImplBody; -template -struct UnaryRedImplBodyWhere; - template struct UnaryRedImpl { template (rect); - UnaryRedImplBodyWhere()( + UnaryRedImplBody()( lhs, rhs, where, rect, pitches, args.collapsed_dim, volume); } else { - UnaryRedImplBody()( - lhs, rhs, rect, pitches, args.collapsed_dim, volume); + UnaryRedImplBody()( + lhs, rhs, AccessorRO(), rect, pitches, args.collapsed_dim, volume); } } From c17f3d2718bb51ccbb3a00ce9b45e61cf31c29c9 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 13:07:17 -0800 Subject: [PATCH 41/66] removing convert_to_predicate_ndarray --- cunumeric/array.py | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 2434360a2..852f9d34f 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -130,13 +130,11 @@ def wrapper(*args: Any, **kwargs: Any) -> R: for k, v in kwargs.items(): if v is None: continue - elif k == "where": - kwargs[k] = convert_to_predicate_ndarray(v) elif k == "out": kwargs[k] = convert_to_cunumeric_ndarray(v, share=True) if not kwargs[k].flags.writeable: raise ValueError("out is not writeable") - elif k in keys: + elif (k in keys) or (k == "where"): kwargs[k] = convert_to_cunumeric_ndarray(v) return func(*args, **kwargs) @@ -158,13 +156,6 @@ def convert_to_cunumeric_ndarray(obj: Any, share: bool = False) -> ndarray: return ndarray(shape=None, thunk=thunk, writeable=writeable) -def convert_to_predicate_ndarray(obj: Any) -> Any: - # Keep all boolean types and None as they are - if obj is True or obj is False or obj is None: - return obj - return convert_to_cunumeric_ndarray(obj) - - def maybe_convert_to_np_ndarray(obj: Any) -> Any: """ Converts cuNumeric arrays into NumPy arrays, otherwise has no effect. @@ -188,20 +179,15 @@ def check_writeable(arr: Union[ndarray, tuple[ndarray, ...], None]) -> None: def broadcast_where( - where: Union[bool, ndarray, None], shape: NdShape + where: Union[ndarray, None], shape: NdShape ) -> Union[ndarray, None]: - if where is not None: - where_array = convert_to_cunumeric_ndarray(where) - else: - where_array = None - - if where_array is not None and where_array.shape != shape: - where_array = ndarray( + if where is not None and where.shape != shape: + where = ndarray( shape=shape, - thunk=where_array._thunk.broadcast_to(shape), + thunk=where._thunk.broadcast_to(shape), writeable=False, ) - return where_array + return where class flagsobj: @@ -4129,7 +4115,7 @@ def _perform_unary_op( out: Union[Any, None] = None, extra_args: Any = None, dtype: Union[np.dtype[Any], None] = None, - where: Union[bool, None, ndarray] = True, + where: Union[None, ndarray] = None, out_dtype: Union[np.dtype[Any], None] = None, ) -> ndarray: if out is not None: @@ -4168,9 +4154,6 @@ def _perform_unary_op( inputs=(src, where), ) - # Quick exit - if where is False: - return out where_array = broadcast_where(where, out.shape) if out_dtype is None: if out.dtype != src.dtype and not ( From e7d9096d92222fde2eb48411d799159c7c59497d Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 13:17:45 -0800 Subject: [PATCH 42/66] small clean-up --- cunumeric/array.py | 16 ++++++++-------- cunumeric/deferred.py | 4 +--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 852f9d34f..2443ecc85 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -182,11 +182,9 @@ def broadcast_where( where: Union[ndarray, None], shape: NdShape ) -> Union[ndarray, None]: if where is not None and where.shape != shape: - where = ndarray( - shape=shape, - thunk=where._thunk.broadcast_to(shape), - writeable=False, - ) + from .module import broadcast_to + + where = broadcast_to(where, shape) return where @@ -4059,10 +4057,12 @@ def _get_where_thunk( ) -> Union[Literal[True], None, NumPyThunk]: if where is True or where is None: return where - if not isinstance(where, ndarray) or where.dtype != np.bool_: + if ( + not isinstance(where, ndarray) + or where.dtype != np.bool_ + or where.shape != out_shape + ): raise RuntimeError("should have converted this earlier") - if where.shape != out_shape: - raise ValueError("where parameter must have same shape as output") return where._thunk @staticmethod diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index c1a102e73..ae5c2ae57 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -3127,7 +3127,7 @@ def unary_op( # Perform a unary reduction operation from one set of dimensions down to # fewer - @auto_convert("src") + @auto_convert("src", "where") def unary_reduction( self, op: UnaryRedCode, @@ -3159,8 +3159,6 @@ def unary_reduction( ) is_where = bool(where is not None) - if is_where: - where = self.runtime.to_deferred_array(where) # See if we are doing reduction to a point or another region if lhs_array.size == 1: assert axes is None or lhs_array.ndim == rhs_array.ndim - ( From b6a90c9f184396b40a75cf50a396f0d5b7631243 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 13:21:43 -0800 Subject: [PATCH 43/66] Update cunumeric/array.py Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 2443ecc85..bcc21d11a 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -4053,9 +4053,9 @@ def unique(self) -> ndarray: @classmethod def _get_where_thunk( - cls, where: Union[Literal[True], None, ndarray], out_shape: NdShape - ) -> Union[Literal[True], None, NumPyThunk]: - if where is True or where is None: + cls, where: Union[None, ndarray], out_shape: NdShape + ) -> Union[None, NumPyThunk]: + if where is None: return where if ( not isinstance(where, ndarray) From 7ec317cc9e5d13dbedca33db75759e80d85814c9 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 13:50:00 -0800 Subject: [PATCH 44/66] removing where from perform_unary_op --- cunumeric/array.py | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index bcc21d11a..934a37ee5 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -22,7 +22,6 @@ TYPE_CHECKING, Any, Callable, - Literal, Optional, Sequence, TypeVar, @@ -4115,33 +4114,31 @@ def _perform_unary_op( out: Union[Any, None] = None, extra_args: Any = None, dtype: Union[np.dtype[Any], None] = None, - where: Union[None, ndarray] = None, out_dtype: Union[np.dtype[Any], None] = None, ) -> ndarray: if out is not None: # If the shapes don't match see if we can broadcast # This will raise an exception if they can't be broadcast together - if isinstance(where, ndarray): - np.broadcast_shapes(src.shape, out.shape, where.shape) - else: - np.broadcast_shapes(src.shape, out.shape) + if np.broadcast_shapes(src.shape, out.shape) != out.shape: + raise ValueError( + f"non-broadcastable output operand with shape {out.shape} " + f"doesn't match the broadcast shape {src.shape}" + ) else: # No output yet, so make one - if isinstance(where, ndarray): - out_shape = np.broadcast_shapes(src.shape, where.shape) - else: - out_shape = src.shape + out_shape = src.shape + if dtype is not None: out = ndarray( shape=out_shape, dtype=dtype, - inputs=(src, where), + inputs=(src,), ) elif out_dtype is not None: out = ndarray( shape=out_shape, dtype=out_dtype, - inputs=(src, where), + inputs=(src,), ) else: out = ndarray( @@ -4151,10 +4148,9 @@ def _perform_unary_op( else np.dtype(np.float32) if src.dtype == np.dtype(np.complex64) else np.dtype(np.float64), - inputs=(src, where), + inputs=(src,), ) - where_array = broadcast_where(where, out.shape) if out_dtype is None: if out.dtype != src.dtype and not ( op == UnaryOpCode.ABSOLUTE and src.dtype.kind == "c" @@ -4162,12 +4158,12 @@ def _perform_unary_op( temp = ndarray( out.shape, dtype=src.dtype, - inputs=(src, where_array), + inputs=(src,), ) temp._thunk.unary_op( op, src._thunk, - cls._get_where_thunk(where_array, out.shape), + True, extra_args, ) out._thunk.convert(temp._thunk) @@ -4175,7 +4171,7 @@ def _perform_unary_op( out._thunk.unary_op( op, src._thunk, - cls._get_where_thunk(where_array, out.shape), + True, extra_args, ) else: @@ -4183,12 +4179,12 @@ def _perform_unary_op( temp = ndarray( out.shape, dtype=out_dtype, - inputs=(src, where_array), + inputs=(src,), ) temp._thunk.unary_op( op, src._thunk, - cls._get_where_thunk(where_array, out.shape), + True, extra_args, ) out._thunk.convert(temp._thunk) @@ -4196,7 +4192,7 @@ def _perform_unary_op( out._thunk.unary_op( op, src._thunk, - cls._get_where_thunk(where_array, out.shape), + True, extra_args, ) return out @@ -4236,13 +4232,6 @@ def _perform_unary_reduction( # TODO: Need to require initial to be given when the array is empty # or a where mask is given. - if where is not None and isinstance(where, ndarray): - # The where array has to broadcast to the src.shape - if np.broadcast_shapes(src.shape, where.shape) != src.shape: - raise ValueError( - '"where" array must broadcast against source array ' - "for reduction" - ) if ( op in ( From 47096389f6e0658e6ee2631aa841ffaf41ceaff3 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 14:46:15 -0800 Subject: [PATCH 45/66] fixing logic for nanmin/nanmax for where --- cunumeric/module.py | 8 ++++---- tests/integration/test_nan_reduction.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index e633dcbf3..798aac5d9 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -5803,8 +5803,8 @@ def nanmin( ) if cunumeric_settings.numpy_compat() and a.dtype.kind == "f": - where = all(isnan(a), axis=axis, keepdims=keepdims) - putmask(out_array, where, np.nan) # type: ignore + all_nan = all(isnan(a), axis=axis, keepdims=keepdims, where=where) + putmask(out_array, all_nan, np.nan) # type: ignore return out_array @@ -5899,8 +5899,8 @@ def nanmax( ) if cunumeric_settings.numpy_compat() and a.dtype.kind == "f": - where = all(isnan(a), axis=axis, keepdims=keepdims) - putmask(out_array, where, np.nan) # type: ignore + all_nan = all(isnan(a), axis=axis, keepdims=keepdims, where=where) + putmask(out_array, all_nan, np.nan) # type: ignore return out_array diff --git a/tests/integration/test_nan_reduction.py b/tests/integration/test_nan_reduction.py index b764dbf93..e57a46d0b 100644 --- a/tests/integration/test_nan_reduction.py +++ b/tests/integration/test_nan_reduction.py @@ -301,7 +301,31 @@ def test_where(self): out_num = num.nanprod(arr, where=[False, True, True]) assert np.allclose(out_np, out_num) + out_np = np.nanmax( + arr, where=[[False, True, True], [False, False, True]], initial=-1 + ) + out_num = num.nanmax( + arr, where=[[False, True, True], [False, False, True]], initial=-1 + ) + assert np.allclose(out_np, out_num) + + out_np = np.nanmin( + arr, where=[[False, True, True], [False, True, True]], initial=10 + ) + out_num = num.nanmin( + arr, where=[[False, True, True], [False, True, True]], initial=10 + ) + assert np.allclose(out_np, out_num) + # where is a boolean + out_np = np.nansum(arr, where=True) + out_num = num.nansum(arr, where=True) + assert np.allclose(out_np, out_num) + + out_np = np.nanprod(arr, where=False) + out_num = num.nanprod(arr, where=False) + assert np.allclose(out_np, out_num) + out_np = np.nanmax(arr, where=True, initial=-1) out_num = num.nanmax(arr, where=True, initial=-1) assert np.allclose(out_np, out_num) From 95bfbebbde11ee7b516bb8529bc9cedcbc3d1f96 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 20:47:54 -0800 Subject: [PATCH 46/66] addressing the rest of the comments from Manolis --- cunumeric/_ufunc/ufunc.py | 10 +++++++--- cunumeric/array.py | 1 - cunumeric/module.py | 6 +++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cunumeric/_ufunc/ufunc.py b/cunumeric/_ufunc/ufunc.py index 01d2fc30b..3bba6b8ad 100644 --- a/cunumeric/_ufunc/ufunc.py +++ b/cunumeric/_ufunc/ufunc.py @@ -19,7 +19,12 @@ import numpy as np from legate.core.utils import OrderedSet -from ..array import check_writeable, convert_to_cunumeric_ndarray, ndarray +from ..array import ( + add_boilerplate, + check_writeable, + convert_to_cunumeric_ndarray, + ndarray, +) from ..config import BinaryOpCode, UnaryOpCode, UnaryRedCode from ..types import NdShape @@ -680,6 +685,7 @@ def __call__( return self._maybe_cast_output(out, result) + @add_boilerplate("array") def reduce( self, array: ndarray, @@ -742,8 +748,6 @@ def reduce( -------- numpy.ufunc.reduce """ - array = convert_to_cunumeric_ndarray(array) - if self._red_code is None: raise NotImplementedError( f"reduction for {self} is not yet implemented" diff --git a/cunumeric/array.py b/cunumeric/array.py index 934a37ee5..3eb8939c8 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3170,7 +3170,6 @@ def mean( else: return sum_array - @add_boilerplate() def _nanmean( self, axis: Optional[Union[int, tuple[int, ...]]] = None, diff --git a/cunumeric/module.py b/cunumeric/module.py index 798aac5d9..5d75476ea 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -1487,6 +1487,10 @@ def _broadcast_to( arr = array(arr, copy=False, subok=subok) # 'broadcast_to' returns a read-only view of the original array out_shape = broadcast_shapes(arr.shape, shape) + if out_shape != shape: + raise ValueError( + f"cannot broadcast an array of shape {arr.shape} to {shape}" + ) result = ndarray( shape=out_shape, thunk=arr._thunk.broadcast_to(out_shape), @@ -6978,7 +6982,7 @@ def count_nonzero( # Averages and variances -@add_boilerplate("a", "where") +@add_boilerplate("a") def mean( a: ndarray, axis: Optional[Union[int, tuple[int, ...]]] = None, From 90f27bc67a09f68cd9e9728383c7388371e774c0 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 20:57:45 -0800 Subject: [PATCH 47/66] some C++ code clean-up --- src/cunumeric/unary/unary_red.cc | 10 ++++++---- src/cunumeric/unary/unary_red_omp.cc | 13 ++++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/cunumeric/unary/unary_red.cc b/src/cunumeric/unary/unary_red.cc index bdd1c7012..bc1717a47 100644 --- a/src/cunumeric/unary/unary_red.cc +++ b/src/cunumeric/unary/unary_red.cc @@ -36,11 +36,13 @@ struct UnaryRedImplBody { size_t volume) const { for (size_t idx = 0; idx < volume; ++idx) { - auto point = pitches.unflatten(idx, rect.lo); - auto identity = LG_OP::identity; - bool mask = true; + auto point = pitches.unflatten(idx, rect.lo); + bool mask = true; if constexpr (HAS_WHERE) mask = (where[point] == true); - if (mask) lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); + if (mask) { + auto identity = LG_OP::identity; + lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); + } } } }; diff --git a/src/cunumeric/unary/unary_red_omp.cc b/src/cunumeric/unary/unary_red_omp.cc index 5074031df..a2765ccff 100644 --- a/src/cunumeric/unary/unary_red_omp.cc +++ b/src/cunumeric/unary/unary_red_omp.cc @@ -90,14 +90,17 @@ struct UnaryRedImplBody { auto split = splitter.split(rect, collapsed_dim); #pragma omp parallel for schedule(static) - for (size_t o_idx = 0; o_idx < split.outer; ++o_idx) + for (size_t o_idx = 0; o_idx < split.outer; ++o_idx) { for (size_t i_idx = 0; i_idx < split.inner; ++i_idx) { - auto point = splitter.combine(o_idx, i_idx, rect.lo); - auto identity = LG_OP::identity; - bool mask = true; + auto point = splitter.combine(o_idx, i_idx, rect.lo); + bool mask = true; if constexpr (HAS_WHERE) mask = (where[point] == true); - if (mask) lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); + if (mask) { + auto identity = LG_OP::identity; + lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); + } } + } } }; From fa19be77aa6bf3e60a930b6e301973de216eec8b Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Mon, 13 Nov 2023 21:49:49 -0800 Subject: [PATCH 48/66] more C++ code clean-up --- cunumeric/array.py | 1 - .../unary/scalar_unary_red_template.inl | 2 +- src/cunumeric/unary/unary_red.cu | 58 ++++--------------- 3 files changed, 12 insertions(+), 49 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 3eb8939c8..33233d284 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3713,7 +3713,6 @@ def sum( where=where, ) - @add_boilerplate() def _nansum( self, axis: Any = None, diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 11d0bbe12..bc9de8077 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -128,7 +128,7 @@ struct ScalarUnaryRed { } }; -template +template struct ScalarUnaryRedImpl { template void operator()(ScalarUnaryRedArgs& args) const diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index 7127046a6..17697e73b 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -266,9 +266,10 @@ static void __device__ __forceinline__ collapse_dims(LHS& result, #endif } -template +template static __device__ __forceinline__ Point local_reduce(LHS& result, AccessorRO in, + AccessorRO where, LHS identity, const ThreadBlocks& blocks, const Rect& domain, @@ -280,33 +281,10 @@ static __device__ __forceinline__ Point local_reduce(LHS& result, Point point = blocks.point(bid, tid, domain.lo); if (!domain.contains(point)) return point; + bool mask = true; + if constexpr (HAS_WHERE) mask = (where[point] == true); while (point[collapsed_dim] <= domain.hi[collapsed_dim]) { - LHS value = OP::convert(point, collapsed_dim, identity, in[point]); - REDOP::template fold(result, value); - blocks.next_point(point); - } - - collapse_dims(result, point, domain, collapsed_dim, identity, tid); - return point; -} - -template -static __device__ __forceinline__ Point local_reduce_where(LHS& result, - AccessorRO in, - AccessorRO where, - LHS identity, - const ThreadBlocks& blocks, - const Rect& domain, - int32_t collapsed_dim) -{ - const coord_t tid = threadIdx.x; - const coord_t bid = blockIdx.x; - - Point point = blocks.point(bid, tid, domain.lo); - if (!domain.contains(point)) return point; - - while (point[collapsed_dim] <= domain.hi[collapsed_dim]) { - if (where[point] == true) { + if (mask) { LHS value = OP::convert(point, collapsed_dim, identity, in[point]); REDOP::template fold(result, value); } @@ -316,33 +294,19 @@ static __device__ __forceinline__ Point local_reduce_where(LHS& result, collapse_dims(result, point, domain, collapsed_dim, identity, tid); return point; } -template + +template static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) reduce_with_rd_acc(AccessorRD out, AccessorRO in, + AccessorRO where, LHS identity, ThreadBlocks blocks, Rect domain, int32_t collapsed_dim) { auto result = identity; - auto point = - local_reduce(result, in, identity, blocks, domain, collapsed_dim); - if (result != identity) out.reduce(point, result); -} - -template -static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) - reduce_with_rd_acc_where(AccessorRD out, - AccessorRO in, - AccessorRO where, - LHS identity, - ThreadBlocks blocks, - Rect domain, - int32_t collapsed_dim) -{ - auto result = identity; - auto point = local_reduce_where( + auto point = local_reduce( result, in, where, identity, blocks, domain, collapsed_dim); if (result != identity) out.reduce(point, result); } @@ -362,7 +326,7 @@ struct UnaryRedImplBody { int collapsed_dim, size_t volume) const { - auto Kernel = reduce_with_rd_acc; + auto Kernel = reduce_with_rd_acc; auto stream = get_cached_stream(); ThreadBlocks blocks; @@ -374,7 +338,7 @@ struct UnaryRedImplBody { lhs, rhs, where, LG_OP::identity, blocks, rect, collapsed_dim); else Kernel<<>>( - lhs, rhs, LG_OP::identity, blocks, rect, collapsed_dim); + lhs, rhs, AccessorRO(), LG_OP::identity, blocks, rect, collapsed_dim); CHECK_CUDA_STREAM(stream); } }; From a51135df4d728ed9cca382bf0576cba584416de5 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 10:35:44 -0800 Subject: [PATCH 49/66] Update src/cunumeric/unary/unary_red.cu Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/unary_red.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index 17697e73b..47ae3ecc4 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -282,7 +282,7 @@ static __device__ __forceinline__ Point local_reduce(LHS& result, if (!domain.contains(point)) return point; bool mask = true; - if constexpr (HAS_WHERE) mask = (where[point] == true); + if constexpr (HAS_WHERE) mask = where[point]; while (point[collapsed_dim] <= domain.hi[collapsed_dim]) { if (mask) { LHS value = OP::convert(point, collapsed_dim, identity, in[point]); From e703cc99e6c80fcf51d1db62f852fee8eea07999 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 10:35:53 -0800 Subject: [PATCH 50/66] Update src/cunumeric/unary/unary_red.cc Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/unary_red.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/unary_red.cc b/src/cunumeric/unary/unary_red.cc index bc1717a47..b37d1a4b2 100644 --- a/src/cunumeric/unary/unary_red.cc +++ b/src/cunumeric/unary/unary_red.cc @@ -38,7 +38,7 @@ struct UnaryRedImplBody { for (size_t idx = 0; idx < volume; ++idx) { auto point = pitches.unflatten(idx, rect.lo); bool mask = true; - if constexpr (HAS_WHERE) mask = (where[point] == true); + if constexpr (HAS_WHERE) mask = where[point]; if (mask) { auto identity = LG_OP::identity; lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); From 7ea110f10ac7af8a8bd1ce54c61e225b79a146ed Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 10:36:02 -0800 Subject: [PATCH 51/66] Update src/cunumeric/unary/unary_red_omp.cc Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/unary_red_omp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/unary_red_omp.cc b/src/cunumeric/unary/unary_red_omp.cc index a2765ccff..fd9ccce60 100644 --- a/src/cunumeric/unary/unary_red_omp.cc +++ b/src/cunumeric/unary/unary_red_omp.cc @@ -94,7 +94,7 @@ struct UnaryRedImplBody { for (size_t i_idx = 0; i_idx < split.inner; ++i_idx) { auto point = splitter.combine(o_idx, i_idx, rect.lo); bool mask = true; - if constexpr (HAS_WHERE) mask = (where[point] == true); + if constexpr (HAS_WHERE) mask = where[point]; if (mask) { auto identity = LG_OP::identity; lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); From f080a95707b5922426b13adf701cc93bb79624d5 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 10:39:46 -0800 Subject: [PATCH 52/66] Update src/cunumeric/unary/scalar_unary_red_template.inl Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/scalar_unary_red_template.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index bc9de8077..9b52cd6bf 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -99,7 +99,7 @@ struct ScalarUnaryRed { { auto p = pitches.unflatten(idx, origin); bool mask = true; - if constexpr (HAS_WHERE) mask = (where[p] == true); + if constexpr (HAS_WHERE) mask = where[p]; if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { if (mask && (in[p] == to_find)) { lhs = true; } From c039cb2b6c9f7bc2b6108f380eff8f81437599c3 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 10:39:53 -0800 Subject: [PATCH 53/66] Update src/cunumeric/unary/scalar_unary_red_template.inl Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/scalar_unary_red_template.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 9b52cd6bf..2eb2ed21f 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -82,7 +82,7 @@ struct ScalarUnaryRed { __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, DenseReduction) const noexcept { bool mask = true; - if constexpr (HAS_WHERE) mask = (whereptr[idx] == true); + if constexpr (HAS_WHERE) mask = whereptr[idx]; if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { if (mask && (inptr[idx] == to_find)) { lhs = true; } From 6896b4ea61ef9ca75c56059536258bb4b2afc6db Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 10:40:21 -0800 Subject: [PATCH 54/66] Update src/cunumeric/unary/unary_red_template.inl Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/unary_red_template.inl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cunumeric/unary/unary_red_template.inl b/src/cunumeric/unary/unary_red_template.inl index eecc45ddf..87750e17c 100644 --- a/src/cunumeric/unary/unary_red_template.inl +++ b/src/cunumeric/unary/unary_red_template.inl @@ -92,7 +92,11 @@ static void unary_red_template(TaskContext& context) has_where ? inputs[1] : dummy_where, scalars[0].value(), scalars[1].value()}; - op_dispatch(args.op_code, UnaryRedDispatch{}, args); + if (has_where) { + op_dispatch(args.op_code, UnaryRedDispatch{}, args); + } else { + op_dispatch(args.op_code, UnaryRedDispatch{}, args); + } } } // namespace cunumeric From 40142397687ad4fa017527c98a8ffbf19ab700e9 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 10:40:37 -0800 Subject: [PATCH 55/66] Update src/cunumeric/unary/scalar_unary_red_template.inl Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/scalar_unary_red_template.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 2eb2ed21f..110a73e7a 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -165,7 +165,7 @@ static void scalar_unary_red_template(TaskContext& context) bool has_where = scalars[2].value(); size_t start_idx = has_where ? 2 : 1; std::vector extra_args; - extra_args.reserve(inputs.size()); + extra_args.reserve(inputs.size() - start_idx); for (size_t idx = start_idx; idx < inputs.size(); ++idx) extra_args.emplace_back(std::move(inputs[idx])); // If the RHS was a scalar, use (1,) as the shape From fe84bb15cf5ffb75ae92bf6ca29dc66e63b0fded Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 10:41:02 -0800 Subject: [PATCH 56/66] Update src/cunumeric/unary/unary_red_template.inl Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/unary_red_template.inl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/cunumeric/unary/unary_red_template.inl b/src/cunumeric/unary/unary_red_template.inl index 87750e17c..1b92d5a32 100644 --- a/src/cunumeric/unary/unary_red_template.inl +++ b/src/cunumeric/unary/unary_red_template.inl @@ -50,14 +50,12 @@ struct UnaryRedImpl { auto lhs = args.lhs.reduce_accessor(rect); + AccessorRO where; if constexpr (HAS_WHERE) { - auto where = args.where.read_accessor(rect); - UnaryRedImplBody()( - lhs, rhs, where, rect, pitches, args.collapsed_dim, volume); - } else { - UnaryRedImplBody()( - lhs, rhs, AccessorRO(), rect, pitches, args.collapsed_dim, volume); + where = args.where.read_accessor(rect); } + UnaryRedImplBody()( + lhs, rhs, where, rect, pitches, args.collapsed_dim, volume); } template Date: Fri, 17 Nov 2023 13:00:29 -0800 Subject: [PATCH 57/66] Update src/cunumeric/unary/unary_red.cu Co-authored-by: Manolis Papadakis --- src/cunumeric/unary/unary_red.cu | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index 47ae3ecc4..d6617df71 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -333,12 +333,8 @@ struct UnaryRedImplBody { blocks.initialize(rect, collapsed_dim); blocks.compute_maximum_concurrency(reinterpret_cast(Kernel)); - if constexpr (HAS_WHERE) - Kernel<<>>( - lhs, rhs, where, LG_OP::identity, blocks, rect, collapsed_dim); - else - Kernel<<>>( - lhs, rhs, AccessorRO(), LG_OP::identity, blocks, rect, collapsed_dim); + Kernel<<>>( + lhs, rhs, where, LG_OP::identity, blocks, rect, collapsed_dim); CHECK_CUDA_STREAM(stream); } }; From 9d5b364698c239428297a309632069feae38668d Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 13:21:04 -0800 Subject: [PATCH 58/66] Update cunumeric/array.py Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 33233d284..411980d82 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3141,7 +3141,7 @@ def mean( if axis is None: if where_array is not None: - divisor = np.array(where_array._count_nonzero()) + divisor = where_array._count_nonzero() else: divisor = np.array(reduce(lambda x, y: x * y, self.shape, 1)) From 2bd62ebc9e5c520440e693236ecf0f0fc2f58353 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 13:21:17 -0800 Subject: [PATCH 59/66] Update cunumeric/array.py Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 411980d82..7a962d22b 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3147,9 +3147,7 @@ def mean( else: if where_array is not None: - divisor = np.array( - where_array.sum(axis=axis, keepdims=keepdims) - ) + divisor = where_array.sum(axis=axis, keepdims=keepdims) else: divisor = np.array(self.shape[axis]) From f504f05a95190de88cac5b5927cbf2d5d37c8ff2 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 13:11:07 -0800 Subject: [PATCH 60/66] fixing bug in cuda code --- src/cunumeric/unary/unary_red.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index d6617df71..b5e0e5eb1 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -282,8 +282,8 @@ static __device__ __forceinline__ Point local_reduce(LHS& result, if (!domain.contains(point)) return point; bool mask = true; - if constexpr (HAS_WHERE) mask = where[point]; while (point[collapsed_dim] <= domain.hi[collapsed_dim]) { + if constexpr (HAS_WHERE) mask = where[point]; if (mask) { LHS value = OP::convert(point, collapsed_dim, identity, in[point]); REDOP::template fold(result, value); From b15f65673def63c090682e30e303d62fbbd34612 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 13:56:55 -0800 Subject: [PATCH 61/66] adding dtype back to sum --- cunumeric/array.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 7a962d22b..2dd01e0ab 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3127,39 +3127,52 @@ def mean( # Do the sum sum_array = ( self._nansum( - axis=axis, out=out, keepdims=keepdims, where=where_array + axis=axis, + out=out, + keepdims=keepdims, + dtype=dtype, + where=where_array, ) if out is not None and out.dtype == dtype and ignore_nan else self.sum( - axis=axis, out=out, keepdims=keepdims, where=where_array + axis=axis, + out=out, + keepdims=keepdims, + dtype=dtype, + where=where_array, ) if out is not None and out.dtype == dtype - else self._nansum(axis=axis, keepdims=keepdims, where=where_array) + else self._nansum( + axis=axis, keepdims=keepdims, dtype=dtype, where=where_array + ) if ignore_nan - else self.sum(axis=axis, keepdims=keepdims, where=where_array) + else self.sum( + axis=axis, keepdims=keepdims, dtype=dtype, where=where_array + ) ) if axis is None: if where_array is not None: divisor = where_array._count_nonzero() else: - divisor = np.array(reduce(lambda x, y: x * y, self.shape, 1)) + divisor = reduce(lambda x, y: x * y, self.shape, 1) else: if where_array is not None: - divisor = where_array.sum(axis=axis, keepdims=keepdims) + divisor = where_array.sum( + axis=axis, dtype=dtype, keepdims=keepdims + ) else: - divisor = np.array(self.shape[axis]) + divisor = self.shape[axis] # Divide by the number of things in the collapsed dimensions # Pick the right kinds of division based on the dtype - sum_array = sum_array.astype(dtype) if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__( - divisor.astype(dtype), + divisor, ) else: - sum_array.__ifloordiv__(divisor.astype(dtype)) + sum_array.__ifloordiv__(divisor) # Convert to the output we didn't already put it there if out is not None and sum_array is not out: assert out.dtype != sum_array.dtype From 7a775350c715d12763b29f1b40b98e3ce1b01038 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 17 Nov 2023 14:20:16 -0800 Subject: [PATCH 62/66] removing ignore_nan --- cunumeric/array.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 2dd01e0ab..0b7ce23c0 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3094,7 +3094,6 @@ def mean( out: Union[ndarray, None] = None, keepdims: bool = False, where: Union[ndarray, None] = None, - ignore_nan: bool = False, ) -> ndarray: """a.mean(axis=None, dtype=None, out=None, keepdims=False) @@ -3126,15 +3125,7 @@ def mean( where_array = broadcast_where(where, self.shape) # Do the sum sum_array = ( - self._nansum( - axis=axis, - out=out, - keepdims=keepdims, - dtype=dtype, - where=where_array, - ) - if out is not None and out.dtype == dtype and ignore_nan - else self.sum( + self.sum( axis=axis, out=out, keepdims=keepdims, @@ -3142,10 +3133,6 @@ def mean( where=where_array, ) if out is not None and out.dtype == dtype - else self._nansum( - axis=axis, keepdims=keepdims, dtype=dtype, where=where_array - ) - if ignore_nan else self.sum( axis=axis, keepdims=keepdims, dtype=dtype, where=where_array ) @@ -3207,7 +3194,6 @@ def _nanmean( out=out, keepdims=keepdims, where=nan_mask, - ignore_nan=True, ) @add_boilerplate() From bdb49e986055b8e746092ec749048d6b37aefdcd Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 29 Nov 2023 21:06:22 -0800 Subject: [PATCH 63/66] fixing var for the case when where is not None --- cunumeric/array.py | 31 ++++++++++++++++++++++++------- tests/integration/test_stats.py | 20 +++++++++++++++++++- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 2767e1a0b..6c226d747 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3119,16 +3119,25 @@ def _normalize_summation( divisor = reduce(lambda x, y: x * y, self.shape, 1) - ddof else: if where is not None: - divisor = ( - where.sum(axis=axis, dtype=dtype, keepdims=keepdims) - ddof + divisor = where.sum( + axis=axis, dtype=sum_array.dtype, keepdims=keepdims ) + if ddof != 0 and not np.isscalar(divisor): + mask = divisor != 0 + values = divisor - ddof + divisor._thunk.putmask(mask._thunk, values._thunk) + else: + divisor -= ddof else: divisor = self.shape[axis] - ddof # Divide by the number of things in the collapsed dimensions # Pick the right kinds of division based on the dtype - if np.ndim(divisor) == 0: + if isinstance(divisor, ndarray): + divisor = divisor.astype(sum_array.dtype) + else: divisor = np.array(divisor, dtype=sum_array.dtype) # type: ignore [assignment] # noqa + if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__(divisor) else: @@ -3267,8 +3276,9 @@ def var( # mean can be broadcast against the original array mu = self.mean(axis=axis, dtype=dtype, keepdims=True, where=where) + where_array = broadcast_where(where, self.shape) + # 1D arrays (or equivalent) should benefit from this unary reduction: - # if axis is None or calculate_volume(tuple_pop(self.shape, axis)) == 1: # this is a scalar reduction and we can optimize this as a single # pass through a scalar reduction @@ -3279,7 +3289,7 @@ def var( dtype=dtype, out=out, keepdims=keepdims, - where=where, + where=where_array, args=(mu,), ) else: @@ -3300,10 +3310,17 @@ def var( dtype=dtype, out=out, keepdims=keepdims, - where=where, + where=where_array, ) - self._normalize_summation(result, axis=axis, dtype=dtype, ddof=ddof) + self._normalize_summation( + result, + axis=axis, + dtype=result.dtype, + ddof=ddof, + keepdims=keepdims, + where=where_array, + ) return result diff --git a/tests/integration/test_stats.py b/tests/integration/test_stats.py index dfa1b0fa3..256a6b77f 100644 --- a/tests/integration/test_stats.py +++ b/tests/integration/test_stats.py @@ -39,7 +39,7 @@ def check_result(in_np, out_np, out_num, **isclose_kwargs): is_negative_test = False result = ( - allclose(out_np, out_num, **isclose_kwargs) + allclose(out_np, out_num, equal_nan=True, **isclose_kwargs) and out_np.dtype == out_num.dtype ) if not result and not is_negative_test: @@ -131,6 +131,24 @@ def test_var_default_shape(dtype, ddof, axis, keepdims): check_op(op_np, op_num, np_in, dtype) +@pytest.mark.parametrize("dtype", dtypes) +@pytest.mark.parametrize("ddof", [0, 1]) +@pytest.mark.parametrize("axis", [None, 0, 1]) +@pytest.mark.parametrize("keepdims", [False, True]) +def test_var_where(dtype, ddof, axis, keepdims): + np_in = get_op_input(astype=dtype) + where = (np_in.astype(int) % 2).astype(bool) + + op_np = functools.partial( + np.var, ddof=ddof, axis=axis, keepdims=keepdims, where=where + ) + op_num = functools.partial( + num.var, ddof=ddof, axis=axis, keepdims=keepdims, where=where + ) + + check_op(op_np, op_num, np_in, dtype) + + @pytest.mark.parametrize("dtype", dtypes) @pytest.mark.parametrize("ddof", [0, 1]) @pytest.mark.parametrize("axis", [None, 0, 1, 2]) From 4ef0749d6674e08d4f88f3d0e9c3076e7b8a1b45 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 05:14:52 +0000 Subject: [PATCH 64/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/cunumeric/unary/unary_red_template.inl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/cunumeric/unary/unary_red_template.inl b/src/cunumeric/unary/unary_red_template.inl index 1b92d5a32..aa038384f 100644 --- a/src/cunumeric/unary/unary_red_template.inl +++ b/src/cunumeric/unary/unary_red_template.inl @@ -51,11 +51,9 @@ struct UnaryRedImpl { auto lhs = args.lhs.reduce_accessor(rect); AccessorRO where; - if constexpr (HAS_WHERE) { - where = args.where.read_accessor(rect); - } + if constexpr (HAS_WHERE) { where = args.where.read_accessor(rect); } UnaryRedImplBody()( - lhs, rhs, where, rect, pitches, args.collapsed_dim, volume); + lhs, rhs, where, rect, pitches, args.collapsed_dim, volume); } template Date: Thu, 30 Nov 2023 09:38:21 -0800 Subject: [PATCH 65/66] fixing eager logic for var --- cunumeric/eager.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cunumeric/eager.py b/cunumeric/eager.py index 03929395d..7c58023ef 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -1532,7 +1532,9 @@ def unary_reduction( squared, out=self.array, axis=orig_axis, - where=where, + where=where + if not isinstance(where, EagerArray) + else where.array, keepdims=keepdims, ) elif op == UnaryRedCode.VARIANCE: @@ -1542,7 +1544,9 @@ def unary_reduction( np.sum( squares, axis=orig_axis, - where=where, + where=where + if not isinstance(where, EagerArray) + else where.array, keepdims=keepdims, out=self.array, ) From 869b1762aa1dea0f9e4e9e70f48c4cdcd5d4d914 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Fri, 1 Dec 2023 18:54:50 -0800 Subject: [PATCH 66/66] removing dtype from _normalize_summation --- cunumeric/array.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 3da80916c..8bfc5178a 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3094,11 +3094,11 @@ def _normalize_summation( self, sum_array: Any, axis: Any, - dtype: np.dtype[Any], ddof: int = 0, keepdims: bool = False, where: Union[ndarray, None] = None, ) -> None: + dtype = sum_array.dtype if axis is None: if where is not None: divisor = where._count_nonzero() - ddof @@ -3106,9 +3106,7 @@ def _normalize_summation( divisor = reduce(lambda x, y: x * y, self.shape, 1) - ddof else: if where is not None: - divisor = where.sum( - axis=axis, dtype=sum_array.dtype, keepdims=keepdims - ) + divisor = where.sum(axis=axis, dtype=dtype, keepdims=keepdims) if ddof != 0 and not np.isscalar(divisor): mask = divisor != 0 values = divisor - ddof @@ -3121,9 +3119,9 @@ def _normalize_summation( # Divide by the number of things in the collapsed dimensions # Pick the right kinds of division based on the dtype if isinstance(divisor, ndarray): - divisor = divisor.astype(sum_array.dtype) + divisor = divisor.astype(dtype) else: - divisor = np.array(divisor, dtype=sum_array.dtype) # type: ignore [assignment] # noqa + divisor = np.array(divisor, dtype=dtype) # type: ignore [assignment] # noqa if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__(divisor) @@ -3179,7 +3177,7 @@ def mean( ) self._normalize_summation( - sum_array, axis, dtype, keepdims=keepdims, where=where_array + sum_array, axis, keepdims=keepdims, where=where_array ) # Convert to the output we didn't already put it there @@ -3303,7 +3301,6 @@ def var( self._normalize_summation( result, axis=axis, - dtype=result.dtype, ddof=ddof, keepdims=keepdims, where=where_array,