From 558999e62d8b41c27883e37f3f0faeaaff0f89bd Mon Sep 17 00:00:00 2001 From: liqun Fu Date: Thu, 4 Jan 2024 17:41:01 -0800 Subject: [PATCH] reduce max/min 20 (#17805) ### Description reducemax/min have been updated in onnx(20). implement it in ort ### Motivation and Context this is for ort1.17.0 release --------- Signed-off-by: Liqun Fu --- docs/OperatorKernels.md | 6 +- .../providers/cpu/cpu_execution_provider.cc | 100 +++-- .../cpu/reduction/reduction_kernel_base.h | 40 ++ .../providers/cpu/reduction/reduction_ops.cc | 101 ++++- .../providers/cpu/reduction/reduction_ops.h | 175 +++++--- .../providers/cuda/reduction/reduction_ops.h | 2 +- onnxruntime/test/onnx/TestCase.cc | 2 +- .../cpu/reduction/reduction_ops_test.cc | 398 +++++++++++++++++- .../onnx_backend_test_series_filters.jsonc | 55 ++- 9 files changed, 737 insertions(+), 142 deletions(-) create mode 100644 onnxruntime/core/providers/cpu/reduction/reduction_kernel_base.h diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index e401baae2d803..f985cf10ded60 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -278,7 +278,8 @@ Do not modify directly.* |||[13, 17]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |||[11, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |||[1, 10]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|ReduceMax|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| +|ReduceMax|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|20+|**T** = tensor(bool), tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| +|||[18, 19]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||[13, 17]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||12|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||11|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| @@ -287,7 +288,8 @@ Do not modify directly.* |||[13, 17]|**T** = tensor(double), tensor(float), tensor(int32)| |||[11, 12]|**T** = tensor(double), tensor(float), tensor(int32)| |||[1, 10]|**T** = tensor(double), tensor(float), tensor(int32)| -|ReduceMin|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| +|ReduceMin|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|20+|**T** = tensor(bool), tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| +|||[18, 19]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||[13, 17]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||12|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||11|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 1390f60243174..f60c7ddac5c05 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -850,21 +850,21 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceLogSumExp); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceLogSumExp); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int64_t, ReduceLogSumExp); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, float, ReduceMax); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceMax); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceMax); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int64_t, ReduceMax); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int8_t, ReduceMax); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, uint8_t, ReduceMax); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, float, ReduceMax); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, double, ReduceMax); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, int32_t, ReduceMax); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, int64_t, ReduceMax); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, int8_t, ReduceMax); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, uint8_t, ReduceMax); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, float, ReduceMean); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceMean); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceMean); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, float, ReduceMin); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceMin); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceMin); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int64_t, ReduceMin); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int8_t, ReduceMin); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, uint8_t, ReduceMin); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, float, ReduceMin); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, double, ReduceMin); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, int32_t, ReduceMin); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, int64_t, ReduceMin); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, int8_t, ReduceMin); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 19, uint8_t, ReduceMin); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, float, ReduceProd); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceProd); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int64_t, ReduceProd); @@ -960,6 +960,20 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, Sh // Opset 20 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, ConstantOfShape); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, bool, ReduceMax); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, ReduceMax); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, ReduceMax); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, int32_t, ReduceMax); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, int64_t, ReduceMax); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, int8_t, ReduceMax); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, uint8_t, ReduceMax); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, bool, ReduceMin); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, ReduceMin); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, ReduceMin); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, int32_t, ReduceMin); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, int64_t, ReduceMin); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, int8_t, ReduceMin); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, uint8_t, ReduceMin); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, DFT); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, GridSample); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, GridSample); @@ -2263,36 +2277,36 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { ReduceLogSumExp)>, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/cpu/reduction/reduction_kernel_base.h b/onnxruntime/core/providers/cpu/reduction/reduction_kernel_base.h new file mode 100644 index 0000000000000..5725e85f8e1e4 --- /dev/null +++ b/onnxruntime/core/providers/cpu/reduction/reduction_kernel_base.h @@ -0,0 +1,40 @@ +#ifndef CORE_PROVIDERS_CPU_REDUCTION_KERNEL_BASE_H +#define CORE_PROVIDERS_CPU_REDUCTION_KERNEL_BASE_H + +#ifndef SHARED_PROVIDER +#include "core/common/optional.h" +#include "core/framework/op_kernel.h" +#endif + +namespace onnxruntime { + +template +class ReduceKernelBase { + protected: + ReduceKernelBase(const OpKernelInfo& info, optional keepdims_override = {}) { + if (allow_multi_axes) { + axes_ = ToShapeVector(info.GetAttrsOrDefault("axes")); + } else { + auto v = info.GetAttrOrDefault("axis", 0); + axes_.push_back(v); + } + int64_t keepdims = 1; + if (keepdims_override.has_value()) { + keepdims = *keepdims_override; + } else { + ORT_ENFORCE(info.GetAttr("keepdims", &keepdims).IsOK()); + } + keepdims_ = (keepdims == 1); + int64_t noop_with_empty_axes = info.GetAttrOrDefault("noop_with_empty_axes", 0); + noop_with_empty_axes_ = (noop_with_empty_axes == 1); + int64_t select_last_index = info.GetAttrOrDefault("select_last_index", 0); + select_last_index_ = (select_last_index != 0); + } + + TensorShapeVector axes_; + bool keepdims_; + bool noop_with_empty_axes_; + bool select_last_index_; +}; +} // namespace onnxruntime +#endif // !CORE_PROVIDERS_CPU_REDUCTION_KERNEL_BASE_H diff --git a/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc b/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc index 3c83394fb0bf4..244da35427f49 100644 --- a/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc +++ b/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc @@ -114,6 +114,14 @@ namespace onnxruntime { KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ x); +#define REGISTER_UNARY_ELEMENTWISE_KERNEL_BOOL_ONLY(x, sinceVersion) \ + ONNX_CPU_OPERATOR_TYPED_KERNEL( \ + x, \ + sinceVersion, \ + bool, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ + x); + REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL(ReduceL1, 1, 10); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_INT64_ONLY(ReduceL1, 1, 10); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL(ReduceL1, 11, 12); @@ -173,11 +181,18 @@ REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_DOUBLE_ONLY(ReduceMax, 13, 17); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_INT8_ONLY(ReduceMax, 13, 17); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_UINT8_ONLY(ReduceMax, 13, 17); -REGISTER_UNARY_ELEMENTWISE_KERNEL(ReduceMax, 18); -REGISTER_UNARY_ELEMENTWISE_KERNEL_INT64_ONLY(ReduceMax, 18); -REGISTER_UNARY_ELEMENTWISE_KERNEL_DOUBLE_ONLY(ReduceMax, 18); -REGISTER_UNARY_ELEMENTWISE_KERNEL_INT8_ONLY(ReduceMax, 18); -REGISTER_UNARY_ELEMENTWISE_KERNEL_UINT8_ONLY(ReduceMax, 18); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL(ReduceMax, 18, 19); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_INT64_ONLY(ReduceMax, 18, 19); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_DOUBLE_ONLY(ReduceMax, 18, 19); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_INT8_ONLY(ReduceMax, 18, 19); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_UINT8_ONLY(ReduceMax, 18, 19); + +REGISTER_UNARY_ELEMENTWISE_KERNEL(ReduceMax, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_INT64_ONLY(ReduceMax, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_DOUBLE_ONLY(ReduceMax, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_INT8_ONLY(ReduceMax, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_UINT8_ONLY(ReduceMax, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_BOOL_ONLY(ReduceMax, 20); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL(ReduceMean, 1, 10); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL(ReduceMean, 11, 12); @@ -207,11 +222,18 @@ REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_DOUBLE_ONLY(ReduceMin, 13, 17); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_INT8_ONLY(ReduceMin, 13, 17); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_UINT8_ONLY(ReduceMin, 13, 17); -REGISTER_UNARY_ELEMENTWISE_KERNEL(ReduceMin, 18); -REGISTER_UNARY_ELEMENTWISE_KERNEL_INT64_ONLY(ReduceMin, 18); -REGISTER_UNARY_ELEMENTWISE_KERNEL_DOUBLE_ONLY(ReduceMin, 18); -REGISTER_UNARY_ELEMENTWISE_KERNEL_INT8_ONLY(ReduceMin, 18); -REGISTER_UNARY_ELEMENTWISE_KERNEL_UINT8_ONLY(ReduceMin, 18); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL(ReduceMin, 18, 19); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_INT64_ONLY(ReduceMin, 18, 19); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_DOUBLE_ONLY(ReduceMin, 18, 19); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_INT8_ONLY(ReduceMin, 18, 19); +REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_UINT8_ONLY(ReduceMin, 18, 19); + +REGISTER_UNARY_ELEMENTWISE_KERNEL(ReduceMin, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_INT64_ONLY(ReduceMin, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_DOUBLE_ONLY(ReduceMin, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_INT8_ONLY(ReduceMin, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_UINT8_ONLY(ReduceMin, 20); +REGISTER_UNARY_ELEMENTWISE_KERNEL_BOOL_ONLY(ReduceMin, 20); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL(ReduceProd, 1, 10); REGISTER_UNARY_ELEMENTWISE_VERSIONED_KERNEL_INT64_ONLY(ReduceProd, 1, 10); @@ -822,10 +844,57 @@ static void ValidateKeepDims(const Tensor* input, int64_t keepdims) { ValidateKeepDims(input->Shape(), keepdims); } +template +bool check_and_reduce_empty_set_input(OpKernelContext* ctx, const gsl::span axes, bool keepdims) { + const Tensor* input = ctx->Input(0); + const TensorShape& input_shape = input->Shape(); + if (input_shape.Size() != 0) { + return false; + } + + // input is an empty set + std::vector input_axes; + if (ctx->InputCount() == 2) { + ORT_ENFORCE(axes.empty(), "Axes input and attribute should not both be present for reduction."); + // second input holds the axes. + const Tensor* axes_tensor = ctx->Input(1); + auto nDims = static_cast(axes_tensor->Shape()[0]); + const auto* data = axes_tensor->Data(); + input_axes.insert(input_axes.begin(), data, data + nDims); + } else { + input_axes.resize(axes.size()); + std::copy(axes.begin(), axes.end(), input_axes.begin()); + } + + gsl::span shape_dims = input_shape.GetDims(); + const int64_t input_shape_size = narrow(shape_dims.size()); + TensorShapeVector output_shape_vector; + for (int64_t i = 0; i < input_shape_size; ++i) { + if (input_axes.empty() || std::find(input_axes.begin(), input_axes.end(), i) != input_axes.end()) { + if (keepdims) { + output_shape_vector.push_back(1); + } + } else { + output_shape_vector.push_back(input_shape[onnxruntime::narrow(i)]); + } + } + + TensorShape output_shape(output_shape_vector); + Tensor* output = ctx->Output(0, output_shape); + if (output_shape.Size() != 0) { + AGG::fill_for_empty_set(*output); + } + return true; +} + template void CommonReduce1Loop(OpKernelContext* ctx, const gsl::span& axes_, int64_t keepdims_, bool noop_with_empty_axes) { + if (check_and_reduce_empty_set_input(ctx, axes_, keepdims_ != 0)) { + return; + } + FastReduceKind fast_kind; TensorShapeVector fast_shape; TensorShapeVector output_shape; @@ -838,8 +907,8 @@ void CommonReduce1Loop(OpKernelContext* ctx, const Tensor* input = ctx->Input(0); Tensor* output = ctx->Output(0, output_shape); if (fast_kind == FastReduceKind::kEmpty) { - const TensorShape& new_input_shape = input->Shape(); - if (new_input_shape.Size() == 1) { + const TensorShape& input_shape = input->Shape(); + if (input_shape.Size() == 1) { const typename AGG::input_type* from_data = input->Data(); typename AGG::value_type* to_data = output->MutableData(); AGG agg(1, *from_data); @@ -859,6 +928,10 @@ template void CommonReduce2Loops(OpKernelContext* ctx, const gsl::span& axes_, int64_t keepdims_, bool noop_with_empty_axes) { + if (check_and_reduce_empty_set_input(ctx, axes_, keepdims_ != 0)) { + return; + } + FastReduceKind fast_kind; TensorShapeVector fast_shape, output_shape, fast_axes; if (CommonFastReduce(ctx, axes_, keepdims_, noop_with_empty_axes, @@ -869,8 +942,8 @@ void CommonReduce2Loops(OpKernelContext* ctx, const Tensor* input = ctx->Input(0); Tensor* output = ctx->Output(0, output_shape); if (fast_kind == FastReduceKind::kEmpty) { - const TensorShape& new_input_shape = input->Shape(); - if (new_input_shape.Size() == 1) { + const TensorShape& input_shape = input->Shape(); + if (input_shape.Size() == 1) { const typename AGG::input_type* from_data = input->Data(); typename AGG::value_type* to_data = output->MutableData(); AGG agg(1, *from_data); diff --git a/onnxruntime/core/providers/cpu/reduction/reduction_ops.h b/onnxruntime/core/providers/cpu/reduction/reduction_ops.h index 7105fd2ddad2e..4d205acaa015a 100644 --- a/onnxruntime/core/providers/cpu/reduction/reduction_ops.h +++ b/onnxruntime/core/providers/cpu/reduction/reduction_ops.h @@ -11,8 +11,10 @@ #include "core/providers/cpu/containers.h" #include "core/util/math.h" #endif +#include "core/framework/math.h" #include "core/util/math_cpuonly.h" #include "core/platform/threadpool.h" +#include "core/providers/cpu/reduction/reduction_kernel_base.h" #include "core/common/safeint.h" #include @@ -178,6 +180,7 @@ class ReduceAggregator : public ReduceAggregatorBase { inline void update0(const T&) {} inline TVAL aggall(const T*) {} inline TVAL get_value() { return accumulator_; } + static void fill_for_empty_set(Tensor&) { ORT_NOT_IMPLEMENTED(); } protected: static void CommonFastReduceRKR(const Tensor& input, const gsl::span& fast_shape, @@ -217,6 +220,10 @@ class ReduceAggregatorSum : public ReduceAggregator { return aggall(from_data, this->N_); } + static void fill_for_empty_set(Tensor& output) { + EigenMap(output).array() = static_cast(0); + } + // Fast reduction static inline FastReduceKind WhichFastReduce() { return FastReduceKind::kKR | FastReduceKind::kRK | FastReduceKind::kKRK | FastReduceKind::kRKR; @@ -290,6 +297,9 @@ class ReduceAggregatorSumSquare : public ReduceAggregator { return Eigen::Map>(from_data, onnxruntime::narrow(this->N_)).squaredNorm(); } inline void update(const T& v) { this->accumulator_ += v * v; } + static void fill_for_empty_set(Tensor& output) { + EigenMap(output).array() = static_cast(0); + } }; template @@ -363,7 +373,11 @@ class ReduceAggregatorMax : public ReduceAggregator { public: inline ReduceAggregatorMax(int64_t N, const T& init) : ReduceAggregator(N, init) {} static T aggall(const T* from_data, int64_t size) { - return Eigen::Map>(from_data, onnxruntime::narrow(size)).maxCoeff(); + if constexpr (std::is_same_v) { /* bool specific impl */ + return Eigen::Map>(from_data, onnxruntime::narrow(size)).cast().maxCoeff(); + } else { /* generic impl */ + return Eigen::Map>(from_data, onnxruntime::narrow(size)).maxCoeff(); + } } inline T aggall(const T* from_data) { return aggall(from_data, this->N_); @@ -383,10 +397,19 @@ class ReduceAggregatorMax : public ReduceAggregator { concurrency::ThreadPool::TryParallelFor( tp, onnxruntime::narrow(fast_shape[0]), ParallelReduceFastCost(1, stridei, sizeof(T), 6), [data, stridei, out](std::ptrdiff_t first, std::ptrdiff_t last) { - EigenVectorMap(out + first, last - first) = ConstEigenMatrixMap( - data + first * stridei, onnxruntime::narrow(stridei), last - first) - .colwise() - .maxCoeff(); + if constexpr (std::is_same_v) { /* bool specific impl */ + EigenVectorMap(out + first, last - first) = ConstEigenMatrixMap( + data + first * stridei, onnxruntime::narrow(stridei), last - first) + .cast() + .colwise() + .maxCoeff() + .cast(); + } else { + EigenVectorMap(out + first, last - first) = ConstEigenMatrixMap( + data + first * stridei, onnxruntime::narrow(stridei), last - first) + .colwise() + .maxCoeff(); + } }); } @@ -405,8 +428,12 @@ class ReduceAggregatorMax : public ReduceAggregator { for (int64_t row = 1; row < n_rows; ++row) { p = data + row * N; for (int64_t j = begin; j < end; ++j) { - if (out[j] < p[j]) - out[j] = p[j]; + if constexpr (std::is_same_v) { /* bool specific impl */ + out[j] = out[j] || p[j]; + } else { + if (out[j] < p[j]) + out[j] = p[j]; + } } } }); @@ -422,11 +449,21 @@ class ReduceAggregatorMax : public ReduceAggregator { tp, onnxruntime::narrow(fast_shape[0]), ParallelReduceFastCost(fast_shape[1], fast_shape[2], sizeof(T), 6), [data, fast_shape, stridei, strideo, out](ptrdiff_t begin, ptrdiff_t end) { for (ptrdiff_t j = begin; j < end; ++j) { - EigenVectorMap(out + j * strideo, onnxruntime::narrow(strideo)) = - ConstEigenMatrixMap( - data + j * stridei, onnxruntime::narrow(fast_shape[2]), onnxruntime::narrow(fast_shape[1])) - .rowwise() - .maxCoeff(); + if constexpr (std::is_same_v) { /* bool specific impl */ + EigenVectorMap(out + j * strideo, onnxruntime::narrow(strideo)) = + ConstEigenMatrixMap( + data + j * stridei, onnxruntime::narrow(fast_shape[2]), onnxruntime::narrow(fast_shape[1])) + .cast() + .rowwise() + .maxCoeff() + .cast(); + } else { + EigenVectorMap(out + j * strideo, onnxruntime::narrow(strideo)) = + ConstEigenMatrixMap( + data + j * stridei, onnxruntime::narrow(fast_shape[2]), onnxruntime::narrow(fast_shape[1])) + .rowwise() + .maxCoeff(); + } } }); } @@ -438,8 +475,12 @@ class ReduceAggregatorMax : public ReduceAggregator { [=](const T* p) -> T { return p[0]; }, [=](T& value, const T* p, int64_t size) { T v = aggall(p, size); - if (v > value) - value = v; + if constexpr (std::is_same_v) { /* bool specific impl */ + value = value || v; + } else { + if (v > value) + value = v; + } }); } }; @@ -545,6 +586,14 @@ class ReduceAggregatorMin : public ReduceAggregator { } inline void update(const T& v) { this->accumulator_ = v < this->accumulator_ ? v : this->accumulator_; } + static void fill_for_empty_set(Tensor& output) { + if constexpr (std::is_same_v) { /* bool specific impl */ + ORT_NOT_IMPLEMENTED(); + } else { + EigenMap(output).array() = std::numeric_limits::infinity(); + } + } + // Fast reduction static inline FastReduceKind WhichFastReduce() { return FastReduceKind::kKR | FastReduceKind::kRK | FastReduceKind::kKRK | FastReduceKind::kRKR; @@ -558,10 +607,19 @@ class ReduceAggregatorMin : public ReduceAggregator { concurrency::ThreadPool::TryParallelFor( tp, onnxruntime::narrow(fast_shape[0]), ParallelReduceFastCost(1, stridei, sizeof(T), 6), [data, stridei, out](std::ptrdiff_t first, std::ptrdiff_t last) { - EigenVectorMap(out + first, last - first) = ConstEigenMatrixMap( - data + first * stridei, onnxruntime::narrow(stridei), last - first) - .colwise() - .minCoeff(); + if constexpr (std::is_same_v) { /* bool specific impl */ + EigenVectorMap(out + first, last - first) = ConstEigenMatrixMap( + data + first * stridei, onnxruntime::narrow(stridei), last - first) + .cast() + .colwise() + .minCoeff() + .cast(); + } else { + EigenVectorMap(out + first, last - first) = ConstEigenMatrixMap( + data + first * stridei, onnxruntime::narrow(stridei), last - first) + .colwise() + .minCoeff(); + } }); } @@ -580,8 +638,12 @@ class ReduceAggregatorMin : public ReduceAggregator { for (int64_t row = 1; row < n_rows; ++row) { p = data + row * N; for (int64_t j = begin; j < end; ++j) { - if (out[j] > p[j]) - out[j] = p[j]; + if constexpr (std::is_same_v) { /* bool specific impl */ + out[j] = out[j] && p[j]; + } else { + if (out[j] > p[j]) + out[j] = p[j]; + } } } }); @@ -597,11 +659,21 @@ class ReduceAggregatorMin : public ReduceAggregator { tp, onnxruntime::narrow(fast_shape[0]), ParallelReduceFastCost(fast_shape[1], fast_shape[2], sizeof(T), 6), [data, fast_shape, stridei, strideo, out](ptrdiff_t begin, ptrdiff_t end) { for (ptrdiff_t j = begin; j < end; ++j) { - EigenVectorMap(out + j * strideo, onnxruntime::narrow(strideo)) = - ConstEigenMatrixMap( - data + j * stridei, onnxruntime::narrow(fast_shape[2]), onnxruntime::narrow(fast_shape[1])) - .rowwise() - .minCoeff(); + if constexpr (std::is_same_v) { /* bool specific impl */ + EigenVectorMap(out + j * strideo, onnxruntime::narrow(strideo)) = + ConstEigenMatrixMap( + data + j * stridei, onnxruntime::narrow(fast_shape[2]), onnxruntime::narrow(fast_shape[1])) + .cast() + .rowwise() + .minCoeff() + .cast(); + } else { + EigenVectorMap(out + j * strideo, onnxruntime::narrow(strideo)) = + ConstEigenMatrixMap( + data + j * stridei, onnxruntime::narrow(fast_shape[2]), onnxruntime::narrow(fast_shape[1])) + .rowwise() + .minCoeff(); + } } }); } @@ -613,8 +685,12 @@ class ReduceAggregatorMin : public ReduceAggregator { [=](const T* p) -> T { return p[0]; }, [=](T& value, const T* p, int64_t size) { T v = aggall(p, size); - if (v < value) - value = v; + if constexpr (std::is_same_v) { /* bool specific impl */ + value = value && v; + } else { + if (v < value) + value = v; + } }); } }; @@ -627,6 +703,9 @@ class ReduceAggregatorProd : public ReduceAggregator { return Eigen::Map>(from_data, onnxruntime::narrow(this->N_)).prod(); } inline void update(const T& v) { this->accumulator_ *= v; } + static void fill_for_empty_set(Tensor& output) { + EigenMap(output).array() = static_cast(1); + } }; template @@ -637,6 +716,10 @@ class ReduceAggregatorL1 : public ReduceAggregator { return Eigen::Map>(from_data, onnxruntime::narrow(this->N_)).cwiseAbs().sum(); } inline void update(const T& v) { this->accumulator_ += v > 0 ? v : -v; } + + static void fill_for_empty_set(Tensor& output) { + EigenMap(output).array() = static_cast(0); + } }; template @@ -648,6 +731,9 @@ class ReduceAggregatorL2 : public ReduceAggregator { } inline void update(const T& v) { this->accumulator_ += v * v; } inline T get_value() { return reduce_sqrt(this->accumulator_); } + static void fill_for_empty_set(Tensor& output) { + EigenMap(output).array() = static_cast(0); + } }; template @@ -659,6 +745,9 @@ class ReduceAggregatorLogSum : public ReduceAggregator { } inline void update(const T& v) { this->accumulator_ += v; } inline T get_value() { return reduce_log(this->accumulator_); } + static void fill_for_empty_set(Tensor& output) { + EigenMap(output).array() = -std::numeric_limits::infinity(); + } }; template @@ -682,6 +771,9 @@ class ReduceAggregatorLogSumExp : public ReduceAggregator { } inline void update(const T& v) { this->accumulator_ += reduce_exp(v - max_); } inline T get_value() { return reduce_log(this->accumulator_) + max_; } + static void fill_for_empty_set(Tensor& output) { + EigenMap(output).array() = -std::numeric_limits::infinity(); + } }; void NoTransposePrepareForReduce(const TensorShape& new_input_shape, @@ -710,35 +802,6 @@ void CommonReduce2Loops(OpKernelContext* ctx, const gsl::span& axes_, int64_t keepdims_, bool noop_with_empty_axes = false); -template -class ReduceKernelBase { - protected: - ReduceKernelBase(const OpKernelInfo& info, optional keepdims_override = {}) { - if (allow_multi_axes) { - axes_ = ToShapeVector(info.GetAttrsOrDefault("axes")); - } else { - auto v = info.GetAttrOrDefault("axis", 0); - axes_.push_back(v); - } - int64_t keepdims = 1; - if (keepdims_override.has_value()) { - keepdims = *keepdims_override; - } else { - ORT_ENFORCE(info.GetAttr("keepdims", &keepdims).IsOK()); - } - keepdims_ = (keepdims == 1); - int64_t noop_with_empty_axes = info.GetAttrOrDefault("noop_with_empty_axes", 0); - noop_with_empty_axes_ = (noop_with_empty_axes == 1); - int64_t select_last_index = info.GetAttrOrDefault("select_last_index", 0); - select_last_index_ = (select_last_index != 0); - } - - TensorShapeVector axes_; - bool keepdims_; - bool noop_with_empty_axes_; - bool select_last_index_; -}; - template class ReduceKernel : public OpKernel, public ReduceKernelBase { protected: diff --git a/onnxruntime/core/providers/cuda/reduction/reduction_ops.h b/onnxruntime/core/providers/cuda/reduction/reduction_ops.h index ee8e13db2eb53..c22ff2d01a37d 100644 --- a/onnxruntime/core/providers/cuda/reduction/reduction_ops.h +++ b/onnxruntime/core/providers/cuda/reduction/reduction_ops.h @@ -4,7 +4,7 @@ #pragma once #include "core/common/optional.h" #include "core/providers/cuda/cuda_kernel.h" -#include "core/providers/cpu/reduction/reduction_ops.h" +#include "core/providers/cpu/reduction/reduction_kernel_base.h" #include "core/providers/cuda/reduction/reduction_functions.h" namespace onnxruntime { diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc index 6d07ddde5c442..57c2061883736 100644 --- a/onnxruntime/test/onnx/TestCase.cc +++ b/onnxruntime/test/onnx/TestCase.cc @@ -954,7 +954,6 @@ std::unique_ptr> GetBrokenTests(const std::string& provider {"reduce_log_sum_exp_empty_set_expanded", "unknown version", {}}, {"reduce_prod_empty_set", "unknown version", {}}, {"reduce_sum_empty_set", "unknown version", {}}, - {"reduce_sum_square_empty_set", "unknown version", {}}, {"reduce_sum_square_empty_set_expanded", "unknown version", {}}, #ifdef ENABLE_TRAINING_CORE {"adagrad", "not a registered function/op", {}}, // Op not registered. @@ -1352,6 +1351,7 @@ std::unique_ptr> GetBrokenTests(const std::string& provider broken_tests->insert({"gridsample_volumetric_nearest_align_corners_0", "unknown version"}); broken_tests->insert({"gridsample_volumetric_nearest_align_corners_1", "unknown version"}); broken_tests->insert({"spacetodepth", "result differs"}); + broken_tests->insert({"reduce_sum_square_empty_set_expanded", "unknown version"}); // Fails with QNN SDK 2.17.0: // expected 7.70947 (40f6b3f3), got 7.84096 (40fae920), diff: 0.131491, tol=0.00870947 idx=419. 100 of 1715 differ broken_tests->insert({"facedetection_op8_qdq", "result differs"}); diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc index 79da8004a9edd..b0e0a0dd0d564 100644 --- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc +++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc @@ -924,7 +924,280 @@ TEST(ReductionOpTest, ReduceMax_default_axes_do_not_keep_dims) { 55.0f, 1.0f, 60.0f, 2.0f}); test.AddOutput("reduced", {}, {60.0f}); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: full reduce without keepDimensions is not supported with explicit batch //TensorRT: axis must be 0 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: full reduce without keepDimensions is not supported with explicit batch //TensorRT: axis must be 0 +} + +TEST(ReductionOpTest, test_bool_ReduceMax_0) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {2}, {-1, 1}); + test.AddOutput("reduced", {2}, {true, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_1) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {2}, {-1, 1}); + test.AddOutput("reduced", {2}, {false, false}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMax_2) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {2}, {-1, 1}); + test.AddOutput("reduced", {2, 1, 1}, {true, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + } + + ); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_3) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {2}, {-1, 1}); + test.AddOutput("reduced", {2, 1, 1}, {false, false}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMax_4) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {2}, {2, 1}); + test.AddOutput("reduced", {2}, {true, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_5) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {2}, {2, 1}); + test.AddOutput("reduced", {2}, {false, false}); + test.Run(); +} + +TEST(ReductionOpTest, test_bool_ReduceMax_6) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {2}, {2, 1}); + test.AddOutput("reduced", {2, 1, 1}, {true, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_7) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {2}, {2, 1}); + test.AddOutput("reduced", {2, 1, 1}, {false, false}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMax_8) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {1}, {0}); + test.AddOutput("reduced", {3, 2}, {false, true, true, true, false, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_9) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {1}, {0}); + test.AddOutput("reduced", {3, 2}, {false, false, false, true, false, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMax_10) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {1}, {0}); + test.AddOutput("reduced", {1, 3, 2}, {false, true, true, true, false, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_11) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {1}, {0}); + test.AddOutput("reduced", {1, 3, 2}, {false, false, false, true, false, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMax_12) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {1}, {2}); + test.AddOutput("reduced", {2, 3}, {false, true, true, true, true, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_13) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {1}, {2}); + test.AddOutput("reduced", {2, 3}, {false, true, false, false, false, false}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMax_14) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {1}, {2}); + test.AddOutput("reduced", {2, 3, 1}, {false, true, true, true, true, true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_15) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddInput("axes", {1}, {2}); + test.AddOutput("reduced", {2, 3, 1}, {false, true, false, false, false, false}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMax_16) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddOutput("reduced", {}, {true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_17) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(0)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddOutput("reduced", {}, {false}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMax_18) { + OpTester test("ReduceMax", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddOutput("reduced", {1, 1, 1}, {true}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); +} + +TEST(ReductionOpTest, test_bool_ReduceMin_19) { + OpTester test("ReduceMin", 20); + test.AddAttribute("keepdims", static_cast(1)); + test.AddInput("data", {2, 3, 2}, {false, false, true, true, false, true, false, true, false, true, false, true}); + test.AddOutput("reduced", {1, 1, 1}, {false}); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kOpenVINOExecutionProvider, + }); } TEST(ReductionOpTest, ReduceMax_do_not_keepdims) { @@ -3254,7 +3527,7 @@ TEST(ReductionOpTest, OptimizeShapeForFastReduce_ReduceDimWithZero1b) { // test that PrepareForReduce handles this case. Called by all reduction ops so any op can be used in the test TEST(ReductionOpTest, ReduceDimWithZero1) { // TODO: Unskip when fixed #41968513 - if (DefaultDmlExecutionProvider().get() != nullptr) { + if (DefaultDmlExecutionProvider().get() != nullptr || DefaultRocmExecutionProvider().get() != nullptr) { GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{1,0,1}] did not match run output shape [{1,1,1}] for reduced"; } @@ -3264,8 +3537,12 @@ TEST(ReductionOpTest, ReduceDimWithZero1) { tester.Run(expect, error_msg, // exclude EPs that don't handle this + // TODO: fix reduce kernel for zero set cases. see: https://github.com/microsoft/onnxruntime/issues/18588 { kCoreMLExecutionProvider, + kCudaExecutionProvider, + kDnnlExecutionProvider, + kMIGraphXExecutionProvider, kOpenVINOExecutionProvider, kQnnExecutionProvider, kTensorrtExecutionProvider, @@ -3275,9 +3552,8 @@ TEST(ReductionOpTest, ReduceDimWithZero1) { // reduce on all axes keeping dims. should allow the 0 to be the reduced value OpTester test("ReduceSum", 10); test.AddAttribute("keepdims", int64_t(1)); - test.AddShapeToTensorData(true, 1); // make second dim symbolic so that we don't break during shape inferencing test.AddInput("data", {3, 0, 2}, {}); - test.AddOutput("reduced", {1, 0, 1}, {}); + test.AddOutput("reduced", {1, 1, 1}, {0.0f}); run(test); } @@ -3301,8 +3577,8 @@ TEST(ReductionOpTest, OptimizeShapeForFastReduce_ReduceDimWithZero2) { TEST(ReductionOpTest, ReduceDimWithZero2) { // TODO: Unskip when fixed #41968513 - if (DefaultDmlExecutionProvider().get() != nullptr) { - GTEST_SKIP() << "Skipping because of the following error: Can't reduce on dim with value of 0 if 'keepdims' is false. Invalid output shape would be produced. input_shape:{3,0,2}"; + if (DefaultDmlExecutionProvider().get() != nullptr || DefaultRocmExecutionProvider().get() != nullptr) { + GTEST_SKIP() << "Skipping because of the following error: Can't reduce on dim with value of 0 if 'keepdims' is false. Invalid output shape would be produced. input_shape:{?,0,?}"; } auto run = [](OpTester& tester, const std::string& error_msg = "") { @@ -3311,23 +3587,25 @@ TEST(ReductionOpTest, ReduceDimWithZero2) { tester.Run(expect, error_msg, // exclude EPs that don't handle this + // TODO: fix reduce kernel for zero set cases. see: https://github.com/microsoft/onnxruntime/issues/18588 { + kCoreMLExecutionProvider, + kCudaExecutionProvider, + kDnnlExecutionProvider, + kMIGraphXExecutionProvider, kOpenVINOExecutionProvider, kQnnExecutionProvider, kTensorrtExecutionProvider, - kCoreMLExecutionProvider, }); }; - // reduction without keeping dims on all axes. can't reduce on an axis with value of 0 + // reducing on all axes including one or more with 0 dimension, with keepdims=0, results a scalar of 0. OpTester test2("ReduceSum", 10); test2.AddAttribute("keepdims", int64_t(0)); test2.AddShapeToTensorData(true, 1); test2.AddInput("data", {3, 0, 2}, {}); - test2.AddOutput("reduced", {}, {0.f}); - run(test2, - "Can't reduce on dim with value of 0 if 'keepdims' is false. " - "Invalid output shape would be produced. input_shape:{3,0,2}"); + test2.AddOutput("reduced", {}, {0.0f}); + run(test2); } TEST(ReductionOpTest, OptimizeShapeForFastReduce_ReduceDimWithZero3) { @@ -5478,5 +5756,101 @@ TEST(ReductionOpTest, ReduceSum_RKRK_keepdims) { test.Run(); } +void test_empty_set(const std::string& op, int opset, bool axes_as_input, float empty_value) { + OpTester test(op, opset); + std::vector input_shape = {2, 0, 4}; + int64_t input_size = std::accumulate(input_shape.begin(), input_shape.end(), static_cast(1), std::multiplies()); + std::vector data(input_size); + test.AddInput("data", input_shape, data); + std::vector axes = {1}; + if (axes_as_input) { + test.AddInput("axes", {(int64_t)(axes.size())}, axes); + } else { + test.AddAttribute("axes", axes); + } + + std::vector output_shape = {2, 1, 4}; + int64_t output_size = std::accumulate(output_shape.begin(), output_shape.end(), static_cast(1), std::multiplies()); + std::vector reduced(output_size, empty_value); + test.AddOutput("reduced", output_shape, reduced); + test.Run( + OpTester::ExpectResult::kExpectSuccess, + "", + { + kCoreMLExecutionProvider, + kCudaExecutionProvider, + kDmlExecutionProvider, + kDnnlExecutionProvider, + kMIGraphXExecutionProvider, + kOpenVINOExecutionProvider, + kQnnExecutionProvider, + kRocmExecutionProvider, + kTensorrtExecutionProvider, + }); +} + +TEST(ReductionOpTest, empty_set_ReduceL1) { + test_empty_set("ReduceL1", 20, true, 0); +} + +TEST(ReductionOpTest, empty_set_ReduceL1_13) { + test_empty_set("ReduceL1", 13, false, 0); +} + +TEST(ReductionOpTest, empty_set_ReduceL2) { + test_empty_set("ReduceL2", 20, true, 0); +} + +TEST(ReductionOpTest, empty_set_ReduceL2_13) { + test_empty_set("ReduceL2", 13, false, 0); +} + +TEST(ReductionOpTest, empty_set_ReduceLogSum) { + test_empty_set("ReduceLogSum", 20, true, -std::numeric_limits::infinity()); +} + +TEST(ReductionOpTest, empty_set_ReduceLogSum_13) { + test_empty_set("ReduceLogSum", 13, false, -std::numeric_limits::infinity()); +} + +TEST(ReductionOpTest, empty_set_ReduceLogSumExp) { + test_empty_set("ReduceLogSumExp", 20, true, -std::numeric_limits::infinity()); +} + +TEST(ReductionOpTest, empty_set_ReduceLogSumExp_13) { + test_empty_set("ReduceLogSumExp", 13, false, -std::numeric_limits::infinity()); +} + +TEST(ReductionOpTest, empty_set_ReduceMin) { + test_empty_set("ReduceMin", 20, true, std::numeric_limits::infinity()); +} + +TEST(ReductionOpTest, empty_set_ReduceMin_13) { + test_empty_set("ReduceMin", 13, false, std::numeric_limits::infinity()); +} + +TEST(ReductionOpTest, empty_set_ReduceProd) { + test_empty_set("ReduceProd", 20, true, 1.0f); +} + +TEST(ReductionOpTest, empty_set_ReduceProd_13) { + test_empty_set("ReduceProd", 13, false, 1.0f); +} + +TEST(ReductionOpTest, empty_set_ReduceSum) { + test_empty_set("ReduceSum", 20, true, 0.0f); +} + +TEST(ReductionOpTest, empty_set_ReduceSum_13) { + test_empty_set("ReduceSum", 11, false, 0.0f); +} + +TEST(ReductionOpTest, empty_set_ReduceSumSquare) { + test_empty_set("ReduceSumSquare", 20, true, 0.0f); +} + +TEST(ReductionOpTest, empty_set_ReduceSumSquare_13) { + test_empty_set("ReduceSumSquare", 13, false, 0.0f); +} } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 49d8d7150a117..3a13e39702904 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -262,22 +262,18 @@ "^test_string_split_empty_tensor", "^test_string_split_maxsplit", "^test_string_split_no_delimiter", - "^test_reduce_max_bool_inputs", - "^test_reduce_min_bool_inputs", - "^test_reduce_min_empty_set", - "^test_reduce_l1_empty_set", - "^test_reduce_l1_empty_set_expanded", - "^test_reduce_l2_empty_set", - "^test_reduce_l2_empty_set_expanded", - "^test_reduce_log_sum_empty_set", - "^test_reduce_log_sum_empty_set_expanded", - "^test_reduce_log_sum_exp_empty_set", - "^test_reduce_log_sum_exp_empty_set_expanded", - "^test_reduce_prod_empty_set", - "^test_reduce_sum_empty_set", - "^test_reduce_sum_empty_set_non_reduced_axis_zero", - "^test_reduce_sum_square_empty_set", - "^test_reduce_sum_square_empty_set_expanded" + "^test_reduce_l1_empty_set_cuda", + "^test_reduce_l1_empty_set_expanded_cuda", + "^test_reduce_l2_empty_set_cuda", + "^test_reduce_l2_empty_set_expanded_cuda", + "^test_reduce_log_sum_empty_set_cuda", + "^test_reduce_log_sum_empty_set_expanded_cuda", + "^test_reduce_log_sum_exp_empty_set_cuda", + "^test_reduce_log_sum_exp_empty_set_expanded_cuda", + "^test_reduce_prod_empty_set_cuda", + "^test_reduce_sum_empty_set_cuda", + "^test_reduce_sum_square_empty_set_cuda", + "^test_reduce_sum_square_empty_set_expanded_cuda" ], "current_failing_tests_x86": [ "^test_vgg19", @@ -377,7 +373,23 @@ "^test_constantofshape_int_zeros", // https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1141563&view=logs&j=a018b46d-e41a-509d-6581-c95fdaa42fcd&t=d61c1d37-f101-5d28-982f-e5931b720302 "^test_gelu_tanh_2_cpu", - "^test_gelu_tanh_2_expanded_cpu" + "^test_gelu_tanh_2_expanded_cpu", + "^test_reduce_max_bool_inputs", + "^test_reduce_min_bool_inputs", + "^test_reduce_min_empty_set", + "^test_reduce_l1_empty_set", + "^test_reduce_l1_empty_set_expanded", + "^test_reduce_l2_empty_set", + "^test_reduce_l2_empty_set_expanded", + "^test_reduce_log_sum_empty_set", + "^test_reduce_log_sum_empty_set_expanded", + "^test_reduce_log_sum_exp_empty_set", + "^test_reduce_log_sum_exp_empty_set_expanded", + "^test_reduce_prod_empty_set", + "^test_reduce_sum_empty_set", + "^test_reduce_sum_empty_set_non_reduced_axis_zero", + "^test_reduce_sum_square_empty_set", + "^test_reduce_sum_square_empty_set_expanded" ], "current_failing_tests_NNAPI": [ "^test_maxpool_2d_uint8", @@ -498,7 +510,8 @@ "test_range_int32_type_negative_delta_expanded_cpu", // Error but not a failure. "test_range_float_type_positive_delta_expanded_cpu", // Error but not a failure. "test_scan_sum_cpu", // Disabled due to output mismatch with tolerance. - "test_scan9_sum_cpu" // Disabled due to output mismatch with tolerance. + "test_scan9_sum_cpu", // Disabled due to output mismatch with tolerance. + "test_reduce_max_bool_inputs_cpu" ], "current_failing_tests_OPENVINO_NPU_FP16": [ "^test_prelu_broadcast", @@ -656,8 +669,10 @@ "^test_affine_grid_3d_expanded", "^test_constantofshape_float_ones", "^test_constantofshape_int_shape_zero", - "^test_constantofshape_int_zeros" - + "^test_constantofshape_int_zeros", + "^test_reduce_log_sum_empty_set_cpu", + "^test_reduce_log_sum_exp_empty_set_cpu", + "^test_reduce_prod_empty_set_cpu" ], // ORT first supported opset 7, so models with nodes that require versions prior to opset 7 are not supported "tests_with_pre_opset7_dependencies": [