Skip to content

Commit

Permalink
Support reduction opeations with keed_dims == false
Browse files Browse the repository at this point in the history
  • Loading branch information
nkogteva committed Aug 21, 2023
1 parent e5023f9 commit b61f749
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 20 deletions.
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/cuda_op_buffers_extractor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class OperationBuffersExtractor {
*/
template <typename TNode>
static std::size_t GetTensorByteSize(const TNode& node) {
return node.get_element_type().size() * shape_size(node.get_shape());
return node.get_element_type().size() * std::max(std::size_t(1), shape_size(node.get_shape()));
}

/**
Expand Down
21 changes: 5 additions & 16 deletions modules/nvidia_plugin/src/transformer/cuda_graph_transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "concat_transformation.hpp"
#include "fuse_matmul_add.hpp"
#include "matmul_transformations.hpp"
#include "reduce_transformation.hpp"
#include "remove_duplicated_results_transformation.hpp"
#include "remove_redundant_convert_transformation.hpp"
#include "transformations/op_conversions/convert_divide.hpp"
Expand Down Expand Up @@ -84,22 +85,9 @@ void GraphTransformer::transform(const CUDA::Device& device,
pass_config->disable<ov::pass::Gelu7Downgrade>();
pass_config->disable<ov::pass::ConvertGELU>();
pass_config->disable<ov::pass::HSwishDecomposition>();

auto is_reduce_op_supported = [](const std::shared_ptr<const ov::Node> &node) -> bool {
if (const auto &reduce_op = std::dynamic_pointer_cast<const ov::op::util::ArithmeticReductionKeepDims>(node)) {
// Each dimension of the output tensor C must match the corresponding dimension
// of the input tensor A or must be equal to 1
return reduce_op->get_keep_dims() ||
reduce_op->input(0).get_shape().size() == reduce_op->output(0).get_shape().size();
}
return false;
};
pass_config->set_callback<ov::pass::ConvertReduceMaxToPooling,
ov::pass::ConvertReduceMeanToPooling,
ov::pass::ConvertReduceSumToPooling>(
[is_reduce_op_supported](const std::shared_ptr<const ov::Node> &node) -> bool {
return is_reduce_op_supported(node);
});
pass_config->disable<ov::pass::ConvertReduceMaxToPooling>();
pass_config->disable<ov::pass::ConvertReduceMeanToPooling>();
pass_config->disable<ov::pass::ConvertReduceSumToPooling>();

[[maybe_unused]] const auto& originOps = model->get_ordered_ops();
[[maybe_unused]] const auto& originOpsSize = originOps.size();
Expand Down Expand Up @@ -154,6 +142,7 @@ void GraphTransformer::transform(const CUDA::Device& device,
pass_manager.register_pass<ov::nvidia_gpu::pass::TransposeMatMulTransformation>();
pass_manager.register_pass<ov::nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.register_pass<ov::nvidia_gpu::pass::ConcatTransformation>();
pass_manager.register_pass<ov::nvidia_gpu::pass::ReduceTransformation>();
// Do we actually need to eliminate broadcast one more time at the end?
pass_manager.register_pass<ov::pass::NopElimination>();

Expand Down
83 changes: 83 additions & 0 deletions modules/nvidia_plugin/src/transformer/reduce_transformation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "openvino/cc/pass/itt.hpp"

#include "exec_graph_info.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "openvino/op/reduce_max.hpp"
#include "openvino/op/reduce_mean.hpp"
#include "openvino/op/reduce_min.hpp"
#include "openvino/op/reduce_prod.hpp"
#include "openvino/op/reduce_sum.hpp"
#include "openvino/op/reshape.hpp"
#include "reduce_transformation.hpp"

using namespace ov::pass::pattern;

namespace ov::nvidia_gpu::pass {

namespace {

bool is_reduce_to_be_transformed(const ov::Output<ov::Node>& output) {
auto node = std::dynamic_pointer_cast<ov::op::util::ArithmeticReductionKeepDims>(output.get_node_shared_ptr());
if (!node) {
return false;
}
if (node->is_dynamic()) {
return false;
}
return !node->get_keep_dims();
}

bool transform_reduce(Matcher &m) {
auto reduce = std::dynamic_pointer_cast<ov::op::util::ArithmeticReductionKeepDims>(m.get_match_root());
const ov::Shape output_shape = reduce->output(0).get_shape();
auto consumers = reduce->get_output_target_inputs(0);

std::shared_ptr<ov::Node> new_reduce;
if (ov::as_type_ptr<ov::op::v1::ReduceMax>(reduce)) {
new_reduce = std::make_shared<ov::op::v1::ReduceMax>(reduce->input_value(0), reduce->input_value(1), true);
} else if (ov::as_type_ptr<ov::op::v1::ReduceMean>(reduce)) {
new_reduce = std::make_shared<ov::op::v1::ReduceMean>(reduce->input_value(0), reduce->input_value(1), true);
} else if (ov::as_type_ptr<ov::op::v1::ReduceMin>(reduce)) {
new_reduce = std::make_shared<ov::op::v1::ReduceMin>(reduce->input_value(0), reduce->input_value(1), true);
} else if (ov::as_type_ptr<ov::op::v1::ReduceProd>(reduce)) {
new_reduce = std::make_shared<ov::op::v1::ReduceProd>(reduce->input_value(0), reduce->input_value(1), true);
} else if (ov::as_type_ptr<ov::op::v1::ReduceSum>(reduce)) {
new_reduce = std::make_shared<ov::op::v1::ReduceSum>(reduce->input_value(0), reduce->input_value(1), true);
} else {
return false;
}
new_reduce->set_friendly_name(reduce->get_friendly_name());
auto reshape_const = std::make_shared<ov::op::v0::Constant>(element::i32, Shape{output_shape.size()}, output_shape);
auto reshape = std::make_shared<ov::op::v1::Reshape>(new_reduce, reshape_const, false);
for (auto consumer : consumers) {
consumer.replace_source_output(reshape);
}
ov::NodeVector new_ops = {new_reduce, reshape_const, reshape};
ov::copy_runtime_info(reduce, new_ops);
for (auto& new_op : new_ops) {
new_op->get_rt_info()[ExecGraphInfoSerialization::ORIGINAL_NAMES] = reduce->get_friendly_name();
}
return true;
}
} // namespace

ReduceTransformation::ReduceTransformation() {
MATCHER_SCOPE(ReduceTransformation);
auto reduce = wrap_type<ov::op::v1::ReduceMax,
ov::op::v1::ReduceMean,
ov::op::v1::ReduceMin,
ov::op::v1::ReduceProd,
ov::op::v1::ReduceSum>(is_reduce_to_be_transformed);
matcher_pass_callback callback = [](Matcher &m) {
return transform_reduce(m);
};
auto m = std::make_shared<Matcher>(reduce, matcher_name);
register_matcher(m, callback);
}

} // namespace ov::nvidia_gpu::pass
17 changes: 17 additions & 0 deletions modules/nvidia_plugin/src/transformer/reduce_transformation.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/pass/graph_rewrite.hpp"

namespace ov::nvidia_gpu::pass {

class ReduceTransformation : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("ReduceTransformation", "0");
ReduceTransformation();
};

} // namespace ov::nvidia_gpu::pass
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {

const std::vector<bool> keepDims = {
true,
// Each dimension of the output tensor C must match
// the corresponding dimension of the input tensor A or must be equal to 1
// false
false
};

const std::vector<std::vector<size_t>> inputShapes = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <gtest/gtest.h>

#include "common_test_utils/ngraph_test_utils.hpp"
#include "openvino/core/model.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/opsets/opset10.hpp"
#include "openvino/pass/manager.hpp"
#include "transformations/init_node_info.hpp"
#include "transformations/utils/utils.hpp"
#include "transformer/reduce_transformation.hpp"

using namespace ov;
using namespace std;

TEST(reduce_transformation, reduce_max_keep_dims_true) {
shared_ptr<ov::Model> model, model_ref;
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{2}, {0, 3});
auto reduce = make_shared<op::v1::ReduceMax>(input, axis, true);
model = make_shared<Model>(reduce, ParameterVector{input});
model_ref = model->clone();
}
pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ReduceTransformation>();
pass_manager.run_passes(model);

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(reduce_transformation, reduce_max_keep_dims_false) {
shared_ptr<ov::Model> model, model_ref;
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{2}, {0, 3});
auto reduce = make_shared<op::v1::ReduceMax>(input, axis, false);
model = make_shared<Model>(reduce, ParameterVector{input});
}
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{2}, {0, 3});
auto reduce = make_shared<op::v1::ReduceMax>(input, axis, true);
auto reshape_const = op::v0::Constant::create(element::i32, Shape{2}, {20, 30});
auto reshape = make_shared<op::v1::Reshape>(reduce, reshape_const, false);
model_ref = make_shared<Model>(reshape, ParameterVector{input});
}
pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ReduceTransformation>();
pass_manager.run_passes(model);

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(reduce_transformation, reduce_mean_keep_dims_false) {
shared_ptr<ov::Model> model, model_ref;
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{1}, {1});
auto reduce = make_shared<op::v1::ReduceMean>(input, axis, false);
model = make_shared<Model>(reduce, ParameterVector{input});
}
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{1}, {1});
auto reduce = make_shared<op::v1::ReduceMean>(input, axis, true);
auto reshape_const = op::v0::Constant::create(element::i32, Shape{3}, {10, 30, 40});
auto reshape = make_shared<op::v1::Reshape>(reduce, reshape_const, false);
model_ref = make_shared<Model>(reshape, ParameterVector{input});
}
pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ReduceTransformation>();
pass_manager.run_passes(model);

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(reduce_transformation, reduce_min_keep_dims_false) {
shared_ptr<ov::Model> model, model_ref;
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{4}, {0, 1, 2, 3});
auto reduce = make_shared<op::v1::ReduceMin>(input, axis, false);
model = make_shared<Model>(reduce, ParameterVector{input});
}
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{4}, {0, 1, 2, 3});
auto reduce = make_shared<op::v1::ReduceMin>(input, axis, true);
auto reshape_const = op::v0::Constant::create(element::i32, Shape{0}, {});
auto reshape = make_shared<op::v1::Reshape>(reduce, reshape_const, false);
model_ref = make_shared<Model>(reshape, ParameterVector{input});
}
pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ReduceTransformation>();
pass_manager.run_passes(model);

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(reduce_transformation, reduce_prod_keep_dims_false) {
shared_ptr<ov::Model> model, model_ref;
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{1}, {3});
auto reduce = make_shared<op::v1::ReduceProd>(input, axis, false);
model = make_shared<Model>(reduce, ParameterVector{input});
}
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{1}, {3});
auto reduce = make_shared<op::v1::ReduceProd>(input, axis, true);
auto reshape_const = op::v0::Constant::create(element::i32, Shape{3}, {10, 20, 30});
auto reshape = make_shared<op::v1::Reshape>(reduce, reshape_const, false);
model_ref = make_shared<Model>(reshape, ParameterVector{input});
}
pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ReduceTransformation>();
pass_manager.run_passes(model);

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(reduce_transformation, reduce_sum_keep_dims_false) {
shared_ptr<ov::Model> model, model_ref;
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{2}, {1, 2});
auto reduce = make_shared<op::v1::ReduceSum>(input, axis, false);
model = make_shared<Model>(reduce, ParameterVector{input});
}
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{10, 20, 30, 40});
auto axis = op::v0::Constant::create(element::i32, Shape{2}, {1, 2});
auto reduce = make_shared<op::v1::ReduceSum>(input, axis, true);
auto reshape_const = op::v0::Constant::create(element::i32, Shape{2}, {10, 40});
auto reshape = make_shared<op::v1::Reshape>(reduce, reshape_const, false);
model_ref = make_shared<Model>(reshape, ParameterVector{input});
}
pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ReduceTransformation>();
pass_manager.run_passes(model);

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

0 comments on commit b61f749

Please sign in to comment.