From b61f7492675d97bba68f79b5a57a365f82b14d19 Mon Sep 17 00:00:00 2001 From: Nadezhda Date: Mon, 21 Aug 2023 15:17:12 +0000 Subject: [PATCH] Support reduction opeations with keed_dims == false --- .../src/cuda_op_buffers_extractor.hpp | 2 +- .../transformer/cuda_graph_transformer.cpp | 21 +-- .../src/transformer/reduce_transformation.cpp | 83 +++++++++ .../src/transformer/reduce_transformation.hpp | 17 ++ .../{reduce_ops.cpp => reduce.cpp} | 4 +- .../transformations/reduce_transformation.cpp | 160 ++++++++++++++++++ 6 files changed, 267 insertions(+), 20 deletions(-) create mode 100644 modules/nvidia_plugin/src/transformer/reduce_transformation.cpp create mode 100644 modules/nvidia_plugin/src/transformer/reduce_transformation.hpp rename modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/{reduce_ops.cpp => reduce.cpp} (98%) create mode 100644 modules/nvidia_plugin/tests/unit/transformations/reduce_transformation.cpp diff --git a/modules/nvidia_plugin/src/cuda_op_buffers_extractor.hpp b/modules/nvidia_plugin/src/cuda_op_buffers_extractor.hpp index 9583db28cb..f25d377edb 100644 --- a/modules/nvidia_plugin/src/cuda_op_buffers_extractor.hpp +++ b/modules/nvidia_plugin/src/cuda_op_buffers_extractor.hpp @@ -149,7 +149,7 @@ class OperationBuffersExtractor { */ template static std::size_t GetTensorByteSize(const TNode& node) { - return node.get_element_type().size() * shape_size(node.get_shape()); + return node.get_element_type().size() * std::max(std::size_t(1), shape_size(node.get_shape())); } /** diff --git a/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.cpp b/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.cpp index abfa0b177d..90f1a97420 100644 --- a/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.cpp +++ b/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.cpp @@ -30,6 +30,7 @@ #include "concat_transformation.hpp" #include "fuse_matmul_add.hpp" #include "matmul_transformations.hpp" +#include "reduce_transformation.hpp" #include "remove_duplicated_results_transformation.hpp" #include "remove_redundant_convert_transformation.hpp" #include "transformations/op_conversions/convert_divide.hpp" @@ -84,22 +85,9 @@ void GraphTransformer::transform(const CUDA::Device& device, pass_config->disable(); pass_config->disable(); pass_config->disable(); - - auto is_reduce_op_supported = [](const std::shared_ptr &node) -> bool { - if (const auto &reduce_op = std::dynamic_pointer_cast(node)) { - // Each dimension of the output tensor C must match the corresponding dimension - // of the input tensor A or must be equal to 1 - return reduce_op->get_keep_dims() || - reduce_op->input(0).get_shape().size() == reduce_op->output(0).get_shape().size(); - } - return false; - }; - pass_config->set_callback( - [is_reduce_op_supported](const std::shared_ptr &node) -> bool { - return is_reduce_op_supported(node); - }); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); [[maybe_unused]] const auto& originOps = model->get_ordered_ops(); [[maybe_unused]] const auto& originOpsSize = originOps.size(); @@ -154,6 +142,7 @@ void GraphTransformer::transform(const CUDA::Device& device, pass_manager.register_pass(); pass_manager.register_pass(); pass_manager.register_pass(); + pass_manager.register_pass(); // Do we actually need to eliminate broadcast one more time at the end? pass_manager.register_pass(); diff --git a/modules/nvidia_plugin/src/transformer/reduce_transformation.cpp b/modules/nvidia_plugin/src/transformer/reduce_transformation.cpp new file mode 100644 index 0000000000..20d5b977e9 --- /dev/null +++ b/modules/nvidia_plugin/src/transformer/reduce_transformation.cpp @@ -0,0 +1,83 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/cc/pass/itt.hpp" + +#include "exec_graph_info.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/op/reduce_max.hpp" +#include "openvino/op/reduce_mean.hpp" +#include "openvino/op/reduce_min.hpp" +#include "openvino/op/reduce_prod.hpp" +#include "openvino/op/reduce_sum.hpp" +#include "openvino/op/reshape.hpp" +#include "reduce_transformation.hpp" + +using namespace ov::pass::pattern; + +namespace ov::nvidia_gpu::pass { + +namespace { + +bool is_reduce_to_be_transformed(const ov::Output& output) { + auto node = std::dynamic_pointer_cast(output.get_node_shared_ptr()); + if (!node) { + return false; + } + if (node->is_dynamic()) { + return false; + } + return !node->get_keep_dims(); +} + +bool transform_reduce(Matcher &m) { + auto reduce = std::dynamic_pointer_cast(m.get_match_root()); + const ov::Shape output_shape = reduce->output(0).get_shape(); + auto consumers = reduce->get_output_target_inputs(0); + + std::shared_ptr new_reduce; + if (ov::as_type_ptr(reduce)) { + new_reduce = std::make_shared(reduce->input_value(0), reduce->input_value(1), true); + } else if (ov::as_type_ptr(reduce)) { + new_reduce = std::make_shared(reduce->input_value(0), reduce->input_value(1), true); + } else if (ov::as_type_ptr(reduce)) { + new_reduce = std::make_shared(reduce->input_value(0), reduce->input_value(1), true); + } else if (ov::as_type_ptr(reduce)) { + new_reduce = std::make_shared(reduce->input_value(0), reduce->input_value(1), true); + } else if (ov::as_type_ptr(reduce)) { + new_reduce = std::make_shared(reduce->input_value(0), reduce->input_value(1), true); + } else { + return false; + } + new_reduce->set_friendly_name(reduce->get_friendly_name()); + auto reshape_const = std::make_shared(element::i32, Shape{output_shape.size()}, output_shape); + auto reshape = std::make_shared(new_reduce, reshape_const, false); + for (auto consumer : consumers) { + consumer.replace_source_output(reshape); + } + ov::NodeVector new_ops = {new_reduce, reshape_const, reshape}; + ov::copy_runtime_info(reduce, new_ops); + for (auto& new_op : new_ops) { + new_op->get_rt_info()[ExecGraphInfoSerialization::ORIGINAL_NAMES] = reduce->get_friendly_name(); + } + return true; +} +} // namespace + +ReduceTransformation::ReduceTransformation() { + MATCHER_SCOPE(ReduceTransformation); + auto reduce = wrap_type(is_reduce_to_be_transformed); + matcher_pass_callback callback = [](Matcher &m) { + return transform_reduce(m); + }; + auto m = std::make_shared(reduce, matcher_name); + register_matcher(m, callback); +} + +} // namespace ov::nvidia_gpu::pass \ No newline at end of file diff --git a/modules/nvidia_plugin/src/transformer/reduce_transformation.hpp b/modules/nvidia_plugin/src/transformer/reduce_transformation.hpp new file mode 100644 index 0000000000..61574191d5 --- /dev/null +++ b/modules/nvidia_plugin/src/transformer/reduce_transformation.hpp @@ -0,0 +1,17 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov::nvidia_gpu::pass { + +class ReduceTransformation : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ReduceTransformation", "0"); + ReduceTransformation(); +}; + +} // namespace ov::nvidia_gpu::pass diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/reduce.cpp similarity index 98% rename from modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp rename to modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/reduce.cpp index b19991d979..d7391d5032 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/reduce.cpp @@ -18,9 +18,7 @@ const std::vector netPrecisions = { const std::vector keepDims = { true, - // Each dimension of the output tensor C must match - // the corresponding dimension of the input tensor A or must be equal to 1 - // false + false }; const std::vector> inputShapes = { diff --git a/modules/nvidia_plugin/tests/unit/transformations/reduce_transformation.cpp b/modules/nvidia_plugin/tests/unit/transformations/reduce_transformation.cpp new file mode 100644 index 0000000000..6f7fed2372 --- /dev/null +++ b/modules/nvidia_plugin/tests/unit/transformations/reduce_transformation.cpp @@ -0,0 +1,160 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/opsets/opset10.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/init_node_info.hpp" +#include "transformations/utils/utils.hpp" +#include "transformer/reduce_transformation.hpp" + +using namespace ov; +using namespace std; + +TEST(reduce_transformation, reduce_max_keep_dims_true) { + shared_ptr model, model_ref; + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{2}, {0, 3}); + auto reduce = make_shared(input, axis, true); + model = make_shared(reduce, ParameterVector{input}); + model_ref = model->clone(); + } + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.run_passes(model); + + auto res = compare_functions(model, model_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(reduce_transformation, reduce_max_keep_dims_false) { + shared_ptr model, model_ref; + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{2}, {0, 3}); + auto reduce = make_shared(input, axis, false); + model = make_shared(reduce, ParameterVector{input}); + } + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{2}, {0, 3}); + auto reduce = make_shared(input, axis, true); + auto reshape_const = op::v0::Constant::create(element::i32, Shape{2}, {20, 30}); + auto reshape = make_shared(reduce, reshape_const, false); + model_ref = make_shared(reshape, ParameterVector{input}); + } + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.run_passes(model); + + auto res = compare_functions(model, model_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(reduce_transformation, reduce_mean_keep_dims_false) { + shared_ptr model, model_ref; + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{1}, {1}); + auto reduce = make_shared(input, axis, false); + model = make_shared(reduce, ParameterVector{input}); + } + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{1}, {1}); + auto reduce = make_shared(input, axis, true); + auto reshape_const = op::v0::Constant::create(element::i32, Shape{3}, {10, 30, 40}); + auto reshape = make_shared(reduce, reshape_const, false); + model_ref = make_shared(reshape, ParameterVector{input}); + } + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.run_passes(model); + + auto res = compare_functions(model, model_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(reduce_transformation, reduce_min_keep_dims_false) { + shared_ptr model, model_ref; + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{4}, {0, 1, 2, 3}); + auto reduce = make_shared(input, axis, false); + model = make_shared(reduce, ParameterVector{input}); + } + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{4}, {0, 1, 2, 3}); + auto reduce = make_shared(input, axis, true); + auto reshape_const = op::v0::Constant::create(element::i32, Shape{0}, {}); + auto reshape = make_shared(reduce, reshape_const, false); + model_ref = make_shared(reshape, ParameterVector{input}); + } + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.run_passes(model); + + auto res = compare_functions(model, model_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(reduce_transformation, reduce_prod_keep_dims_false) { + shared_ptr model, model_ref; + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{1}, {3}); + auto reduce = make_shared(input, axis, false); + model = make_shared(reduce, ParameterVector{input}); + } + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{1}, {3}); + auto reduce = make_shared(input, axis, true); + auto reshape_const = op::v0::Constant::create(element::i32, Shape{3}, {10, 20, 30}); + auto reshape = make_shared(reduce, reshape_const, false); + model_ref = make_shared(reshape, ParameterVector{input}); + } + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.run_passes(model); + + auto res = compare_functions(model, model_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(reduce_transformation, reduce_sum_keep_dims_false) { + shared_ptr model, model_ref; + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{2}, {1, 2}); + auto reduce = make_shared(input, axis, false); + model = make_shared(reduce, ParameterVector{input}); + } + { + auto input = make_shared(element::f32, Shape{10, 20, 30, 40}); + auto axis = op::v0::Constant::create(element::i32, Shape{2}, {1, 2}); + auto reduce = make_shared(input, axis, true); + auto reshape_const = op::v0::Constant::create(element::i32, Shape{2}, {10, 40}); + auto reshape = make_shared(reduce, reshape_const, false); + model_ref = make_shared(reshape, ParameterVector{input}); + } + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.run_passes(model); + + auto res = compare_functions(model, model_ref); + ASSERT_TRUE(res.first) << res.second; +} \ No newline at end of file