diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index f1edeaa18ff1ef..b64690b61abb1b 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -72,7 +72,8 @@ void regmodule_properties(py::module m) { .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY); py::enum_(m_hint, "ModelDistributionPolicy", py::arithmetic()) - .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL); + .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) + .value("PIPELINE_PARALLEL", ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL); py::enum_(m_hint, "ExecutionMode", py::arithmetic()) .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE) diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp index ca0c86aa924062..9badb007d526b9 100644 --- a/src/core/include/openvino/core/any.hpp +++ b/src/core/include/openvino/core/any.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -209,6 +210,18 @@ struct Read, typename std::enable_if +struct Read, typename std::enable_if::value>::type> { + void operator()(std::istream& is, std::set& set) const { + while (is.good()) { + std::string str; + is >> str; + auto v = from_string(str); + set.insert(std::move(v)); + } + } +}; + template struct Read< std::map, @@ -343,6 +356,21 @@ struct Write> { } }; +template +struct Write> { + void operator()(std::ostream& os, const std::set& set) const { + if (!set.empty()) { + std::size_t i = 0; + for (auto&& v : set) { + os << to_string(v); + if (i < (set.size() - 1)) + os << ' '; + ++i; + } + } + } +}; + template struct Write> { void operator()(std::ostream& os, const std::map& map) const { diff --git a/src/core/tests/any.cpp b/src/core/tests/any.cpp index 426bf1373a3e74..7d9e3d4edc1126 100644 --- a/src/core/tests/any.cpp +++ b/src/core/tests/any.cpp @@ -158,6 +158,23 @@ TEST_F(AnyTests, AnyAsMapOfAnys) { ASSERT_EQ(refMap["testParamString"].as(), testString); } +TEST_F(AnyTests, AnyAsSetOfAnys) { + std::set refSet0; + std::set refSet1; + refSet0.insert("test"); + refSet1.insert(4); + Any s0 = refSet0; + Any s1 = refSet1; + bool isSet0 = s0.is>(); + bool isSet1 = s1.is>(); + ASSERT_TRUE(isSet0); + ASSERT_TRUE(isSet1); + auto testSet0 = s0.as>(); + auto testSet1 = s1.as>(); + ASSERT_NE(testSet0.count("test"), 0); + ASSERT_NE(testSet1.count(4), 0); +} + TEST_F(AnyTests, AnyAsMapOfMapOfAnys) { std::map refMap1; refMap1["testParamInt"] = 4; diff --git a/src/frontends/onnx/frontend/src/op/batch_norm.cpp b/src/frontends/onnx/frontend/src/op/batch_norm.cpp index 04a613bc10bdb7..fc4a3c2a4fd9d1 100644 --- a/src/frontends/onnx/frontend/src/op/batch_norm.cpp +++ b/src/frontends/onnx/frontend/src/op/batch_norm.cpp @@ -52,6 +52,12 @@ ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node) { OPENVINO_THROW("Cannot create OpenVINO batch norm with unsupported number of inputs"); } } // namespace set_1 +/* + Opset 6 is skipped because there are no significant difference between opset1 and opset6. + Found difference is: + 1. In Training, the computation of ReduceMean and ReduceVar uses float + to avoid overflow for float16 inputs. + */ namespace set_7 { // This version supports ONNX BatchNormalization-7 and BatchNormalization-9 @@ -71,8 +77,42 @@ ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node) { return {std::make_shared(x, scale, bias, mean, var, epsilon)}; } - } // namespace set_7 +/* + Opset 9 is skipped because there are no significant difference between opset7 and opset9. + Found difference is: + 1. removed -> spatial : int (default is 1) + If true, compute the mean and variance across per activation. If false, compute the mean and variance across + per feature over each mini-batch. + + */ + +namespace set_14 { +// This version supports ONNX BatchNormalization-14 BatchNormalization-15 +ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node) { + ov::OutputVector inputs{node.get_ov_inputs()}; + auto x = inputs.at(0); + auto scale = inputs.at(1); + auto bias = inputs.at(2); + auto mean = inputs.at(3); + auto var = inputs.at(4); + + double epsilon{node.get_attribute_value("epsilon", 1e-5)}; + int64_t training_mode{node.get_attribute_value("training_mode", 0)}; + + CHECK_VALID_NODE(node, + training_mode == false && node.get_outputs_size() == 1, + "Training mode of BatchNormalization is not supported."); + return {std::make_shared(x, scale, bias, mean, var, epsilon)}; +} +} // namespace set_14 +/* + Opset 15 is skipped because there are no significant difference between opset14 and opset15. + Found difference is: + 1. In Training, the computation of ReduceMean and ReduceVar uses float + to avoid overflow for float16 inputs. + */ + } // namespace op } // namespace onnx } // namespace frontend diff --git a/src/frontends/onnx/frontend/src/op/batch_norm.hpp b/src/frontends/onnx/frontend/src/op/batch_norm.hpp index fbf4c715bb15de..29a79d444152d2 100644 --- a/src/frontends/onnx/frontend/src/op/batch_norm.hpp +++ b/src/frontends/onnx/frontend/src/op/batch_norm.hpp @@ -19,6 +19,11 @@ namespace set_7 { ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node); } // namespace set_7 + +namespace set_14 { +ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node); + +} // namespace set_14 } // namespace op } // namespace onnx } // namespace frontend diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp index 0acbb0c9a8c2f4..02255b673ca576 100644 --- a/src/frontends/onnx/frontend/src/ops_bridge.cpp +++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp @@ -360,6 +360,7 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("AveragePool", 1, average_pool); REGISTER_OPERATOR("BatchNormalization", 1, batch_norm); REGISTER_OPERATOR("BatchNormalization", 7, batch_norm); + REGISTER_OPERATOR("BatchNormalization", 14, batch_norm); REGISTER_OPERATOR("BitShift", 1, bitshift); REGISTER_OPERATOR("BitwiseAnd", 1, bitwise_and); REGISTER_OPERATOR("BitwiseNot", 1, bitwise_not); diff --git a/src/frontends/onnx/tests/models/batchnorm_opset1.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset1.prototxt new file mode 100644 index 00000000000000..11bed1195afa2b --- /dev/null +++ b/src/frontends/onnx/tests/models/batchnorm_opset1.prototxt @@ -0,0 +1,113 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "x" + input: "s" + input: "bias" + input: "mean" + input: "var" + output: "y" + op_type: "BatchNormalization" + } + name: "test_batchnorm_example" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "s" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "mean" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "var" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 1 +} diff --git a/src/frontends/onnx/tests/models/batchnorm_opset14.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset14.prototxt new file mode 100644 index 00000000000000..48edc903669cc4 --- /dev/null +++ b/src/frontends/onnx/tests/models/batchnorm_opset14.prototxt @@ -0,0 +1,113 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "x" + input: "s" + input: "bias" + input: "mean" + input: "var" + output: "y" + op_type: "BatchNormalization" + } + name: "test_batchnorm_example" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "s" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "mean" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "var" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 14 +} diff --git a/src/frontends/onnx/tests/models/batchnorm_opset15.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset15.prototxt new file mode 100644 index 00000000000000..cf0a43fec08c0f --- /dev/null +++ b/src/frontends/onnx/tests/models/batchnorm_opset15.prototxt @@ -0,0 +1,113 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "x" + input: "s" + input: "bias" + input: "mean" + input: "var" + output: "y" + op_type: "BatchNormalization" + } + name: "test_batchnorm_example" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "s" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "mean" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "var" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 15 +} diff --git a/src/frontends/onnx/tests/models/batchnorm_opset6.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset6.prototxt new file mode 100644 index 00000000000000..31217f90df3b47 --- /dev/null +++ b/src/frontends/onnx/tests/models/batchnorm_opset6.prototxt @@ -0,0 +1,113 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "x" + input: "s" + input: "bias" + input: "mean" + input: "var" + output: "y" + op_type: "BatchNormalization" + } + name: "test_batchnorm_example" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "s" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "mean" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "var" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 6 +} diff --git a/src/frontends/onnx/tests/models/batchnorm_opset7.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset7.prototxt new file mode 100644 index 00000000000000..cdc60d2c2a038f --- /dev/null +++ b/src/frontends/onnx/tests/models/batchnorm_opset7.prototxt @@ -0,0 +1,113 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "x" + input: "s" + input: "bias" + input: "mean" + input: "var" + output: "y" + op_type: "BatchNormalization" + } + name: "test_batchnorm_example" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "s" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "mean" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "var" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 7 +} diff --git a/src/frontends/onnx/tests/models/batchnorm_opset9.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset9.prototxt new file mode 100644 index 00000000000000..e7e7459f9d0d3b --- /dev/null +++ b/src/frontends/onnx/tests/models/batchnorm_opset9.prototxt @@ -0,0 +1,113 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "x" + input: "s" + input: "bias" + input: "mean" + input: "var" + output: "y" + op_type: "BatchNormalization" + } + name: "test_batchnorm_example" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "s" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "mean" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "var" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 9 +} diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index 74ace1949c8177..7b3e39e71399aa 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -320,6 +320,95 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_default) { test_case.run(); } +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset1) { + // Batch Normalization with default parameters + auto model = convert_model("batchnorm_opset1.onnx"); + + auto test_case = ov::test::TestCase(model, s_device); + test_case.add_input({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f}); // data {1, 2, 1, 3} + test_case.add_input({1.f, 1.5f}); // scale + test_case.add_input({0.f, 1.f}); // bias + test_case.add_input({0.f, 3.f}); // mean + test_case.add_input({1.f, 1.5f}); // var + test_case.add_expected_output(Shape{1, 2, 1, 3}, + {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f}); + test_case.run(); +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset6) { + // Batch Normalization with default parameters + auto model = convert_model("batchnorm_opset6.onnx"); + + auto test_case = ov::test::TestCase(model, s_device); + test_case.add_input({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f}); // data {1, 2, 1, 3} + test_case.add_input({1.f, 1.5f}); // scale + test_case.add_input({0.f, 1.f}); // bias + test_case.add_input({0.f, 3.f}); // mean + test_case.add_input({1.f, 1.5f}); // var + test_case.add_expected_output(Shape{1, 2, 1, 3}, + {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f}); + test_case.run(); +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset7) { + // Batch Normalization with default parameters + auto model = convert_model("batchnorm_opset7.onnx"); + + auto test_case = ov::test::TestCase(model, s_device); + test_case.add_input({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f}); // data {1, 2, 1, 3} + test_case.add_input({1.f, 1.5f}); // scale + test_case.add_input({0.f, 1.f}); // bias + test_case.add_input({0.f, 3.f}); // mean + test_case.add_input({1.f, 1.5f}); // var + test_case.add_expected_output(Shape{1, 2, 1, 3}, + {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f}); + test_case.run(); +} +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset9) { + // Batch Normalization with default parameters + auto model = convert_model("batchnorm_opset9.onnx"); + + auto test_case = ov::test::TestCase(model, s_device); + test_case.add_input({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f}); // data {1, 2, 1, 3} + test_case.add_input({1.f, 1.5f}); // scale + test_case.add_input({0.f, 1.f}); // bias + test_case.add_input({0.f, 3.f}); // mean + test_case.add_input({1.f, 1.5f}); // var + test_case.add_expected_output(Shape{1, 2, 1, 3}, + {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f}); + test_case.run(); +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset14) { + // Batch Normalization with default parameters + auto model = convert_model("batchnorm_opset14.onnx"); + + auto test_case = ov::test::TestCase(model, s_device); + test_case.add_input({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f}); // data {1, 2, 1, 3} + test_case.add_input({1.f, 1.5f}); // scale + test_case.add_input({0.f, 1.f}); // bias + test_case.add_input({0.f, 3.f}); // mean + test_case.add_input({1.f, 1.5f}); // var + test_case.add_expected_output(Shape{1, 2, 1, 3}, + {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f}); + test_case.run(); +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset15) { + // Batch Normalization with default parameters + auto model = convert_model("batchnorm_opset15.onnx"); + + auto test_case = ov::test::TestCase(model, s_device); + test_case.add_input({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f}); // data {1, 2, 1, 3} + test_case.add_input({1.f, 1.5f}); // scale + test_case.add_input({0.f, 1.f}); // bias + test_case.add_input({0.f, 3.f}); // mean + test_case.add_input({1.f, 1.5f}); // var + test_case.add_expected_output(Shape{1, 2, 1, 3}, + {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f}); + test_case.run(); +} + OPENVINO_TEST(${BACKEND_NAME}, onnx_model_relu) { // Simple ReLU test auto model = convert_model("relu.onnx"); diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp index 8af71f2bd90154..eb4bc9bee916a7 100644 --- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp +++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp @@ -69,5 +69,12 @@ static constexpr Property compiled_model_ru static constexpr Property compiled_model_runtime_properties_supported{ "COMPILED_MODEL_RUNTIME_PROPERTIES_SUPPORTED"}; +/** + * @brief Read-write property to set the percentage of the estimated model size which is used to determine the query + * model results for further processing + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property query_model_ratio{"QUERY_MODEL_RATIO"}; + } // namespace internal } // namespace ov diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index a569ce711b6520..8165e658c206f0 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -230,12 +230,14 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this get_supported_nodes( const std::shared_ptr& model, std::function&)> transform, - std::function)> is_node_supported); + std::function)> is_node_supported, + float query_model_ratio = 1.0f); /** * @private diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 2ddd8702eb87fd..d759988d6c5d2d 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -400,9 +400,11 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; enum class ModelDistributionPolicy { - TENSOR_PARALLEL = 0, // Split tensor into several parts and distribute them between sockets/devices during model - // compilation. At inference time sockets/devices process tensors in parallel and do - // syncronization at the end ensuring mathematical correctness. + TENSOR_PARALLEL = 0, // Distribute tensor to multiple sockets/devices during model compilation. At inference + // time, sockets/devices process individual tensor in parallel. + PIPELINE_PARALLEL = 1, // Distribute tensor to multiple sockets/devices during model compilation. At inference + // time, sockets/devices process individual tensor one by one. And each socket/device + // processes a portion of a different tensor in parallel. }; /** @cond INTERNAL */ @@ -410,6 +412,8 @@ inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy& switch (stream_mode) { case ModelDistributionPolicy::TENSOR_PARALLEL: return os << "TENSOR_PARALLEL"; + case ModelDistributionPolicy::PIPELINE_PARALLEL: + return os << "PIPELINE_PARALLEL"; default: OPENVINO_THROW("Unsupported model distribution policy!"); } @@ -420,6 +424,8 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea is >> str; if (str == "TENSOR_PARALLEL") { stream_mode = ModelDistributionPolicy::TENSOR_PARALLEL; + } else if (str == "PIPELINE_PARALLEL") { + stream_mode = ModelDistributionPolicy::PIPELINE_PARALLEL; } else { OPENVINO_THROW("Unsupported model distribution policy: ", str); } @@ -430,17 +436,19 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea /** * @brief This property defines model distribution policy for inference with multiple sockets/devices. * @ingroup ov_runtime_cpp_prop_api - * * This property can be used to select model distribution policy between execution units (e.g. between CPU sockets/NUMA * nodes or between different GPUs). - * -- TENSOR_PARALLEL : Split tensor into several parts and distribute them between sockets/devices during model - * compilation. At inference time sockets/devices process tensors in parallel and do syncronization - * at the end ensuring mathematical correctness. + * -- TENSOR_PARALLEL : Distribute tensor to multiple sockets/devices during model compilation. At inference time, + * sockets/devices process individual tensor in parallel. + * -- PIPELINE_PARALLEL : Distribute tensor to multiple sockets/devices during model compilation. At inference time, + * sockets/devices process individual tensor one by one. And each socket/device processes a + * portion of a different tensor in parallel. * - * The following code is an example how TENSOR_PARALLEL model disrtibution policy might be enabled. + * The following code is an example how TENSOR_PARALLEL or PIPELINE_PARALLEL model distribution policy might be enabled. * * @code * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL})); + * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL})); * @endcode */ static constexpr Property> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"}; diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index 16b9d3be97830f..6532aed839044d 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -4,27 +4,30 @@ #include "openvino/runtime/iplugin.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/util/op_types.hpp" +#include "openvino/op/util/shape_of_base.hpp" #include "openvino/pass/manager.hpp" #include "transformations/common_optimizations/fused_names_cleanup.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" namespace { -std::unordered_set get_removed_nodes(const std::shared_ptr& originalFunction, - const std::shared_ptr& transformedFunction) { +std::unordered_set get_removed_nodes(const std::shared_ptr& original_model, + const std::shared_ptr& transformed_model) { std::unordered_set result = {}; - std::unordered_set transformedNodeNames = {}; + std::unordered_set transformed_node_names = {}; - for (auto&& node : transformedFunction->get_ops()) { - transformedNodeNames.emplace(node->get_friendly_name()); - for (auto&& fusedLayerName : ov::getFusedNamesVector(node)) - transformedNodeNames.emplace(fusedLayerName); + for (auto&& node : transformed_model->get_ops()) { + transformed_node_names.emplace(node->get_friendly_name()); + for (auto&& fused_layer_name : ov::getFusedNamesVector(node)) { + transformed_node_names.emplace(fused_layer_name); + } } - for (auto&& originalNode : originalFunction->get_ops()) { - if (!transformedNodeNames.count(originalNode->get_friendly_name())) - result.emplace(originalNode->get_friendly_name()); + for (auto&& original_node : original_model->get_ops()) { + if (!transformed_node_names.count(original_node->get_friendly_name())) + result.emplace(original_node->get_friendly_name()); } return result; @@ -75,7 +78,15 @@ std::shared_ptr ov::IPlugin::compile_model(const std::string std::unordered_set ov::get_supported_nodes( const std::shared_ptr& model, std::function&)> transform, - std::function)> is_node_supported) { + std::function)> is_node_supported, + float query_model_ratio) { + using NameSet = std::unordered_set; + using NodePtr = std::shared_ptr; + NameSet res; + if (query_model_ratio <= 0) { + return res; + } + bool query_by_memory_control = query_model_ratio < 1; // Collect original operation names std::unordered_set original_ops; for (auto&& node : model->get_ops()) { @@ -83,7 +94,6 @@ std::unordered_set ov::get_supported_nodes( } auto transformed_model = model->clone(); - // Cleanup fused names if there are present in original model ov::pass::Manager m; m.register_pass(); @@ -92,13 +102,13 @@ std::unordered_set ov::get_supported_nodes( transform(transformed_model); auto ops = transformed_model->get_ordered_ops(); - // Mark removed nodes as supported - std::unordered_set supported = get_removed_nodes(model, transformed_model); - std::unordered_set unsupported; + NameSet supported; + NameSet unsupported; + NameSet removed_nodes = get_removed_nodes(model, transformed_model); - auto get_names_set = [](const std::shared_ptr& op) -> std::unordered_set { + auto get_names_set = [](const NodePtr& op) -> NameSet { auto fused_names = ov::getFusedNamesVector(op); - std::unordered_set names(fused_names.begin(), fused_names.end()); + NameSet names(fused_names.begin(), fused_names.end()); names.insert(op->get_friendly_name()); return names; }; @@ -119,20 +129,66 @@ std::unordered_set ov::get_supported_nodes( supported.erase(name); } - auto has_all_consumers_unsupported = [&supported](const std::shared_ptr& node) { - for (auto&& input : node->output(0).get_target_inputs()) { - if (supported.count(input.get_node()->get_friendly_name())) { - return false; + auto copy_set = [](NameSet& source, NameSet& dest) { + dest.clear(); + copy(source.begin(), source.end(), inserter(dest, dest.end())); + }; + + auto get_output_node = [](const ov::Output& output) -> NodePtr { + return output.get_node_shared_ptr(); + }; + + auto get_input_node = [&get_output_node](const ov::Input& input) -> NodePtr { + return get_output_node(input.get_source_output()); + }; + + auto has_all_consumers_unsupported = [&](const NameSet& supported, const NodePtr& node) -> bool { + bool has_consumers = false; + for (auto&& output : node->outputs()) { + for (auto&& input : output.get_target_inputs()) { + has_consumers = true; + if (supported.count(input.get_node()->get_friendly_name())) { + return false; + } } } - return (node->output(0).get_target_inputs().size() != 0); + return has_consumers; }; - auto has_unsupported_source = [&supported](const std::shared_ptr& node) { - return !supported.count(node->input_values().begin()->get_node()->get_friendly_name()); + auto has_users_supported = [&](const NameSet& supported, const NodePtr& node) -> bool { + auto users = node->get_users(); + for (auto& user : users) { + if (supported.count(user->get_friendly_name())) { + return true; + } + } + return false; + }; + + auto has_users_unsupported = [&](const NameSet& supported, const NodePtr& node) -> bool { + auto users = node->get_users(); + for (auto& user : users) { + if (!supported.count(user->get_friendly_name()) && !ov::is_type(user)) { + return true; + } + } + return false; }; - auto remove_op_from_supported = [&](const std::shared_ptr& node) { + auto has_unsupported_source = + [&get_input_node](const NameSet& supported, const NodePtr& op, bool const_only = false) -> bool { + for (auto& input : op->inputs()) { + const auto& node = get_input_node(input); + if (const_only && !ov::op::util::is_constant(node)) + continue; + if (!supported.count(node->get_friendly_name())) { + return true; + } + } + return false; + }; + + auto remove_op_from_supported = [&](const NodePtr& node) { auto names = get_names_set(node); for (auto& name : get_names_set(node)) { supported.erase(name); @@ -169,36 +225,204 @@ std::unordered_set ov::get_supported_nodes( } } - // Walk over transformed model for special handing of Parameters/Constants/Results for (auto&& op : ops) { // Mark Constants and all fused names as unsupported if they are have no // supported consumers/sources if (ov::op::util::is_constant(op)) { - if (has_all_consumers_unsupported(op)) { + if (has_all_consumers_unsupported(supported, op)) { remove_op_from_supported(op); + continue; } } } + size_t total_ops_size = 0; + for (auto&& op : ops) { + if (ov::op::util::is_constant(op)) { + const auto const_byte_size = op->get_element_type().size() * shape_size(op->get_shape()); + total_ops_size += const_byte_size; + } + } + // If there is no constant or supported nodes in the model, mark query_by_memory_control as false + if (total_ops_size == 0 || supported.size() == 0) { + query_by_memory_control = false; + } + + if (query_by_memory_control) { + NameSet temp_supported; + NameSet temp_unsupported; + NameSet temp_supported_1; + NameSet temp_unsupported_1; + bool cancel_split = false; + std::set split_node_set; + int64_t last_total_len = 0; + int search_times = 0; + size_t last_total_size = 0; + double min_query_size = query_model_ratio * total_ops_size * 0.95; + double max_query_size = query_model_ratio * total_ops_size * 1.05; + copy_set(supported, temp_supported); + copy_set(unsupported, temp_unsupported); + // Search the smallest transmission node within the user's requested ratio range of 0.95-1.05 times + do { + std::map temp_pair_checker; + bool ready_split = false; + bool start_split = false; + bool has_min_graph = false; + size_t total_size = 0; + search_times++; + copy_set(temp_supported, supported); + copy_set(temp_unsupported, unsupported); + // Walk over transformed model for special handing of Parameters/Constants/Results + for (auto&& op : ops) { + if (supported.count(op->get_friendly_name()) && !cancel_split) { + if (const auto& assign = std::dynamic_pointer_cast(op)) { + if (temp_pair_checker.count(assign->get_variable_id()) == 0) { + temp_pair_checker[assign->get_variable_id()] = 1; + } else { + temp_pair_checker[assign->get_variable_id()]++; + } + } + if (ov::op::util::is_constant(op) && !ready_split) { + const auto const_byte_size = op->get_element_type().size() * shape_size(op->get_shape()); + total_size += const_byte_size; + // If the total size is 1.05 times larger than the user's requirement: + // - If has_min_graph = false, it means there is no nodes meets requirement, so need cancel + // split and break + // - If th split_node_set > 1, it means this is not the first search in do-while, so cancel + // split and break + if (total_size <= max_query_size) { + has_min_graph = true; + } else if (!has_min_graph || search_times > 1) { + cancel_split = true; + break; + } + // Ready to split if total size meets user's requirement and Assign-ReadValue operations in + // pairs on the network + if (total_size >= min_query_size) { + if (!ready_split && split_node_set.find(op->get_friendly_name()) == split_node_set.end()) { + ready_split = check_pairs(temp_pair_checker); + if (ready_split) { + split_node_set.insert(op->get_friendly_name()); + // Judge if the current constant op should be removed from supported + if (total_size < max_query_size) + continue; + } + } + } + } + // Start splitting when ready and the ops is constant + if (ready_split) { + if (ov::op::util::is_constant(op)) { + remove_op_from_supported(op); + start_split = true; + } else if (start_split) { + remove_op_from_supported(op); + for (auto& input : op->inputs()) { + const auto& node = get_input_node(input); + if (ov::op::util::is_constant(node)) { + remove_op_from_supported(node); + } + } + } + } + } + } + // Add the ops to supported that removed by transformations and it has supported users + // + // constant_compressed(to be marked as supported) + // | + // convert(to be marked as supported) + // | + // divide(already in supported) + // + // In case the dependency relationships of some nodes, so traverse the entire model to ensure accurate + // split. For example: In the graph above, constant_compressed op will be first obtained by + // get_ordered_ops(), but it depends on convert op, so need loop again to mark constant_compressed op after + // convert op is marked. + bool update_supported = true; + while (update_supported) { + update_supported = false; + for (auto& op : model->get_ordered_ops()) { + if (!supported.count(op->get_friendly_name()) && has_users_supported(supported, op) && + !unsupported.count(op->get_friendly_name())) { + supported.insert(op->get_friendly_name()); + update_supported = true; + } + } + } + // Calculate the data size that needs to be transmitted after the current model is split + int64_t total_len = 0; + for (auto& op : model->get_ordered_ops()) { + if (supported.count(op->get_friendly_name()) && !ov::op::util::is_constant(op) && + !ov::op::util::is_parameter(op)) { + if (has_users_unsupported(supported, op)) { + int64_t op_size = 1; + for (size_t shape_id = 0; shape_id < op->get_output_partial_shape(0).size(); shape_id++) { + if (!op->get_output_partial_shape(0)[shape_id].is_dynamic()) { + int64_t len = op->get_output_partial_shape(0)[shape_id].get_length(); + if (len >= 1) + op_size *= len; + } + } + total_len += op_size; + } + } + } + if ((total_len < last_total_len || last_total_len == 0) && !cancel_split) { + last_total_len = total_len; + copy_set(supported, temp_supported_1); + copy_set(unsupported, temp_unsupported_1); + } + // Cancel split when total size is unchanged in loop + if (total_size != last_total_size) { + last_total_size = total_size; + } else { + cancel_split = true; + } + } while (!cancel_split); + copy_set(temp_supported_1, supported); + copy_set(temp_unsupported_1, unsupported); + } else { + // If memory control is off + // mark all removed nodes as supported + supported.insert(removed_nodes.begin(), removed_nodes.end()); + } + // Finally get intersection of all supported operation names // and operation names from original model - std::unordered_set res; for (auto&& name : supported) { if (original_ops.count(name)) { res.insert(name); } } - // Remove parameters which has no supported consumers + // Remove parameters (or parameter + convert) which has no supported consumers + // and results (or result + convert) which has no supported source node + for (auto& op : model->get_ordered_ops()) { + if (ov::is_type(op)) { + if (ov::op::util::is_parameter(get_input_node(op->input(0))) && has_all_consumers_unsupported(res, op)) { + res.erase(op->get_friendly_name()); + } + } else { + auto outputs = op->outputs(); + auto all_consumers_are_results = + std::all_of(outputs.begin(), outputs.end(), [&](const ov::Output& output) -> bool { + return ov::op::util::is_output(get_output_node(output)); + }); + if (all_consumers_are_results && has_unsupported_source(res, op, true)) { + res.erase(op->get_friendly_name()); + } + } + } + for (auto& param : model->get_parameters()) { - if (has_all_consumers_unsupported(param)) { + if (has_all_consumers_unsupported(res, param)) { res.erase(param->get_friendly_name()); } } - // Remove results which has no supported source node for (auto& result : model->get_results()) { - if (has_unsupported_source(result)) { + if (has_unsupported_source(res, result)) { res.erase(result->get_friendly_name()); } } diff --git a/src/inference/tests/unit/query_model_test.cpp b/src/inference/tests/unit/query_model_test.cpp index 91e3c79f2a928c..f7a4ea80794134 100644 --- a/src/inference/tests/unit/query_model_test.cpp +++ b/src/inference/tests/unit/query_model_test.cpp @@ -21,12 +21,15 @@ #include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/convert_precision.hpp" #include "transformations/init_node_info.hpp" +#include "transformations/op_conversions/convert_divide.hpp" #include "transformations/op_conversions/convert_reduce_to_pooling.hpp" #include "transformations/op_conversions/log_softmax_decomposition.hpp" #include "transformations/op_conversions/reduce_l2_decomposition.hpp" #include "transformations/rt_info/decompression.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" +using ConfigParams = std::tuple>; + std::ostream& operator<<(std::ostream& os, const std::unordered_set& s); std::ostream& operator<<(std::ostream& os, const std::unordered_set& s) { @@ -40,7 +43,7 @@ std::ostream& operator<<(std::ostream& os, const std::unordered_set return os; } -class GetSupportedNodesTest : public ::testing::Test { +class GetSupportedNodesTest : public ::testing::TestWithParam { protected: ov::Shape m_shape{1, 84}; std::shared_ptr m_function; @@ -48,8 +51,9 @@ class GetSupportedNodesTest : public ::testing::Test { public: void Run(std::function&)> transform, std::function)> is_node_supported, - const std::unordered_set& expected) { - auto supported = ov::get_supported_nodes(m_function, transform, is_node_supported); + const std::unordered_set& expected, + float query_model_ratio = 1.0f) { + auto supported = ov::get_supported_nodes(m_function, transform, is_node_supported, query_model_ratio); auto const is_in_expected = [&expected](const std::string& x) { return expected.find(x) != expected.end(); }; @@ -157,7 +161,7 @@ TEST_F(GetSupportedNodesTest, SupportedCompressedConstantNop) { }, [&](const std::shared_ptr& op) { return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) || - (std::dynamic_pointer_cast(op) != nullptr); + ov::is_type(op); }, {"input", "constant_compressed", "constant", "add", "result"}); } @@ -180,7 +184,7 @@ TEST_F(GetSupportedNodesTest, SupportedConstantInsertAdditionalOp) { m.register_pass(); m.run_passes(model); for (auto& op : model->get_ops()) { - if (std::dynamic_pointer_cast(op) != nullptr) { + if (ov::is_type(op)) { // Add one more dummy operation auto consumers = op->output(0).get_target_inputs(); auto shape = op->get_shape(); @@ -197,8 +201,7 @@ TEST_F(GetSupportedNodesTest, SupportedConstantInsertAdditionalOp) { }, [&](const std::shared_ptr& op) { return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) || - (std::dynamic_pointer_cast(op) != nullptr) || - (std::dynamic_pointer_cast(op) != nullptr); + ov::is_type(op) || ov::is_type(op); }, {"input", "constant", "output_operation", "result"}); } @@ -235,7 +238,7 @@ TEST_F(GetSupportedNodesTest, PartiallySupportedCompressedConstant) { }, [&](const std::shared_ptr& op) { return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) || - (std::dynamic_pointer_cast(op) != nullptr); + ov::is_type(op); }, {"input2", "constant_compressed", "constant", "mul", "result2"}); } @@ -277,7 +280,7 @@ TEST_F(GetSupportedNodesTest, ConstantSubgraphSupported) { }, [&](const std::shared_ptr& op) { return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) || - (std::dynamic_pointer_cast(op) != nullptr); + ov::is_type(op); }, {"input", "weights", @@ -317,7 +320,7 @@ TEST_F(GetSupportedNodesTest, UnmarkedSupportedInputsOutputs) { }, [&](const std::shared_ptr& op) { // Plugin don't mark input, constant and result as supported - return (std::dynamic_pointer_cast(op) != nullptr); + return ov::is_type(op); }, {"add"}); } @@ -347,7 +350,7 @@ TEST_F(GetSupportedNodesTest, WrongFusedNamesInOriginalModel) { }, [&](const std::shared_ptr& op) { return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) || - (std::dynamic_pointer_cast(op) != nullptr); + ov::is_type(op); }, {"input", "weights", "matmul"}); } @@ -374,10 +377,8 @@ TEST_F(GetSupportedNodesTest, FusedNamesSupportedUnsupportedBoth) { [&](const std::shared_ptr& op) { // Exp is not supported and all constants are missing return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || - (std::dynamic_pointer_cast(op) != nullptr) || - (std::dynamic_pointer_cast(op) != nullptr) || - (std::dynamic_pointer_cast(op) != nullptr) || - (std::dynamic_pointer_cast(op) != nullptr); + ov::is_type(op) || ov::is_type(op) || + ov::is_type(op) || ov::is_type(op); }, {"dummy_param"}); // kepp dummy only since it has no unsupported consumers } @@ -421,7 +422,7 @@ TEST_F(GetSupportedNodesTest, ShapeOfNonConstantNode) { }, [&](const std::shared_ptr& op) { return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) || - (std::dynamic_pointer_cast(op) != nullptr); + ov::is_type(op); }, {"input", "slope_compressed", "slope", "prelu"}); // keep dummy only since it has no unsupported consumers } @@ -490,7 +491,7 @@ TEST_F(GetSupportedNodesTest, FusedNameReduceL2Test) { [&](const std::shared_ptr& op) { // Pooling is supported, but Sqrt is not return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::op::util::is_constant(op) || - (std::dynamic_pointer_cast(op) != nullptr); + ov::is_type(op); }, {}); // Check that constant axis is removed from supported } @@ -520,3 +521,214 @@ TEST_F(GetSupportedNodesTest, AssignReadValueTest) { }, {}); } + +TEST_F(GetSupportedNodesTest, NoSupportedOpsTest) { + { + auto param = std::make_shared(ov::element::f32, ov::PartialShape{1, 3, 2, 2}); + param->set_friendly_name("input"); + auto const_value = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1}); + const_value->set_friendly_name("const_val"); + auto add = std::make_shared(param, const_value); + add->set_friendly_name("add"); + auto res = std::make_shared(add); + res->set_friendly_name("res"); + m_function = std::make_shared(ov::ResultVector{res}, ov::ParameterVector{param}); + } + Run( + [&](std::shared_ptr& model) { + ov::pass::Manager m; + m.register_pass(); + m.run_passes(model); + }, + [&](const std::shared_ptr& op) { + return false; + }, + {}, + 0.9f); +} + +TEST_F(GetSupportedNodesTest, NoConstOpTest) { + { + auto param1 = std::make_shared(ov::element::f32, ov::Shape{1, 512}); + param1->set_friendly_name("input1"); + auto param2 = std::make_shared(ov::element::f32, ov::Shape{1, 512}); + param2->set_friendly_name("input2"); + auto add = std::make_shared(param1, param2); + add->set_friendly_name("add"); + auto res = std::make_shared(add); + res->set_friendly_name("res"); + m_function = std::make_shared(ov::ResultVector{res}, ov::ParameterVector{param1, param2}); + } + Run( + [&](std::shared_ptr& model) { + ov::pass::Manager m; + m.register_pass(); + m.run_passes(model); + }, + [&](const std::shared_ptr& op) { + return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::is_type(op); + }, + {"input1", "input2", "add", "res"}, + 0.9f); +} + +TEST_F(GetSupportedNodesTest, DivideWillRemoveConvertAndConstant) { + { + auto param = std::make_shared(ov::element::f32, ov::Shape{1, 3, 2, 2}); + param->set_friendly_name("input"); + auto constant_compressed = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1, 3, 2, 2}, {1}); + constant_compressed->set_friendly_name("constant_compressed"); + auto convert = std::make_shared(constant_compressed, ov::element::f32); + convert->set_friendly_name("convert"); + auto divide = std::make_shared(param, convert); + divide->set_friendly_name("divide"); + auto const_value = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1}); + const_value->set_friendly_name("const_val"); + auto add = std::make_shared(divide, const_value); + add->set_friendly_name("add"); + auto result = std::make_shared(add); + result->set_friendly_name("result"); + m_function = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); + } + Run( + [&](std::shared_ptr& model) { + ov::pass::Manager m; + m.register_pass(); + const bool keep_precision_sensitive_in_fp32_1 = true; + const bool convert_input_output_precision = false; + const bool store_original_precision_as_rt_attribute = true; + type_to_fuse_map empty_fuse_map = {}; + precisions_map fp_convert_precision_map = {{ov::element::f32, ov::element::f16}}; + m.register_pass(fp_convert_precision_map, + empty_fuse_map, + keep_precision_sensitive_in_fp32_1, + convert_input_output_precision, + store_original_precision_as_rt_attribute); + m.register_pass(); + m.run_passes(model); + }, + [&](const std::shared_ptr& op) { + return true; + }, + {"input", "constant_compressed", "divide", "const_val", "add", "convert", "result"}, + 0.98f); +} + +using GetSupportedNodesCommonTest = GetSupportedNodesTest; +using GetSupportedNodesOneConstOp = GetSupportedNodesTest; +using GetSupportedNodesStopSplit = GetSupportedNodesTest; + +TEST_P(GetSupportedNodesCommonTest, SplitModelWithDifferentRatioTest) { + { + auto param = std::make_shared(ov::element::f32, ov::PartialShape{1, 3, 2, 2}); + param->set_friendly_name("input"); + auto const_value1 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1}); + const_value1->set_friendly_name("const_val1"); + auto add1 = std::make_shared(param, const_value1); + add1->set_friendly_name("add1"); + auto const_value2 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1}); + const_value2->set_friendly_name("const_val2"); + auto add2 = std::make_shared(add1, const_value2); + add2->set_friendly_name("add2"); + auto result = std::make_shared(add2); + result->set_friendly_name("res"); + m_function = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); + } + float query_model_ratio; + std::unordered_set expected; + std::tie(query_model_ratio, expected) = this->GetParam(); + Run( + [&](std::shared_ptr& model) { + ov::pass::Manager m; + m.register_pass(); + m.run_passes(model); + }, + [&](const std::shared_ptr& op) { + return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::op::util::is_constant(op) || + ov::is_type(op) || ov::is_type(op); + }, + expected, + query_model_ratio); +} + +TEST_P(GetSupportedNodesOneConstOp, OneConstOpTest) { + { + auto param = std::make_shared(ov::element::f32, ov::PartialShape{1, 3, 2, 2}); + param->set_friendly_name("input"); + auto const_value = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1}); + const_value->set_friendly_name("const_val"); + auto add = std::make_shared(param, const_value); + add->set_friendly_name("add"); + auto res = std::make_shared(add); + res->set_friendly_name("res"); + m_function = std::make_shared(ov::ResultVector{res}, ov::ParameterVector{param}); + } + float query_model_ratio; + std::unordered_set expected; + std::tie(query_model_ratio, expected) = this->GetParam(); + Run( + [&](std::shared_ptr& model) { + ov::pass::Manager m; + m.register_pass(); + m.run_passes(model); + }, + [&](const std::shared_ptr& op) { + return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::op::util::is_constant(op) || + ov::is_type(op); + }, + expected, + query_model_ratio); +} + +TEST_P(GetSupportedNodesStopSplit, StopSplitTest) { + { + auto param = std::make_shared(ov::element::f32, ov::PartialShape{1, 3, 2, 2}); + param->set_friendly_name("input"); + auto const_value1 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1}); + const_value1->set_friendly_name("const_val1"); + auto add = std::make_shared(param, const_value1); + add->set_friendly_name("add"); + auto const_value2 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1}); + const_value2->set_friendly_name("const_val2"); + auto mul_scale = std::make_shared(add, const_value2); + mul_scale->set_friendly_name("mul_scale"); + auto result = std::make_shared(mul_scale); + result->set_friendly_name("res"); + m_function = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); + } + float query_model_ratio; + std::unordered_set expected; + std::tie(query_model_ratio, expected) = this->GetParam(); + Run( + [&](std::shared_ptr& model) { + ov::pass::Manager m; + m.register_pass(); + m.run_passes(model); + }, + [&](const std::shared_ptr& op) { + return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::is_type(op) || + ov::op::util::is_constant(op); + }, + expected, + query_model_ratio); +} + +const std::vector testConfigs = { + ConfigParams{0.0f, std::unordered_set{}}, + ConfigParams{0.5f, std::unordered_set{"input", "const_val1", "add1"}}, + ConfigParams{1.0f, std::unordered_set{"input", "const_val1", "add1", "const_val2", "add2", "res"}}}; + +const std::vector testConfigs1 = { + ConfigParams{0.0f, std::unordered_set{}}, + ConfigParams{0.5f, std::unordered_set{}}, + ConfigParams{1.0f, std::unordered_set{"input", "const_val", "add", "res"}}}; + +const std::vector testConfigs2 = { + ConfigParams{0.0f, std::unordered_set{}}, + ConfigParams{0.3f, std::unordered_set{}}, + ConfigParams{0.9f, std::unordered_set{"input", "const_val1", "add"}}, + ConfigParams{1.0f, std::unordered_set{"input", "const_val1", "add"}}}; + +INSTANTIATE_TEST_SUITE_P(GetSupportedNodesTest, GetSupportedNodesCommonTest, ::testing::ValuesIn(testConfigs)); +INSTANTIATE_TEST_SUITE_P(GetSupportedNodesTest, GetSupportedNodesOneConstOp, ::testing::ValuesIn(testConfigs1)); +INSTANTIATE_TEST_SUITE_P(GetSupportedNodesTest, GetSupportedNodesStopSplit, ::testing::ValuesIn(testConfigs2)); diff --git a/src/plugins/hetero/src/config.cpp b/src/plugins/hetero/src/config.cpp index 5cdb5cc125a673..a54a91fa19d900 100644 --- a/src/plugins/hetero/src/config.cpp +++ b/src/plugins/hetero/src/config.cpp @@ -6,6 +6,7 @@ #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/properties.hpp" +#include "properties.hpp" using namespace ov::hetero; @@ -20,6 +21,18 @@ Configuration::Configuration(const ov::AnyMap& config, const Configuration& defa if (ov::device::priorities == key) { device_priorities = value.as(); + } else if (ov::hint::model_distribution_policy == key) { + for (auto& row : value.as>()) { + if (row != ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL) { + OPENVINO_THROW( + "Wrong value ", + row, + " for property key ", + ov::hint::model_distribution_policy.name(), + ". HETERO plugin only support {ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL}"); + } + } + modelDistributionPolicy = value.as>(); } else { if (throwOnUnsupported) OPENVINO_THROW("Property was not found: ", key); @@ -31,6 +44,8 @@ Configuration::Configuration(const ov::AnyMap& config, const Configuration& defa ov::Any Configuration::get(const std::string& name) const { if (name == ov::device::priorities) { return {device_priorities}; + } else if (name == ov::hint::model_distribution_policy) { + return {modelDistributionPolicy}; } else { OPENVINO_THROW("Property was not found: ", name); } @@ -42,7 +57,8 @@ std::vector Configuration::get_supported() const { } ov::AnyMap Configuration::get_hetero_properties() const { - return {{ov::device::priorities.name(), device_priorities}}; + return {{ov::device::priorities.name(), device_priorities}, + {ov::hint::model_distribution_policy.name(), modelDistributionPolicy}}; } ov::AnyMap Configuration::get_device_properties() const { diff --git a/src/plugins/hetero/src/config.hpp b/src/plugins/hetero/src/config.hpp index 55647b710d76b1..42d972c021343d 100644 --- a/src/plugins/hetero/src/config.hpp +++ b/src/plugins/hetero/src/config.hpp @@ -8,6 +8,7 @@ #include #include "openvino/runtime/properties.hpp" +#include "properties.hpp" namespace ov { namespace hetero { @@ -34,6 +35,9 @@ struct Configuration { bool dump_dot_files() const; std::string device_priorities; + + std::set modelDistributionPolicy = {}; + ov::AnyMap device_properties; }; } // namespace hetero diff --git a/src/plugins/hetero/src/plugin.cpp b/src/plugins/hetero/src/plugin.cpp index b4258881e3e686..0c48703aabd404 100644 --- a/src/plugins/hetero/src/plugin.cpp +++ b/src/plugins/hetero/src/plugin.cpp @@ -14,8 +14,12 @@ #include "compiled_model.hpp" #include "itt.hpp" +#include "op/device_subgraph.hpp" +#include "openvino/core/graph_util.hpp" #include "openvino/core/rt_info.hpp" +#include "openvino/op/util/op_types.hpp" #include "openvino/runtime/device_id_parser.hpp" +#include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/util/common_util.hpp" @@ -77,23 +81,105 @@ ov::hetero::Plugin::DeviceProperties ov::hetero::Plugin::get_properties_per_devi return device_properties; } +void ov::hetero::Plugin::get_device_memory_map(const std::vector& device_names, + std::map& available_device_mem_map) const { + // TODO: add unified API to get device memory. + // There is no unified API to get device memory. So this feature get memory of specific device with specific method. + // Skip device which cannot get device memory size. + for (const auto& device_name : device_names) { + if (device_name.find("CPU") != std::string::npos) { + // Assuming the CPU has enough memory + available_device_mem_map["CPU"] = -1; + } else if (device_name.find("GPU") != std::string::npos) { + try { + size_t device_mem = get_core()->get_property(device_name, ov::intel_gpu::device_total_mem_size); + available_device_mem_map[device_name] = device_mem; + } catch (const ov::Exception&) { + } + } + } +} + std::pair ov::hetero::Plugin::query_model_update( std::shared_ptr& model, const ov::AnyMap& properties, bool allow_exception) const { + std::map available_device_mem_map; Configuration full_config{properties, m_cfg}; DeviceProperties properties_per_device = get_properties_per_device(full_config.device_priorities, full_config.get_device_properties()); // WARNING: Here is devices with user set priority auto device_names = ov::DeviceIDParser::get_hetero_devices(full_config.device_priorities); + bool hetero_query_model_by_device = false; + if (full_config.modelDistributionPolicy.count(ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL) != 0) { + get_device_memory_map(device_names, available_device_mem_map); + // Will disable hetero query model by device if there is no device's available memory is obtained. + if (available_device_mem_map.size() != 0) { + hetero_query_model_by_device = true; + } + } auto update_supported_ops = [](ov::SupportedOpsMap& final_results, const ov::SupportedOpsMap& device_results) { for (const auto& layer_query_result : device_results) final_results.emplace(layer_query_result); }; + auto has_subgraph_ops = [](std::shared_ptr& model) { + for (auto& op : model->get_ordered_ops()) { + if (ov::as_type_ptr(op)) { + return true; + } + } + return false; + }; + + auto update_config = [&](ov::AnyMap& device_config, + const std::shared_ptr& model, + std::string device_name, + bool fallback_device) { + auto internal_supported_properties = get_core()->get_property(device_name, ov::internal::supported_properties); + if (ov::util::contains(internal_supported_properties, ov::internal::query_model_ratio)) { + if (fallback_device) { + device_config[ov::internal::query_model_ratio.name()] = 1.0f; + } else if (available_device_mem_map.count(device_name)) { + size_t total_ops_size = 0; + size_t available_discrete_device_memory = 0; + for (auto&& op : model->get_ordered_ops()) { + if (ov::op::util::is_constant(op)) { + total_ops_size += op->get_element_type().size() * shape_size(op->get_shape()); + } + } + for (auto& device_mem_info : available_device_mem_map) { + if (device_mem_info.first.find("CPU") != 0) + available_discrete_device_memory += device_mem_info.second; + } + // Estimate the memory size required for the model is 1.2 * total_ops_size + // 1. Check if current device that can take the entire model + // 2. Check if all left devices can take the entire model + if (available_device_mem_map[device_name] >= 1.2 * total_ops_size || device_name.find("CPU") == 0) { + device_config[ov::internal::query_model_ratio.name()] = 1.0f; + } else if (available_discrete_device_memory >= 1.2 * total_ops_size || + available_device_mem_map.count("CPU")) { + float model_ratio = + static_cast(available_device_mem_map[device_name] * 1.0 / (1.2 * total_ops_size)); + if (total_ops_size < available_device_mem_map[device_name]) { + model_ratio = 1.0f; + } + device_config[ov::internal::query_model_ratio.name()] = model_ratio; + } else { + float model_ratio = static_cast(available_device_mem_map[device_name] * 1.0 / + available_discrete_device_memory); + device_config[ov::internal::query_model_ratio.name()] = model_ratio; + } + // Remove the current device + available_device_mem_map.erase(device_name); + } + } + }; + ov::SupportedOpsMap supported_ops_temp; + ov::SupportedOpsMap supported_ops_temp_1; ov::SupportedOpsMap supported_ops_final; std::map query_results; ov::hetero::SubgraphsMappingInfo mapping_info; @@ -109,15 +195,46 @@ std::pair ov::hetero::Plu for (const auto& device_name : device_names) { // If there are some unsupported operations and it is a last device // exception should be raised when allowed - const auto& default_device = (!allow_exception || device_name != device_names.back()) ? get_device_name() : ""; - const auto& device_config = properties_per_device.at(device_name); - query_results[device_name] = get_core()->query_model(model, device_name, device_config); - // Update supported operations map which includes new operations - update_supported_ops(supported_ops_temp, query_results[device_name]); - // Update supported operations map which includes original operations only - update_supported_ops(supported_ops_final, query_results[device_name]); - mapping_info = - ov::hetero::mask_model_subgraphs_by_ops(model, supported_ops_temp, m_cfg.dump_dot_files(), default_device); + bool fallback_device = (device_name == device_names.back()); + const auto& default_device = (!allow_exception || !fallback_device) ? get_device_name() : ""; + auto& device_config = properties_per_device.at(device_name); + if (!has_subgraph_ops(model)) { + if (hetero_query_model_by_device) + update_config(device_config, model, device_name, fallback_device); + query_results[device_name] = get_core()->query_model(model, device_name, device_config); + update_supported_ops(supported_ops_temp, query_results[device_name]); + update_supported_ops(supported_ops_final, query_results[device_name]); + mapping_info = ov::hetero::mask_model_subgraphs_by_ops(model, + supported_ops_temp, + m_cfg.dump_dot_files(), + default_device); + } else { + // Mask supported nodes and left nodes to Subgraph in graph, and query model use subgraph, keep the + // model in query_model same as compile + auto temp_model = model->clone(); + update_supported_ops(supported_ops_temp_1, supported_ops_temp); + for (auto&& node : temp_model->get_ops()) { + supported_ops_temp_1.emplace(node->get_friendly_name(), "HETERO-TEMP"); + } + auto mapping_info_temp = + ov::hetero::mask_model_subgraphs_by_ops(temp_model, supported_ops_temp_1, false, default_device); + for (const auto& op : temp_model->get_ordered_ops()) { + if (const auto& subgraph = ov::as_type_ptr(op)) { + if (subgraph->get_affinity() == "HETERO-TEMP") { + if (hetero_query_model_by_device) + update_config(device_config, subgraph->get_function(), device_name, fallback_device); + query_results[device_name] = + get_core()->query_model(subgraph->get_function(), device_name, device_config); + update_supported_ops(supported_ops_temp, query_results[device_name]); + update_supported_ops(supported_ops_final, query_results[device_name]); + } + } + } + mapping_info = ov::hetero::mask_model_subgraphs_by_ops(model, + supported_ops_temp, + m_cfg.dump_dot_files(), + default_device); + } } return {supported_ops_final, mapping_info}; } @@ -145,7 +262,7 @@ ov::Any ov::hetero::Plugin::get_property(const std::string& name, const ov::AnyM return ro_properties; }; const auto& default_rw_properties = []() { - std::vector rw_properties{ov::device::priorities}; + std::vector rw_properties{ov::device::priorities, ov::hint::model_distribution_policy}; return rw_properties; }; diff --git a/src/plugins/hetero/src/plugin.hpp b/src/plugins/hetero/src/plugin.hpp index bd9b897c1c2af7..d3038642d56c76 100644 --- a/src/plugins/hetero/src/plugin.hpp +++ b/src/plugins/hetero/src/plugin.hpp @@ -60,6 +60,9 @@ class Plugin : public ov::IPlugin { DeviceProperties get_properties_per_device(const std::string& device_priorities, const ov::AnyMap& properties) const; + void get_device_memory_map(const std::vector& device_names, + std::map& device_mem_map) const; + std::pair query_model_update( std::shared_ptr& model, const ov::AnyMap& properties, diff --git a/src/plugins/hetero/src/properties.hpp b/src/plugins/hetero/src/properties.hpp index f5f2b2a1c7693e..c008bf1155f09f 100644 --- a/src/plugins/hetero/src/properties.hpp +++ b/src/plugins/hetero/src/properties.hpp @@ -17,6 +17,5 @@ static constexpr Property caching_device_pr * @brief Read-only property showing number of compiled submodels */ static constexpr Property number_of_submodels{"HETERO_NUMBER_OF_SUBMODELS"}; - } // namespace hetero } // namespace ov diff --git a/src/plugins/hetero/src/sync_infer_request.cpp b/src/plugins/hetero/src/sync_infer_request.cpp index cdd1825b86cf23..556b1749755df7 100644 --- a/src/plugins/hetero/src/sync_infer_request.cpp +++ b/src/plugins/hetero/src/sync_infer_request.cpp @@ -13,6 +13,7 @@ #include "compiled_model.hpp" #include "itt.hpp" #include "openvino/core/except.hpp" +#include "openvino/runtime/make_tensor.hpp" #include "plugin.hpp" ov::hetero::InferRequest::InferRequest(const std::shared_ptr& compiled_model) @@ -33,6 +34,7 @@ ov::hetero::InferRequest::InferRequest(const std::shared_ptr, ov::SoPtr> temp_tensor_map; for (const auto& kvp : compiled_model->m_mapping_info._submodels_input_to_prev_output) { const auto& submodel_idx_in = kvp.first.first; const auto& port_idx_in = kvp.first.second; @@ -41,8 +43,14 @@ ov::hetero::InferRequest::InferRequest(const std::shared_ptrget_compiled_model()->outputs()[port_idx_out]; const auto& output_tensor = m_subrequests[submodel_idx_out]->get_tensor(output_port); + if (temp_tensor_map.find(output_port) == temp_tensor_map.end()) { + temp_tensor_map[output_port] = { + ov::make_tensor(output_tensor->get_element_type(), output_tensor->get_shape()), + nullptr}; + } + m_subrequests[submodel_idx_out]->set_tensor(output_port, temp_tensor_map[output_port]); const auto& input_port = m_subrequests[submodel_idx_in]->get_compiled_model()->inputs()[port_idx_in]; - m_subrequests[submodel_idx_in]->set_tensor(input_port, output_tensor); + m_subrequests[submodel_idx_in]->set_tensor(input_port, temp_tensor_map[output_port]); } } diff --git a/src/plugins/hetero/tests/functional/CMakeLists.txt b/src/plugins/hetero/tests/functional/CMakeLists.txt index 678e7cface695b..196a8269080664 100644 --- a/src/plugins/hetero/tests/functional/CMakeLists.txt +++ b/src/plugins/hetero/tests/functional/CMakeLists.txt @@ -15,6 +15,9 @@ ov_add_test_target( gtest gtest_main common_test_utils + INCLUDES + PUBLIC + $/src ADD_CLANG_FORMAT LABELS OV UNIT HETERO diff --git a/src/plugins/hetero/tests/functional/hetero_tests.cpp b/src/plugins/hetero/tests/functional/hetero_tests.cpp index a7e33605ad8dc6..d375c9a70e6a29 100644 --- a/src/plugins/hetero/tests/functional/hetero_tests.cpp +++ b/src/plugins/hetero/tests/functional/hetero_tests.cpp @@ -15,6 +15,7 @@ #include "openvino/pass/manager.hpp" #include "openvino/pass/serialize.hpp" #include "openvino/runtime/exec_model_info.hpp" +#include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/iremote_context.hpp" @@ -177,6 +178,29 @@ std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_ind return std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param1, param2}); } +std::shared_ptr ov::hetero::tests::HeteroTests::create_model_with_multi_add() { + auto param = std::make_shared(ov::element::f32, ov::PartialShape{1, 3, 1, 1}); + param->set_friendly_name("input"); + auto const_value1 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 1, 1}, {1}); + const_value1->set_friendly_name("const_val1"); + auto add1 = std::make_shared(param, const_value1); + add1->set_friendly_name("add1"); + auto const_value2 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 1, 1}, {1}); + const_value2->set_friendly_name("const_val2"); + auto add2 = std::make_shared(add1, const_value2); + add2->set_friendly_name("add2"); + auto const_value3 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 1, 1}, {1}); + const_value3->set_friendly_name("const_val3"); + auto add3 = std::make_shared(add2, const_value3); + add3->set_friendly_name("add3"); + auto const_value4 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 1, 1}, {1}); + const_value4->set_friendly_name("const_val4"); + auto add4 = std::make_shared(add3, const_value4); + add4->set_friendly_name("add4"); + auto result = std::make_shared(add4); + result->set_friendly_name("res"); + return std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); +} // Mock plugins class MockCompiledModel : public ov::ICompiledModel { @@ -546,7 +570,9 @@ class MockPluginBase : public ov::IPlugin { auto device_id = properties.count(ov::device::id.name()) ? properties.at(ov::device::id.name()).as() : m_default_device_id; - + float query_model_ratio = properties.count(ov::internal::query_model_ratio.name()) + ? properties.at(ov::internal::query_model_ratio.name()).as() + : 1.0f; auto supported = ov::get_supported_nodes( model, [&](std::shared_ptr& model) { @@ -561,7 +587,8 @@ class MockPluginBase : public ov::IPlugin { if (m_supported_ops.find(op->get_type_info().name) == m_supported_ops.end()) return false; return true; - }); + }, + query_model_ratio); for (auto&& op_name : supported) { res.emplace(op_name, get_device_name() + "." + device_id); } @@ -743,6 +770,117 @@ class MockPluginSubtract : public MockPluginBase { } }; +class MockPluginGPU : public MockPluginBase { +public: + MockPluginGPU(const std::string& name) + : MockPluginBase(name, {"Parameter", "Result", "Add", "Constant", "Reshape"}, true) {} + + const ov::Version& get_const_version() override { + static const ov::Version version = {CI_BUILD_NUMBER, "openvino_mock_reshape_plugin"}; + return version; + } + void set_property(const ov::AnyMap& properties) override { + for (const auto& it : properties) { + if (it.first == ov::num_streams.name()) + num_streams = it.second.as(); + else if (it.first == ov::enable_profiling.name()) + m_profiling = it.second.as(); + else if (it.first == ov::internal::exclusive_async_requests.name()) + exclusive_async_requests = it.second.as(); + else if (it.first == ov::device::id.name()) + continue; + else + OPENVINO_THROW(get_device_name(), " set config: " + it.first); + } + } + + ov::Any get_property(const std::string& name, const ov::AnyMap& arguments) const override { + const static std::vector device_ids = {"0", "1", "2"}; + const std::vector roProperties{RO_property(ov::supported_properties.name()), + RO_property(ov::optimal_batch_size.name()), + RO_property(ov::device::capabilities.name()), + RO_property(ov::device::type.name()), + RO_property(ov::device::uuid.name()), + RO_property(ov::device::id.name()), + RO_property(ov::intel_gpu::memory_statistics.name()), + RO_property(ov::intel_gpu::device_total_mem_size.name())}; + // the whole config is RW before network is loaded. + const std::vector rwProperties{RW_property(ov::num_streams.name()), + RW_property(ov::enable_profiling.name()), + RW_property(ov::compilation_num_threads.name()), + RW_property(ov::hint::performance_mode.name()), + RW_property(ov::hint::num_requests.name())}; + std::string device_id; + if (arguments.find(ov::device::id.name()) != arguments.end()) { + device_id = arguments.find(ov::device::id.name())->second.as(); + } + if (name == ov::supported_properties) { + std::vector supportedProperties; + supportedProperties.reserve(roProperties.size() + rwProperties.size()); + supportedProperties.insert(supportedProperties.end(), roProperties.begin(), roProperties.end()); + supportedProperties.insert(supportedProperties.end(), rwProperties.begin(), rwProperties.end()); + + return decltype(ov::supported_properties)::value_type(supportedProperties); + } else if (name == ov::internal::supported_properties) { + return decltype(ov::internal::supported_properties)::value_type( + {ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::query_model_ratio.name(), ov::PropertyMutability::RW}}); + } else if (name == ov::internal::exclusive_async_requests) { + return decltype(ov::internal::exclusive_async_requests)::value_type{exclusive_async_requests}; + } else if (name == ov::device::uuid) { + ov::device::UUID uuid; + for (size_t i = 0; i < uuid.MAX_UUID_SIZE; i++) { + if (device_id == device_ids[0]) + uuid.uuid[i] = static_cast(i); + else if (device_id == device_ids[1]) + uuid.uuid[i] = static_cast(i * 2); + else if (device_id == device_ids[2]) + uuid.uuid[i] = static_cast(i * 3); + } + return decltype(ov::device::uuid)::value_type{uuid}; + } else if (name == ov::available_devices) { + return decltype(ov::available_devices)::value_type(device_ids); + } else if (name == ov::device::capabilities) { + std::vector capabilities; + capabilities.push_back(ov::device::capability::EXPORT_IMPORT); + return decltype(ov::device::capabilities)::value_type(capabilities); + } else if (ov::internal::caching_properties == name) { + std::vector caching_properties = {ov::device::uuid}; + return decltype(ov::internal::caching_properties)::value_type(caching_properties); + } else if (name == ov::loaded_from_cache.name()) { + return m_loaded_from_cache; + } else if (name == ov::enable_profiling.name()) { + return decltype(ov::enable_profiling)::value_type{m_profiling}; + } else if (name == ov::streams::num.name()) { + return decltype(ov::streams::num)::value_type{num_streams}; + } else if (name == ov::intel_gpu::device_total_mem_size.name()) { + size_t mem_size = 0; + if (device_id == "0") + mem_size = 64; + else if (device_id == "1") + mem_size = 16; + else if (device_id == "2") + mem_size = 32; + return decltype(ov::intel_gpu::device_total_mem_size)::value_type{mem_size}; + } else if (name == ov::device::type.name()) { + ov::device::Type device_type = ov::device::Type::INTEGRATED; + if (device_id == "0") + device_type = ov::device::Type::INTEGRATED; + else if (device_id == "1") + device_type = ov::device::Type::DISCRETE; + else if (device_id == "2") + device_type = ov::device::Type::DISCRETE; + return decltype(ov::device::type)::value_type(device_type); + } + OPENVINO_THROW("Unsupported property: ", name); + } + +private: + int32_t num_streams{0}; + bool exclusive_async_requests = false; +}; + void ov::hetero::tests::HeteroTests::reg_plugin(std::shared_ptr& plugin) { std::string library_path = get_mock_engine_path(); if (!m_so) @@ -766,5 +904,6 @@ void ov::hetero::tests::HeteroTests::SetUp() { if (m_mock_plugins.empty()) { reg_plugin_type("MOCK0"); reg_plugin_type("MOCK1"); + reg_plugin_type("MOCKGPU"); } } \ No newline at end of file diff --git a/src/plugins/hetero/tests/functional/hetero_tests.hpp b/src/plugins/hetero/tests/functional/hetero_tests.hpp index f25d9d0feedcce..3890a91d21495a 100644 --- a/src/plugins/hetero/tests/functional/hetero_tests.hpp +++ b/src/plugins/hetero/tests/functional/hetero_tests.hpp @@ -26,6 +26,7 @@ class HeteroTests : public ::testing::Test { std::shared_ptr create_model_with_reshape(bool dynamic = false); std::shared_ptr create_model_with_subtract_shapeof_reshape(bool dynamic = false); std::shared_ptr create_model_with_independent_parameter(bool dynamic = false); + std::shared_ptr create_model_with_multi_add(); ov::Tensor create_and_fill_tensor(const ov::element::Type& type, const ov::Shape& shape); private: diff --git a/src/plugins/hetero/tests/functional/properties_tests.cpp b/src/plugins/hetero/tests/functional/properties_tests.cpp index a9e596181a3076..474ada15ca69c6 100644 --- a/src/plugins/hetero/tests/functional/properties_tests.cpp +++ b/src/plugins/hetero/tests/functional/properties_tests.cpp @@ -3,6 +3,7 @@ // #include "hetero_tests.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "properties.hpp" using namespace ov::hetero::tests; @@ -10,7 +11,8 @@ TEST_F(HeteroTests, get_property_supported_properties) { const std::vector supported_properties = {ov::supported_properties, ov::device::full_name, ov::device::capabilities, - ov::device::priorities}; + ov::device::priorities, + ov::hint::model_distribution_policy}; auto actual_supported_properties = core.get_property("HETERO", ov::supported_properties); EXPECT_EQ(supported_properties.size(), actual_supported_properties.size()); for (auto& supported_property : supported_properties) { @@ -41,4 +43,19 @@ TEST_F(HeteroTests, set_property_device_priorities) { EXPECT_EQ("", core.get_property("HETERO", ov::device::priorities)); core.set_property("HETERO", ov::device::priorities("MOCK0,MOCK1")); EXPECT_EQ("MOCK0,MOCK1", core.get_property("HETERO", ov::device::priorities)); +} + +TEST_F(HeteroTests, set_property_ModelDistributionPolicy) { + std::set value = {}; + std::set model_policy = {ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL}; + + ASSERT_NO_THROW(core.set_property("HETERO", ov::hint::model_distribution_policy(model_policy))); + ASSERT_NO_THROW(value = core.get_property("HETERO", ov::hint::model_distribution_policy)); + ASSERT_EQ(model_policy, value); + + model_policy = {}; + + ASSERT_NO_THROW(core.set_property("HETERO", ov::hint::model_distribution_policy(model_policy))); + ASSERT_NO_THROW(value = core.get_property("HETERO", ov::hint::model_distribution_policy)); + ASSERT_EQ(model_policy, value); } \ No newline at end of file diff --git a/src/plugins/hetero/tests/functional/query_model_tests.cpp b/src/plugins/hetero/tests/functional/query_model_tests.cpp index 6ec4f17f053803..fab5e78220a82f 100644 --- a/src/plugins/hetero/tests/functional/query_model_tests.cpp +++ b/src/plugins/hetero/tests/functional/query_model_tests.cpp @@ -114,3 +114,61 @@ TEST_F(HeteroTests, query_model_on_independent_parameter) { } EXPECT_EQ(0, names.size()); } + +TEST_F(HeteroTests, query_model_by_three_device) { + const std::string dev_name0 = "MOCKGPU.2"; + const std::string dev_name1 = "MOCKGPU.1"; + const std::string dev_name2 = "MOCKGPU.0"; + std::set model_policy = {ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL}; + // This WA is needed because mock plugins are loaded one by one + EXPECT_NO_THROW(core.get_available_devices()); + const auto model = create_model_with_multi_add(); + const auto supported_ops = core.query_model(model, + "HETERO", + {ov::device::priorities(dev_name0 + "," + dev_name1 + "," + dev_name2), + ov::hint::model_distribution_policy(model_policy)}); + std::map expect_result = {{"input", "MOCKGPU.2"}, + {"const_val1", "MOCKGPU.2"}, + {"const_val2", "MOCKGPU.2"}, + {"add1", "MOCKGPU.2"}, + {"add2", "MOCKGPU.2"}, + {"const_val3", "MOCKGPU.1"}, + {"add3", "MOCKGPU.1"}, + {"const_val4", "MOCKGPU.0"}, + {"add4", "MOCKGPU.0"}, + {"res", "MOCKGPU.0"}}; + for (const auto& op : supported_ops) { + if (expect_result.find(op.first) != expect_result.end()) { + EXPECT_EQ(op.second, expect_result[op.first]); + } + } +} + +TEST_F(HeteroTests, query_model_by_two_device) { + const std::string dev_name0 = "MOCKGPU.2"; + const std::string dev_name1 = "MOCKGPU.0"; + std::set model_policy = {ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL}; + + // This WA is needed because mock plugins are loaded one by one + EXPECT_NO_THROW(core.get_available_devices()); + const auto model = create_model_with_multi_add(); + const auto supported_ops = core.query_model( + model, + "HETERO", + {ov::device::priorities(dev_name0 + "," + dev_name1), ov::hint::model_distribution_policy(model_policy)}); + std::map expect_result = {{"input", "MOCKGPU.2"}, + {"const_val1", "MOCKGPU.2"}, + {"const_val2", "MOCKGPU.2"}, + {"add1", "MOCKGPU.2"}, + {"add2", "MOCKGPU.2"}, + {"const_val3", "MOCKGPU.0"}, + {"add3", "MOCKGPU.0"}, + {"const_val4", "MOCKGPU.0"}, + {"add4", "MOCKGPU.0"}, + {"res", "MOCKGPU.0"}}; + for (const auto& op : supported_ops) { + if (expect_result.find(op.first) != expect_result.end()) { + EXPECT_EQ(op.second, expect_result[op.first]); + } + } +} \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index cf992e4a678127..323e848c8bc96e 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -587,6 +587,26 @@ static void attn_reduce(T* dst, float* temp, size_t M, size_t S, size_t temp_str } } +template +void parallel_for3d_dynamic(const T0& D0, const T1& D1, const T2& D2, const F& func) { +#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) + tbb::parallel_for(tbb::blocked_range3d(0, D0, 0, D1, 0, D2), + [=](const tbb::blocked_range3d& r) { + for (T0 d0 = r.pages().begin(); d0 < r.pages().end(); d0++) { + for (T1 d1 = r.rows().begin(); d1 < r.rows().end(); d1++) { + for (T2 d2 = r.cols().begin(); d2 < r.cols().end(); d2++) { + func(d0, d1, d2); + } + } + } + }); +#else + parallel_for3d(D0, D1, D2, [&](size_t d0, size_t d1, size_t d2) { + func(d0, d1, d2); + }); +#endif +} + template static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, const ov::intel_cpu::PlainTensor& present_key, @@ -641,32 +661,32 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, }); } #endif - parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) { - size_t start{0}, end{0}; - splitter(B * h_group_num * kv_len, nthr, ithr, start, end); - - size_t b, h_group, pk; - if (start < end) { - parallel_it_init(start, b, B, h_group, h_group_num, pk, kv_len); - if (is_pagedattn) { - for (size_t iwork = start; iwork < end; ++iwork) { - auto context_len = static_cast(context_lens.ptr()[b]); - // kv_len must be valid - if (pk < context_len) { - auto block_idx = beams.ptr(b)[pk]; - OPENVINO_ASSERT(block_idx >= 0, "block idx must be greater or equal than 0"); - for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { - buf_attn_w.ptr(b, h, pq)[pk] = - dot_product(query.ptr(b, h, pq), present_key.ptr(block_idx, h_group), - S, nullptr, nullptr, nullptr); - } - } + if (is_pagedattn) { + parallel_for3d_dynamic(B, h_group_num, kv_len, [&](size_t b, size_t h_group, size_t pk) { + auto context_len = static_cast(context_lens.ptr()[b]); + // kv_len must be valid + if (pk < context_len) { + auto block_idx = beams.ptr(b)[pk]; + OPENVINO_ASSERT(block_idx >= 0, "block idx must be greater or equal than 0"); + + for (size_t pq = 0; pq < q_len; pq++) { + for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { + buf_attn_w.ptr(b, h, pq)[pk] = + dot_product(query.ptr(b, h, pq), present_key.ptr(block_idx, h_group), + S, nullptr, nullptr, nullptr); } - parallel_it_step(b, B, h_group, h_group_num, pk, kv_len); } - } else { + } + }); + } else { + parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) { + size_t start{0}, end{0}; + splitter(B * h_group_num * kv_len, nthr, ithr, start, end); + + size_t b, h_group, pk; + if (start < end) { + parallel_it_init(start, b, B, h_group, h_group_num, pk, kv_len); if (q_len == 1 && h_each_group_len == 1) { if (B == 1) { // the memory will be continuous when b==1 @@ -705,70 +725,96 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, } } } - } - }); + }); + } - parallel_for3d(B, H, q_len, [&](size_t b, size_t h, size_t pq) { - auto cur_kv_len = kv_len; - auto ncausal = auto_causal ? (cur_kv_len - q_len + pq + 1) : cur_kv_len; - if (is_pagedattn) { - cur_kv_len = static_cast(context_lens.ptr()[b]); - ncausal = cur_kv_len; - } - // apply attention mask & sofmax - float* alibi_ptr = alibi_mask ? &alibi_mask.at({b, h, pq, 0}, true) : nullptr; - uint8_t* attn_mask_ptr = nullptr; - auto attn_mask_prec = attention_mask.get_precision(); - if (attention_mask) - attn_mask_ptr = reinterpret_cast(&attention_mask.at({b, h, pq, 0}, true)); - uint8_t* cmask_ptr = causal_mask ? &causal_mask.at({b, h, pq, 0}, true) : nullptr; - attn_softmax_kernel(buf_attn_w.ptr(b, h, pq), - buf_attn_w.ptr(b, h, pq), - d_scale, - alibi_ptr, - attn_mask_ptr, - cmask_ptr, - select_nfltmax_at_0, - ncausal, - cur_kv_len, - attn_mask_prec, - ov::element::f32); - }); + if (is_pagedattn) { + parallel_for3d_dynamic(B, H, q_len, [&](size_t b, size_t h, size_t pq) { + auto cur_kv_len = static_cast(context_lens.ptr()[b]); + auto ncausal = cur_kv_len; + // apply attention mask & sofmax + float* alibi_ptr = alibi_mask ? &alibi_mask.at({b, h, pq, 0}, true) : nullptr; + uint8_t* attn_mask_ptr = nullptr; + auto attn_mask_prec = attention_mask.get_precision(); + if (attention_mask) + attn_mask_ptr = reinterpret_cast(&attention_mask.at({b, h, pq, 0}, true)); + uint8_t* cmask_ptr = causal_mask ? &causal_mask.at({b, h, pq, 0}, true) : nullptr; + attn_softmax_kernel(buf_attn_w.ptr(b, h, pq), + buf_attn_w.ptr(b, h, pq), + d_scale, + alibi_ptr, + attn_mask_ptr, + cmask_ptr, + select_nfltmax_at_0, + ncausal, + cur_kv_len, + attn_mask_prec, + ov::element::f32); + }); + } else { + parallel_for3d(B, H, q_len, [&](size_t b, size_t h, size_t pq) { + auto cur_kv_len = kv_len; + auto ncausal = auto_causal ? (cur_kv_len - q_len + pq + 1) : cur_kv_len; + // apply attention mask & sofmax + float* alibi_ptr = alibi_mask ? &alibi_mask.at({b, h, pq, 0}, true) : nullptr; + uint8_t* attn_mask_ptr = nullptr; + auto attn_mask_prec = attention_mask.get_precision(); + if (attention_mask) + attn_mask_ptr = reinterpret_cast(&attention_mask.at({b, h, pq, 0}, true)); + uint8_t* cmask_ptr = causal_mask ? &causal_mask.at({b, h, pq, 0}, true) : nullptr; + attn_softmax_kernel(buf_attn_w.ptr(b, h, pq), + buf_attn_w.ptr(b, h, pq), + d_scale, + alibi_ptr, + attn_mask_ptr, + cmask_ptr, + select_nfltmax_at_0, + ncausal, + cur_kv_len, + attn_mask_prec, + ov::element::f32); + }); + } // attn_w * V buf_attn_score.resize({static_cast(nthr), B, q_len, H, S}); // buf_attn_w {B, H, q_len, kv_len} - parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) { - size_t start{0}, end{0}; - splitter(B * h_group_num * kv_len, nthr, ithr, start, end); - - memset(buf_attn_score.ptr(ithr, 0, 0, 0, 0), 0, buf_attn_score.stride(0) * sizeof(float)); - - size_t b, h_group, pv; - if (start < end) { - parallel_it_init(start, b, B, h_group, h_group_num, pv, kv_len); - if (is_pagedattn) { - for (size_t iwork = start; iwork < end; ++iwork) { - auto context_len = static_cast(context_lens.ptr()[b]); - // kv_len must be valid - if (pv < context_len) { - auto block_idx = beams.ptr(b)[pv]; - OPENVINO_ASSERT(block_idx >= 0, "block idx in vcache must be greater or equal than 0"); - auto* v = present_value.ptr(block_idx, h_group); - for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { - attn_acc_value(buf_attn_score.ptr(ithr, b, pq, h), - buf_attn_w.ptr(b, h, pq)[pv], - v, - S, - nullptr, - nullptr); - } - } + + if (is_pagedattn) { + parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) { + memset(buf_attn_score.ptr(ithr, 0, 0, 0, 0), 0, buf_attn_score.stride(0) * sizeof(float)); + }); + + parallel_for3d_dynamic(B, h_group_num, kv_len, [&](size_t b, size_t h_group, size_t pv) { + auto ithr = parallel_get_thread_num(); + auto context_len = static_cast(context_lens.ptr()[b]); + // kv_len must be valid + if (pv < context_len) { + auto block_idx = beams.ptr(b)[pv]; + OPENVINO_ASSERT(block_idx >= 0, "block idx in vcache must be greater or equal than 0"); + auto* v = present_value.ptr(block_idx, h_group); + for (size_t pq = 0; pq < q_len; pq++) { + for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { + attn_acc_value(buf_attn_score.ptr(ithr, b, pq, h), + buf_attn_w.ptr(b, h, pq)[pv], + v, + S, + nullptr, + nullptr); } - parallel_it_step(b, B, h_group, h_group_num, pv, kv_len); } - } else { + } + }); + } else { + parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) { + size_t start{0}, end{0}; + splitter(B * h_group_num * kv_len, nthr, ithr, start, end); + + memset(buf_attn_score.ptr(ithr, 0, 0, 0, 0), 0, buf_attn_score.stride(0) * sizeof(float)); + + size_t b, h_group, pv; + if (start < end) { + parallel_it_init(start, b, B, h_group, h_group_num, pv, kv_len); if (q_len == 1 && h_each_group_len == 1) { for (size_t iwork = start; iwork < end; ++iwork) { auto b_kv = beams ? beams.ptr(b)[pv] : b; @@ -801,8 +847,8 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, } } } - } - }); + }); + } parallel_for3d(B, H, q_len, [&](size_t b, size_t h, size_t pq) { auto* temp = buf_attn_score.ptr(0, b, pq, h); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl index 189b2ea62736d4..90dc0f07e0ffb0 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl @@ -375,7 +375,7 @@ KERNEL(gemm_tiled_opt)( #endif // TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST // Loading A tile and tile C calculation -#if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING && !HAS_DYNAMIC_N_PADDING && TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST +#if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING && TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST #if TILE_K_NOT_DIVISIBLE A_FLOATN a_read = TILE_K_NOT_DIVISIBLE_CALC ? a_ptr[sglid] : BLOCK_READ_A(a_ptr, 0); #else @@ -413,7 +413,6 @@ KERNEL(gemm_tiled_opt)( b_tile[subtile_k_id * SIMD_WIDTH + simd_local_id], c_tile[dot_id]); #else // TILE_K > SIMD_WIDTH #if IS_DYNAMIC && B_VEC_SIZE > 1 - A_FLOATN a_read_tmp = sub_group_broadcast(a_read, simd_local_id); #if TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE) b_tile_tmp; unroll_for (uint b_elem = 0; b_elem < B_VEC_SIZE; ++b_elem) { @@ -429,7 +428,7 @@ KERNEL(gemm_tiled_opt)( #endif // TILE_K > SIMD_WIDTH } } - #if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING && !HAS_DYNAMIC_N_PADDING + #if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING // Read A for next dot_id #if TILE_K_NOT_DIVISIBLE a_read = (dot_id + 1 < tile_m_iterations) ? TILE_K_NOT_DIVISIBLE_CALC ? a_ptr[sglid] : BLOCK_READ_A(a_ptr, 0) : 0; @@ -592,12 +591,11 @@ KERNEL(gemm_tiled_opt)( unroll_for (uint simd_id = 0; simd_id < TILE_K_LEFTOVER; simd_id++) { #if B_VEC_SIZE > 1 - A_FLOATN a_read_tmp = sub_group_broadcast(a_read, simd_id); #if TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE) b_tile_tmp = {b_tile[0][simd_id], b_tile[1][simd_id]}; - c_tile[dot_id] = mad((MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE))(a_read_tmp), b_tile_tmp, c_tile[dot_id]); + c_tile[dot_id] = mad((INPUT0_TYPE)sub_group_broadcast(a_read, simd_id), b_tile_tmp, c_tile[dot_id]); #else - c_tile[dot_id] = mad((MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE))(a_read_tmp), b_tile[simd_id], c_tile[dot_id]); + c_tile[dot_id] = mad((INPUT0_TYPE)sub_group_broadcast(a_read, simd_id), b_tile[simd_id], c_tile[dot_id]); #endif #else c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_read, simd_id)), b_tile[simd_id], c_tile[dot_id]); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 3f26c86a241644..1e47ebecab8512 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -256,6 +256,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& ProgramBuilder prog(ctx->get_engine(), config); + float query_model_ratio = config.get_property(ov::internal::query_model_ratio.name()).as(); + auto supported = ov::get_supported_nodes(model, [&config,this](std::shared_ptr& model) { std::map shapes; @@ -264,7 +266,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& }, [&prog](std::shared_ptr node) { return prog.is_op_supported(node); - }); + }, + query_model_ratio); for (auto&& op_name : supported) { res.emplace(op_name, ctx->get_device_name()); @@ -562,7 +565,8 @@ std::vector Plugin::get_supported_internal_properties() const ov::PropertyName{ov::internal::config_device_id.name(), ov::PropertyMutability::WO}, ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}}; + ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::query_model_ratio.name(), PropertyMutability::RW}}; return supported_internal_properties; } diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 2b55f9d93d4449..cfc60af6663293 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -536,7 +536,9 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->enable(); pass_config->set_callback( - [](const_node_ptr &node) -> bool { + [&](const_node_ptr &node) -> bool { + OPENVINO_ASSERT(node->input_value(0).get_partial_shape().rank().is_static(), + node->get_friendly_name() + " has dynamic rank!"); return node->input_value(0).get_partial_shape().rank().get_length() <= 5; }); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index f3569d0cb2a2ee..7d80eddcde66fa 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -53,6 +53,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false), std::make_tuple(ov::internal::exclusive_async_requests, false), + std::make_tuple(ov::internal::query_model_ratio, 1.0f), std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), // Legacy API properties diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index 70c7c20c0b7555..9220fad47269b4 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -361,7 +361,7 @@ class gemm_gpu_tests: public ::testing::Test { } } - void test_dynamic_padding(bool is_caching_test) { + void test_dynamic_padding(bool is_caching_test, bool n_dim_only) { tests::random_generator rg; rg.set_seed(GET_SUITE_NAME); @@ -414,10 +414,19 @@ class gemm_gpu_tests: public ::testing::Test { ov::Shape in2_shape_aligned = { aligned_batch1_size, aligned_batch2_size, aligned_k_size, aligned_n_size }; // Use dynamic padding for all BFYX dimensions - tensor dyn_pad_dims_input({1, 1, 1, 1}, 0); + tensor dyn_pad_dims_input1({0, 0, 0, 0}, 0); + tensor dyn_pad_dims_input2({0, 0, 0, 0}, 0); - auto in1_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input)}; - auto in2_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input)}; + if (n_dim_only) { + dyn_pad_dims_input1 = tensor({0, 0, 0, 0}, 0); + dyn_pad_dims_input2 = tensor({0, 0, 1, 0}, 0); + } else { + dyn_pad_dims_input1 = tensor({1, 1, 1, 1}, 0); + dyn_pad_dims_input2 = tensor({1, 1, 1, 1}, 0); + } + + auto in1_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input1)}; + auto in2_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input2)}; auto aligned_input1_mem = engine.allocate_memory({ov::PartialShape(in1_shape_aligned), data_types::f16, format::bfyx}); auto aligned_input2_mem = engine.allocate_memory({ov::PartialShape(in2_shape_aligned), data_types::f16, format::bfyx}); @@ -425,14 +434,14 @@ class gemm_gpu_tests: public ::testing::Test { auto input1_mem = engine.reinterpret_buffer(*aligned_input1_mem, layout{ov::PartialShape(in1_shape), data_types::f16, format::bfyx, - padding({padding_size_batch1, 0, 0, 0}, - {0, padding_size_batch2, padding_size_k, padding_size_m}, 0.0f, dyn_pad_dims_input)}); + n_dim_only ? padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input1) : + padding({padding_size_batch1, 0, 0, 0}, {0, padding_size_batch2, padding_size_k, padding_size_m}, 0.0f, dyn_pad_dims_input1)}); auto input2_mem = engine.reinterpret_buffer(*aligned_input2_mem, layout{ov::PartialShape(in2_shape), data_types::f16, format::bfyx, - padding({0, padding_size_batch2, 0, 0}, - {padding_size_batch1, 0, padding_size_n, padding_size_k}, 0.0f, dyn_pad_dims_input)}); + n_dim_only ? padding({0, 0, 0, 0}, {0, 0, padding_size_n, 0}, 0.0f, dyn_pad_dims_input2) : + padding({0, padding_size_batch2, 0, 0}, {padding_size_batch1, 0, padding_size_n, padding_size_k}, 0.0f, dyn_pad_dims_input2)}); auto input_1_data = rg.generate_random_1d(ov::shape_size(in1_shape), -2, 2); auto input_2_data = rg.generate_random_1d(ov::shape_size(in2_shape), -2, 2); @@ -1574,10 +1583,15 @@ TEST_F(gemm_gpu_tests, dynamic) { this->test_dynamic(false); } -TEST_F(gemm_gpu_tests, dynamic_padding) { - this->test_dynamic_padding(false); +TEST_F(gemm_gpu_tests, dynamic_padding_all_dim) { + this->test_dynamic_padding(false, false); } +TEST_F(gemm_gpu_tests, dynamic_padding_n_dim_only) { + this->test_dynamic_padding(false, true); +} + + TEST_F(gemm_gpu_tests, dynamic_multi_inference_same_shape) { this->test_dynamic_multi_inference_same_shape(false); }