diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
index f1edeaa18ff1ef..b64690b61abb1b 100644
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -72,7 +72,8 @@ void regmodule_properties(py::module m) {
         .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY);
 
     py::enum_<ov::hint::ModelDistributionPolicy>(m_hint, "ModelDistributionPolicy", py::arithmetic())
-        .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL);
+        .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)
+        .value("PIPELINE_PARALLEL", ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL);
 
     py::enum_<ov::hint::ExecutionMode>(m_hint, "ExecutionMode", py::arithmetic())
         .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE)
diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp
index ca0c86aa924062..9badb007d526b9 100644
--- a/src/core/include/openvino/core/any.hpp
+++ b/src/core/include/openvino/core/any.hpp
@@ -10,6 +10,7 @@
 
 #include <map>
 #include <memory>
+#include <set>
 #include <string>
 #include <typeindex>
 #include <typeinfo>
@@ -209,6 +210,18 @@ struct Read<std::vector<T, A>, typename std::enable_if<std::is_default_construct
     }
 };
 
+template <typename K, typename C, typename A>
+struct Read<std::set<K, C, A>, typename std::enable_if<std::is_default_constructible<K>::value>::type> {
+    void operator()(std::istream& is, std::set<K, C, A>& set) const {
+        while (is.good()) {
+            std::string str;
+            is >> str;
+            auto v = from_string<K>(str);
+            set.insert(std::move(v));
+        }
+    }
+};
+
 template <typename K, typename T, typename C, typename A>
 struct Read<
     std::map<K, T, C, A>,
@@ -343,6 +356,21 @@ struct Write<std::vector<T, A>> {
     }
 };
 
+template <typename K, typename C, typename A>
+struct Write<std::set<K, C, A>> {
+    void operator()(std::ostream& os, const std::set<K, C, A>& set) const {
+        if (!set.empty()) {
+            std::size_t i = 0;
+            for (auto&& v : set) {
+                os << to_string(v);
+                if (i < (set.size() - 1))
+                    os << ' ';
+                ++i;
+            }
+        }
+    }
+};
+
 template <typename K, typename T, typename C, typename A>
 struct Write<std::map<K, T, C, A>> {
     void operator()(std::ostream& os, const std::map<K, T, C, A>& map) const {
diff --git a/src/core/tests/any.cpp b/src/core/tests/any.cpp
index 426bf1373a3e74..7d9e3d4edc1126 100644
--- a/src/core/tests/any.cpp
+++ b/src/core/tests/any.cpp
@@ -158,6 +158,23 @@ TEST_F(AnyTests, AnyAsMapOfAnys) {
     ASSERT_EQ(refMap["testParamString"].as<std::string>(), testString);
 }
 
+TEST_F(AnyTests, AnyAsSetOfAnys) {
+    std::set<std::string> refSet0;
+    std::set<int> refSet1;
+    refSet0.insert("test");
+    refSet1.insert(4);
+    Any s0 = refSet0;
+    Any s1 = refSet1;
+    bool isSet0 = s0.is<std::set<std::string>>();
+    bool isSet1 = s1.is<std::set<int>>();
+    ASSERT_TRUE(isSet0);
+    ASSERT_TRUE(isSet1);
+    auto testSet0 = s0.as<std::set<std::string>>();
+    auto testSet1 = s1.as<std::set<int>>();
+    ASSERT_NE(testSet0.count("test"), 0);
+    ASSERT_NE(testSet1.count(4), 0);
+}
+
 TEST_F(AnyTests, AnyAsMapOfMapOfAnys) {
     std::map<std::string, Any> refMap1;
     refMap1["testParamInt"] = 4;
diff --git a/src/frontends/onnx/frontend/src/op/batch_norm.cpp b/src/frontends/onnx/frontend/src/op/batch_norm.cpp
index 04a613bc10bdb7..fc4a3c2a4fd9d1 100644
--- a/src/frontends/onnx/frontend/src/op/batch_norm.cpp
+++ b/src/frontends/onnx/frontend/src/op/batch_norm.cpp
@@ -52,6 +52,12 @@ ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node) {
     OPENVINO_THROW("Cannot create OpenVINO batch norm with unsupported number of inputs");
 }
 }  // namespace set_1
+/*
+     Opset 6 is skipped because there are no significant difference between opset1 and opset6.
+     Found difference is:
+     1. In Training, the computation of ReduceMean and ReduceVar uses float
+        to avoid overflow for float16 inputs.
+ */
 
 namespace set_7 {
 // This version supports ONNX BatchNormalization-7 and BatchNormalization-9
@@ -71,8 +77,42 @@ ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node) {
 
     return {std::make_shared<v5::BatchNormInference>(x, scale, bias, mean, var, epsilon)};
 }
-
 }  // namespace set_7
+/*
+    Opset 9 is skipped because there are no significant difference between opset7 and opset9.
+    Found difference is:
+    1. removed -> spatial : int (default is 1)
+    If true, compute the mean and variance across per activation. If false, compute the mean and variance across
+    per feature over each mini-batch.
+
+ */
+
+namespace set_14 {
+// This version supports ONNX BatchNormalization-14 BatchNormalization-15
+ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node) {
+    ov::OutputVector inputs{node.get_ov_inputs()};
+    auto x = inputs.at(0);
+    auto scale = inputs.at(1);
+    auto bias = inputs.at(2);
+    auto mean = inputs.at(3);
+    auto var = inputs.at(4);
+
+    double epsilon{node.get_attribute_value<double>("epsilon", 1e-5)};
+    int64_t training_mode{node.get_attribute_value<int64_t>("training_mode", 0)};
+
+    CHECK_VALID_NODE(node,
+                     training_mode == false && node.get_outputs_size() == 1,
+                     "Training mode of BatchNormalization is not supported.");
+    return {std::make_shared<v5::BatchNormInference>(x, scale, bias, mean, var, epsilon)};
+}
+}  // namespace set_14
+/*
+     Opset 15 is skipped because there are no significant difference between opset14 and opset15.
+     Found difference is:
+     1. In Training, the computation of ReduceMean and ReduceVar uses float
+        to avoid overflow for float16 inputs.
+ */
+
 }  // namespace op
 }  // namespace onnx
 }  // namespace frontend
diff --git a/src/frontends/onnx/frontend/src/op/batch_norm.hpp b/src/frontends/onnx/frontend/src/op/batch_norm.hpp
index fbf4c715bb15de..29a79d444152d2 100644
--- a/src/frontends/onnx/frontend/src/op/batch_norm.hpp
+++ b/src/frontends/onnx/frontend/src/op/batch_norm.hpp
@@ -19,6 +19,11 @@ namespace set_7 {
 ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node);
 
 }  // namespace set_7
+
+namespace set_14 {
+ov::OutputVector batch_norm(const ov::frontend::onnx::Node& node);
+
+}  // namespace set_14
 }  // namespace op
 }  // namespace onnx
 }  // namespace frontend
diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp
index 0acbb0c9a8c2f4..02255b673ca576 100644
--- a/src/frontends/onnx/frontend/src/ops_bridge.cpp
+++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp
@@ -360,6 +360,7 @@ OperatorsBridge::OperatorsBridge() {
     REGISTER_OPERATOR("AveragePool", 1, average_pool);
     REGISTER_OPERATOR("BatchNormalization", 1, batch_norm);
     REGISTER_OPERATOR("BatchNormalization", 7, batch_norm);
+    REGISTER_OPERATOR("BatchNormalization", 14, batch_norm);
     REGISTER_OPERATOR("BitShift", 1, bitshift);
     REGISTER_OPERATOR("BitwiseAnd", 1, bitwise_and);
     REGISTER_OPERATOR("BitwiseNot", 1, bitwise_not);
diff --git a/src/frontends/onnx/tests/models/batchnorm_opset1.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset1.prototxt
new file mode 100644
index 00000000000000..11bed1195afa2b
--- /dev/null
+++ b/src/frontends/onnx/tests/models/batchnorm_opset1.prototxt
@@ -0,0 +1,113 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "x"
+    input: "s"
+    input: "bias"
+    input: "mean"
+    input: "var"
+    output: "y"
+    op_type: "BatchNormalization"
+  }
+  name: "test_batchnorm_example"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "s"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "bias"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mean"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "var"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 1
+}
diff --git a/src/frontends/onnx/tests/models/batchnorm_opset14.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset14.prototxt
new file mode 100644
index 00000000000000..48edc903669cc4
--- /dev/null
+++ b/src/frontends/onnx/tests/models/batchnorm_opset14.prototxt
@@ -0,0 +1,113 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "x"
+    input: "s"
+    input: "bias"
+    input: "mean"
+    input: "var"
+    output: "y"
+    op_type: "BatchNormalization"
+  }
+  name: "test_batchnorm_example"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "s"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "bias"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mean"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "var"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 14
+}
diff --git a/src/frontends/onnx/tests/models/batchnorm_opset15.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset15.prototxt
new file mode 100644
index 00000000000000..cf0a43fec08c0f
--- /dev/null
+++ b/src/frontends/onnx/tests/models/batchnorm_opset15.prototxt
@@ -0,0 +1,113 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "x"
+    input: "s"
+    input: "bias"
+    input: "mean"
+    input: "var"
+    output: "y"
+    op_type: "BatchNormalization"
+  }
+  name: "test_batchnorm_example"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "s"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "bias"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mean"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "var"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 15
+}
diff --git a/src/frontends/onnx/tests/models/batchnorm_opset6.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset6.prototxt
new file mode 100644
index 00000000000000..31217f90df3b47
--- /dev/null
+++ b/src/frontends/onnx/tests/models/batchnorm_opset6.prototxt
@@ -0,0 +1,113 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "x"
+    input: "s"
+    input: "bias"
+    input: "mean"
+    input: "var"
+    output: "y"
+    op_type: "BatchNormalization"
+  }
+  name: "test_batchnorm_example"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "s"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "bias"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mean"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "var"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 6
+}
diff --git a/src/frontends/onnx/tests/models/batchnorm_opset7.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset7.prototxt
new file mode 100644
index 00000000000000..cdc60d2c2a038f
--- /dev/null
+++ b/src/frontends/onnx/tests/models/batchnorm_opset7.prototxt
@@ -0,0 +1,113 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "x"
+    input: "s"
+    input: "bias"
+    input: "mean"
+    input: "var"
+    output: "y"
+    op_type: "BatchNormalization"
+  }
+  name: "test_batchnorm_example"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "s"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "bias"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mean"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "var"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 7
+}
diff --git a/src/frontends/onnx/tests/models/batchnorm_opset9.prototxt b/src/frontends/onnx/tests/models/batchnorm_opset9.prototxt
new file mode 100644
index 00000000000000..e7e7459f9d0d3b
--- /dev/null
+++ b/src/frontends/onnx/tests/models/batchnorm_opset9.prototxt
@@ -0,0 +1,113 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "x"
+    input: "s"
+    input: "bias"
+    input: "mean"
+    input: "var"
+    output: "y"
+    op_type: "BatchNormalization"
+  }
+  name: "test_batchnorm_example"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "s"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "bias"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mean"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "var"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 9
+}
diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp
index 74ace1949c8177..7b3e39e71399aa 100644
--- a/src/frontends/onnx/tests/onnx_import.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import.in.cpp
@@ -320,6 +320,95 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_default) {
     test_case.run();
 }
 
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset1) {
+    // Batch Normalization with default parameters
+    auto model = convert_model("batchnorm_opset1.onnx");
+
+    auto test_case = ov::test::TestCase(model, s_device);
+    test_case.add_input<float>({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f});  // data {1, 2, 1, 3}
+    test_case.add_input<float>({1.f, 1.5f});                      // scale
+    test_case.add_input<float>({0.f, 1.f});                       // bias
+    test_case.add_input<float>({0.f, 3.f});                       // mean
+    test_case.add_input<float>({1.f, 1.5f});                      // var
+    test_case.add_expected_output<float>(Shape{1, 2, 1, 3},
+                                         {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f});
+    test_case.run();
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset6) {
+    // Batch Normalization with default parameters
+    auto model = convert_model("batchnorm_opset6.onnx");
+
+    auto test_case = ov::test::TestCase(model, s_device);
+    test_case.add_input<float>({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f});  // data {1, 2, 1, 3}
+    test_case.add_input<float>({1.f, 1.5f});                      // scale
+    test_case.add_input<float>({0.f, 1.f});                       // bias
+    test_case.add_input<float>({0.f, 3.f});                       // mean
+    test_case.add_input<float>({1.f, 1.5f});                      // var
+    test_case.add_expected_output<float>(Shape{1, 2, 1, 3},
+                                         {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f});
+    test_case.run();
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset7) {
+    // Batch Normalization with default parameters
+    auto model = convert_model("batchnorm_opset7.onnx");
+
+    auto test_case = ov::test::TestCase(model, s_device);
+    test_case.add_input<float>({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f});  // data {1, 2, 1, 3}
+    test_case.add_input<float>({1.f, 1.5f});                      // scale
+    test_case.add_input<float>({0.f, 1.f});                       // bias
+    test_case.add_input<float>({0.f, 3.f});                       // mean
+    test_case.add_input<float>({1.f, 1.5f});                      // var
+    test_case.add_expected_output<float>(Shape{1, 2, 1, 3},
+                                         {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f});
+    test_case.run();
+}
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset9) {
+    // Batch Normalization with default parameters
+    auto model = convert_model("batchnorm_opset9.onnx");
+
+    auto test_case = ov::test::TestCase(model, s_device);
+    test_case.add_input<float>({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f});  // data {1, 2, 1, 3}
+    test_case.add_input<float>({1.f, 1.5f});                      // scale
+    test_case.add_input<float>({0.f, 1.f});                       // bias
+    test_case.add_input<float>({0.f, 3.f});                       // mean
+    test_case.add_input<float>({1.f, 1.5f});                      // var
+    test_case.add_expected_output<float>(Shape{1, 2, 1, 3},
+                                         {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f});
+    test_case.run();
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset14) {
+    // Batch Normalization with default parameters
+    auto model = convert_model("batchnorm_opset14.onnx");
+
+    auto test_case = ov::test::TestCase(model, s_device);
+    test_case.add_input<float>({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f});  // data {1, 2, 1, 3}
+    test_case.add_input<float>({1.f, 1.5f});                      // scale
+    test_case.add_input<float>({0.f, 1.f});                       // bias
+    test_case.add_input<float>({0.f, 3.f});                       // mean
+    test_case.add_input<float>({1.f, 1.5f});                      // var
+    test_case.add_expected_output<float>(Shape{1, 2, 1, 3},
+                                         {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f});
+    test_case.run();
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_batch_norm_opset15) {
+    // Batch Normalization with default parameters
+    auto model = convert_model("batchnorm_opset15.onnx");
+
+    auto test_case = ov::test::TestCase(model, s_device);
+    test_case.add_input<float>({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f});  // data {1, 2, 1, 3}
+    test_case.add_input<float>({1.f, 1.5f});                      // scale
+    test_case.add_input<float>({0.f, 1.f});                       // bias
+    test_case.add_input<float>({0.f, 3.f});                       // mean
+    test_case.add_input<float>({1.f, 1.5f});                      // var
+    test_case.add_expected_output<float>(Shape{1, 2, 1, 3},
+                                         {-0.999995f, 0.f, 0.999995f, -0.22474074f, 1.f, 2.2247407f});
+    test_case.run();
+}
+
 OPENVINO_TEST(${BACKEND_NAME}, onnx_model_relu) {
     // Simple ReLU test
     auto model = convert_model("relu.onnx");
diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp
index 8af71f2bd90154..eb4bc9bee916a7 100644
--- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp
+++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp
@@ -69,5 +69,12 @@ static constexpr Property<std::string, PropertyMutability::RO> compiled_model_ru
 static constexpr Property<bool, PropertyMutability::RO> compiled_model_runtime_properties_supported{
     "COMPILED_MODEL_RUNTIME_PROPERTIES_SUPPORTED"};
 
+/**
+ * @brief Read-write property to set the percentage of the estimated model size which is used to determine the query
+ * model results for further processing
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<float, PropertyMutability::RW> query_model_ratio{"QUERY_MODEL_RATIO"};
+
 }  // namespace internal
 }  // namespace ov
diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp
index a569ce711b6520..8165e658c206f0 100644
--- a/src/inference/dev_api/openvino/runtime/iplugin.hpp
+++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp
@@ -230,12 +230,14 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
  * @param model Original model
  * @param transform Transformation pipeline function
  * @param is_node_supported Function returning whether node is supported or not
+ * @param query_model_ratio The percentage of the model can be queried during query model (0 if not query)
  * @return Set of strings which contains supported node names
  */
 OPENVINO_RUNTIME_API std::unordered_set<std::string> get_supported_nodes(
     const std::shared_ptr<const ov::Model>& model,
     std::function<void(std::shared_ptr<ov::Model>&)> transform,
-    std::function<bool(const std::shared_ptr<ov::Node>)> is_node_supported);
+    std::function<bool(const std::shared_ptr<ov::Node>)> is_node_supported,
+    float query_model_ratio = 1.0f);
 
 /**
  * @private
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
index 2ddd8702eb87fd..d759988d6c5d2d 100644
--- a/src/inference/include/openvino/runtime/properties.hpp
+++ b/src/inference/include/openvino/runtime/properties.hpp
@@ -400,9 +400,11 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
 static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};
 
 enum class ModelDistributionPolicy {
-    TENSOR_PARALLEL = 0,  // Split tensor into several parts and distribute them between sockets/devices during model
-                          // compilation. At inference time sockets/devices process tensors in parallel and do
-                          // syncronization at the end ensuring mathematical correctness.
+    TENSOR_PARALLEL = 0,    // Distribute tensor to multiple sockets/devices during model compilation. At inference
+                            // time, sockets/devices process individual tensor in parallel.
+    PIPELINE_PARALLEL = 1,  // Distribute tensor to multiple sockets/devices during model compilation. At inference
+                            // time, sockets/devices process individual tensor one by one. And each socket/device
+                            // processes a portion of a different tensor in parallel.
 };
 
 /** @cond INTERNAL */
@@ -410,6 +412,8 @@ inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy&
     switch (stream_mode) {
     case ModelDistributionPolicy::TENSOR_PARALLEL:
         return os << "TENSOR_PARALLEL";
+    case ModelDistributionPolicy::PIPELINE_PARALLEL:
+        return os << "PIPELINE_PARALLEL";
     default:
         OPENVINO_THROW("Unsupported model distribution policy!");
     }
@@ -420,6 +424,8 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea
     is >> str;
     if (str == "TENSOR_PARALLEL") {
         stream_mode = ModelDistributionPolicy::TENSOR_PARALLEL;
+    } else if (str == "PIPELINE_PARALLEL") {
+        stream_mode = ModelDistributionPolicy::PIPELINE_PARALLEL;
     } else {
         OPENVINO_THROW("Unsupported model distribution policy: ", str);
     }
@@ -430,17 +436,19 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea
 /**
  * @brief This property defines model distribution policy for inference with multiple sockets/devices.
  * @ingroup ov_runtime_cpp_prop_api
- *
  * This property can be used to select model distribution policy between execution units (e.g. between CPU sockets/NUMA
  * nodes or between different GPUs).
- * -- TENSOR_PARALLEL : Split tensor into several parts and distribute them between sockets/devices during model
- *                      compilation. At inference time sockets/devices process tensors in parallel and do syncronization
- *                      at the end ensuring mathematical correctness.
+ * -- TENSOR_PARALLEL   : Distribute tensor to multiple sockets/devices during model compilation. At inference time,
+ *                        sockets/devices process individual tensor in parallel.
+ * -- PIPELINE_PARALLEL : Distribute tensor to multiple sockets/devices during model compilation. At inference time,
+ *                        sockets/devices process individual tensor one by one. And each socket/device processes a
+ *                        portion of a different tensor in parallel.
  *
- * The following code is an example how TENSOR_PARALLEL model disrtibution policy might be enabled.
+ * The following code is an example how TENSOR_PARALLEL or PIPELINE_PARALLEL model distribution policy might be enabled.
  *
  * @code
  * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}));
+ * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL}));
  * @endcode
  */
 static constexpr Property<std::set<ModelDistributionPolicy>> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"};
diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp
index 16b9d3be97830f..6532aed839044d 100644
--- a/src/inference/src/dev/iplugin.cpp
+++ b/src/inference/src/dev/iplugin.cpp
@@ -4,27 +4,30 @@
 
 #include "openvino/runtime/iplugin.hpp"
 
+#include "openvino/op/convert.hpp"
 #include "openvino/op/util/op_types.hpp"
+#include "openvino/op/util/shape_of_base.hpp"
 #include "openvino/pass/manager.hpp"
 #include "transformations/common_optimizations/fused_names_cleanup.hpp"
 #include "transformations/rt_info/fused_names_attribute.hpp"
 
 namespace {
 
-std::unordered_set<std::string> get_removed_nodes(const std::shared_ptr<const ov::Model>& originalFunction,
-                                                  const std::shared_ptr<const ov::Model>& transformedFunction) {
+std::unordered_set<std::string> get_removed_nodes(const std::shared_ptr<const ov::Model>& original_model,
+                                                  const std::shared_ptr<const ov::Model>& transformed_model) {
     std::unordered_set<std::string> result = {};
-    std::unordered_set<std::string> transformedNodeNames = {};
+    std::unordered_set<std::string> transformed_node_names = {};
 
-    for (auto&& node : transformedFunction->get_ops()) {
-        transformedNodeNames.emplace(node->get_friendly_name());
-        for (auto&& fusedLayerName : ov::getFusedNamesVector(node))
-            transformedNodeNames.emplace(fusedLayerName);
+    for (auto&& node : transformed_model->get_ops()) {
+        transformed_node_names.emplace(node->get_friendly_name());
+        for (auto&& fused_layer_name : ov::getFusedNamesVector(node)) {
+            transformed_node_names.emplace(fused_layer_name);
+        }
     }
 
-    for (auto&& originalNode : originalFunction->get_ops()) {
-        if (!transformedNodeNames.count(originalNode->get_friendly_name()))
-            result.emplace(originalNode->get_friendly_name());
+    for (auto&& original_node : original_model->get_ops()) {
+        if (!transformed_node_names.count(original_node->get_friendly_name()))
+            result.emplace(original_node->get_friendly_name());
     }
 
     return result;
@@ -75,7 +78,15 @@ std::shared_ptr<ov::ICompiledModel> ov::IPlugin::compile_model(const std::string
 std::unordered_set<std::string> ov::get_supported_nodes(
     const std::shared_ptr<const ov::Model>& model,
     std::function<void(std::shared_ptr<ov::Model>&)> transform,
-    std::function<bool(const std::shared_ptr<ov::Node>)> is_node_supported) {
+    std::function<bool(const std::shared_ptr<ov::Node>)> is_node_supported,
+    float query_model_ratio) {
+    using NameSet = std::unordered_set<std::string>;
+    using NodePtr = std::shared_ptr<ov::Node>;
+    NameSet res;
+    if (query_model_ratio <= 0) {
+        return res;
+    }
+    bool query_by_memory_control = query_model_ratio < 1;
     // Collect original operation names
     std::unordered_set<std::string> original_ops;
     for (auto&& node : model->get_ops()) {
@@ -83,7 +94,6 @@ std::unordered_set<std::string> ov::get_supported_nodes(
     }
 
     auto transformed_model = model->clone();
-
     // Cleanup fused names if there are present in original model
     ov::pass::Manager m;
     m.register_pass<ov::pass::FusedNamesCleanup>();
@@ -92,13 +102,13 @@ std::unordered_set<std::string> ov::get_supported_nodes(
     transform(transformed_model);
     auto ops = transformed_model->get_ordered_ops();
 
-    // Mark removed nodes as supported
-    std::unordered_set<std::string> supported = get_removed_nodes(model, transformed_model);
-    std::unordered_set<std::string> unsupported;
+    NameSet supported;
+    NameSet unsupported;
+    NameSet removed_nodes = get_removed_nodes(model, transformed_model);
 
-    auto get_names_set = [](const std::shared_ptr<ov::Node>& op) -> std::unordered_set<std::string> {
+    auto get_names_set = [](const NodePtr& op) -> NameSet {
         auto fused_names = ov::getFusedNamesVector(op);
-        std::unordered_set<std::string> names(fused_names.begin(), fused_names.end());
+        NameSet names(fused_names.begin(), fused_names.end());
         names.insert(op->get_friendly_name());
         return names;
     };
@@ -119,20 +129,66 @@ std::unordered_set<std::string> ov::get_supported_nodes(
         supported.erase(name);
     }
 
-    auto has_all_consumers_unsupported = [&supported](const std::shared_ptr<ov::Node>& node) {
-        for (auto&& input : node->output(0).get_target_inputs()) {
-            if (supported.count(input.get_node()->get_friendly_name())) {
-                return false;
+    auto copy_set = [](NameSet& source, NameSet& dest) {
+        dest.clear();
+        copy(source.begin(), source.end(), inserter(dest, dest.end()));
+    };
+
+    auto get_output_node = [](const ov::Output<ov::Node>& output) -> NodePtr {
+        return output.get_node_shared_ptr();
+    };
+
+    auto get_input_node = [&get_output_node](const ov::Input<ov::Node>& input) -> NodePtr {
+        return get_output_node(input.get_source_output());
+    };
+
+    auto has_all_consumers_unsupported = [&](const NameSet& supported, const NodePtr& node) -> bool {
+        bool has_consumers = false;
+        for (auto&& output : node->outputs()) {
+            for (auto&& input : output.get_target_inputs()) {
+                has_consumers = true;
+                if (supported.count(input.get_node()->get_friendly_name())) {
+                    return false;
+                }
             }
         }
-        return (node->output(0).get_target_inputs().size() != 0);
+        return has_consumers;
     };
 
-    auto has_unsupported_source = [&supported](const std::shared_ptr<ov::Node>& node) {
-        return !supported.count(node->input_values().begin()->get_node()->get_friendly_name());
+    auto has_users_supported = [&](const NameSet& supported, const NodePtr& node) -> bool {
+        auto users = node->get_users();
+        for (auto& user : users) {
+            if (supported.count(user->get_friendly_name())) {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    auto has_users_unsupported = [&](const NameSet& supported, const NodePtr& node) -> bool {
+        auto users = node->get_users();
+        for (auto& user : users) {
+            if (!supported.count(user->get_friendly_name()) && !ov::is_type<ov::op::v0::Result>(user)) {
+                return true;
+            }
+        }
+        return false;
     };
 
-    auto remove_op_from_supported = [&](const std::shared_ptr<ov::Node>& node) {
+    auto has_unsupported_source =
+        [&get_input_node](const NameSet& supported, const NodePtr& op, bool const_only = false) -> bool {
+        for (auto& input : op->inputs()) {
+            const auto& node = get_input_node(input);
+            if (const_only && !ov::op::util::is_constant(node))
+                continue;
+            if (!supported.count(node->get_friendly_name())) {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    auto remove_op_from_supported = [&](const NodePtr& node) {
         auto names = get_names_set(node);
         for (auto& name : get_names_set(node)) {
             supported.erase(name);
@@ -169,36 +225,204 @@ std::unordered_set<std::string> ov::get_supported_nodes(
         }
     }
 
-    // Walk over transformed model for special handing of Parameters/Constants/Results
     for (auto&& op : ops) {
         // Mark Constants and all fused names as unsupported if they are have no
         // supported consumers/sources
         if (ov::op::util::is_constant(op)) {
-            if (has_all_consumers_unsupported(op)) {
+            if (has_all_consumers_unsupported(supported, op)) {
                 remove_op_from_supported(op);
+                continue;
             }
         }
     }
 
+    size_t total_ops_size = 0;
+    for (auto&& op : ops) {
+        if (ov::op::util::is_constant(op)) {
+            const auto const_byte_size = op->get_element_type().size() * shape_size(op->get_shape());
+            total_ops_size += const_byte_size;
+        }
+    }
+    // If there is no constant or supported nodes in the model, mark query_by_memory_control as false
+    if (total_ops_size == 0 || supported.size() == 0) {
+        query_by_memory_control = false;
+    }
+
+    if (query_by_memory_control) {
+        NameSet temp_supported;
+        NameSet temp_unsupported;
+        NameSet temp_supported_1;
+        NameSet temp_unsupported_1;
+        bool cancel_split = false;
+        std::set<std::string> split_node_set;
+        int64_t last_total_len = 0;
+        int search_times = 0;
+        size_t last_total_size = 0;
+        double min_query_size = query_model_ratio * total_ops_size * 0.95;
+        double max_query_size = query_model_ratio * total_ops_size * 1.05;
+        copy_set(supported, temp_supported);
+        copy_set(unsupported, temp_unsupported);
+        // Search the smallest transmission node within the user's requested ratio range of 0.95-1.05 times
+        do {
+            std::map<std::string, int> temp_pair_checker;
+            bool ready_split = false;
+            bool start_split = false;
+            bool has_min_graph = false;
+            size_t total_size = 0;
+            search_times++;
+            copy_set(temp_supported, supported);
+            copy_set(temp_unsupported, unsupported);
+            // Walk over transformed model for special handing of Parameters/Constants/Results
+            for (auto&& op : ops) {
+                if (supported.count(op->get_friendly_name()) && !cancel_split) {
+                    if (const auto& assign = std::dynamic_pointer_cast<ov::op::util::VariableExtension>(op)) {
+                        if (temp_pair_checker.count(assign->get_variable_id()) == 0) {
+                            temp_pair_checker[assign->get_variable_id()] = 1;
+                        } else {
+                            temp_pair_checker[assign->get_variable_id()]++;
+                        }
+                    }
+                    if (ov::op::util::is_constant(op) && !ready_split) {
+                        const auto const_byte_size = op->get_element_type().size() * shape_size(op->get_shape());
+                        total_size += const_byte_size;
+                        // If the total size is 1.05 times larger than the user's requirement:
+                        // - If has_min_graph = false, it means there is no nodes meets requirement, so need cancel
+                        //   split and break
+                        // - If th split_node_set > 1, it means this is not the first search in do-while, so cancel
+                        //   split and break
+                        if (total_size <= max_query_size) {
+                            has_min_graph = true;
+                        } else if (!has_min_graph || search_times > 1) {
+                            cancel_split = true;
+                            break;
+                        }
+                        // Ready to split if total size meets user's requirement and Assign-ReadValue operations in
+                        // pairs on the network
+                        if (total_size >= min_query_size) {
+                            if (!ready_split && split_node_set.find(op->get_friendly_name()) == split_node_set.end()) {
+                                ready_split = check_pairs(temp_pair_checker);
+                                if (ready_split) {
+                                    split_node_set.insert(op->get_friendly_name());
+                                    // Judge if the current constant op should be removed from supported
+                                    if (total_size < max_query_size)
+                                        continue;
+                                }
+                            }
+                        }
+                    }
+                    // Start splitting when ready and the ops is constant
+                    if (ready_split) {
+                        if (ov::op::util::is_constant(op)) {
+                            remove_op_from_supported(op);
+                            start_split = true;
+                        } else if (start_split) {
+                            remove_op_from_supported(op);
+                            for (auto& input : op->inputs()) {
+                                const auto& node = get_input_node(input);
+                                if (ov::op::util::is_constant(node)) {
+                                    remove_op_from_supported(node);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            // Add the ops to supported that removed by transformations and it has supported users
+            //
+            // constant_compressed(to be marked as supported)
+            //         |
+            //      convert(to be marked as supported)
+            //         |
+            //       divide(already in supported)
+            //
+            // In case the dependency relationships of some nodes, so traverse the entire model to ensure accurate
+            // split. For example: In the graph above, constant_compressed op will be first obtained by
+            // get_ordered_ops(), but it depends on convert op, so need loop again to mark constant_compressed op after
+            // convert op is marked.
+            bool update_supported = true;
+            while (update_supported) {
+                update_supported = false;
+                for (auto& op : model->get_ordered_ops()) {
+                    if (!supported.count(op->get_friendly_name()) && has_users_supported(supported, op) &&
+                        !unsupported.count(op->get_friendly_name())) {
+                        supported.insert(op->get_friendly_name());
+                        update_supported = true;
+                    }
+                }
+            }
+            // Calculate the data size that needs to be transmitted after the current model is split
+            int64_t total_len = 0;
+            for (auto& op : model->get_ordered_ops()) {
+                if (supported.count(op->get_friendly_name()) && !ov::op::util::is_constant(op) &&
+                    !ov::op::util::is_parameter(op)) {
+                    if (has_users_unsupported(supported, op)) {
+                        int64_t op_size = 1;
+                        for (size_t shape_id = 0; shape_id < op->get_output_partial_shape(0).size(); shape_id++) {
+                            if (!op->get_output_partial_shape(0)[shape_id].is_dynamic()) {
+                                int64_t len = op->get_output_partial_shape(0)[shape_id].get_length();
+                                if (len >= 1)
+                                    op_size *= len;
+                            }
+                        }
+                        total_len += op_size;
+                    }
+                }
+            }
+            if ((total_len < last_total_len || last_total_len == 0) && !cancel_split) {
+                last_total_len = total_len;
+                copy_set(supported, temp_supported_1);
+                copy_set(unsupported, temp_unsupported_1);
+            }
+            // Cancel split when total size is unchanged in loop
+            if (total_size != last_total_size) {
+                last_total_size = total_size;
+            } else {
+                cancel_split = true;
+            }
+        } while (!cancel_split);
+        copy_set(temp_supported_1, supported);
+        copy_set(temp_unsupported_1, unsupported);
+    } else {
+        // If memory control is off
+        // mark all removed nodes as supported
+        supported.insert(removed_nodes.begin(), removed_nodes.end());
+    }
+
     // Finally get intersection of all supported operation names
     // and operation names from original model
-    std::unordered_set<std::string> res;
     for (auto&& name : supported) {
         if (original_ops.count(name)) {
             res.insert(name);
         }
     }
 
-    // Remove parameters which has no supported consumers
+    // Remove parameters (or parameter + convert) which has no supported consumers
+    // and results (or result + convert) which has no supported source node
+    for (auto& op : model->get_ordered_ops()) {
+        if (ov::is_type<ov::op::v0::Convert>(op)) {
+            if (ov::op::util::is_parameter(get_input_node(op->input(0))) && has_all_consumers_unsupported(res, op)) {
+                res.erase(op->get_friendly_name());
+            }
+        } else {
+            auto outputs = op->outputs();
+            auto all_consumers_are_results =
+                std::all_of(outputs.begin(), outputs.end(), [&](const ov::Output<ov::Node>& output) -> bool {
+                    return ov::op::util::is_output(get_output_node(output));
+                });
+            if (all_consumers_are_results && has_unsupported_source(res, op, true)) {
+                res.erase(op->get_friendly_name());
+            }
+        }
+    }
+
     for (auto& param : model->get_parameters()) {
-        if (has_all_consumers_unsupported(param)) {
+        if (has_all_consumers_unsupported(res, param)) {
             res.erase(param->get_friendly_name());
         }
     }
 
-    // Remove results which has no supported source node
     for (auto& result : model->get_results()) {
-        if (has_unsupported_source(result)) {
+        if (has_unsupported_source(res, result)) {
             res.erase(result->get_friendly_name());
         }
     }
diff --git a/src/inference/tests/unit/query_model_test.cpp b/src/inference/tests/unit/query_model_test.cpp
index 91e3c79f2a928c..f7a4ea80794134 100644
--- a/src/inference/tests/unit/query_model_test.cpp
+++ b/src/inference/tests/unit/query_model_test.cpp
@@ -21,12 +21,15 @@
 #include "transformations/common_optimizations/nop_elimination.hpp"
 #include "transformations/convert_precision.hpp"
 #include "transformations/init_node_info.hpp"
+#include "transformations/op_conversions/convert_divide.hpp"
 #include "transformations/op_conversions/convert_reduce_to_pooling.hpp"
 #include "transformations/op_conversions/log_softmax_decomposition.hpp"
 #include "transformations/op_conversions/reduce_l2_decomposition.hpp"
 #include "transformations/rt_info/decompression.hpp"
 #include "transformations/rt_info/fused_names_attribute.hpp"
 
+using ConfigParams = std::tuple<float, std::unordered_set<std::string>>;
+
 std::ostream& operator<<(std::ostream& os, const std::unordered_set<std::string>& s);
 
 std::ostream& operator<<(std::ostream& os, const std::unordered_set<std::string>& s) {
@@ -40,7 +43,7 @@ std::ostream& operator<<(std::ostream& os, const std::unordered_set<std::string>
     return os;
 }
 
-class GetSupportedNodesTest : public ::testing::Test {
+class GetSupportedNodesTest : public ::testing::TestWithParam<ConfigParams> {
 protected:
     ov::Shape m_shape{1, 84};
     std::shared_ptr<ov::Model> m_function;
@@ -48,8 +51,9 @@ class GetSupportedNodesTest : public ::testing::Test {
 public:
     void Run(std::function<void(std::shared_ptr<ov::Model>&)> transform,
              std::function<bool(const std::shared_ptr<ov::Node>)> is_node_supported,
-             const std::unordered_set<std::string>& expected) {
-        auto supported = ov::get_supported_nodes(m_function, transform, is_node_supported);
+             const std::unordered_set<std::string>& expected,
+             float query_model_ratio = 1.0f) {
+        auto supported = ov::get_supported_nodes(m_function, transform, is_node_supported, query_model_ratio);
         auto const is_in_expected = [&expected](const std::string& x) {
             return expected.find(x) != expected.end();
         };
@@ -157,7 +161,7 @@ TEST_F(GetSupportedNodesTest, SupportedCompressedConstantNop) {
         },
         [&](const std::shared_ptr<ov::Node>& op) {
             return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) ||
-                   (std::dynamic_pointer_cast<ov::op::v1::Add>(op) != nullptr);
+                   ov::is_type<ov::op::v1::Add>(op);
         },
         {"input", "constant_compressed", "constant", "add", "result"});
 }
@@ -180,7 +184,7 @@ TEST_F(GetSupportedNodesTest, SupportedConstantInsertAdditionalOp) {
             m.register_pass<ov::pass::InitNodeInfo>();
             m.run_passes(model);
             for (auto& op : model->get_ops()) {
-                if (std::dynamic_pointer_cast<ov::op::v1::Multiply>(op) != nullptr) {
+                if (ov::is_type<ov::op::v1::Multiply>(op)) {
                     // Add one more dummy operation
                     auto consumers = op->output(0).get_target_inputs();
                     auto shape = op->get_shape();
@@ -197,8 +201,7 @@ TEST_F(GetSupportedNodesTest, SupportedConstantInsertAdditionalOp) {
         },
         [&](const std::shared_ptr<ov::Node>& op) {
             return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) ||
-                   (std::dynamic_pointer_cast<ov::op::v1::Multiply>(op) != nullptr) ||
-                   (std::dynamic_pointer_cast<ov::op::v1::Add>(op) != nullptr);
+                   ov::is_type<ov::op::v1::Multiply>(op) || ov::is_type<ov::op::v1::Add>(op);
         },
         {"input", "constant", "output_operation", "result"});
 }
@@ -235,7 +238,7 @@ TEST_F(GetSupportedNodesTest, PartiallySupportedCompressedConstant) {
         },
         [&](const std::shared_ptr<ov::Node>& op) {
             return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) ||
-                   (std::dynamic_pointer_cast<ov::op::v1::Multiply>(op) != nullptr);
+                   ov::is_type<ov::op::v1::Multiply>(op);
         },
         {"input2", "constant_compressed", "constant", "mul", "result2"});
 }
@@ -277,7 +280,7 @@ TEST_F(GetSupportedNodesTest, ConstantSubgraphSupported) {
         },
         [&](const std::shared_ptr<ov::Node>& op) {
             return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) ||
-                   (std::dynamic_pointer_cast<ov::op::v0::MatMul>(op) != nullptr);
+                   ov::is_type<ov::op::v0::MatMul>(op);
         },
         {"input",
          "weights",
@@ -317,7 +320,7 @@ TEST_F(GetSupportedNodesTest, UnmarkedSupportedInputsOutputs) {
         },
         [&](const std::shared_ptr<ov::Node>& op) {
             // Plugin don't mark input, constant and result as supported
-            return (std::dynamic_pointer_cast<ov::op::v1::Add>(op) != nullptr);
+            return ov::is_type<ov::op::v1::Add>(op);
         },
         {"add"});
 }
@@ -347,7 +350,7 @@ TEST_F(GetSupportedNodesTest, WrongFusedNamesInOriginalModel) {
         },
         [&](const std::shared_ptr<ov::Node>& op) {
             return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) ||
-                   (std::dynamic_pointer_cast<ov::op::v0::MatMul>(op) != nullptr);
+                   ov::is_type<ov::op::v0::MatMul>(op);
         },
         {"input", "weights", "matmul"});
 }
@@ -374,10 +377,8 @@ TEST_F(GetSupportedNodesTest, FusedNamesSupportedUnsupportedBoth) {
         [&](const std::shared_ptr<ov::Node>& op) {
             // Exp is not supported and all constants are missing
             return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) ||
-                   (std::dynamic_pointer_cast<ov::op::v1::ReduceMax>(op) != nullptr) ||
-                   (std::dynamic_pointer_cast<ov::op::v1::Subtract>(op) != nullptr) ||
-                   (std::dynamic_pointer_cast<ov::op::v1::ReduceSum>(op) != nullptr) ||
-                   (std::dynamic_pointer_cast<ov::op::v0::Log>(op) != nullptr);
+                   ov::is_type<ov::op::v1::ReduceMax>(op) || ov::is_type<ov::op::v1::Subtract>(op) ||
+                   ov::is_type<ov::op::v1::ReduceSum>(op) || ov::is_type<ov::op::v0::Log>(op);
         },
         {"dummy_param"});  // kepp dummy only since it has no unsupported consumers
 }
@@ -421,7 +422,7 @@ TEST_F(GetSupportedNodesTest, ShapeOfNonConstantNode) {
         },
         [&](const std::shared_ptr<ov::Node>& op) {
             return ov::op::util::is_parameter(op) || ov::op::util::is_constant(op) || ov::op::util::is_output(op) ||
-                   (std::dynamic_pointer_cast<ov::op::v0::PRelu>(op) != nullptr);
+                   ov::is_type<ov::op::v0::PRelu>(op);
         },
         {"input", "slope_compressed", "slope", "prelu"});  // keep dummy only since it has no unsupported consumers
 }
@@ -490,7 +491,7 @@ TEST_F(GetSupportedNodesTest, FusedNameReduceL2Test) {
         [&](const std::shared_ptr<ov::Node>& op) {
             // Pooling is supported, but Sqrt is not
             return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::op::util::is_constant(op) ||
-                   (std::dynamic_pointer_cast<ov::opset1::AvgPool>(op) != nullptr);
+                   ov::is_type<ov::op::v1::AvgPool>(op);
         },
         {});  // Check that constant axis is removed from supported
 }
@@ -520,3 +521,214 @@ TEST_F(GetSupportedNodesTest, AssignReadValueTest) {
         },
         {});
 }
+
+TEST_F(GetSupportedNodesTest, NoSupportedOpsTest) {
+    {
+        auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 3, 2, 2});
+        param->set_friendly_name("input");
+        auto const_value = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1});
+        const_value->set_friendly_name("const_val");
+        auto add = std::make_shared<ov::op::v1::Add>(param, const_value);
+        add->set_friendly_name("add");
+        auto res = std::make_shared<ov::op::v0::Result>(add);
+        res->set_friendly_name("res");
+        m_function = std::make_shared<ov::Model>(ov::ResultVector{res}, ov::ParameterVector{param});
+    }
+    Run(
+        [&](std::shared_ptr<ov::Model>& model) {
+            ov::pass::Manager m;
+            m.register_pass<ov::pass::InitNodeInfo>();
+            m.run_passes(model);
+        },
+        [&](const std::shared_ptr<ov::Node>& op) {
+            return false;
+        },
+        {},
+        0.9f);
+}
+
+TEST_F(GetSupportedNodesTest, NoConstOpTest) {
+    {
+        auto param1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1, 512});
+        param1->set_friendly_name("input1");
+        auto param2 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1, 512});
+        param2->set_friendly_name("input2");
+        auto add = std::make_shared<ov::op::v1::Add>(param1, param2);
+        add->set_friendly_name("add");
+        auto res = std::make_shared<ov::op::v0::Result>(add);
+        res->set_friendly_name("res");
+        m_function = std::make_shared<ov::Model>(ov::ResultVector{res}, ov::ParameterVector{param1, param2});
+    }
+    Run(
+        [&](std::shared_ptr<ov::Model>& model) {
+            ov::pass::Manager m;
+            m.register_pass<ov::pass::InitNodeInfo>();
+            m.run_passes(model);
+        },
+        [&](const std::shared_ptr<ov::Node>& op) {
+            return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::is_type<ov::op::v1::Add>(op);
+        },
+        {"input1", "input2", "add", "res"},
+        0.9f);
+}
+
+TEST_F(GetSupportedNodesTest, DivideWillRemoveConvertAndConstant) {
+    {
+        auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1, 3, 2, 2});
+        param->set_friendly_name("input");
+        auto constant_compressed = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1, 3, 2, 2}, {1});
+        constant_compressed->set_friendly_name("constant_compressed");
+        auto convert = std::make_shared<ov::op::v0::Convert>(constant_compressed, ov::element::f32);
+        convert->set_friendly_name("convert");
+        auto divide = std::make_shared<ov::op::v1::Divide>(param, convert);
+        divide->set_friendly_name("divide");
+        auto const_value = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1});
+        const_value->set_friendly_name("const_val");
+        auto add = std::make_shared<ov::op::v1::Add>(divide, const_value);
+        add->set_friendly_name("add");
+        auto result = std::make_shared<ov::op::v0::Result>(add);
+        result->set_friendly_name("result");
+        m_function = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{param});
+    }
+    Run(
+        [&](std::shared_ptr<ov::Model>& model) {
+            ov::pass::Manager m;
+            m.register_pass<ov::pass::InitNodeInfo>();
+            const bool keep_precision_sensitive_in_fp32_1 = true;
+            const bool convert_input_output_precision = false;
+            const bool store_original_precision_as_rt_attribute = true;
+            type_to_fuse_map empty_fuse_map = {};
+            precisions_map fp_convert_precision_map = {{ov::element::f32, ov::element::f16}};
+            m.register_pass<ov::pass::ConvertPrecision>(fp_convert_precision_map,
+                                                        empty_fuse_map,
+                                                        keep_precision_sensitive_in_fp32_1,
+                                                        convert_input_output_precision,
+                                                        store_original_precision_as_rt_attribute);
+            m.register_pass<ov::pass::CommonOptimizations>();
+            m.run_passes(model);
+        },
+        [&](const std::shared_ptr<ov::Node>& op) {
+            return true;
+        },
+        {"input", "constant_compressed", "divide", "const_val", "add", "convert", "result"},
+        0.98f);
+}
+
+using GetSupportedNodesCommonTest = GetSupportedNodesTest;
+using GetSupportedNodesOneConstOp = GetSupportedNodesTest;
+using GetSupportedNodesStopSplit = GetSupportedNodesTest;
+
+TEST_P(GetSupportedNodesCommonTest, SplitModelWithDifferentRatioTest) {
+    {
+        auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 3, 2, 2});
+        param->set_friendly_name("input");
+        auto const_value1 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1});
+        const_value1->set_friendly_name("const_val1");
+        auto add1 = std::make_shared<ov::op::v1::Add>(param, const_value1);
+        add1->set_friendly_name("add1");
+        auto const_value2 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1});
+        const_value2->set_friendly_name("const_val2");
+        auto add2 = std::make_shared<ov::op::v1::Add>(add1, const_value2);
+        add2->set_friendly_name("add2");
+        auto result = std::make_shared<ov::op::v0::Result>(add2);
+        result->set_friendly_name("res");
+        m_function = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{param});
+    }
+    float query_model_ratio;
+    std::unordered_set<std::string> expected;
+    std::tie(query_model_ratio, expected) = this->GetParam();
+    Run(
+        [&](std::shared_ptr<ov::Model>& model) {
+            ov::pass::Manager m;
+            m.register_pass<ov::pass::InitNodeInfo>();
+            m.run_passes(model);
+        },
+        [&](const std::shared_ptr<ov::Node>& op) {
+            return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::op::util::is_constant(op) ||
+                   ov::is_type<ov::op::v1::Add>(op) || ov::is_type<ov::op::v1::Reshape>(op);
+        },
+        expected,
+        query_model_ratio);
+}
+
+TEST_P(GetSupportedNodesOneConstOp, OneConstOpTest) {
+    {
+        auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 3, 2, 2});
+        param->set_friendly_name("input");
+        auto const_value = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1});
+        const_value->set_friendly_name("const_val");
+        auto add = std::make_shared<ov::op::v1::Add>(param, const_value);
+        add->set_friendly_name("add");
+        auto res = std::make_shared<ov::op::v0::Result>(add);
+        res->set_friendly_name("res");
+        m_function = std::make_shared<ov::Model>(ov::ResultVector{res}, ov::ParameterVector{param});
+    }
+    float query_model_ratio;
+    std::unordered_set<std::string> expected;
+    std::tie(query_model_ratio, expected) = this->GetParam();
+    Run(
+        [&](std::shared_ptr<ov::Model>& model) {
+            ov::pass::Manager m;
+            m.register_pass<ov::pass::InitNodeInfo>();
+            m.run_passes(model);
+        },
+        [&](const std::shared_ptr<ov::Node>& op) {
+            return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::op::util::is_constant(op) ||
+                   ov::is_type<ov::op::v1::Add>(op);
+        },
+        expected,
+        query_model_ratio);
+}
+
+TEST_P(GetSupportedNodesStopSplit, StopSplitTest) {
+    {
+        auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 3, 2, 2});
+        param->set_friendly_name("input");
+        auto const_value1 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1});
+        const_value1->set_friendly_name("const_val1");
+        auto add = std::make_shared<ov::op::v1::Add>(param, const_value1);
+        add->set_friendly_name("add");
+        auto const_value2 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 2, 2}, {1});
+        const_value2->set_friendly_name("const_val2");
+        auto mul_scale = std::make_shared<ov::op::v1::Multiply>(add, const_value2);
+        mul_scale->set_friendly_name("mul_scale");
+        auto result = std::make_shared<ov::op::v0::Result>(mul_scale);
+        result->set_friendly_name("res");
+        m_function = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{param});
+    }
+    float query_model_ratio;
+    std::unordered_set<std::string> expected;
+    std::tie(query_model_ratio, expected) = this->GetParam();
+    Run(
+        [&](std::shared_ptr<ov::Model>& model) {
+            ov::pass::Manager m;
+            m.register_pass<ov::pass::InitNodeInfo>();
+            m.run_passes(model);
+        },
+        [&](const std::shared_ptr<ov::Node>& op) {
+            return ov::op::util::is_parameter(op) || ov::op::util::is_output(op) || ov::is_type<ov::op::v1::Add>(op) ||
+                   ov::op::util::is_constant(op);
+        },
+        expected,
+        query_model_ratio);
+}
+
+const std::vector<ConfigParams> testConfigs = {
+    ConfigParams{0.0f, std::unordered_set<std::string>{}},
+    ConfigParams{0.5f, std::unordered_set<std::string>{"input", "const_val1", "add1"}},
+    ConfigParams{1.0f, std::unordered_set<std::string>{"input", "const_val1", "add1", "const_val2", "add2", "res"}}};
+
+const std::vector<ConfigParams> testConfigs1 = {
+    ConfigParams{0.0f, std::unordered_set<std::string>{}},
+    ConfigParams{0.5f, std::unordered_set<std::string>{}},
+    ConfigParams{1.0f, std::unordered_set<std::string>{"input", "const_val", "add", "res"}}};
+
+const std::vector<ConfigParams> testConfigs2 = {
+    ConfigParams{0.0f, std::unordered_set<std::string>{}},
+    ConfigParams{0.3f, std::unordered_set<std::string>{}},
+    ConfigParams{0.9f, std::unordered_set<std::string>{"input", "const_val1", "add"}},
+    ConfigParams{1.0f, std::unordered_set<std::string>{"input", "const_val1", "add"}}};
+
+INSTANTIATE_TEST_SUITE_P(GetSupportedNodesTest, GetSupportedNodesCommonTest, ::testing::ValuesIn(testConfigs));
+INSTANTIATE_TEST_SUITE_P(GetSupportedNodesTest, GetSupportedNodesOneConstOp, ::testing::ValuesIn(testConfigs1));
+INSTANTIATE_TEST_SUITE_P(GetSupportedNodesTest, GetSupportedNodesStopSplit, ::testing::ValuesIn(testConfigs2));
diff --git a/src/plugins/hetero/src/config.cpp b/src/plugins/hetero/src/config.cpp
index 5cdb5cc125a673..a54a91fa19d900 100644
--- a/src/plugins/hetero/src/config.cpp
+++ b/src/plugins/hetero/src/config.cpp
@@ -6,6 +6,7 @@
 
 #include "openvino/runtime/internal_properties.hpp"
 #include "openvino/runtime/properties.hpp"
+#include "properties.hpp"
 
 using namespace ov::hetero;
 
@@ -20,6 +21,18 @@ Configuration::Configuration(const ov::AnyMap& config, const Configuration& defa
 
         if (ov::device::priorities == key) {
             device_priorities = value.as<std::string>();
+        } else if (ov::hint::model_distribution_policy == key) {
+            for (auto& row : value.as<std::set<ov::hint::ModelDistributionPolicy>>()) {
+                if (row != ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL) {
+                    OPENVINO_THROW(
+                        "Wrong value ",
+                        row,
+                        " for property key ",
+                        ov::hint::model_distribution_policy.name(),
+                        ". HETERO plugin only support {ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL}");
+                }
+            }
+            modelDistributionPolicy = value.as<std::set<ov::hint::ModelDistributionPolicy>>();
         } else {
             if (throwOnUnsupported)
                 OPENVINO_THROW("Property was not found: ", key);
@@ -31,6 +44,8 @@ Configuration::Configuration(const ov::AnyMap& config, const Configuration& defa
 ov::Any Configuration::get(const std::string& name) const {
     if (name == ov::device::priorities) {
         return {device_priorities};
+    } else if (name == ov::hint::model_distribution_policy) {
+        return {modelDistributionPolicy};
     } else {
         OPENVINO_THROW("Property was not found: ", name);
     }
@@ -42,7 +57,8 @@ std::vector<ov::PropertyName> Configuration::get_supported() const {
 }
 
 ov::AnyMap Configuration::get_hetero_properties() const {
-    return {{ov::device::priorities.name(), device_priorities}};
+    return {{ov::device::priorities.name(), device_priorities},
+            {ov::hint::model_distribution_policy.name(), modelDistributionPolicy}};
 }
 
 ov::AnyMap Configuration::get_device_properties() const {
diff --git a/src/plugins/hetero/src/config.hpp b/src/plugins/hetero/src/config.hpp
index 55647b710d76b1..42d972c021343d 100644
--- a/src/plugins/hetero/src/config.hpp
+++ b/src/plugins/hetero/src/config.hpp
@@ -8,6 +8,7 @@
 #include <string>
 
 #include "openvino/runtime/properties.hpp"
+#include "properties.hpp"
 
 namespace ov {
 namespace hetero {
@@ -34,6 +35,9 @@ struct Configuration {
     bool dump_dot_files() const;
 
     std::string device_priorities;
+
+    std::set<ov::hint::ModelDistributionPolicy> modelDistributionPolicy = {};
+
     ov::AnyMap device_properties;
 };
 }  // namespace hetero
diff --git a/src/plugins/hetero/src/plugin.cpp b/src/plugins/hetero/src/plugin.cpp
index b4258881e3e686..0c48703aabd404 100644
--- a/src/plugins/hetero/src/plugin.cpp
+++ b/src/plugins/hetero/src/plugin.cpp
@@ -14,8 +14,12 @@
 
 #include "compiled_model.hpp"
 #include "itt.hpp"
+#include "op/device_subgraph.hpp"
+#include "openvino/core/graph_util.hpp"
 #include "openvino/core/rt_info.hpp"
+#include "openvino/op/util/op_types.hpp"
 #include "openvino/runtime/device_id_parser.hpp"
+#include "openvino/runtime/intel_gpu/properties.hpp"
 #include "openvino/runtime/internal_properties.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/util/common_util.hpp"
@@ -77,23 +81,105 @@ ov::hetero::Plugin::DeviceProperties ov::hetero::Plugin::get_properties_per_devi
     return device_properties;
 }
 
+void ov::hetero::Plugin::get_device_memory_map(const std::vector<std::string>& device_names,
+                                               std::map<std::string, size_t>& available_device_mem_map) const {
+    // TODO: add unified API to get device memory.
+    // There is no unified API to get device memory. So this feature get memory of specific device with specific method.
+    // Skip device which cannot get device memory size.
+    for (const auto& device_name : device_names) {
+        if (device_name.find("CPU") != std::string::npos) {
+            // Assuming the CPU has enough memory
+            available_device_mem_map["CPU"] = -1;
+        } else if (device_name.find("GPU") != std::string::npos) {
+            try {
+                size_t device_mem = get_core()->get_property(device_name, ov::intel_gpu::device_total_mem_size);
+                available_device_mem_map[device_name] = device_mem;
+            } catch (const ov::Exception&) {
+            }
+        }
+    }
+}
+
 std::pair<ov::SupportedOpsMap, ov::hetero::SubgraphsMappingInfo> ov::hetero::Plugin::query_model_update(
     std::shared_ptr<ov::Model>& model,
     const ov::AnyMap& properties,
     bool allow_exception) const {
+    std::map<std::string, size_t> available_device_mem_map;
     Configuration full_config{properties, m_cfg};
     DeviceProperties properties_per_device =
         get_properties_per_device(full_config.device_priorities, full_config.get_device_properties());
 
     //  WARNING: Here is devices with user set priority
     auto device_names = ov::DeviceIDParser::get_hetero_devices(full_config.device_priorities);
+    bool hetero_query_model_by_device = false;
+    if (full_config.modelDistributionPolicy.count(ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL) != 0) {
+        get_device_memory_map(device_names, available_device_mem_map);
+        // Will disable hetero query model by device if there is no device's available memory is obtained.
+        if (available_device_mem_map.size() != 0) {
+            hetero_query_model_by_device = true;
+        }
+    }
 
     auto update_supported_ops = [](ov::SupportedOpsMap& final_results, const ov::SupportedOpsMap& device_results) {
         for (const auto& layer_query_result : device_results)
             final_results.emplace(layer_query_result);
     };
 
+    auto has_subgraph_ops = [](std::shared_ptr<ov::Model>& model) {
+        for (auto& op : model->get_ordered_ops()) {
+            if (ov::as_type_ptr<ov::hetero::op::DeviceSubgraph>(op)) {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    auto update_config = [&](ov::AnyMap& device_config,
+                             const std::shared_ptr<const ov::Model>& model,
+                             std::string device_name,
+                             bool fallback_device) {
+        auto internal_supported_properties = get_core()->get_property(device_name, ov::internal::supported_properties);
+        if (ov::util::contains(internal_supported_properties, ov::internal::query_model_ratio)) {
+            if (fallback_device) {
+                device_config[ov::internal::query_model_ratio.name()] = 1.0f;
+            } else if (available_device_mem_map.count(device_name)) {
+                size_t total_ops_size = 0;
+                size_t available_discrete_device_memory = 0;
+                for (auto&& op : model->get_ordered_ops()) {
+                    if (ov::op::util::is_constant(op)) {
+                        total_ops_size += op->get_element_type().size() * shape_size(op->get_shape());
+                    }
+                }
+                for (auto& device_mem_info : available_device_mem_map) {
+                    if (device_mem_info.first.find("CPU") != 0)
+                        available_discrete_device_memory += device_mem_info.second;
+                }
+                // Estimate the memory size required for the model is 1.2 * total_ops_size
+                // 1. Check if current device that can take the entire model
+                // 2. Check if all left devices can take the entire model
+                if (available_device_mem_map[device_name] >= 1.2 * total_ops_size || device_name.find("CPU") == 0) {
+                    device_config[ov::internal::query_model_ratio.name()] = 1.0f;
+                } else if (available_discrete_device_memory >= 1.2 * total_ops_size ||
+                           available_device_mem_map.count("CPU")) {
+                    float model_ratio =
+                        static_cast<float>(available_device_mem_map[device_name] * 1.0 / (1.2 * total_ops_size));
+                    if (total_ops_size < available_device_mem_map[device_name]) {
+                        model_ratio = 1.0f;
+                    }
+                    device_config[ov::internal::query_model_ratio.name()] = model_ratio;
+                } else {
+                    float model_ratio = static_cast<float>(available_device_mem_map[device_name] * 1.0 /
+                                                           available_discrete_device_memory);
+                    device_config[ov::internal::query_model_ratio.name()] = model_ratio;
+                }
+                // Remove the current device
+                available_device_mem_map.erase(device_name);
+            }
+        }
+    };
+
     ov::SupportedOpsMap supported_ops_temp;
+    ov::SupportedOpsMap supported_ops_temp_1;
     ov::SupportedOpsMap supported_ops_final;
     std::map<std::string, ov::SupportedOpsMap> query_results;
     ov::hetero::SubgraphsMappingInfo mapping_info;
@@ -109,15 +195,46 @@ std::pair<ov::SupportedOpsMap, ov::hetero::SubgraphsMappingInfo> ov::hetero::Plu
     for (const auto& device_name : device_names) {
         // If there are some unsupported operations and it is a last device
         // exception should be raised when allowed
-        const auto& default_device = (!allow_exception || device_name != device_names.back()) ? get_device_name() : "";
-        const auto& device_config = properties_per_device.at(device_name);
-        query_results[device_name] = get_core()->query_model(model, device_name, device_config);
-        // Update supported operations map which includes new operations
-        update_supported_ops(supported_ops_temp, query_results[device_name]);
-        // Update supported operations map which includes original operations only
-        update_supported_ops(supported_ops_final, query_results[device_name]);
-        mapping_info =
-            ov::hetero::mask_model_subgraphs_by_ops(model, supported_ops_temp, m_cfg.dump_dot_files(), default_device);
+        bool fallback_device = (device_name == device_names.back());
+        const auto& default_device = (!allow_exception || !fallback_device) ? get_device_name() : "";
+        auto& device_config = properties_per_device.at(device_name);
+        if (!has_subgraph_ops(model)) {
+            if (hetero_query_model_by_device)
+                update_config(device_config, model, device_name, fallback_device);
+            query_results[device_name] = get_core()->query_model(model, device_name, device_config);
+            update_supported_ops(supported_ops_temp, query_results[device_name]);
+            update_supported_ops(supported_ops_final, query_results[device_name]);
+            mapping_info = ov::hetero::mask_model_subgraphs_by_ops(model,
+                                                                   supported_ops_temp,
+                                                                   m_cfg.dump_dot_files(),
+                                                                   default_device);
+        } else {
+            // Mask supported nodes and left nodes to Subgraph in graph, and query model use subgraph, keep the
+            // model in query_model same as compile
+            auto temp_model = model->clone();
+            update_supported_ops(supported_ops_temp_1, supported_ops_temp);
+            for (auto&& node : temp_model->get_ops()) {
+                supported_ops_temp_1.emplace(node->get_friendly_name(), "HETERO-TEMP");
+            }
+            auto mapping_info_temp =
+                ov::hetero::mask_model_subgraphs_by_ops(temp_model, supported_ops_temp_1, false, default_device);
+            for (const auto& op : temp_model->get_ordered_ops()) {
+                if (const auto& subgraph = ov::as_type_ptr<ov::hetero::op::DeviceSubgraph>(op)) {
+                    if (subgraph->get_affinity() == "HETERO-TEMP") {
+                        if (hetero_query_model_by_device)
+                            update_config(device_config, subgraph->get_function(), device_name, fallback_device);
+                        query_results[device_name] =
+                            get_core()->query_model(subgraph->get_function(), device_name, device_config);
+                        update_supported_ops(supported_ops_temp, query_results[device_name]);
+                        update_supported_ops(supported_ops_final, query_results[device_name]);
+                    }
+                }
+            }
+            mapping_info = ov::hetero::mask_model_subgraphs_by_ops(model,
+                                                                   supported_ops_temp,
+                                                                   m_cfg.dump_dot_files(),
+                                                                   default_device);
+        }
     }
     return {supported_ops_final, mapping_info};
 }
@@ -145,7 +262,7 @@ ov::Any ov::hetero::Plugin::get_property(const std::string& name, const ov::AnyM
         return ro_properties;
     };
     const auto& default_rw_properties = []() {
-        std::vector<ov::PropertyName> rw_properties{ov::device::priorities};
+        std::vector<ov::PropertyName> rw_properties{ov::device::priorities, ov::hint::model_distribution_policy};
         return rw_properties;
     };
 
diff --git a/src/plugins/hetero/src/plugin.hpp b/src/plugins/hetero/src/plugin.hpp
index bd9b897c1c2af7..d3038642d56c76 100644
--- a/src/plugins/hetero/src/plugin.hpp
+++ b/src/plugins/hetero/src/plugin.hpp
@@ -60,6 +60,9 @@ class Plugin : public ov::IPlugin {
     DeviceProperties get_properties_per_device(const std::string& device_priorities,
                                                const ov::AnyMap& properties) const;
 
+    void get_device_memory_map(const std::vector<std::string>& device_names,
+                               std::map<std::string, size_t>& device_mem_map) const;
+
     std::pair<ov::SupportedOpsMap, ov::hetero::SubgraphsMappingInfo> query_model_update(
         std::shared_ptr<ov::Model>& model,
         const ov::AnyMap& properties,
diff --git a/src/plugins/hetero/src/properties.hpp b/src/plugins/hetero/src/properties.hpp
index f5f2b2a1c7693e..c008bf1155f09f 100644
--- a/src/plugins/hetero/src/properties.hpp
+++ b/src/plugins/hetero/src/properties.hpp
@@ -17,6 +17,5 @@ static constexpr Property<std::string, PropertyMutability::RO> caching_device_pr
  * @brief Read-only property showing number of compiled submodels
  */
 static constexpr Property<size_t, PropertyMutability::RO> number_of_submodels{"HETERO_NUMBER_OF_SUBMODELS"};
-
 }  // namespace hetero
 }  // namespace ov
diff --git a/src/plugins/hetero/src/sync_infer_request.cpp b/src/plugins/hetero/src/sync_infer_request.cpp
index cdd1825b86cf23..556b1749755df7 100644
--- a/src/plugins/hetero/src/sync_infer_request.cpp
+++ b/src/plugins/hetero/src/sync_infer_request.cpp
@@ -13,6 +13,7 @@
 #include "compiled_model.hpp"
 #include "itt.hpp"
 #include "openvino/core/except.hpp"
+#include "openvino/runtime/make_tensor.hpp"
 #include "plugin.hpp"
 
 ov::hetero::InferRequest::InferRequest(const std::shared_ptr<const ov::hetero::CompiledModel>& compiled_model)
@@ -33,6 +34,7 @@ ov::hetero::InferRequest::InferRequest(const std::shared_ptr<const ov::hetero::C
         m_port_to_subrequest_idx[port] = submodel_idx;
     }
 
+    std::map<ov::Output<const ov::Node>, ov::SoPtr<ov::ITensor>> temp_tensor_map;
     for (const auto& kvp : compiled_model->m_mapping_info._submodels_input_to_prev_output) {
         const auto& submodel_idx_in = kvp.first.first;
         const auto& port_idx_in = kvp.first.second;
@@ -41,8 +43,14 @@ ov::hetero::InferRequest::InferRequest(const std::shared_ptr<const ov::hetero::C
 
         const auto& output_port = m_subrequests[submodel_idx_out]->get_compiled_model()->outputs()[port_idx_out];
         const auto& output_tensor = m_subrequests[submodel_idx_out]->get_tensor(output_port);
+        if (temp_tensor_map.find(output_port) == temp_tensor_map.end()) {
+            temp_tensor_map[output_port] = {
+                ov::make_tensor(output_tensor->get_element_type(), output_tensor->get_shape()),
+                nullptr};
+        }
+        m_subrequests[submodel_idx_out]->set_tensor(output_port, temp_tensor_map[output_port]);
         const auto& input_port = m_subrequests[submodel_idx_in]->get_compiled_model()->inputs()[port_idx_in];
-        m_subrequests[submodel_idx_in]->set_tensor(input_port, output_tensor);
+        m_subrequests[submodel_idx_in]->set_tensor(input_port, temp_tensor_map[output_port]);
     }
 }
 
diff --git a/src/plugins/hetero/tests/functional/CMakeLists.txt b/src/plugins/hetero/tests/functional/CMakeLists.txt
index 678e7cface695b..196a8269080664 100644
--- a/src/plugins/hetero/tests/functional/CMakeLists.txt
+++ b/src/plugins/hetero/tests/functional/CMakeLists.txt
@@ -15,6 +15,9 @@ ov_add_test_target(
             gtest
             gtest_main
             common_test_utils
+        INCLUDES
+            PUBLIC
+                $<TARGET_PROPERTY:openvino_hetero_plugin,SOURCE_DIR>/src
         ADD_CLANG_FORMAT
         LABELS
             OV UNIT HETERO
diff --git a/src/plugins/hetero/tests/functional/hetero_tests.cpp b/src/plugins/hetero/tests/functional/hetero_tests.cpp
index a7e33605ad8dc6..d375c9a70e6a29 100644
--- a/src/plugins/hetero/tests/functional/hetero_tests.cpp
+++ b/src/plugins/hetero/tests/functional/hetero_tests.cpp
@@ -15,6 +15,7 @@
 #include "openvino/pass/manager.hpp"
 #include "openvino/pass/serialize.hpp"
 #include "openvino/runtime/exec_model_info.hpp"
+#include "openvino/runtime/intel_gpu/properties.hpp"
 #include "openvino/runtime/internal_properties.hpp"
 #include "openvino/runtime/iplugin.hpp"
 #include "openvino/runtime/iremote_context.hpp"
@@ -177,6 +178,29 @@ std::shared_ptr<ov::Model> ov::hetero::tests::HeteroTests::create_model_with_ind
     return std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{param1, param2});
 }
 
+std::shared_ptr<ov::Model> ov::hetero::tests::HeteroTests::create_model_with_multi_add() {
+    auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 3, 1, 1});
+    param->set_friendly_name("input");
+    auto const_value1 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 1, 1}, {1});
+    const_value1->set_friendly_name("const_val1");
+    auto add1 = std::make_shared<ov::op::v1::Add>(param, const_value1);
+    add1->set_friendly_name("add1");
+    auto const_value2 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 1, 1}, {1});
+    const_value2->set_friendly_name("const_val2");
+    auto add2 = std::make_shared<ov::op::v1::Add>(add1, const_value2);
+    add2->set_friendly_name("add2");
+    auto const_value3 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 1, 1}, {1});
+    const_value3->set_friendly_name("const_val3");
+    auto add3 = std::make_shared<ov::op::v1::Add>(add2, const_value3);
+    add3->set_friendly_name("add3");
+    auto const_value4 = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 3, 1, 1}, {1});
+    const_value4->set_friendly_name("const_val4");
+    auto add4 = std::make_shared<ov::op::v1::Add>(add3, const_value4);
+    add4->set_friendly_name("add4");
+    auto result = std::make_shared<ov::op::v0::Result>(add4);
+    result->set_friendly_name("res");
+    return std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{param});
+}
 // Mock plugins
 
 class MockCompiledModel : public ov::ICompiledModel {
@@ -546,7 +570,9 @@ class MockPluginBase : public ov::IPlugin {
         auto device_id = properties.count(ov::device::id.name())
                              ? properties.at(ov::device::id.name()).as<std::string>()
                              : m_default_device_id;
-
+        float query_model_ratio = properties.count(ov::internal::query_model_ratio.name())
+                                      ? properties.at(ov::internal::query_model_ratio.name()).as<float>()
+                                      : 1.0f;
         auto supported = ov::get_supported_nodes(
             model,
             [&](std::shared_ptr<ov::Model>& model) {
@@ -561,7 +587,8 @@ class MockPluginBase : public ov::IPlugin {
                 if (m_supported_ops.find(op->get_type_info().name) == m_supported_ops.end())
                     return false;
                 return true;
-            });
+            },
+            query_model_ratio);
         for (auto&& op_name : supported) {
             res.emplace(op_name, get_device_name() + "." + device_id);
         }
@@ -743,6 +770,117 @@ class MockPluginSubtract : public MockPluginBase {
     }
 };
 
+class MockPluginGPU : public MockPluginBase {
+public:
+    MockPluginGPU(const std::string& name)
+        : MockPluginBase(name, {"Parameter", "Result", "Add", "Constant", "Reshape"}, true) {}
+
+    const ov::Version& get_const_version() override {
+        static const ov::Version version = {CI_BUILD_NUMBER, "openvino_mock_reshape_plugin"};
+        return version;
+    }
+    void set_property(const ov::AnyMap& properties) override {
+        for (const auto& it : properties) {
+            if (it.first == ov::num_streams.name())
+                num_streams = it.second.as<int32_t>();
+            else if (it.first == ov::enable_profiling.name())
+                m_profiling = it.second.as<bool>();
+            else if (it.first == ov::internal::exclusive_async_requests.name())
+                exclusive_async_requests = it.second.as<bool>();
+            else if (it.first == ov::device::id.name())
+                continue;
+            else
+                OPENVINO_THROW(get_device_name(), " set config: " + it.first);
+        }
+    }
+
+    ov::Any get_property(const std::string& name, const ov::AnyMap& arguments) const override {
+        const static std::vector<std::string> device_ids = {"0", "1", "2"};
+        const std::vector<ov::PropertyName> roProperties{RO_property(ov::supported_properties.name()),
+                                                         RO_property(ov::optimal_batch_size.name()),
+                                                         RO_property(ov::device::capabilities.name()),
+                                                         RO_property(ov::device::type.name()),
+                                                         RO_property(ov::device::uuid.name()),
+                                                         RO_property(ov::device::id.name()),
+                                                         RO_property(ov::intel_gpu::memory_statistics.name()),
+                                                         RO_property(ov::intel_gpu::device_total_mem_size.name())};
+        // the whole config is RW before network is loaded.
+        const std::vector<ov::PropertyName> rwProperties{RW_property(ov::num_streams.name()),
+                                                         RW_property(ov::enable_profiling.name()),
+                                                         RW_property(ov::compilation_num_threads.name()),
+                                                         RW_property(ov::hint::performance_mode.name()),
+                                                         RW_property(ov::hint::num_requests.name())};
+        std::string device_id;
+        if (arguments.find(ov::device::id.name()) != arguments.end()) {
+            device_id = arguments.find(ov::device::id.name())->second.as<std::string>();
+        }
+        if (name == ov::supported_properties) {
+            std::vector<ov::PropertyName> supportedProperties;
+            supportedProperties.reserve(roProperties.size() + rwProperties.size());
+            supportedProperties.insert(supportedProperties.end(), roProperties.begin(), roProperties.end());
+            supportedProperties.insert(supportedProperties.end(), rwProperties.begin(), rwProperties.end());
+
+            return decltype(ov::supported_properties)::value_type(supportedProperties);
+        } else if (name == ov::internal::supported_properties) {
+            return decltype(ov::internal::supported_properties)::value_type(
+                {ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
+                 ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
+                 ov::PropertyName{ov::internal::query_model_ratio.name(), ov::PropertyMutability::RW}});
+        } else if (name == ov::internal::exclusive_async_requests) {
+            return decltype(ov::internal::exclusive_async_requests)::value_type{exclusive_async_requests};
+        } else if (name == ov::device::uuid) {
+            ov::device::UUID uuid;
+            for (size_t i = 0; i < uuid.MAX_UUID_SIZE; i++) {
+                if (device_id == device_ids[0])
+                    uuid.uuid[i] = static_cast<uint8_t>(i);
+                else if (device_id == device_ids[1])
+                    uuid.uuid[i] = static_cast<uint8_t>(i * 2);
+                else if (device_id == device_ids[2])
+                    uuid.uuid[i] = static_cast<uint8_t>(i * 3);
+            }
+            return decltype(ov::device::uuid)::value_type{uuid};
+        } else if (name == ov::available_devices) {
+            return decltype(ov::available_devices)::value_type(device_ids);
+        } else if (name == ov::device::capabilities) {
+            std::vector<std::string> capabilities;
+            capabilities.push_back(ov::device::capability::EXPORT_IMPORT);
+            return decltype(ov::device::capabilities)::value_type(capabilities);
+        } else if (ov::internal::caching_properties == name) {
+            std::vector<ov::PropertyName> caching_properties = {ov::device::uuid};
+            return decltype(ov::internal::caching_properties)::value_type(caching_properties);
+        } else if (name == ov::loaded_from_cache.name()) {
+            return m_loaded_from_cache;
+        } else if (name == ov::enable_profiling.name()) {
+            return decltype(ov::enable_profiling)::value_type{m_profiling};
+        } else if (name == ov::streams::num.name()) {
+            return decltype(ov::streams::num)::value_type{num_streams};
+        } else if (name == ov::intel_gpu::device_total_mem_size.name()) {
+            size_t mem_size = 0;
+            if (device_id == "0")
+                mem_size = 64;
+            else if (device_id == "1")
+                mem_size = 16;
+            else if (device_id == "2")
+                mem_size = 32;
+            return decltype(ov::intel_gpu::device_total_mem_size)::value_type{mem_size};
+        } else if (name == ov::device::type.name()) {
+            ov::device::Type device_type = ov::device::Type::INTEGRATED;
+            if (device_id == "0")
+                device_type = ov::device::Type::INTEGRATED;
+            else if (device_id == "1")
+                device_type = ov::device::Type::DISCRETE;
+            else if (device_id == "2")
+                device_type = ov::device::Type::DISCRETE;
+            return decltype(ov::device::type)::value_type(device_type);
+        }
+        OPENVINO_THROW("Unsupported property: ", name);
+    }
+
+private:
+    int32_t num_streams{0};
+    bool exclusive_async_requests = false;
+};
+
 void ov::hetero::tests::HeteroTests::reg_plugin(std::shared_ptr<ov::IPlugin>& plugin) {
     std::string library_path = get_mock_engine_path();
     if (!m_so)
@@ -766,5 +904,6 @@ void ov::hetero::tests::HeteroTests::SetUp() {
     if (m_mock_plugins.empty()) {
         reg_plugin_type<MockPluginReshape>("MOCK0");
         reg_plugin_type<MockPluginSubtract>("MOCK1");
+        reg_plugin_type<MockPluginGPU>("MOCKGPU");
     }
 }
\ No newline at end of file
diff --git a/src/plugins/hetero/tests/functional/hetero_tests.hpp b/src/plugins/hetero/tests/functional/hetero_tests.hpp
index f25d9d0feedcce..3890a91d21495a 100644
--- a/src/plugins/hetero/tests/functional/hetero_tests.hpp
+++ b/src/plugins/hetero/tests/functional/hetero_tests.hpp
@@ -26,6 +26,7 @@ class HeteroTests : public ::testing::Test {
     std::shared_ptr<ov::Model> create_model_with_reshape(bool dynamic = false);
     std::shared_ptr<ov::Model> create_model_with_subtract_shapeof_reshape(bool dynamic = false);
     std::shared_ptr<ov::Model> create_model_with_independent_parameter(bool dynamic = false);
+    std::shared_ptr<ov::Model> create_model_with_multi_add();
     ov::Tensor create_and_fill_tensor(const ov::element::Type& type, const ov::Shape& shape);
 
 private:
diff --git a/src/plugins/hetero/tests/functional/properties_tests.cpp b/src/plugins/hetero/tests/functional/properties_tests.cpp
index a9e596181a3076..474ada15ca69c6 100644
--- a/src/plugins/hetero/tests/functional/properties_tests.cpp
+++ b/src/plugins/hetero/tests/functional/properties_tests.cpp
@@ -3,6 +3,7 @@
 //
 #include "hetero_tests.hpp"
 #include "openvino/runtime/internal_properties.hpp"
+#include "properties.hpp"
 
 using namespace ov::hetero::tests;
 
@@ -10,7 +11,8 @@ TEST_F(HeteroTests, get_property_supported_properties) {
     const std::vector<ov::PropertyName> supported_properties = {ov::supported_properties,
                                                                 ov::device::full_name,
                                                                 ov::device::capabilities,
-                                                                ov::device::priorities};
+                                                                ov::device::priorities,
+                                                                ov::hint::model_distribution_policy};
     auto actual_supported_properties = core.get_property("HETERO", ov::supported_properties);
     EXPECT_EQ(supported_properties.size(), actual_supported_properties.size());
     for (auto& supported_property : supported_properties) {
@@ -41,4 +43,19 @@ TEST_F(HeteroTests, set_property_device_priorities) {
     EXPECT_EQ("", core.get_property("HETERO", ov::device::priorities));
     core.set_property("HETERO", ov::device::priorities("MOCK0,MOCK1"));
     EXPECT_EQ("MOCK0,MOCK1", core.get_property("HETERO", ov::device::priorities));
+}
+
+TEST_F(HeteroTests, set_property_ModelDistributionPolicy) {
+    std::set<ov::hint::ModelDistributionPolicy> value = {};
+    std::set<ov::hint::ModelDistributionPolicy> model_policy = {ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL};
+
+    ASSERT_NO_THROW(core.set_property("HETERO", ov::hint::model_distribution_policy(model_policy)));
+    ASSERT_NO_THROW(value = core.get_property("HETERO", ov::hint::model_distribution_policy));
+    ASSERT_EQ(model_policy, value);
+
+    model_policy = {};
+
+    ASSERT_NO_THROW(core.set_property("HETERO", ov::hint::model_distribution_policy(model_policy)));
+    ASSERT_NO_THROW(value = core.get_property("HETERO", ov::hint::model_distribution_policy));
+    ASSERT_EQ(model_policy, value);
 }
\ No newline at end of file
diff --git a/src/plugins/hetero/tests/functional/query_model_tests.cpp b/src/plugins/hetero/tests/functional/query_model_tests.cpp
index 6ec4f17f053803..fab5e78220a82f 100644
--- a/src/plugins/hetero/tests/functional/query_model_tests.cpp
+++ b/src/plugins/hetero/tests/functional/query_model_tests.cpp
@@ -114,3 +114,61 @@ TEST_F(HeteroTests, query_model_on_independent_parameter) {
     }
     EXPECT_EQ(0, names.size());
 }
+
+TEST_F(HeteroTests, query_model_by_three_device) {
+    const std::string dev_name0 = "MOCKGPU.2";
+    const std::string dev_name1 = "MOCKGPU.1";
+    const std::string dev_name2 = "MOCKGPU.0";
+    std::set<ov::hint::ModelDistributionPolicy> model_policy = {ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL};
+    // This WA is needed because mock plugins are loaded one by one
+    EXPECT_NO_THROW(core.get_available_devices());
+    const auto model = create_model_with_multi_add();
+    const auto supported_ops = core.query_model(model,
+                                                "HETERO",
+                                                {ov::device::priorities(dev_name0 + "," + dev_name1 + "," + dev_name2),
+                                                 ov::hint::model_distribution_policy(model_policy)});
+    std::map<std::string, std::string> expect_result = {{"input", "MOCKGPU.2"},
+                                                        {"const_val1", "MOCKGPU.2"},
+                                                        {"const_val2", "MOCKGPU.2"},
+                                                        {"add1", "MOCKGPU.2"},
+                                                        {"add2", "MOCKGPU.2"},
+                                                        {"const_val3", "MOCKGPU.1"},
+                                                        {"add3", "MOCKGPU.1"},
+                                                        {"const_val4", "MOCKGPU.0"},
+                                                        {"add4", "MOCKGPU.0"},
+                                                        {"res", "MOCKGPU.0"}};
+    for (const auto& op : supported_ops) {
+        if (expect_result.find(op.first) != expect_result.end()) {
+            EXPECT_EQ(op.second, expect_result[op.first]);
+        }
+    }
+}
+
+TEST_F(HeteroTests, query_model_by_two_device) {
+    const std::string dev_name0 = "MOCKGPU.2";
+    const std::string dev_name1 = "MOCKGPU.0";
+    std::set<ov::hint::ModelDistributionPolicy> model_policy = {ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL};
+
+    // This WA is needed because mock plugins are loaded one by one
+    EXPECT_NO_THROW(core.get_available_devices());
+    const auto model = create_model_with_multi_add();
+    const auto supported_ops = core.query_model(
+        model,
+        "HETERO",
+        {ov::device::priorities(dev_name0 + "," + dev_name1), ov::hint::model_distribution_policy(model_policy)});
+    std::map<std::string, std::string> expect_result = {{"input", "MOCKGPU.2"},
+                                                        {"const_val1", "MOCKGPU.2"},
+                                                        {"const_val2", "MOCKGPU.2"},
+                                                        {"add1", "MOCKGPU.2"},
+                                                        {"add2", "MOCKGPU.2"},
+                                                        {"const_val3", "MOCKGPU.0"},
+                                                        {"add3", "MOCKGPU.0"},
+                                                        {"const_val4", "MOCKGPU.0"},
+                                                        {"add4", "MOCKGPU.0"},
+                                                        {"res", "MOCKGPU.0"}};
+    for (const auto& op : supported_ops) {
+        if (expect_result.find(op.first) != expect_result.end()) {
+            EXPECT_EQ(op.second, expect_result[op.first]);
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp
index cf992e4a678127..323e848c8bc96e 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp
@@ -587,6 +587,26 @@ static void attn_reduce(T* dst, float* temp, size_t M, size_t S, size_t temp_str
     }
 }
 
+template <typename T0, typename T1, typename T2, typename F>
+void parallel_for3d_dynamic(const T0& D0, const T1& D1, const T2& D2, const F& func) {
+#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
+    tbb::parallel_for(tbb::blocked_range3d<T0, T1, T2>(0, D0, 0, D1, 0, D2),
+    [=](const tbb::blocked_range3d<T0, T1, T2>& r) {
+        for (T0 d0 = r.pages().begin(); d0 < r.pages().end(); d0++) {
+            for (T1 d1 = r.rows().begin(); d1 < r.rows().end(); d1++) {
+                for (T2 d2 = r.cols().begin(); d2 < r.cols().end(); d2++) {
+                    func(d0, d1, d2);
+                }
+            }
+        }
+    });
+#else
+    parallel_for3d(D0, D1, D2, [&](size_t d0, size_t d1, size_t d2) {
+        func(d0, d1, d2);
+    });
+#endif
+}
+
 template <typename T, typename T2>
 static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query,
                              const ov::intel_cpu::PlainTensor& present_key,
@@ -641,32 +661,32 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query,
         });
     }
 #endif
-    parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) {
-        size_t start{0}, end{0};
-        splitter(B * h_group_num * kv_len, nthr, ithr, start, end);
-
-        size_t b, h_group, pk;
-        if (start < end) {
-            parallel_it_init(start, b, B, h_group, h_group_num, pk, kv_len);
-            if (is_pagedattn) {
-                for (size_t iwork = start; iwork < end; ++iwork) {
-                    auto context_len = static_cast<size_t>(context_lens.ptr<int32_t>()[b]);
-                    // kv_len must be valid
-                    if (pk < context_len) {
-                        auto block_idx = beams.ptr<int32_t>(b)[pk];
-                        OPENVINO_ASSERT(block_idx >= 0, "block idx must be greater or equal than 0");
 
-                        for (size_t pq = 0; pq < q_len; pq++) {
-                            for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) {
-                                buf_attn_w.ptr<float>(b, h, pq)[pk] =
-                                        dot_product(query.ptr<T>(b, h, pq), present_key.ptr<T2>(block_idx, h_group),
-                                            S, nullptr, nullptr, nullptr);
-                            }
-                        }
+    if (is_pagedattn) {
+        parallel_for3d_dynamic(B, h_group_num, kv_len, [&](size_t b, size_t h_group, size_t pk) {
+            auto context_len = static_cast<size_t>(context_lens.ptr<int32_t>()[b]);
+            // kv_len must be valid
+            if (pk < context_len) {
+                auto block_idx = beams.ptr<int32_t>(b)[pk];
+                OPENVINO_ASSERT(block_idx >= 0, "block idx must be greater or equal than 0");
+
+                for (size_t pq = 0; pq < q_len; pq++) {
+                    for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) {
+                        buf_attn_w.ptr<float>(b, h, pq)[pk] =
+                                dot_product(query.ptr<T>(b, h, pq), present_key.ptr<T2>(block_idx, h_group),
+                                    S, nullptr, nullptr, nullptr);
                     }
-                    parallel_it_step(b, B, h_group, h_group_num, pk, kv_len);
                 }
-            } else {
+            }
+        });
+    } else {
+        parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) {
+            size_t start{0}, end{0};
+            splitter(B * h_group_num * kv_len, nthr, ithr, start, end);
+
+            size_t b, h_group, pk;
+            if (start < end) {
+                parallel_it_init(start, b, B, h_group, h_group_num, pk, kv_len);
                 if (q_len == 1 && h_each_group_len == 1) {
                     if (B == 1) {
                         // the memory will be continuous when b==1
@@ -705,70 +725,96 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query,
                     }
                 }
             }
-        }
-    });
+        });
+    }
 
-    parallel_for3d(B, H, q_len, [&](size_t b, size_t h, size_t pq) {
-        auto cur_kv_len = kv_len;
-        auto ncausal = auto_causal ? (cur_kv_len - q_len + pq + 1) : cur_kv_len;
-        if (is_pagedattn) {
-            cur_kv_len = static_cast<size_t>(context_lens.ptr<int32_t>()[b]);
-            ncausal = cur_kv_len;
-        }
-        // apply attention mask & sofmax
-        float* alibi_ptr = alibi_mask ? &alibi_mask.at<float>({b, h, pq, 0}, true) : nullptr;
-        uint8_t* attn_mask_ptr = nullptr;
-        auto attn_mask_prec = attention_mask.get_precision();
-        if (attention_mask)
-            attn_mask_ptr = reinterpret_cast<uint8_t*>(&attention_mask.at<T>({b, h, pq, 0}, true));
-        uint8_t* cmask_ptr = causal_mask ? &causal_mask.at<uint8_t>({b, h, pq, 0}, true) : nullptr;
-        attn_softmax_kernel(buf_attn_w.ptr<float>(b, h, pq),
-                            buf_attn_w.ptr<float>(b, h, pq),
-                            d_scale,
-                            alibi_ptr,
-                            attn_mask_ptr,
-                            cmask_ptr,
-                            select_nfltmax_at_0,
-                            ncausal,
-                            cur_kv_len,
-                            attn_mask_prec,
-                            ov::element::f32);
-    });
+    if (is_pagedattn) {
+        parallel_for3d_dynamic(B, H, q_len, [&](size_t b, size_t h, size_t pq) {
+            auto cur_kv_len = static_cast<size_t>(context_lens.ptr<int32_t>()[b]);
+            auto ncausal = cur_kv_len;
+            // apply attention mask & sofmax
+            float* alibi_ptr = alibi_mask ? &alibi_mask.at<float>({b, h, pq, 0}, true) : nullptr;
+            uint8_t* attn_mask_ptr = nullptr;
+            auto attn_mask_prec = attention_mask.get_precision();
+            if (attention_mask)
+                attn_mask_ptr = reinterpret_cast<uint8_t*>(&attention_mask.at<T>({b, h, pq, 0}, true));
+            uint8_t* cmask_ptr = causal_mask ? &causal_mask.at<uint8_t>({b, h, pq, 0}, true) : nullptr;
+            attn_softmax_kernel(buf_attn_w.ptr<float>(b, h, pq),
+                                buf_attn_w.ptr<float>(b, h, pq),
+                                d_scale,
+                                alibi_ptr,
+                                attn_mask_ptr,
+                                cmask_ptr,
+                                select_nfltmax_at_0,
+                                ncausal,
+                                cur_kv_len,
+                                attn_mask_prec,
+                                ov::element::f32);
+        });
+    } else {
+        parallel_for3d(B, H, q_len, [&](size_t b, size_t h, size_t pq) {
+            auto cur_kv_len = kv_len;
+            auto ncausal = auto_causal ? (cur_kv_len - q_len + pq + 1) : cur_kv_len;
+            // apply attention mask & sofmax
+            float* alibi_ptr = alibi_mask ? &alibi_mask.at<float>({b, h, pq, 0}, true) : nullptr;
+            uint8_t* attn_mask_ptr = nullptr;
+            auto attn_mask_prec = attention_mask.get_precision();
+            if (attention_mask)
+                attn_mask_ptr = reinterpret_cast<uint8_t*>(&attention_mask.at<T>({b, h, pq, 0}, true));
+            uint8_t* cmask_ptr = causal_mask ? &causal_mask.at<uint8_t>({b, h, pq, 0}, true) : nullptr;
+            attn_softmax_kernel(buf_attn_w.ptr<float>(b, h, pq),
+                                buf_attn_w.ptr<float>(b, h, pq),
+                                d_scale,
+                                alibi_ptr,
+                                attn_mask_ptr,
+                                cmask_ptr,
+                                select_nfltmax_at_0,
+                                ncausal,
+                                cur_kv_len,
+                                attn_mask_prec,
+                                ov::element::f32);
+        });
+    }
 
     // attn_w * V
     buf_attn_score.resize<float>({static_cast<size_t>(nthr), B, q_len, H, S});
     // buf_attn_w {B, H, q_len, kv_len}
-    parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) {
-        size_t start{0}, end{0};
-        splitter(B * h_group_num * kv_len, nthr, ithr, start, end);
-
-        memset(buf_attn_score.ptr<float>(ithr, 0, 0, 0, 0), 0, buf_attn_score.stride(0) * sizeof(float));
-
-        size_t b, h_group, pv;
-        if (start < end) {
-            parallel_it_init(start, b, B, h_group, h_group_num, pv, kv_len);
-            if (is_pagedattn) {
-                for (size_t iwork = start; iwork < end; ++iwork) {
-                    auto context_len = static_cast<size_t>(context_lens.ptr<int32_t>()[b]);
-                    // kv_len must be valid
-                    if (pv < context_len) {
-                        auto block_idx = beams.ptr<int32_t>(b)[pv];
-                        OPENVINO_ASSERT(block_idx >= 0, "block idx in vcache must be greater or equal than 0");
-                        auto* v = present_value.ptr<T2>(block_idx, h_group);
-                        for (size_t pq = 0; pq < q_len; pq++) {
-                            for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) {
-                                attn_acc_value(buf_attn_score.ptr<float>(ithr, b, pq, h),
-                                            buf_attn_w.ptr<float>(b, h, pq)[pv],
-                                            v,
-                                            S,
-                                            nullptr,
-                                            nullptr);
-                            }
-                        }
+
+    if (is_pagedattn) {
+        parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) {
+            memset(buf_attn_score.ptr<float>(ithr, 0, 0, 0, 0), 0, buf_attn_score.stride(0) * sizeof(float));
+        });
+
+        parallel_for3d_dynamic(B, h_group_num, kv_len, [&](size_t b, size_t h_group, size_t pv) {
+            auto ithr = parallel_get_thread_num();
+            auto context_len = static_cast<size_t>(context_lens.ptr<int32_t>()[b]);
+            // kv_len must be valid
+            if (pv < context_len) {
+                auto block_idx = beams.ptr<int32_t>(b)[pv];
+                OPENVINO_ASSERT(block_idx >= 0, "block idx in vcache must be greater or equal than 0");
+                auto* v = present_value.ptr<T2>(block_idx, h_group);
+                for (size_t pq = 0; pq < q_len; pq++) {
+                    for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) {
+                        attn_acc_value(buf_attn_score.ptr<float>(ithr, b, pq, h),
+                                    buf_attn_w.ptr<float>(b, h, pq)[pv],
+                                    v,
+                                    S,
+                                    nullptr,
+                                    nullptr);
                     }
-                    parallel_it_step(b, B, h_group, h_group_num, pv, kv_len);
                 }
-            } else {
+            }
+        });
+    } else {
+        parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) {
+            size_t start{0}, end{0};
+            splitter(B * h_group_num * kv_len, nthr, ithr, start, end);
+
+            memset(buf_attn_score.ptr<float>(ithr, 0, 0, 0, 0), 0, buf_attn_score.stride(0) * sizeof(float));
+
+            size_t b, h_group, pv;
+            if (start < end) {
+                parallel_it_init(start, b, B, h_group, h_group_num, pv, kv_len);
                 if (q_len == 1 && h_each_group_len == 1) {
                     for (size_t iwork = start; iwork < end; ++iwork) {
                         auto b_kv = beams ? beams.ptr<int32_t>(b)[pv] : b;
@@ -801,8 +847,8 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query,
                     }
                 }
             }
-        }
-    });
+        });
+    }
 
     parallel_for3d(B, H, q_len, [&](size_t b, size_t h, size_t pq) {
         auto* temp = buf_attn_score.ptr<float>(0, b, pq, h);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl
index 189b2ea62736d4..90dc0f07e0ffb0 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl
@@ -375,7 +375,7 @@ KERNEL(gemm_tiled_opt)(
 #endif // TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST
 
         // Loading A tile and tile C calculation
-#if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING && !HAS_DYNAMIC_N_PADDING && TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST
+#if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING && TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST
     #if TILE_K_NOT_DIVISIBLE
             A_FLOATN a_read = TILE_K_NOT_DIVISIBLE_CALC ? a_ptr[sglid] : BLOCK_READ_A(a_ptr, 0);
     #else
@@ -413,7 +413,6 @@ KERNEL(gemm_tiled_opt)(
                                          b_tile[subtile_k_id * SIMD_WIDTH + simd_local_id], c_tile[dot_id]);
     #else // TILE_K > SIMD_WIDTH
                 #if IS_DYNAMIC && B_VEC_SIZE > 1
-                    A_FLOATN a_read_tmp = sub_group_broadcast(a_read, simd_local_id);
                     #if TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST
                     MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE) b_tile_tmp;
                     unroll_for (uint b_elem = 0; b_elem < B_VEC_SIZE; ++b_elem) {
@@ -429,7 +428,7 @@ KERNEL(gemm_tiled_opt)(
     #endif // TILE_K > SIMD_WIDTH
                 }
             }
-    #if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING && !HAS_DYNAMIC_N_PADDING
+    #if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING 
         // Read A for next dot_id
         #if TILE_K_NOT_DIVISIBLE
             a_read = (dot_id + 1 < tile_m_iterations) ? TILE_K_NOT_DIVISIBLE_CALC ? a_ptr[sglid] : BLOCK_READ_A(a_ptr, 0) : 0;
@@ -592,12 +591,11 @@ KERNEL(gemm_tiled_opt)(
 
             unroll_for (uint simd_id = 0; simd_id < TILE_K_LEFTOVER; simd_id++) {
             #if B_VEC_SIZE > 1
-                A_FLOATN a_read_tmp = sub_group_broadcast(a_read, simd_id);
                 #if TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST
                 MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE) b_tile_tmp = {b_tile[0][simd_id], b_tile[1][simd_id]};
-                c_tile[dot_id] = mad((MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE))(a_read_tmp), b_tile_tmp, c_tile[dot_id]);
+                c_tile[dot_id] = mad((INPUT0_TYPE)sub_group_broadcast(a_read, simd_id), b_tile_tmp, c_tile[dot_id]);
                 #else
-                c_tile[dot_id] = mad((MAKE_VECTOR_TYPE(INPUT1_TYPE, B_VEC_SIZE))(a_read_tmp), b_tile[simd_id], c_tile[dot_id]);
+                c_tile[dot_id] = mad((INPUT0_TYPE)sub_group_broadcast(a_read, simd_id), b_tile[simd_id], c_tile[dot_id]);
                 #endif
             #else
                 c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_read, simd_id)), b_tile[simd_id], c_tile[dot_id]);
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 3f26c86a241644..1e47ebecab8512 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -256,6 +256,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
 
     ProgramBuilder prog(ctx->get_engine(), config);
 
+    float query_model_ratio = config.get_property(ov::internal::query_model_ratio.name()).as<float>();
+
     auto supported = ov::get_supported_nodes(model,
         [&config,this](std::shared_ptr<ov::Model>& model) {
             std::map<std::string, ov::PartialShape> shapes;
@@ -264,7 +266,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
         },
         [&prog](std::shared_ptr<ov::Node> node) {
             return prog.is_op_supported(node);
-        });
+        },
+        query_model_ratio);
 
     for (auto&& op_name : supported) {
         res.emplace(op_name, ctx->get_device_name());
@@ -562,7 +565,8 @@ std::vector<ov::PropertyName> Plugin::get_supported_internal_properties() const
             ov::PropertyName{ov::internal::config_device_id.name(), ov::PropertyMutability::WO},
             ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
             ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
-            ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}};
+            ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO},
+            ov::PropertyName{ov::internal::query_model_ratio.name(), PropertyMutability::RW}};
     return supported_internal_properties;
 }
 
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 2b55f9d93d4449..cfc60af6663293 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -536,7 +536,9 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
 
         pass_config->enable<ov::pass::SoftmaxDecomposition>();
         pass_config->set_callback<ov::pass::SoftmaxDecomposition>(
-            [](const_node_ptr &node) -> bool {
+            [&](const_node_ptr &node) -> bool {
+                OPENVINO_ASSERT(node->input_value(0).get_partial_shape().rank().is_static(),
+                    node->get_friendly_name() + " has dynamic rank!");
                 return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
             });
 
diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
index f3569d0cb2a2ee..7d80eddcde66fa 100644
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -53,6 +53,7 @@ void ExecutionConfig::set_default() {
         std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true),
         std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false),
         std::make_tuple(ov::internal::exclusive_async_requests, false),
+        std::make_tuple(ov::internal::query_model_ratio, 1.0f),
         std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED),
 
         // Legacy API properties
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
index 70c7c20c0b7555..9220fad47269b4 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
@@ -361,7 +361,7 @@ class gemm_gpu_tests: public ::testing::Test {
         }
     }
 
-    void test_dynamic_padding(bool is_caching_test) {
+    void test_dynamic_padding(bool is_caching_test, bool n_dim_only) {
         tests::random_generator rg;
         rg.set_seed(GET_SUITE_NAME);
 
@@ -414,10 +414,19 @@ class gemm_gpu_tests: public ::testing::Test {
         ov::Shape in2_shape_aligned = { aligned_batch1_size, aligned_batch2_size, aligned_k_size, aligned_n_size };
 
         // Use dynamic padding for all BFYX dimensions
-        tensor dyn_pad_dims_input({1, 1, 1, 1}, 0);
+        tensor dyn_pad_dims_input1({0, 0, 0, 0}, 0);
+        tensor dyn_pad_dims_input2({0, 0, 0, 0}, 0);
 
-        auto in1_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input)};
-        auto in2_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input)};
+        if (n_dim_only) {
+            dyn_pad_dims_input1 = tensor({0, 0, 0, 0}, 0);
+            dyn_pad_dims_input2 = tensor({0, 0, 1, 0}, 0);
+        } else {
+            dyn_pad_dims_input1 = tensor({1, 1, 1, 1}, 0);
+            dyn_pad_dims_input2 = tensor({1, 1, 1, 1}, 0);
+        }
+
+        auto in1_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input1)};
+        auto in2_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input2)};
 
         auto aligned_input1_mem = engine.allocate_memory({ov::PartialShape(in1_shape_aligned), data_types::f16, format::bfyx});
         auto aligned_input2_mem = engine.allocate_memory({ov::PartialShape(in2_shape_aligned), data_types::f16, format::bfyx});
@@ -425,14 +434,14 @@ class gemm_gpu_tests: public ::testing::Test {
         auto input1_mem = engine.reinterpret_buffer(*aligned_input1_mem, layout{ov::PartialShape(in1_shape),
                                                                                 data_types::f16,
                                                                                 format::bfyx,
-                                                                                padding({padding_size_batch1, 0, 0, 0},
-                                                                                        {0, padding_size_batch2, padding_size_k, padding_size_m}, 0.0f, dyn_pad_dims_input)});
+                                                                                n_dim_only ? padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input1) :
+                                                                                             padding({padding_size_batch1, 0, 0, 0}, {0, padding_size_batch2, padding_size_k, padding_size_m}, 0.0f, dyn_pad_dims_input1)});
 
         auto input2_mem = engine.reinterpret_buffer(*aligned_input2_mem, layout{ov::PartialShape(in2_shape),
                                                                                 data_types::f16,
                                                                                 format::bfyx,
-                                                                                padding({0, padding_size_batch2, 0, 0},
-                                                                                        {padding_size_batch1, 0, padding_size_n, padding_size_k}, 0.0f, dyn_pad_dims_input)});
+                                                                                n_dim_only ? padding({0, 0, 0, 0}, {0, 0, padding_size_n, 0}, 0.0f, dyn_pad_dims_input2) :
+                                                                                            padding({0, padding_size_batch2, 0, 0}, {padding_size_batch1, 0, padding_size_n, padding_size_k}, 0.0f, dyn_pad_dims_input2)});
 
         auto input_1_data = rg.generate_random_1d<ov::float16>(ov::shape_size(in1_shape), -2, 2);
         auto input_2_data = rg.generate_random_1d<ov::float16>(ov::shape_size(in2_shape), -2, 2);
@@ -1574,10 +1583,15 @@ TEST_F(gemm_gpu_tests, dynamic) {
     this->test_dynamic(false);
 }
 
-TEST_F(gemm_gpu_tests, dynamic_padding) {
-    this->test_dynamic_padding(false);
+TEST_F(gemm_gpu_tests, dynamic_padding_all_dim) {
+    this->test_dynamic_padding(false, false);
 }
 
+TEST_F(gemm_gpu_tests, dynamic_padding_n_dim_only) {
+    this->test_dynamic_padding(false, true);
+}
+
+
 TEST_F(gemm_gpu_tests, dynamic_multi_inference_same_shape) {
     this->test_dynamic_multi_inference_same_shape(false);
 }