diff --git a/cmake/developer_package/ncc_naming_style/openvino.style b/cmake/developer_package/ncc_naming_style/openvino.style
index 6608795381e4a1..141f02b9ab5808 100644
--- a/cmake/developer_package/ncc_naming_style/openvino.style
+++ b/cmake/developer_package/ncc_naming_style/openvino.style
@@ -18,7 +18,7 @@ VariableReference: '^\w+$'
 
 EnumName: '^[A-Z][\w]+$'
 # excepts element_type
-EnumConstantName: '^([A-Z\d_]+|undefined|dynamic|boolean|bf16|f16|f32|f64|i4|i8|i16|i32|i64|u1|u4|u8|u16|u32|u64|nf4|f8e4m3|f8e5m2|string|asymmetric|align_corners|round_prefer_floor|round_prefer_ceil|floor|ceil|simple|nearest|linear|linear_onnx|cubic|area|scales|sizes|half_pixel|tf_half_pixel_for_nn|pytorch_half_pixel|asymetric)$'
+EnumConstantName: '^([A-Z\d_]+|undefined|dynamic|boolean|bf16|f16|f32|f64|i4|i8|i16|i32|i64|u1|u2|u3|u4|u6|u8|u16|u32|u64|nf4|f8e4m3|f8e5m2|string|asymmetric|align_corners|round_prefer_floor|round_prefer_ceil|floor|ceil|simple|nearest|linear|linear_onnx|cubic|area|scales|sizes|half_pixel|tf_half_pixel_for_nn|pytorch_half_pixel|asymetric)$'
 # TODO: align
 UsingDeclaration: '^.*$'
 TypedefName: '^.*$'
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst
index ffccba1adf3181..1cea2c755f1505 100644
--- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst
@@ -30,7 +30,7 @@ Table of Contents
 * :doc:`Assign <../operation-specs/infrastructure/assign-3>`
 * :doc:`Atan <../operation-specs/arithmetic/atan-1>`
 * :doc:`Atanh <../operation-specs/arithmetic/atanh-3>`
-* :doc:`AvgPool <../operation-specs/pooling/avg-pool-1>`
+* :doc:`AvgPool <../operation-specs/pooling/avg-pool-14>`
 * :doc:`BatchNormInference <../operation-specs/normalization/batch-norm-inference-5>`
 * :doc:`BatchToSpace <../operation-specs/movement/batch-to-space-2>`
 * :doc:`BinaryConvolution <../operation-specs/convolution/binary-convolution-1>`
@@ -120,7 +120,7 @@ Table of Contents
 * :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>`
 * :doc:`MatMul <../operation-specs/matrix/matmul-1>`
 * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>`
-* :doc:`MaxPool <../operation-specs/pooling/max-pool-8>`
+* :doc:`MaxPool <../operation-specs/pooling/max-pool-14>`
 * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>`
 * :doc:`Minimum <../operation-specs/arithmetic/minimum-1>`
 * :doc:`Mish <../operation-specs/activation/mish-4>`
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst
index 251fb60a45cd28..07242d20b85327 100644
--- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst
@@ -23,6 +23,7 @@ Operation Specifications
    Atan-1 <operation-specs/arithmetic/atan-1>
    Atanh-3 <operation-specs/arithmetic/atanh-3>
    AvgPool-1 <operation-specs/pooling/avg-pool-1>
+   AvgPool-14 <operation-specs/pooling/avg-pool-14>
    BatchNormInference-1 <operation-specs/normalization/batch-norm-inference-1>
    BatchNormInference-5 <operation-specs/normalization/batch-norm-inference-5>
    BatchToSpace-2 <operation-specs/movement/batch-to-space-2>
@@ -127,6 +128,7 @@ Operation Specifications
    MatrixNms-8 <operation-specs/sort/matrix-non-max-suppression-8>
    MaxPool-1 <operation-specs/pooling/max-pool-1>
    MaxPool-8 <operation-specs/pooling/max-pool-8>
+   MaxPool-14 <operation-specs/pooling/max-pool-14>
    Maximum-1 <operation-specs/arithmetic/maximum-1>
    Minimum-1 <operation-specs/arithmetic/minimum-1>
    Mish-4 <operation-specs/activation/mish-4>
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/pooling/avg-pool-14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/pooling/avg-pool-14.rst
new file mode 100644
index 00000000000000..080bcadf05850c
--- /dev/null
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/pooling/avg-pool-14.rst
@@ -0,0 +1,199 @@
+.. {#openvino_docs_ops_pooling_AvgPool_14}
+
+AvgPool
+=======
+
+
+.. meta::
+  :description: Learn about AvgPool-14 - a pooling operation, which can 
+                be performed on a 3D, 4D or 5D input tensor.
+
+**Versioned name**: *AvgPool-14*
+
+**Category**: *Pooling*
+
+**Short description**: Performs the average pooling operation on input.
+
+**Detailed description**: `Reference <http://cs231n.github.io/convolutional-networks/#pool>`__. Average Pool is a pooling operation that performs down-sampling by dividing the input into pooling regions of size specified by kernel attribute and computing the average values of each region.
+
+**Attributes**: *Pooling* attributes are specified in the ``data`` node, which is a child of the layer node.
+
+* *strides*
+
+  * **Description**: *strides* is a distance (in pixels) to slide the window on the feature map over the (z, y, x) axes for 3D poolings and (y, x) axes for 2D poolings. For example, *strides* equal "4,2,1" means sliding the window 4 pixel at a time over depth dimension, 2 over height dimension and 1 over width dimension.
+  * **Range of values**: integer values starting from 0
+  * **Type**: int[]
+  * **Required**: *yes*
+
+* *pads_begin*
+
+  * **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal "1,2" means adding 1 pixel to the top of the input and 2 to the left of the input.
+  * **Range of values**: integer values starting from 0
+  * **Type**: int[]
+  * **Required**: *yes*
+  * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
+
+* *pads_end*
+
+  * **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal "1,2" means adding 1 pixel to the bottom of the input and 2 to the right of the input.
+  * **Range of values**: integer values starting from 0
+  * **Type**: int[]
+  * **Required**: *yes*
+  * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
+
+* *kernel*
+
+  * **Description**: *kernel* is a size of each filter. For example, *kernel* equal (2, 3) means that each filter has height equal to 2 and width equal to 3.
+  * **Range of values**: integer values starting from 1
+  * **Type**: int[]
+  * **Required**: *yes*
+
+* *exclude-pad*
+
+  * **Description**: *exclude-pad* is a type of pooling strategy for values in the padding area. For example, if *exclude-pad* is "true", then zero-values that came from padding are not included in averaging calculation.
+  * **Range of values**: true or false
+  * **Type**: boolean
+  * **Required**: *yes*
+
+* *rounding_type*
+
+  * **Description**: *rounding_type* is a type of rounding to be applied. *ceil_torch* does not allow the last pooling to start in the padding area.
+  * **Range of values**:
+    * *floor*
+    * *ceil*
+    * *ceil_torch*
+  * **Type**: string
+  * **Default value**: *floor*
+  * **Required**: *no*
+
+* *auto_pad*
+
+  * **Description**: *auto_pad* how the padding is calculated. Possible values:
+    
+    * *explicit*: use explicit padding values from `pads_begin` and `pads_end`.
+    * *same_upper (same_lower)* the input is padded to match the output size. In case of odd padding value an extra padding is added at the end (at the beginning).
+    * *valid* - do not use padding.
+  * **Type**: string
+  * **Default value**: *explicit*
+  * **Required**: *no*
+  * **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified.
+
+**Input**:
+
+* **1**: 3D, 4D or 5D input tensor. Input shape can be either ``[N, C, H]``, ``[N, C, H, W]`` or ``[N, C, H, W, D]``. **Required.**
+
+**Output**:
+  
+* **1**: The output shape is ``[N, C, H_out]``, ``[N, C, H_out, W_out]`` or ``[N, C, H_out, W_out, D_out]``. Output shape calculation rules and examples can be found in :doc:`Pooling Operators shape inference rules <pooling_shape_rules>`.
+
+**Types**
+
+* *T*: floating point or integer type.
+
+* *T_IND*: ``int64`` or ``int32``.
+
+
+**Examples**
+
+.. code-block:: xml
+   :force:
+   
+   <layer ... type="AvgPool" ... >
+       <data auto_pad="same_upper" exclude-pad="true" kernel="2,2" pads_begin="0,0" pads_end="1,1" strides="2,2"/>
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </output>
+   </layer>
+   
+   <layer ... type="AvgPool" ... >
+       <data auto_pad="same_upper" exclude-pad="false" kernel="5,5" pads_begin="0,0" pads_end="1,1" strides="2,2"/>
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </output>
+   </layer>
+   
+   <layer ... type="AvgPool" ... >
+       <data auto_pad="explicit" exclude-pad="true" kernel="5,5" pads_begin="1,1" pads_end="1,1" strides="3,3"/>
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>10</dim>
+               <dim>10</dim>
+           </port>
+       </output>
+   </layer>
+   
+   <layer ... type="AvgPool" ... >
+       <data auto_pad="explicit" exclude-pad="false" kernel="5,5" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>15</dim>
+               <dim>15</dim>
+           </port>
+       </output>
+   </layer>
+   
+   <layer ... type="AvgPool" ... >
+       <data auto_pad="valid" exclude-pad="true" kernel="5,5" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>14</dim>
+               <dim>14</dim>
+           </port>
+       </output>
+   </layer>
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/pooling/max-pool-14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/pooling/max-pool-14.rst
new file mode 100644
index 00000000000000..4c52ce24ca43e3
--- /dev/null
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/pooling/max-pool-14.rst
@@ -0,0 +1,207 @@
+.. {#openvino_docs_ops_pooling_MaxPool_14}
+
+MaxPool
+=======
+
+
+.. meta::
+  :description: Learn about MaxPool-14 - a pooling operation, which can 
+                be performed on a 3D, 4D or 5D input tensor.
+
+**Versioned name**: *MaxPool-14*
+
+**Category**: *Pooling*
+
+**Short description**: Performs the max pooling operation on input.
+
+**Detailed description**: Input shape can be either 3D, 4D, or 5D. The max pooling operation is performed with respect to input shape from the third dimension to the last dimension. If paddings are used, during the pooling calculation their values are ``-inf``. The max pooling operation involves sliding a filter over each channel of a feature map and downsampling by choosing the largest value within the region covered by the filter.
+
+**Attributes**: *Pooling* attributes are specified in the ``data`` node, which is a child of the layer node.
+
+* *strides*
+
+  * **Description**: *strides* is a distance (in pixels) to slide the window on the feature map over the (z, y, x) axes for 3D poolings and (y, x) axes for 2D poolings. For example, *strides* equal to "4,2,1" means sliding the window 4 pixels at a time over depth dimension, 2 over height dimension, and 1 over width dimension.
+  * **Range of values**: integer values starting from 0
+  * **Type**: int[]
+  * **Required**: *yes*
+
+* *dilations*
+
+  * **Description**: *dilations* specify the index of the next pixel to select when pooling. If not present, the dilation defaults to 1, meaning the adjacent pixel is chosen. A value of 2 indicates that one pixel is skipped and every other pixel is considered. Dilations specify one value for each spatial axis of the kernel: ``(z, y, x)`` for 3D poolings and ``(y, x)``  for 2D poolings.
+  * **Range of values**: integer values starting from 0
+  * **Type**: int[]
+  * **Default value**: ``[1, 1, ...]``
+  * **Required**: *no*
+
+* *pads_begin*
+
+  * **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal to "1,2" means adding 1 pixel to the top of the input and 2 to the left of the input. All added padding values are equal to negative infinity.
+  * **Range of values**: integer values starting from 0
+  * **Type**: int[]
+  * **Required**: *yes*
+  * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
+
+* *pads_end*
+
+  * **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal to "1,2" means adding 1 pixel to the bottom of the input and 2 to the right of the input. All added padding values are equal to negative infinity.
+  * **Range of values**: integer values starting from 0
+  * **Type**: int[]
+  * **Required**: *yes*
+  * **Note**: the attribute is ignored when the *auto_pad* attribute is specified.
+
+* *kernel*
+
+  * **Description**: *kernel* is a size of each filter. For example, *kernel* equal to (2, 3) means that each filter has height equal to 2 and width equal to 3.
+  * **Range of values**: integer values starting from 1
+  * **Type**: int[]
+  * **Required**: *yes*
+
+* *rounding_type*
+
+  * **Description**: *rounding_type* is a type of rounding to be used to compute output shape. *ceil_torch* does not allow the last pooling to start in the padding area.
+  * **Range of values**:
+    * *floor*
+    * *ceil*
+    * *ceil_torch*
+  * **Type**: string
+  * **Default value**: *floor*
+  * **Required**: *no*
+
+* *auto_pad*
+
+  * **Description**: *auto_pad* how the padding is calculated. Possible values:
+
+    * *explicit*: explicit padding values from ``pads_begin`` and ``pads_end`` are used.
+    * *same_upper (same_lower)* the input is padded to match the output size. In case of odd padding value, an extra padding is added at the end (at the beginning).
+    * *valid* padding is not used.
+
+  * **Type**: string
+  * **Default value**: *explicit*
+  * **Required**: *no*
+  * **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is not equal to explicit.
+
+* *index_element_type*
+
+  * **Description**: the type of output tensor with indices
+  * **Range of values**: "i64" or "i32"
+  * **Type**: string
+  * **Default value**: "i64"
+  * **Required**: *No*
+
+* *axis*
+
+  * **Description**: indicator of the first dimension in the input shape that should be used to calculate the upper bound of allowed index output values. The upper bound is the product of dimensions starting from the one pointed by the 'axis' attribute until the end of the input shape.
+  * **Range of values**: integer number. Negative value means counting dimension from the end. The range is ``[-R, R - 1]``, where ``R`` is the rank of the input tensor.
+  * **Type**: int
+  * **Default value**: 0
+  * **Required**: *No*
+
+**Inputs**:
+
+* **1**: 3D, 4D, or 5D input tensor of type T. Required.
+
+**Outputs**:
+
+  * **1**: Input shape can be either ``[N, C, H]``, ``[N, C, H, W]``, or ``[N, C, H, W, D]``. The corresponding output shape is ``[N, C, H_out]``, ``[N, C, H_out, W_out]`` or ``[N, C, H_out, W_out, D_out]``. Output tensor has the same data type as the input tensor. Output shape calculation rules and examples can be found in :doc:`Pooling Operators shape inference rules <pooling_shape_rules>`.
+
+  * **2**: Output tensor of type *T_IND* with indices of values selected by the pooling operation.
+    Shape of this output matches the first output. The type of this output can be specified using the ``index_element_type`` attribute.
+    Values are computed as indices in a tensor flattened to 1D, not considering padding. Examples for a 5D input tensor:
+
+    * When ``axis == 0``, the values are in the range ``[0, N * C * H * W * D)``.
+    * When ``axis == 2``, the values are in the range ``[0, H * W * D)``.
+
+    .. note::
+
+       The values of this output can only be calculated correctly if ``pads_value`` is set to ``-infinity``.
+
+
+**Types**
+
+* *T*: floating point or integer type.
+
+* *T_IND*: ``int64`` or ``int32``.
+
+
+**Examples**
+
+.. code-block:: xml
+   :force:
+
+   <layer ... type="MaxPool" ... >
+       <data auto_pad="same_upper" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+           <port id="2">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </output>
+   </layer>
+
+   <layer ... type="MaxPool" ... >
+       <data auto_pad="explicit" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>17</dim>
+               <dim>17</dim>
+           </port>
+           <port id="2">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>17</dim>
+               <dim>17</dim>
+           </port>
+       </output>
+   </layer>
+
+   <layer ... type="MaxPool" ... >
+       <data auto_pad="valid" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
+       <input>
+           <port id="0">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>32</dim>
+               <dim>32</dim>
+           </port>
+       </input>
+       <output>
+           <port id="1">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>16</dim>
+               <dim>16</dim>
+           </port>
+           <port id="2">
+               <dim>1</dim>
+               <dim>3</dim>
+               <dim>16</dim>
+               <dim>16</dim>
+           </port>
+       </output>
+   </layer>
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/pooling/pooling_shape_rules.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/pooling/pooling_shape_rules.rst
new file mode 100644
index 00000000000000..cc5105a4f11697
--- /dev/null
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/pooling/pooling_shape_rules.rst
@@ -0,0 +1,208 @@
+.. {#openvino_docs_pooling_shape_rules}
+
+Shape calculation rules for Pooling Operators
+=============================================
+
+.. meta::
+  :description: Learn about output shape calculation rules for OpenVINO Pooling Operators.
+
+**Mathematical Formulation**
+
+Output shape calculation based on ``auto_pad`` and ``rounding_type``:
+
+* ``auto_pad = explicit`` and ``rounding_type = floor``
+      ``H_out = floor((H + pads_begin[0] + pads_end[0] - ((kernel[0] - 1) * dilations[0] + 1)) / strides[0] + 1)``
+      ``W_out = floor((W + pads_begin[1] + pads_end[1] - ((kernel[1] - 1) * dilations[1] + 1)) / strides[1] + 1)``
+      ``D_out = floor((D + pads_begin[2] + pads_end[2] - ((kernel[2] - 1) * dilations[2] + 1)) / strides[2] + 1)``
+
+* ``auto_pad = explicit`` and ``rounding_type = ceil``
+      ``H_out = ceil((H + pads_begin[0] + pads_end[0] - ((kernel[0] - 1) * dilations[0] + 1)) / strides[0] + 1)``
+      ``W_out = ceil((W + pads_begin[1] + pads_end[1] - ((kernel[1] - 1) * dilations[1] + 1)) / strides[1] + 1)``
+      ``D_out = ceil((D + pads_begin[2] + pads_end[2] - ((kernel[2] - 1) * dilations[2] + 1)) / strides[2] + 1)``
+
+* ``auto_pad = valid``
+    Please note that AvgPool does not support ``dilations`` attribute, in wchich case its value should be replaced with ``1``.
+      ``H_out = ceil((H - ((kernel[0] - 1) * dilations[0] + 1) + 1) / strides[0])``
+      ``W_out = ceil((W - ((kernel[1] - 1) * dilations[1] + 1) + 1) / strides[1])``
+      ``D_out = ceil((D - ((kernel[2] - 1) * dilations[2] + 1) + 1) / strides[2])``
+
+* ``auto_pad = same_upper / same_lower``
+      ``H_out = H``
+      ``W_out = W``
+      ``D_out = D``
+
+
+If ``H + pads_begin[i] + pads_end[i] - kernel[i]`` is not divisible by ``strides[i]`` evenly, the result is rounded with respect to the ``rounding_type`` attribute.
+If ``rounding_type`` is set to ``ceil_torch``, the last pooling operation within a dimension cannot start in the padding area. If this is the case, the respective dimension is reduced by ``1``. More context can be found in the `PyTorch issue discussion <https://github.com/pytorch/pytorch/issues/57178>`__.
+
+**Examples**
+
+1. Example 1 shows how *MaxPool* operates with 4D input using 2D kernel and ``auto_pad = explicit``.
+
+   .. code-block:: sh
+
+      input = [[[[-1, 2, 3],
+                 [4, 5, -6],
+                 [-7, 8, 9]]]]   # shape: (1, 1, 3, 3)
+      strides = [1, 1]
+      pads_begin = [1, 1]
+      pads_end = [1, 1]
+      kernel = [2, 2]
+      rounding_type = "floor"
+      auto_pad = "explicit"
+      output0 = [[[[-1, 2, 3, 3],
+                   [4, 5, 5, -6],
+                   [4, 8, 9, 9],
+                   [-7, 8, 9, 9]]]]   # shape: (1, 1, 4, 4)
+      output1 = [[[[0, 1, 2, 2],
+                   [3, 4, 4, 5],
+                   [3, 7, 8, 8],
+                   [6, 7, 8, 8]]]]   # shape: (1, 1, 4, 4)
+
+
+2. Example 2 shows how *MaxPool* operates with 3D input using 1D kernel and ``auto_pad = valid``.
+
+   .. code-block:: sh
+
+      input = [[[-1, 2, 3, 5, -7, 9, 1]]]   # shape: (1, 1, 7)
+      strides = [1]
+      kernel = [3]
+      rounding_type = "floor"
+      auto_pad = "valid"
+      output0 = [[[3, 5, 5, 9, 9]]]   # shape: (1, 1, 5)
+      output1 = [[[2, 3, 3, 5, 5]]]   # shape: (1, 1, 5)
+
+
+3. Example 3 shows how *MaxPool* operates with 4D input using 2D kernel and ``auto_pad = same_lower``.
+
+   .. code-block:: sh
+
+      input = [[[[-1, 2, 3],
+               [4, 5, -6],
+               [-7, 8, 9]]]]   # shape: (1, 1, 3, 3)
+      strides = [1, 1]
+      kernel = [2, 2]
+      rounding_type = "floor"
+      auto_pad = "same_lower"
+      output0 = [[[[-1, 2, 3],
+                  [4, 5, 5]
+                  [4, 8, 9]]]]   # shape: (1, 1, 3, 3)
+      output1 = [[[[0, 1, 2],
+                  [3, 4, 4],
+                  [3, 7, 8]]]]   # shape: (1, 1, 3, 3)
+
+
+4. Example 4 shows how *MaxPool* operates with 4D input using 2D kernel and ``auto_pad = same_upper``.
+
+   .. code-block:: sh
+
+      input = [[[[-1, 2, 3],
+                 [4, 5, -6],
+                 [-7, 8, 9]],
+                [[2, -1, 5],
+                 [6, -7, 1],
+                 [8, 2, -3]]]]   # shape: (1, 2, 3, 3)
+      strides = [1, 1]
+      kernel = [2, 2]
+      rounding_type = "floor"
+      auto_pad = "same_upper"
+      output0 = [[[[5, 5, 3],
+                   [8, 9, 9]
+                   [8, 9, 9]],
+                  [[6, 5, 5],
+                   [8, 2, 1],
+                   [8, 2, -3]]]]   # shape: (1, 2, 3, 3)
+      output1 = [[[[4, 4, 2],
+                   [7, 8, 8],
+                   [7, 8, 8]],
+                  [[12, 11, 11],
+                   [15, 16, 14],
+                   [15, 16, 17]]]]   # shape: (1, 2, 3, 3)
+
+
+5. Example 5 shows how *MaxPool* operates with 4D input using 2D kernel and ``rounding_type = ceil_torch``.
+
+   .. code-block:: sh
+
+      input = [[[[1, 2, 3],
+                 [4, 5, 6],
+                 [7, 8, 9]]]]   # shape: (1, 1, 3, 3)
+      strides = [2, 2]
+      kernel = [2, 2]
+      pads_begin = [1, 1]
+      pads_end = [1, 1]
+      rounding_type = "ceil_torch"
+      output0 = [[[[1, 3],
+                   [7, 9]]]]   # shape: (1, 1, 2, 2)
+      output1 = [[[[0, 2],
+                   [6, 8]]]]   # shape: (1, 1, 2, 2)
+
+
+6. Example 6 shows how *MaxPool* operates with 4D input using 2D kernel, ``auto_pad = valid`` and ``rounding_type = ceil``.
+
+   .. code-block:: sh
+
+      input = [[[[-1, 2, 3],
+                 [4, 5, -6],
+                 [-7, 8, 9]]]]   # shape: (1, 1, 3, 3)
+      strides = [2, 2]
+      kernel = [2, 2]
+      rounding_type = "ceil"
+      auto_pad = "valid"
+      output0 = [[[[5, 3],
+                   [8, 9]]]]   # shape: (1, 1, 2, 2)
+      output1 = [[[[4, 2],
+                   [7, 8]]]]   # shape: (1, 1, 2, 2)
+
+
+7. Example 7 shows how *MaxPool* operates on 4D input using dilated 2D kernel, ``auto_pad = explicit`` and ``rounding_type = floor``.
+
+   .. code-block:: sh
+
+      input = [[[[1, 2, 3],
+                 [4, 5, 6],
+                 [7, 8, 9]]]]   # shape: (1, 1, 3, 3)
+      strides = [1, 1]
+      kernel = [2, 2]
+      dilations = [2, 2]
+      rounding_type = "floor"
+      auto_pad = "explicit"
+      pads_begin = [1, 1]
+      pads_end = [1, 1]
+      output0 = [[[[5, 6, 5],
+                   [8, 9, 8],
+                   [5, 6, 5]]]]   # shape: (1, 1, 3, 3)
+      output1 = [[[[4, 5, 4],
+                   [7, 8, 7],
+                   [4, 5, 4]]]]   # shape: (1, 1, 3, 3)
+
+
+8. Example 8 shows how *MaxPool* operates on 4D input using 2D kernel, with non-default ``axis`` value.
+
+Input shape:   (1, 2, 3, 3)
+Output shape:  (1, 2, 2, 2)
+
+   .. code-block:: sh
+
+      input = [[[[1, 2, 3],
+                 [4, 5, 6],
+                 [7, 8, 9]],
+                [[10, 11, 12],
+                 [13, 14, 15],
+                 [16, 17, 18]]]]   # shape: (1, 2, 3, 3)
+      strides = [1, 1]
+      kernel = [2, 2]
+      dilations = [1, 1]
+      rounding_type = "floor"
+      auto_pad = "explicit"
+      pads_begin = [0, 0]
+      pads_end = [0, 0]
+      axis = 2
+      output0 = [[[[5, 6],
+                   [8, 9]],
+                  [[14, 15],
+                   [17, 18]]]]   # shape: (1, 2, 2, 2)
+      output1 = [[[[4, 5],
+                   [7, 8]],
+                  [[4, 5],
+                   [7, 8]]]]   # shape: (1, 2, 2, 2)
diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst
index 6865869071f10b..de0b0f96cc0e1d 100644
--- a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst
+++ b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst
@@ -193,6 +193,15 @@ Tune quantization parameters
        regex = '.*layer_.*'
        nncf.quantize(model, dataset, ignored_scope=nncf.IgnoredScope(patterns=regex))
 
+  * Exclude by subgraphs:
+
+    .. code-block:: sh
+
+       subgraph = nncf.Subgraph(inputs=['layer_1', 'layer_2'], outputs=['layer_3'])
+       nncf.quantize(model, dataset, ignored_scope=nncf.IgnoredScope(subgraphs=[subgraph]))
+
+    In this case, all nodes along all simple paths in the graph from input to output nodes will be excluded from the quantization process.
+
 * ``target_device`` - defines the target device, the specificity of which will be taken into account during optimization. The following values are supported: ``ANY`` (default), ``CPU``, ``CPU_SPR``, ``GPU``, and ``NPU``.
 
   .. code-block:: sh
diff --git a/docs/sphinx_setup/_static/js/custom.js b/docs/sphinx_setup/_static/js/custom.js
index 3b9af96ec7314f..4ad51222bd1da9 100644
--- a/docs/sphinx_setup/_static/js/custom.js
+++ b/docs/sphinx_setup/_static/js/custom.js
@@ -25,14 +25,6 @@ var wapSection = 'openvinotoolkit';
     s.appendChild(po);
 })();
 
-// legal notice for benchmarks
-function addLegalNotice() {
-    if (window.location.href.indexOf('openvino_docs_performance_') !== -1) {
-        var legalNotice = $('<div class="opt-notice-wrapper"><p class="opt-notice">Results may vary. For workloads visit: <a href="openvino_docs_performance_benchmarks_faq.html#what-image-sizes-are-used-for-the-classification-network-models">workloads</a> and for configurations visit: <a href="openvino_docs_performance_benchmarks.html#platforms-configurations-methodology">configurations</a>. See also <a class="el" href="openvino_docs_Legal_Information.html">Legal Information</a>.</p></div>');
-        $('body').append(legalNotice);
-    }
-}
-
 $(document).ready(function () {
     addFooter();
     createVersions();
@@ -42,7 +34,6 @@ $(document).ready(function () {
     init_switchers();
     handleSwitcherParam();
     initViewerJS();
-    addLegalNotice();
     updateSearchForm();
     initBenchmarkPickers();   // included with the new benchmarks page 
     initCollapsibleHeaders(); // included with the new benchmarks page
diff --git a/src/bindings/js/node/include/compiled_model.hpp b/src/bindings/js/node/include/compiled_model.hpp
index cc3a86eda2c624..f9c1927a3a90fb 100644
--- a/src/bindings/js/node/include/compiled_model.hpp
+++ b/src/bindings/js/node/include/compiled_model.hpp
@@ -68,6 +68,9 @@ class CompiledModelWrap : public Napi::ObjectWrap<CompiledModelWrap> {
      */
     Napi::Value get_inputs(const Napi::CallbackInfo& info);
 
+    /** @brief Exports the compiled model to bytes/output stream. */
+    Napi::Value export_model(const Napi::CallbackInfo& info);
+
 private:
     ov::CompiledModel _compiled_model;
 };
diff --git a/src/bindings/js/node/include/core_wrap.hpp b/src/bindings/js/node/include/core_wrap.hpp
index ac4637c5faf620..c1db1c45c3e930 100644
--- a/src/bindings/js/node/include/core_wrap.hpp
+++ b/src/bindings/js/node/include/core_wrap.hpp
@@ -84,6 +84,9 @@ class CoreWrap : public Napi::ObjectWrap<CoreWrap> {
                                    const Napi::String& device,
                                    const std::map<std::string, ov::Any>& config);
 
+    /** @brief Imports a compiled model from the previously exported one. */
+    Napi::Value import_model(const Napi::CallbackInfo& info);
+
     /** @brief Returns devices available for inference. */
     Napi::Value get_available_devices(const Napi::CallbackInfo& info);
 
diff --git a/src/bindings/js/node/lib/addon.ts b/src/bindings/js/node/lib/addon.ts
index 7f4f9fd917cd13..eccb5c38e114f4 100644
--- a/src/bindings/js/node/lib/addon.ts
+++ b/src/bindings/js/node/lib/addon.ts
@@ -36,6 +36,7 @@ interface Core {
     modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Promise<Model>;
   readModelSync(modelPath: string, weightsPath?: string): Model;
   readModelSync(modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Model;
+  importModelSync(modelStream: Buffer, device: string): CompiledModel;
   getAvailableDevices(): string[];
 }
 interface CoreConstructor {
@@ -56,6 +57,7 @@ interface CompiledModel {
   output(nameOrId?: string | number): Output;
   input(nameOrId?: string | number): Output;
   createInferRequest(): InferRequest;
+  exportModelSync(): Buffer;
 }
 
 interface Tensor {
diff --git a/src/bindings/js/node/src/compiled_model.cpp b/src/bindings/js/node/src/compiled_model.cpp
index d02f14a82bae1c..304ac9d299a3f2 100644
--- a/src/bindings/js/node/src/compiled_model.cpp
+++ b/src/bindings/js/node/src/compiled_model.cpp
@@ -19,7 +19,8 @@ Napi::Function CompiledModelWrap::get_class(Napi::Env env) {
                         InstanceMethod("input", &CompiledModelWrap::get_input),
                         InstanceAccessor<&CompiledModelWrap::get_inputs>("inputs"),
                         InstanceMethod("output", &CompiledModelWrap::get_output),
-                        InstanceAccessor<&CompiledModelWrap::get_outputs>("outputs")});
+                        InstanceAccessor<&CompiledModelWrap::get_outputs>("outputs"),
+                        InstanceMethod("exportModelSync", &CompiledModelWrap::export_model)});
 }
 
 Napi::Object CompiledModelWrap::wrap(Napi::Env env, ov::CompiledModel compiled_model) {
@@ -110,3 +111,10 @@ Napi::Value CompiledModelWrap::get_inputs(const Napi::CallbackInfo& info) {
 
     return js_inputs;
 }
+
+Napi::Value CompiledModelWrap::export_model(const Napi::CallbackInfo& info) {
+    std::stringstream _stream;
+    _compiled_model.export_model(_stream);
+    const auto& exported = _stream.str();
+    return Napi::Buffer<const char>::Copy(info.Env(), exported.c_str(), exported.size());
+}
diff --git a/src/bindings/js/node/src/core_wrap.cpp b/src/bindings/js/node/src/core_wrap.cpp
index d5d5d68ed6b1a6..96d827e77431cf 100644
--- a/src/bindings/js/node/src/core_wrap.cpp
+++ b/src/bindings/js/node/src/core_wrap.cpp
@@ -20,6 +20,7 @@ Napi::Function CoreWrap::get_class(Napi::Env env) {
                         InstanceMethod("readModel", &CoreWrap::read_model_async),
                         InstanceMethod("compileModelSync", &CoreWrap::compile_model_sync_dispatch),
                         InstanceMethod("compileModel", &CoreWrap::compile_model_async),
+                        InstanceMethod("importModelSync", &CoreWrap::import_model),
                         InstanceMethod("getAvailableDevices", &CoreWrap::get_available_devices)});
 }
 
@@ -230,3 +231,25 @@ Napi::Value CoreWrap::get_available_devices(const Napi::CallbackInfo& info) {
 
     return js_devices;
 }
+
+Napi::Value CoreWrap::import_model(const Napi::CallbackInfo& info) {
+    if (info.Length() != 2) {
+        reportError(info.Env(), "Invalid number of arguments -> " + std::to_string(info.Length()));
+        return info.Env().Undefined();
+    }
+    if (!info[0].IsBuffer()) {
+        reportError(info.Env(), "The first argument must be of type Buffer.");
+        return info.Env().Undefined();
+    }
+    if (!info[1].IsString()) {
+        reportError(info.Env(), "The second argument must be of type String.");
+        return info.Env().Undefined();
+    }
+    const auto& model_data = info[0].As<Napi::Buffer<uint8_t>>();
+    const auto model_stream = std::string(reinterpret_cast<char*>(model_data.Data()), model_data.Length());
+    std::stringstream _stream;
+    _stream << model_stream;
+
+    const auto& compiled = _core.import_model(_stream, std::string(info[1].ToString()));
+    return CompiledModelWrap::wrap(info.Env(), compiled);
+}
diff --git a/src/bindings/js/node/tests/basic.test.js b/src/bindings/js/node/tests/basic.test.js
index 8711ba3d33e3cc..07e0502053edd1 100644
--- a/src/bindings/js/node/tests/basic.test.js
+++ b/src/bindings/js/node/tests/basic.test.js
@@ -186,3 +186,17 @@ describe('Input class for ov::Input<const ov::Node>', () => {
   });
 
 });
+
+it('Test exportModel()/importModel()', () => {
+  const userStream = compiledModel.exportModelSync();
+  const newCompiled = core.importModelSync(userStream, 'CPU');
+  const epsilon = 0.5;
+  const tensor = Float32Array.from({ length: 3072 }, () => (Math.random() + epsilon));
+
+  const inferRequest = compiledModel.createInferRequest();
+  const res1 = inferRequest.infer([tensor]);
+  const newInferRequest = newCompiled.createInferRequest();
+  const res2 = newInferRequest.infer([tensor]);
+
+  assert.deepStrictEqual(res1['fc_out'].data[0], res2['fc_out'].data[0]);
+});
diff --git a/src/core/dev_api/openvino/core/type/element_iterator.hpp b/src/core/dev_api/openvino/core/type/element_iterator.hpp
new file mode 100644
index 00000000000000..331bd3684e576e
--- /dev/null
+++ b/src/core/dev_api/openvino/core/type/element_iterator.hpp
@@ -0,0 +1,502 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/type/element_type_traits.hpp"
+
+namespace ov {
+namespace util {
+
+/**
+ * @brief Make bit mask by setting N less significant bits.
+ *
+ * @tparam T Type of value.
+ * @param n  Number of bits to set.
+ * @return   Bit-mask value with N bits set.
+ */
+template <class T>
+constexpr T make_n_bit_mask(const T n) {
+    return (1ULL << n) - 1ULL;
+}
+}  // namespace util
+
+namespace element {
+
+/**
+ * @brief Checks if element type is N in-raw bits type.
+ *
+ * @param et  Element type to check
+ * @return True if element type is bit type otherwise false.
+ */
+constexpr bool is_bit_type(Type_t et) {
+    return et == u1 || et == u2;
+}
+
+/**
+ * @brief Checks if element type is 4-bits type.
+ *
+ * @param et  Element type to check
+ * @return True if element type is nibble type otherwise false.
+ */
+constexpr bool is_nibble_type(Type_t et) {
+    return et == u4 || et == i4 || et == nf4;
+}
+
+/**
+ * @brief Checks if element type is split bit type.
+ *
+ * The value is stored in byte(s) like [b0, b1, x, .., x, b2, b3].
+ *
+ * @param et  Element type to check
+ * @return True if element type is split bit type otherwise false.
+ */
+constexpr bool is_split_bit_type(Type_t et) {
+    return et == u3 || et == u6;
+}
+
+/**
+ * @brief Checks element type is using only N bytes as value.
+ *
+ * @param et  Element type to check.
+ * @return True if element type use byte(s) for its value, false otherwise.
+ */
+constexpr bool is_byte_type(Type_t et) {
+    return !is_bit_type(et) && !is_split_bit_type(et) && !is_nibble_type(et) && et != string;
+}
+
+/**
+ * @brief Gets bit width of ov::element::Type_t.
+ *
+ * @return Number of bits representing the Type_t.
+ */
+template <Type_t ET>
+constexpr size_t bit_width() {
+    return sizeof(typename ov::fundamental_type_for<ET>());
+}
+
+template <>
+constexpr size_t bit_width<Type_t::u1>() {
+    return 1;
+}
+
+template <>
+constexpr size_t bit_width<Type_t::u2>() {
+    return 2;
+}
+
+template <>
+constexpr size_t bit_width<Type_t::u3>() {
+    return 3;
+}
+
+template <>
+constexpr size_t bit_width<Type_t::u4>() {
+    return 4;
+}
+
+template <>
+constexpr size_t bit_width<Type_t::i4>() {
+    return 4;
+}
+
+template <>
+constexpr size_t bit_width<Type_t::u6>() {
+    return 6;
+}
+
+/**
+ * @brief The BitProxy value class used by ov::element::Iterator to access values which has no standard byte(s) layout.
+ *
+ * It used by iterator to access values represented by precisions like u2, i4, u6 etc. in the way like stored
+ * on bytes.
+ * The R/W access is done via conversion and copy assignment operators.
+ * The public members are used to work on sub-byte value like on its fundamental type defined by T.
+ *
+ * @tparam T       Fundamental type of sub-byte value which must be same as fundamental type of element::Type_t.
+ * @tparam ET      OpenVINO element type.
+ * @tparam Enable  Type to enable/disable this class.
+ */
+template <class T, Type_t ET, class Enable = void>
+class BitProxy {};
+
+/**
+ * @brief The BitProxy specialization for types which are represented by N in-raw bits in byte.
+ *
+ * @tparam T  Fundamental type of sub-byte value which must be same as fundamental type of element::Type_t.
+ * @tparam ET OpenVINO element type.
+ */
+template <class T, Type_t ET>
+class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(ET)>::type> {
+private:
+    template <Type_t, class>
+    friend class Iterator;  //!< Iterator class is friend to access private members to manipulate pointer.
+
+    static constexpr size_t m_bits = bit_width<ET>();                            //!< Number of bit for single value.
+    static constexpr size_t m_num_values = 8 / m_bits;                           //!< Number values in byte.
+    static constexpr size_t m_shift_init = is_nibble_type(ET) ? 0 : 8 - m_bits;  //!< Initial value for bit shift.
+
+    T* m_ptr;            //!< Pointer to T used to get value.
+    size_t m_bit_shift;  //!< Current bit shift to get value.
+
+    constexpr BitProxy(T* ptr) noexcept : m_ptr{ptr}, m_bit_shift{m_shift_init} {}
+
+    uint8_t get_bit_value() const {
+        constexpr auto value_mask = util::make_n_bit_mask(m_bits);
+        return (*m_ptr >> m_bit_shift) & value_mask;
+    }
+
+public:
+    using value_type = typename std::decay<T>::type;  //!< Fundamental type of bound to BitProxy.
+
+    /**
+     * @brief Compare proxy value with other provided value.
+     * @param rhs  Value to compare.
+     * @return True if equal otherwise false.
+     */
+    template <class U>
+    constexpr bool operator==(const U& rhs) const {
+        return static_cast<value_type>(*this) == rhs;
+    }
+
+    /**
+     * @brief Compare proxy value is less than rhs.
+     *
+     * @tparam U   Type of value to compare.
+     * @param rhs  Value to compare.
+     * @return True if less otherwise false.
+     */
+    template <class U>
+    constexpr bool operator<(const U& rhs) const {
+        return static_cast<value_type>(*this) < rhs;
+    }
+
+    /**
+     * @brief Converts to fundamental type.
+     *
+     * @return Value of BitProxy.
+     */
+    template <Type_t ETT = ET, typename std::enable_if<ETT != i4>::type* = nullptr>
+    operator value_type() const {
+        return static_cast<value_type>(get_bit_value());
+    }
+
+    /**
+     * @brief Converts to fundamental type.
+     *
+     * @return Value of BitProxy.
+     */
+    template <Type_t ETT = ET, typename std::enable_if<ETT == i4>::type* = nullptr>
+    operator value_type() const {
+        constexpr auto value_mask = util::make_n_bit_mask(m_bits);
+        constexpr auto value_msb_mask = (1U << (m_bits - 1U));
+
+        auto v = get_bit_value();
+        if (v & value_msb_mask) {
+            // If N bit value MSB bit is set then value is negative.
+            // As v is byte then all bits above N must be set to be two's complement.
+            v |= ~value_mask;
+        }
+        return static_cast<value_type>(v);
+    }
+
+    /**
+     * @brief Sets current ProxyBit to value.
+     * @param v  Value to be set.
+     */
+    BitProxy<T, ET>& operator=(const value_type v) {
+        constexpr auto value_mask = util::make_n_bit_mask(m_bits);
+        *m_ptr &= ~(value_mask << m_bit_shift);
+        *m_ptr |= (static_cast<uint8_t>(v) & value_mask) << m_bit_shift;
+        return *this;
+    }
+};
+
+/**
+ * @brief The BitProxy specialization for u3, u6 precisions.
+ *
+ * @note The input pointer must point on buffer which has got 3 * n bytes.
+ *
+ * @tparam T  Fundamental type of sub-byte value which must be same as fundamental type of element::Type_t.
+ * @tparam ET OpenVINO element type.
+ */
+template <class T, Type_t ET>
+class BitProxy<T, ET, typename std::enable_if<is_split_bit_type(ET)>::type> {
+private:
+    template <Type_t, class>
+    friend class Iterator;  //!< Iterator class is friend to access private members to manipulate pointer.
+
+    static constexpr size_t m_bits = bit_width<ET>();         //!< Number of bit for single value.
+    static constexpr size_t m_num_values = (3 * 8) / m_bits;  //!< Number values in byte.
+    static constexpr size_t m_shift_init = m_num_values - 1;  //!< Initial value for bit shift.
+
+    struct ByteValue {
+        uint8_t b0;
+        uint8_t b1;
+        uint8_t b2;
+    };
+
+    union {
+        T* m_ptr;            //!< Pointer to T buffer.
+        ByteValue* m_bytes;  //!< Pointer to buffer as 3 bytes representation.
+    };
+
+    size_t m_bit_shift;  //!< Current bit shift to get value.
+
+    constexpr BitProxy(T* ptr) noexcept : m_ptr{ptr}, m_bit_shift{m_shift_init} {}
+
+public:
+    using value_type = typename std::decay<T>::type;  //!< Fundamental type of sub-byte.
+
+    /**
+     * @brief Compare proxy value is equal than rhs.
+     *
+     * @tparam U   Type of value to compare.
+     * @param rhs  Value to compare.
+     * @return True if equal, false otherwise.
+     */
+    template <class U>
+    constexpr bool operator==(const U& rhs) const {
+        return static_cast<value_type>(*this) == rhs;
+    }
+
+    /**
+     * @brief Compare proxy value is less than rhs.
+     *
+     * @tparam U   Type of value to compare.
+     * @param rhs  Value to compare.
+     * @return True if less otherwise false.
+     */
+    template <class U>
+    constexpr bool operator<(const U& rhs) const {
+        return static_cast<value_type>(*this) < rhs;
+    }
+
+    /**
+     * @brief Converts to fundamental type.
+     *
+     * @return Value of BitProxy.
+     */
+    operator value_type() const {
+        constexpr uint16_t lower_mask_bits = 16 / m_num_values;
+        constexpr uint16_t upper_mask_bits = 8 / m_num_values;
+        constexpr uint16_t mask_lower = util::make_n_bit_mask(lower_mask_bits);
+        constexpr uint16_t mask_upper = util::make_n_bit_mask(upper_mask_bits) << lower_mask_bits;
+
+        // get lower part of value
+        uint16_t v = ((m_bytes->b0 << 8U) | m_bytes->b1) >> (lower_mask_bits * m_bit_shift);
+        v &= mask_lower;
+        // get upper part of value
+        v |= ((m_bytes->b2 << lower_mask_bits) >> (upper_mask_bits * m_bit_shift)) & mask_upper;
+        return static_cast<value_type>(v);
+    }
+
+    /**
+     * @brief Sets current ProxyBit to value.
+     * @param v  Value to be set.
+     */
+    BitProxy<T, ET>& operator=(const value_type v) {
+        constexpr uint16_t lower_mask_bits = 16 / m_num_values;
+        constexpr uint16_t upper_mask_bits = 8 / m_num_values;
+        constexpr uint16_t mask_lower = util::make_n_bit_mask(lower_mask_bits);
+        constexpr uint16_t mask_upper = util::make_n_bit_mask(upper_mask_bits) << lower_mask_bits;
+
+        uint16_t tmp = (m_bytes->b0 << 8U) | m_bytes->b1;
+        tmp &= ~(mask_lower << (lower_mask_bits * m_bit_shift));
+        tmp |= (v & mask_lower) << (lower_mask_bits * m_bit_shift);
+        m_bytes->b0 = tmp >> 8U;
+        m_bytes->b1 = tmp & 0x00ff;
+
+        tmp = m_bytes->b2 & ~((mask_upper >> lower_mask_bits) << (upper_mask_bits * m_bit_shift));
+        tmp |= (((v & mask_upper) >> lower_mask_bits) << (upper_mask_bits * m_bit_shift));
+        m_bytes->b2 = tmp & 0x00ff;
+        return *this;
+    }
+};
+
+/**
+ * @brief Put BitProxy value to output stream.
+ *
+ * @param os    Reference to output stream.
+ * @param value Value to print.
+ * @return return output stream.
+ */
+template <class T, Type_t ET>
+std::ostream& operator<<(std::ostream& os, const BitProxy<T, ET>& value) {
+    os << +static_cast<T>(value);
+    return os;
+}
+
+/**
+ * @brief Bidirectional iterator of specified precision.
+ *
+ * The iterator supports low precisions using BitProxy to access values via conversion.
+ *
+ * @tparam ET  Type of OpenVINO element type (ov::element::Type_t).
+ * @tparam T   Must be fundamental type for specified ET.
+ */
+template <Type_t ET, class T>
+class Iterator {
+    using proxy_type = BitProxy<T, ET>;
+
+public:
+    using iterator_category = std::bidirectional_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = T;
+    using reference = typename std::conditional<std::is_const<T>::value, const proxy_type&, proxy_type&>::type;
+    using pointer = typename std::conditional<std::is_const<T>::value, const proxy_type*, proxy_type*>::type;
+
+    static_assert(std::is_same<typename std::decay<T>::type, ov::fundamental_type_for<ET>>::value,
+                  "Iterator value_type must be same as fundamental type of ET");
+
+    constexpr Iterator(T* ptr) noexcept : m_et_ptr{ptr} {}
+
+    // Iteration operators
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_bit_type(ETT), Iterator<ET, T>>::type& operator++() {
+        m_et_ptr.m_bit_shift -= m_et_ptr.m_bits;
+        m_et_ptr.m_bit_shift = m_et_ptr.m_bit_shift % (m_et_ptr.m_num_values * m_et_ptr.m_bits);
+        m_et_ptr.m_ptr += static_cast<std::ptrdiff_t>(m_et_ptr.m_bit_shift == m_et_ptr.m_shift_init);
+        return *this;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_nibble_type(ETT), Iterator<ET, T>>::type& operator++() {
+        m_et_ptr.m_bit_shift ^= m_et_ptr.m_bits;
+        m_et_ptr.m_ptr += static_cast<std::ptrdiff_t>(m_et_ptr.m_bit_shift == m_et_ptr.m_shift_init);
+        return *this;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_split_bit_type(ETT), Iterator<ET, T>>::type& operator++() {
+        --m_et_ptr.m_bit_shift;
+        m_et_ptr.m_bit_shift = m_et_ptr.m_bit_shift % m_et_ptr.m_num_values;
+        m_et_ptr.m_ptr += (m_et_ptr.m_bit_shift == m_et_ptr.m_shift_init) ? 3 : 0;
+        return *this;
+    }
+
+    Iterator<ET, T> operator++(int) {
+        auto old = *this;
+        ++(*this);
+        return old;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_bit_type(ETT), Iterator<ET, T>>::type& operator+=(const difference_type& n) {
+        const auto advance = n + (m_et_ptr.m_shift_init - m_et_ptr.m_bit_shift) / m_et_ptr.m_bits;
+        m_et_ptr.m_bit_shift = m_et_ptr.m_shift_init - (advance % m_et_ptr.m_num_values) * m_et_ptr.m_bits;
+        m_et_ptr.m_ptr += advance / m_et_ptr.m_num_values;
+        return *this;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_nibble_type(ETT), Iterator<ET, T>>::type& operator+=(const difference_type& n) {
+        m_et_ptr.m_ptr += n / m_et_ptr.m_num_values;
+        return (n % m_et_ptr.m_num_values) ? ++*this : *this;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_split_bit_type(ETT), Iterator<ET, T>>::type& operator+=(const difference_type& n) {
+        const auto advance = n + m_et_ptr.m_shift_init - m_et_ptr.m_bit_shift;
+        m_et_ptr.m_bit_shift = m_et_ptr.m_shift_init - (advance % m_et_ptr.m_num_values);
+        m_et_ptr.m_ptr += 3 * (advance / m_et_ptr.m_num_values);
+        return *this;
+    }
+
+    Iterator<ET, T> operator+(const difference_type& n) {
+        auto tmp(*this);
+        tmp += n;
+        return tmp;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_bit_type(ETT), Iterator<ET, T>>::type& operator--() {
+        m_et_ptr.m_bit_shift += m_et_ptr.m_bits;
+        m_et_ptr.m_bit_shift = m_et_ptr.m_bit_shift % (m_et_ptr.m_num_values * m_et_ptr.m_bits);
+        m_et_ptr.m_ptr -= static_cast<std::ptrdiff_t>(m_et_ptr.m_bit_shift == 0);
+        return *this;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_nibble_type(ETT), Iterator<ET, T>>::type& operator--() {
+        m_et_ptr.m_bit_shift ^= m_et_ptr.m_bits;
+        m_et_ptr.m_ptr -= static_cast<std::ptrdiff_t>(m_et_ptr.m_bit_shift == 4);
+        return *this;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_split_bit_type(ETT), Iterator<ET, T>>::type& operator--() {
+        ++m_et_ptr.m_bit_shift;
+        m_et_ptr.m_bit_shift = m_et_ptr.m_bit_shift % m_et_ptr.m_num_values;
+        m_et_ptr.m_ptr -= m_et_ptr.m_bit_shift == 0 ? 3 : 0;
+        return *this;
+    }
+
+    Iterator<ET, T> operator--(int) {
+        auto old = *this;
+        --(*this);
+        return old;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_bit_type(ETT), Iterator<ET, T>>::type& operator-=(const difference_type& n) {
+        const auto advance = m_et_ptr.m_bit_shift / m_et_ptr.m_bits + n;
+        m_et_ptr.m_bit_shift = (advance % m_et_ptr.m_num_values) * m_et_ptr.m_bits;
+        m_et_ptr.m_ptr -= advance / m_et_ptr.m_num_values;
+        return *this;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_nibble_type(ETT), Iterator<ET, T>>::type& operator-=(const difference_type& n) {
+        m_et_ptr.m_ptr -= n / m_et_ptr.m_num_values;
+        return (n % m_et_ptr.m_num_values) ? --*this : *this;
+    }
+
+    template <Type_t ETT = ET>
+    typename std::enable_if<is_split_bit_type(ETT), Iterator<ET, T>>::type& operator-=(const difference_type& n) {
+        const auto advance = m_et_ptr.m_bit_shift + n;
+        m_et_ptr.m_bit_shift = advance % m_et_ptr.m_num_values;
+        m_et_ptr.m_ptr -= 3 * (advance / m_et_ptr.m_num_values);
+        return *this;
+    }
+
+    Iterator<ET, T> operator-(const difference_type& n) {
+        auto tmp(*this);
+        tmp -= n;
+        return tmp;
+    }
+
+    // compare operators
+    constexpr bool operator!=(const Iterator<ET, T>& rhs) const {
+        return (m_et_ptr.m_ptr != rhs.m_et_ptr.m_ptr) || (m_et_ptr.m_bit_shift != rhs.m_et_ptr.m_bit_shift);
+    }
+
+    // dereference operators
+    constexpr const proxy_type& operator*() const {
+        return m_et_ptr;
+    }
+
+    reference operator*() {
+        return m_et_ptr;
+    }
+
+private:
+    proxy_type m_et_ptr;
+};
+
+/**
+ * @brief Make element iterator from pointer.
+ *
+ * @tparam ET  Type of ov::element::Type_t.
+ * @tparam T   Type of pointer data. Must be fundamental type of ET.
+
+ * @param ptr  Pointer to data.
+ * @return Element iterator for type ET.
+ */
+template <Type_t ET, class T, typename std::enable_if<!is_byte_type(ET) && ET != string>::type* = nullptr>
+constexpr Iterator<ET, T> iterator(T* ptr) {
+    return {ptr};
+}
+}  // namespace element
+}  // namespace ov
diff --git a/src/core/include/openvino/core/core.hpp b/src/core/include/openvino/core/core.hpp
index 258d66d9b91820..6a54548397dd7e 100644
--- a/src/core/include/openvino/core/core.hpp
+++ b/src/core/include/openvino/core/core.hpp
@@ -49,6 +49,7 @@
 #include "openvino/core/preprocess/output_info.hpp"
 #include "openvino/core/preprocess/output_model_info.hpp"
 #include "openvino/core/preprocess/output_tensor_info.hpp"
+#include "openvino/core/preprocess/padding_mode.hpp"
 #include "openvino/core/preprocess/postprocess_steps.hpp"
 #include "openvino/core/preprocess/pre_post_process.hpp"
 #include "openvino/core/preprocess/preprocess_steps.hpp"
diff --git a/src/core/include/openvino/core/preprocess/padding_mode.hpp b/src/core/include/openvino/core/preprocess/padding_mode.hpp
new file mode 100644
index 00000000000000..c1391628e8f50b
--- /dev/null
+++ b/src/core/include/openvino/core/preprocess/padding_mode.hpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+namespace ov {
+namespace preprocess {
+
+using PaddingMode = ov::op::PadMode;
+
+}  // namespace preprocess
+}  // namespace ov
diff --git a/src/core/include/openvino/core/preprocess/preprocess_steps.hpp b/src/core/include/openvino/core/preprocess/preprocess_steps.hpp
index ae41e447551671..8b8288814fe53b 100644
--- a/src/core/include/openvino/core/preprocess/preprocess_steps.hpp
+++ b/src/core/include/openvino/core/preprocess/preprocess_steps.hpp
@@ -6,6 +6,7 @@
 
 #include "openvino/core/core_visibility.hpp"
 #include "openvino/core/preprocess/color_format.hpp"
+#include "openvino/core/preprocess/padding_mode.hpp"
 #include "openvino/core/preprocess/resize_algorithm.hpp"
 #include "openvino/core/type/element_type.hpp"
 
@@ -80,6 +81,32 @@ class OPENVINO_API PreProcessSteps final {
     /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner
     PreProcessSteps& mean(const std::vector<float>& values);
 
+    /// \brief Add pad preprocess operation
+    /// Extends an input tensor on edges with constants
+    ///
+    /// \param pads_begin Number of padding elements to add at the beginning of each axis.
+    /// \param pads_end Number of padding elements to add at the end of each axis.
+    /// \param value Value to be populated in the padded area
+    ///
+    /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner
+    PreProcessSteps& pad(const std::vector<int>& pads_begin,
+                         const std::vector<int>& pads_end,
+                         float value,
+                         PaddingMode mode);
+
+    /// \brief Add pad preprocess operation
+    /// Extends an input tensor on edges with constants
+    ///
+    /// \param pads_begin Number of padding elements to add at the beginning of each axis.
+    /// \param pads_end Number of padding elements to add at the end of each axis.
+    /// \param values Values to be populated in the padded area
+    ///
+    /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner
+    PreProcessSteps& pad(const std::vector<int>& pads_begin,
+                         const std::vector<int>& pads_end,
+                         const std::vector<float>& values,
+                         PaddingMode mode);
+
     /// \brief Signature for custom preprocessing operation. Custom preprocessing operation takes one input node and
     /// produces one output node. For more advanced cases, client's code can use transformation passes over ov::Model
     /// directly
diff --git a/src/core/include/openvino/core/type/element_type.hpp b/src/core/include/openvino/core/type/element_type.hpp
index 39833797c3a663..531a8ccfcb5b6c 100644
--- a/src/core/include/openvino/core/type/element_type.hpp
+++ b/src/core/include/openvino/core/type/element_type.hpp
@@ -48,7 +48,10 @@ enum class Type_t {
     i32,        //!< i32 element type
     i64,        //!< i64 element type
     u1,         //!< binary element type
+    u2,         //!< u2 element type
+    u3,         //!< u3 element type
     u4,         //!< u4 element type
+    u6,         //!< u6 element type
     u8,         //!< u8 element type
     u16,        //!< u16 element type
     u32,        //!< u32 element type
@@ -168,9 +171,18 @@ constexpr Type i64(Type_t::i64);
 /// \brief binary element type
 /// \ingroup ov_element_cpp_api
 constexpr Type u1(Type_t::u1);
+/// \brief u2 element type
+/// \ingroup ov_element_cpp_api
+constexpr Type u2(Type_t::u2);
+/// \brief u3 element type
+/// \ingroup ov_element_cpp_api
+constexpr Type u3(Type_t::u3);
 /// \brief u4 element type
 /// \ingroup ov_element_cpp_api
 constexpr Type u4(Type_t::u4);
+/// \brief u6 element type
+/// \ingroup ov_element_cpp_api
+constexpr Type u6(Type_t::u6);
 /// \brief u8 element type
 /// \ingroup ov_element_cpp_api
 constexpr Type u8(Type_t::u8);
diff --git a/src/core/include/openvino/core/type/element_type_traits.hpp b/src/core/include/openvino/core/type/element_type_traits.hpp
index c47bae8a13914c..94f3c25372eb5e 100644
--- a/src/core/include/openvino/core/type/element_type_traits.hpp
+++ b/src/core/include/openvino/core/type/element_type_traits.hpp
@@ -68,11 +68,26 @@ struct element_type_traits<element::Type_t::u1> {
     using value_type = int8_t;
 };
 
+template <>
+struct element_type_traits<element::Type_t::u2> {
+    using value_type = int8_t;
+};
+
+template <>
+struct element_type_traits<element::Type_t::u3> {
+    using value_type = int8_t;
+};
+
 template <>
 struct element_type_traits<element::Type_t::u4> {
     using value_type = int8_t;
 };
 
+template <>
+struct element_type_traits<element::Type_t::u6> {
+    using value_type = int8_t;
+};
+
 template <>
 struct element_type_traits<element::Type_t::u8> {
     using value_type = uint8_t;
diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp
index ae539351058574..97d56098b89e4c 100644
--- a/src/core/include/openvino/op/constant.hpp
+++ b/src/core/include/openvino/op/constant.hpp
@@ -146,6 +146,9 @@ class OPENVINO_API Constant : public Op {
         case Type_t::string:
             fill_data<Type_t::string>(value);
             break;
+        case Type_t::u2:
+        case Type_t::u3:
+        case Type_t::u6:
         case Type_t::undefined:
         case Type_t::dynamic:
             OPENVINO_THROW("unsupported type");
@@ -872,6 +875,9 @@ class OPENVINO_API Constant : public Op {
         case Type_t::string:
             write_buffer<Type_t::string>(source);
             break;
+        case element::Type_t::u2:
+        case element::Type_t::u3:
+        case element::Type_t::u6:
         case element::Type_t::undefined:
         case element::Type_t::dynamic:
             OPENVINO_THROW("unsupported type");
diff --git a/src/core/src/pass/visualize_tree.cpp b/src/core/src/pass/visualize_tree.cpp
index 7a24338a447f32..bf9e040683e102 100644
--- a/src/core/src/pass/visualize_tree.cpp
+++ b/src/core/src/pass/visualize_tree.cpp
@@ -373,7 +373,10 @@ static std::string get_value(const std::shared_ptr<ov::op::v0::Constant>& consta
     case ov::element::Type_t::undefined:
     case ov::element::Type_t::dynamic:
     case ov::element::Type_t::u1:
+    case ov::element::Type_t::u2:
+    case ov::element::Type_t::u3:
     case ov::element::Type_t::u4:
+    case ov::element::Type_t::u6:
     case ov::element::Type_t::nf4:
     case ov::element::Type_t::i4:
     case ov::element::Type_t::f8e4m3:
diff --git a/src/core/src/pattern/op/optional.cpp b/src/core/src/pattern/op/optional.cpp
index 57cc35c547b0b9..8cd96efc44536e 100644
--- a/src/core/src/pattern/op/optional.cpp
+++ b/src/core/src/pattern/op/optional.cpp
@@ -8,6 +8,31 @@
 #include "openvino/pass/pattern/op/or.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 
+using namespace ov::pass::pattern::op;
+
+/*
+                                   ┌──────────────┐
+                                   │    Relu      │
+   ┌──────────────┐                └──────┬───────┘
+   │    Relu      │                       │
+   └──────┬───────┘                ┌──────┴───────┐        ┌──────────────┐
+          │                        │WrapType<Relu>│        │     Relu     │
+   ┌──────┴───────┐                └──────┬───────┘        └───────┬──────┘
+   │Optional<Relu>│  Unfolds into         │                        │
+   └──────┬───────┘                       └────────┐      ┌────────┘
+          │                                        │      │
+        ┌─┴─┐                                     ┌┴──────┴┐
+        │ABS│                                     │   Or   │
+        └───┘                                     └────┬───┘
+                                                       │
+                                                     ┌─┴─┐
+                                                     │ABS│
+                                                     └───┘
+
+    In case there're no inputs to the Optional, there's no second branch hence no need in the
+    Or node and we may omit it leaving only the WrapType node with the Optional entry inside.
+*/
+
 std::vector<ov::DiscreteTypeInfo> ov::pass::pattern::op::Optional::get_optional_types() const {
     return optional_types;
 }
@@ -15,11 +40,20 @@ std::vector<ov::DiscreteTypeInfo> ov::pass::pattern::op::Optional::get_optional_
 bool ov::pass::pattern::op::Optional::match_value(Matcher* matcher,
                                                   const Output<Node>& pattern_value,
                                                   const Output<Node>& graph_value) {
-    ov::OutputVector or_in_values = input_values();
-    auto wrap_node = std::make_shared<ov::pass::pattern::op::WrapType>(optional_types, m_predicate, or_in_values);
-    or_in_values.push_back(wrap_node);
+    // Turn the Optional node into WrapType node to create a case where the Optional node is present
+    ov::OutputVector input_values_to_optional = input_values();
+    size_t num_input_values_to_optional = input_values_to_optional.size();
+    auto wrap_node = std::make_shared<WrapType>(optional_types, m_predicate, input_values_to_optional);
+
+    // Either continue using the WrapType if there're no inputs to it or create an Or node,
+    // if there're other inputs to Optional creating another "branch" for matching.
+    // Use only the 0th input as a "data" input. (To be changed or considered when Optional
+    // starts supporting multiple inputs)
+    auto pattern = num_input_values_to_optional == 0 ? std::static_pointer_cast<Pattern>(wrap_node)
+                                                     : std::static_pointer_cast<Pattern>(std::make_shared<Or>(
+                                                           OutputVector{wrap_node, input_values_to_optional[0]}));
 
-    if (matcher->match_value(std::make_shared<ov::pass::pattern::op::Or>(or_in_values), graph_value)) {
+    if (matcher->match_value(pattern, graph_value) || num_input_values_to_optional == 0) {
         auto& pattern_map = matcher->get_pattern_value_map();
         if (pattern_map.count(wrap_node)) {
             pattern_map[shared_from_this()] = graph_value;
diff --git a/src/core/src/preprocess/pre_post_process.cpp b/src/core/src/preprocess/pre_post_process.cpp
index 2d60c3c63108bb..d81d48082cde04 100644
--- a/src/core/src/preprocess/pre_post_process.cpp
+++ b/src/core/src/preprocess/pre_post_process.cpp
@@ -279,6 +279,22 @@ PreProcessSteps& PreProcessSteps::mean(const std::vector<float>& values) {
     return *this;
 }
 
+PreProcessSteps& PreProcessSteps::pad(const std::vector<int>& pads_begin,
+                                      const std::vector<int>& pads_end,
+                                      float value,
+                                      PaddingMode mode) {
+    m_impl->add_pad_impl(pads_begin, pads_end, std::vector<float>{value}, mode);
+    return *this;
+}
+
+PreProcessSteps& PreProcessSteps::pad(const std::vector<int>& pads_begin,
+                                      const std::vector<int>& pads_end,
+                                      const std::vector<float>& values,
+                                      PaddingMode mode) {
+    m_impl->add_pad_impl(pads_begin, pads_end, values, mode);
+    return *this;
+}
+
 PreProcessSteps& PreProcessSteps::convert_element_type(const element::Type& type) {
     m_impl->add_convert_impl(type);
     return *this;
diff --git a/src/core/src/preprocess/preprocess_steps_impl.cpp b/src/core/src/preprocess/preprocess_steps_impl.cpp
index 0d1594d4ffe47a..522e545b714509 100644
--- a/src/core/src/preprocess/preprocess_steps_impl.cpp
+++ b/src/core/src/preprocess/preprocess_steps_impl.cpp
@@ -130,6 +130,54 @@ void PreStepsList::add_mean_impl(const std::vector<float>& values) {
         "mean " + vector_to_string(values));
 }
 
+void PreStepsList::add_pad_impl(const std::vector<int>& pads_begin,
+                                const std::vector<int>& pads_end,
+                                const std::vector<float>& pad_values,
+                                PaddingMode mode) {
+    std::string name;
+    name = "pad(begin " + vector_to_string(pads_begin) + ", end " + vector_to_string(pads_end);
+    switch (mode) {
+    case PaddingMode::CONSTANT:
+        name += ", with " + vector_to_string(pad_values) + ")";
+        break;
+    case PaddingMode::EDGE:
+        name += ", copied from edge)";
+        break;
+    case PaddingMode::REFLECT:
+        name += ", reflected from tensor)";
+        break;
+    case PaddingMode::SYMMETRIC:
+        name += ", symmetrically added from tensor)";
+        break;
+    }
+
+    m_actions.emplace_back(
+        [pads_begin, pads_end, pad_values, mode](const std::vector<Output<Node>>& nodes,
+                                                 const std::shared_ptr<Model>& function,
+                                                 PreprocessingContext& ctxt) {
+            OPENVINO_ASSERT(nodes.size() == 1,
+                            "Can't pad multi-plane input. Suggesting to convert current image to "
+                            "RGB/BGR color format using 'PreProcessSteps::convert_color'");
+
+            const auto& node = nodes[0];
+            auto element_type = nodes[0].get_element_type();
+            OPENVINO_ASSERT(element_type.is_real(),
+                            "Pad preprocessing can be applied to 'float' inputs. Consider using of "
+                            "'convert_element_type' before padding. Current type is: ",
+                            element_type);
+
+            auto pad_value = opset8::Constant::create(node.get_element_type(), Shape{}, pad_values);
+
+            auto npads_begin = opset8::Constant::create(element::i64, Shape{pads_begin.size()}, pads_begin);
+            auto npads_end = opset8::Constant::create(element::i64, Shape{pads_end.size()}, pads_end);
+            auto npad_value = opset8::Constant::create(element_type, Shape{}, pad_values);
+
+            auto pad = std::make_shared<opset8::Pad>(node, npads_begin, npads_end, npad_value, mode);
+            return std::make_tuple(std::vector<Output<Node>>{pad}, true);
+        },
+        name);
+}
+
 void PreStepsList::add_convert_impl(const element::Type& type) {
     m_actions.emplace_back(
         [type](const std::vector<Output<Node>>& nodes,
diff --git a/src/core/src/preprocess/preprocess_steps_impl.hpp b/src/core/src/preprocess/preprocess_steps_impl.hpp
index 0cb36b91706f40..3fa74668e8a518 100644
--- a/src/core/src/preprocess/preprocess_steps_impl.hpp
+++ b/src/core/src/preprocess/preprocess_steps_impl.hpp
@@ -158,6 +158,10 @@ class PreStepsList {
 public:
     void add_scale_impl(const std::vector<float>& values);
     void add_mean_impl(const std::vector<float>& values);
+    void add_pad_impl(const std::vector<int>& pads_begin,
+                      const std::vector<int>& pads_end,
+                      const std::vector<float>& values,
+                      PaddingMode mode);
     void add_convert_impl(const element::Type& type);
     void add_crop_impl(const std::vector<int>& begin, const std::vector<int>& end);
     void add_resize_impl(ResizeAlgorithm alg, int dst_height, int dst_width);
diff --git a/src/core/src/type/element_type.cpp b/src/core/src/type/element_type.cpp
index 088a6a2367e1c2..8a529d6d0e678d 100644
--- a/src/core/src/type/element_type.cpp
+++ b/src/core/src/type/element_type.cpp
@@ -59,8 +59,14 @@ inline TypeInfo get_type_info(ov::element::Type_t type) {
         return {64, false, true, false, "int64_t", "i64"};
     case ov::element::Type_t::u1:
         return {1, false, false, false, "uint1_t", "u1"};
+    case ov::element::Type_t::u2:
+        return {2, false, false, false, "uint2_t", "u2"};
+    case ov::element::Type_t::u3:
+        return {3, false, false, false, "uint3_t", "u3"};
     case ov::element::Type_t::u4:
         return {4, false, false, false, "uint4_t", "u4"};
+    case ov::element::Type_t::u6:
+        return {6, false, false, false, "uint6_t", "u6"};
     case ov::element::Type_t::u8:
         return {8, false, false, true, "uint8_t", "u8"};
     case ov::element::Type_t::u16:
@@ -103,8 +109,14 @@ ov::element::Type type_from_string(const std::string& type) {
         return ::ov::element::Type(::ov::element::Type_t::i64);
     } else if (type == "u1" || type == "U1" || type == "BIN" || type == "bin") {
         return ::ov::element::Type(::ov::element::Type_t::u1);
+    } else if (type == "u2" || type == "U2") {
+        return ::ov::element::Type(::ov::element::Type_t::u2);
+    } else if (type == "u3" || type == "U3") {
+        return ::ov::element::Type(::ov::element::Type_t::u3);
     } else if (type == "u4" || type == "U4") {
         return ::ov::element::Type(::ov::element::Type_t::u4);
+    } else if (type == "u6" || type == "U6") {
+        return ::ov::element::Type(::ov::element::Type_t::u6);
     } else if (type == "u8" || type == "U8") {
         return ::ov::element::Type(::ov::element::Type_t::u8);
     } else if (type == "u16" || type == "U16") {
@@ -135,11 +147,11 @@ ov::element::Type type_from_string(const std::string& type) {
 
 std::vector<const ov::element::Type*> ov::element::Type::get_known_types() {
     std::vector<const ov::element::Type*> rc = {
-        &ov::element::dynamic, &ov::element::boolean, &ov::element::bf16, &ov::element::f16,    &ov::element::f32,
-        &ov::element::f64,     &ov::element::i4,      &ov::element::i8,   &ov::element::i16,    &ov::element::i32,
-        &ov::element::i64,     &ov::element::u1,      &ov::element::u4,   &ov::element::u8,     &ov::element::u16,
-        &ov::element::u32,     &ov::element::u64,     &ov::element::nf4,  &ov::element::f8e4m3, &ov::element::f8e5m2,
-        &ov::element::string};
+        &ov::element::dynamic, &ov::element::boolean, &ov::element::bf16,   &ov::element::f16,   &ov::element::f32,
+        &ov::element::f64,     &ov::element::i4,      &ov::element::i8,     &ov::element::i16,   &ov::element::i32,
+        &ov::element::i64,     &ov::element::u1,      &ov::element::u2,     &ov::element::u3,    &ov::element::u4,
+        &ov::element::u6,      &ov::element::u8,      &ov::element::u16,    &ov::element::u32,   &ov::element::u64,
+        &ov::element::nf4,     &ov::element::f8e4m3,  &ov::element::f8e5m2, &ov::element::string};
     return rc;
 }
 
@@ -163,7 +175,10 @@ ov::element::Type::Type(size_t bitwidth,
         {ov::element::Type_t::i32, {32, false, true, true, "int32_t", "i32"}},
         {ov::element::Type_t::i64, {64, false, true, false, "int64_t", "i64"}},
         {ov::element::Type_t::u1, {1, false, false, false, "uint1_t", "u1"}},
+        {ov::element::Type_t::u2, {2, false, false, false, "uint2_t", "u2"}},
+        {ov::element::Type_t::u3, {3, false, false, false, "uint3_t", "u3"}},
         {ov::element::Type_t::u4, {4, false, false, false, "uint4_t", "u4"}},
+        {ov::element::Type_t::u6, {6, false, false, false, "uint6_t", "u6"}},
         {ov::element::Type_t::u8, {8, false, false, true, "uint8_t", "u8"}},
         {ov::element::Type_t::u16, {16, false, false, false, "uint16_t", "u16"}},
         {ov::element::Type_t::u32, {32, false, false, false, "uint32_t", "u32"}},
@@ -304,8 +319,14 @@ Type fundamental_type_for(const Type& type) {
         return from<element_type_traits<Type_t::i64>::value_type>();
     case Type_t::u1:
         return from<element_type_traits<Type_t::u1>::value_type>();
+    case Type_t::u2:
+        return from<element_type_traits<Type_t::u2>::value_type>();
+    case Type_t::u3:
+        return from<element_type_traits<Type_t::u3>::value_type>();
     case Type_t::u4:
         return from<element_type_traits<Type_t::u4>::value_type>();
+    case Type_t::u6:
+        return from<element_type_traits<Type_t::u6>::value_type>();
     case Type_t::u8:
         return from<element_type_traits<Type_t::u8>::value_type>();
     case Type_t::u16:
@@ -415,7 +436,10 @@ inline size_t compiler_byte_size(ov::element::Type_t et) {
         ET_CASE(i32);
         ET_CASE(i64);
         ET_CASE(u1);
+        ET_CASE(u2);
+        ET_CASE(u3);
         ET_CASE(u4);
+        ET_CASE(u6);
         ET_CASE(u8);
         ET_CASE(u16);
         ET_CASE(u32);
@@ -451,7 +475,10 @@ OPENVINO_API EnumNames<element::Type_t>& EnumNames<element::Type_t>::get() {
                                                          {"i32", element::Type_t::i32},
                                                          {"i64", element::Type_t::i64},
                                                          {"u1", element::Type_t::u1},
+                                                         {"u2", element::Type_t::u2},
+                                                         {"u3", element::Type_t::u3},
                                                          {"u4", element::Type_t::u4},
+                                                         {"u6", element::Type_t::u6},
                                                          {"u8", element::Type_t::u8},
                                                          {"u16", element::Type_t::u16},
                                                          {"u32", element::Type_t::u32},
diff --git a/src/core/tests/element_iterator_test.cpp b/src/core/tests/element_iterator_test.cpp
new file mode 100644
index 00000000000000..cfe4b164c7c0c8
--- /dev/null
+++ b/src/core/tests/element_iterator_test.cpp
@@ -0,0 +1,478 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/core/type/element_iterator.hpp"
+
+#include <gmock/gmock.h>
+
+#include <array>
+
+#include "openvino/runtime/tensor.hpp"
+
+namespace ov {
+namespace test {
+
+using testing::ElementsAre;
+using testing::ElementsAreArray;
+
+namespace {
+constexpr size_t get_buffer_size(const size_t bit_width, const size_t num_of_elements) {
+    return (num_of_elements * bit_width + 7) / 8;
+}
+}  // namespace
+
+// bits number in comments are counted [b7, b6, ..., b0]
+// ---- u1
+TEST(ElementIteratorTest, write_u1_data) {
+    constexpr auto elements_count = 16;
+    auto input = std::array<int8_t, elements_count>{0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1};
+    auto output = std::array<int8_t, get_buffer_size(1, elements_count)>{};
+    auto iter = element::iterator<element::u1>(output.data());
+
+    std::copy(input.begin(), input.end(), iter);
+    EXPECT_THAT(output, ElementsAre(0x16, 0xB3));
+}
+
+TEST(ElementIteratorTest, read_const_u1_data) {
+    constexpr auto elements_count = 16;
+    constexpr auto input = std::array<int8_t, get_buffer_size(1, elements_count)>{0x21, static_cast<int8_t>(0xa3)};
+    auto iter = element::iterator<element::u1>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count),
+                ElementsAre(0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1));
+}
+
+TEST(ElementIteratorTest, read_non_const_u1_data) {
+    constexpr auto elements_count = 16;
+    auto input = std::array<int8_t, get_buffer_size(1, elements_count)>{0x21, static_cast<int8_t>(0xa3)};
+    auto iter = element::iterator<element::u1>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count),
+                ElementsAre(0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1));
+}
+
+TEST(ElementIteratorTest, read_u1_data_increment_decrement_iterator) {
+    auto input = std::array<int8_t, 3>{0x32, static_cast<int8_t>(0xa3), 0x55};
+    auto iter = element::iterator<element::u1>(input.data() + 1);
+
+    EXPECT_EQ(*iter--, 1);  // 2nd byte bit7
+    EXPECT_EQ(*iter++, 0);  // 1st byte bit0
+    EXPECT_EQ(*++iter, 0);  // 2nd byte bit6
+    EXPECT_EQ(*iter--, 0);  // 2nd byte bit6
+    EXPECT_EQ(*iter, 1);    // 2nd byte bit7
+}
+
+TEST(ElementIteratorTest, read_u1_data_iterator_with_offset) {
+    auto input = std::array<int8_t, 3>{0x32, static_cast<int8_t>(0xa3), 0x41};
+    auto iter = element::iterator<element::u1>(input.data() + 1);
+
+    EXPECT_EQ(*iter, 1);                // 2nd byte bit7
+    EXPECT_EQ(*(iter - 2), 1);          // 1st byte bit1
+    EXPECT_EQ(*(iter - 5), 1);          // 1st byte bit4
+    EXPECT_EQ(*(iter + 1), 0);          // 2nd byte bit6
+    EXPECT_EQ(*(iter + 8), 0);          // 3rd byte bit7
+    EXPECT_EQ(*(iter + 9), 1);          // 3rd byte bit6
+    EXPECT_EQ(*std::prev(iter, 1), 0);  // 1st byte bit0
+    EXPECT_EQ(*std::next(iter, 2), 1);  // 2nd byte bit5
+}
+
+TEST(ElementIteratorTest, read_u1_from_tensor) {
+    auto input = std::array<int8_t, 4>{0x32, static_cast<int8_t>(0xa3), 0x41, 0x11};
+    auto t = ov::Tensor(element::u1, Shape{2, 16}, input.data());
+    auto iter = element::iterator<element::u1>(static_cast<int8_t*>(t.data(element::u1)));
+
+    EXPECT_THAT(
+        std::vector<int8_t>(iter, iter + t.get_size()),
+        ElementsAre(0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1));
+}
+
+TEST(ElementIteratorTest, u1_value_to_output_stream) {
+    constexpr auto value = static_cast<int8_t>(0x80);
+    auto iter = element::iterator<element::u1>(&value);
+
+    std::stringstream s;
+    s << *iter;
+
+    EXPECT_EQ(s.str(), "1");
+}
+
+// ---- u2
+TEST(ElementIteratorTest, write_u2_data) {
+    constexpr auto elements_count = 16;
+    auto input = std::array<int8_t, elements_count>{2, 0, 1, 3, 0, 0, 3, 3, 1, 2, 1, 2, 3, 2, 1, 0};
+    auto output = std::array<int8_t, get_buffer_size(2, elements_count)>{};
+    auto iter = element::iterator<element::u2>(output.data());
+
+    std::copy(input.begin(), input.end(), iter);
+
+    EXPECT_THAT(output, ElementsAre(0x87, 0x0f, 0x66, 0xe4));
+}
+
+TEST(ElementIteratorTest, read_const_u2_data) {
+    constexpr auto elements_count = 16;
+    constexpr auto input = std::array<int8_t, get_buffer_size(2, elements_count)>{static_cast<int8_t>(0x87),
+                                                                                  0x0f,
+                                                                                  0x66,
+                                                                                  static_cast<int8_t>(0xe4)};
+    auto iter = element::iterator<element::u2>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count),
+                ElementsAre(2, 0, 1, 3, 0, 0, 3, 3, 1, 2, 1, 2, 3, 2, 1, 0));
+}
+
+TEST(ElementIteratorTest, read_non_const_u2_data) {
+    constexpr auto elements_count = 16;
+    auto input = std::array<int8_t, get_buffer_size(2, elements_count)>{static_cast<int8_t>(0x87),
+                                                                        0x0f,
+                                                                        0x66,
+                                                                        static_cast<int8_t>(0xe4)};
+    auto iter = element::iterator<element::u2>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count),
+                ElementsAre(2, 0, 1, 3, 0, 0, 3, 3, 1, 2, 1, 2, 3, 2, 1, 0));
+}
+
+TEST(ElementIteratorTest, read_u2_data_increment_decrement_iterator) {
+    auto input = std::array<int8_t, 2>{0x33, static_cast<int8_t>(0x93)};
+    auto iter = element::iterator<element::u2>(input.data() + 1);
+
+    EXPECT_EQ(*iter--, 2);  // 2nd byte 1st half-nibble
+    EXPECT_EQ(*iter++, 3);  // 1st byte 4th half-nibble
+    EXPECT_EQ(*++iter, 1);  // 2nd byte 2nd half-nibble
+    EXPECT_EQ(*iter--, 1);  // 2nd byte 2nd half-nibble
+    EXPECT_EQ(*--iter, 3);  // 1st byte 4th half-nibble
+}
+
+TEST(ElementIteratorTest, read_u2_data_iterator_with_offset) {
+    auto input = std::array<int8_t, 3>{0x43, static_cast<int8_t>(0x93), 0x41};
+    auto iter = element::iterator<element::u2>(input.data() + 1);
+
+    EXPECT_EQ(*iter, 2);                // 2nd byte 1st half-nibble
+    EXPECT_EQ(*(iter - 3), 0);          // 1st byte 2nd half-nibble
+    EXPECT_EQ(*(iter - 4), 1);          // 1st byte 1st half-nibble
+    EXPECT_EQ(*(iter + 1), 1);          // 2nd byte 2nd half-nibble
+    EXPECT_EQ(*(iter + 7), 1);          // 3rd byte 4th half-nibble
+    EXPECT_EQ(*std::prev(iter, 1), 3);  // 1st byte 4th half-nibble
+    EXPECT_EQ(*std::next(iter, 2), 0);  // 2nd byte 3rd half-nibble
+}
+
+TEST(ElementIteratorTest, u2_value_to_output_stream) {
+    constexpr auto value = static_cast<int8_t>(0x80);
+    auto iter = element::iterator<element::u2>(&value);
+
+    std::stringstream s;
+    s << *iter;
+
+    EXPECT_EQ(s.str(), "2");
+}
+
+TEST(ElementIteratorTest, read_u2_from_tensor) {
+    auto input = std::array<int8_t, 4>{0x32, static_cast<int8_t>(0xa3), 0x41, 0x11};
+    auto t = ov::Tensor(element::u2, Shape{4, 4}, input.data());
+    auto iter = element::iterator<element::u2>(static_cast<int8_t*>(t.data(element::u2)));
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + t.get_size()),
+                ElementsAre(0, 3, 0, 2, 2, 2, 0, 3, 1, 0, 0, 1, 0, 1, 0, 1));
+}
+
+// --- u3
+TEST(ElementIteratorTest, write_u3_data) {
+    constexpr auto elements_count = 8;
+    auto input = std::array<int8_t, elements_count>{2, 3, 0, 1, 4, 5, 6, 7};
+    auto output = std::array<int8_t, 3>{};
+    auto iter = element::iterator<element::u3>(output.data());
+
+    std::copy(input.begin(), input.end(), iter);
+
+    EXPECT_THAT(output, ElementsAre(0b10110001, 0b00011011, 0b00001111));
+}
+
+TEST(ElementIteratorTest, read_non_const_u3_data) {
+    constexpr auto elements_count = 16;
+    auto input = std::array<int8_t, 6>{0x7a, 0x6f, 0x55, static_cast<int8_t>(0xb1), 0x1b, 0x0f};
+    auto iter = element::iterator<element::u3>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count),
+                ElementsAre(1, 7, 2, 6, 1, 6, 3, 7, 2, 3, 0, 1, 4, 5, 6, 7));
+}
+
+TEST(ElementIteratorTest, read_const_u3_data) {
+    constexpr auto elements_count = 8;
+    constexpr auto input = std::array<int8_t, 3>{static_cast<int8_t>(0b10110001), 0b00011011, 0b00001111};
+    auto iter = element::iterator<element::u3>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count), ElementsAre(2, 3, 0, 1, 4, 5, 6, 7));
+}
+
+TEST(ElementIteratorTest, read_u3_data_iterator_with_offset) {
+    // Has values {1, 7, 2, 6, 1, 6, 3, 7, [2], 3, 0, 1, 4, 5, 6, 7}
+    auto input = std::array<int8_t, 6>{0x7a, 0x6f, 0x55, static_cast<int8_t>(0xb1), 0x1b, 0x0f};
+    auto iter = element::iterator<element::u3>(input.data() + 3);
+
+    EXPECT_EQ(*iter, 2);
+    EXPECT_EQ(*(iter - 3), 6);
+    EXPECT_EQ(*(iter - 4), 1);
+    EXPECT_EQ(*(iter - 5), 6);
+    EXPECT_EQ(*(iter + 1), 3);
+    EXPECT_EQ(*(iter + 5), 5);
+    EXPECT_EQ(*(iter + 7), 7);
+    EXPECT_EQ(*std::prev(iter, 1), 7);
+    EXPECT_EQ(*std::next(iter, 2), 0);
+}
+
+TEST(ElementIteratorTest, read_u3_from_tensor) {
+    // Has values {1, 7, 2, 6, 1, 6, 3, 7, [2], 3, 0, 1, 4, 5, 6, 7}
+    auto input = std::array<int8_t, 6>{0x7a, 0x6f, 0x55, static_cast<int8_t>(0xb1), 0x1b, 0x0f};
+    auto t = ov::Tensor(element::u3, Shape{4, 2, 2}, input.data());
+    auto iter = element::iterator<element::u3>(static_cast<int8_t*>(t.data(element::u3)));
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + t.get_size()),
+                ElementsAre(1, 7, 2, 6, 1, 6, 3, 7, 2, 3, 0, 1, 4, 5, 6, 7));
+}
+
+// --- u4
+// nibbles are counted as [n1, n0]
+TEST(ElementIteratorTest, write_u4_data) {
+    constexpr auto elements_count = 16;
+    auto input = std::array<int8_t, elements_count>{1, 2, 3, 10, 12, 15, 14, 4, 7, 9, 11, 13, 8, 0, 5, 6};
+    auto output = std::array<int8_t, get_buffer_size(4, elements_count)>{};
+    auto iter = element::iterator<element::u4>(output.data());
+
+    std::copy(input.begin(), input.end(), iter);
+
+    EXPECT_THAT(output, ElementsAre(0x21, 0xa3, 0xfc, 0x4e, 0x97, 0xdb, 0x08, 0x65));
+}
+
+TEST(ElementIteratorTest, read_const_u4_data) {
+    constexpr auto elements_count = 16;
+    constexpr auto byte_size = get_buffer_size(4, elements_count);
+    constexpr auto input = std::array<int8_t, byte_size>{0x12,
+                                                         0x3a,
+                                                         static_cast<int8_t>(0xcf),
+                                                         static_cast<int8_t>(0xe4),
+                                                         0x79,
+                                                         static_cast<int8_t>(0xbd),
+                                                         0x08,
+                                                         0x56};
+    auto iter = element::iterator<element::u4>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count),
+                ElementsAre(2, 1, 10, 3, 15, 12, 4, 14, 9, 7, 13, 11, 8, 0, 6, 5));
+}
+
+TEST(ElementIteratorTest, read_non_const_u4_data) {
+    constexpr auto elements_count = 16;
+    constexpr auto byte_size = get_buffer_size(4, elements_count);
+    auto input = std::array<int8_t, byte_size>{0x12,
+                                               0x3a,
+                                               static_cast<int8_t>(0xcf),
+                                               static_cast<int8_t>(0xe4),
+                                               0x79,
+                                               static_cast<int8_t>(0xbd),
+                                               0x08,
+                                               0x56};
+    auto iter = element::iterator<element::u4>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count),
+                ElementsAre(2, 1, 10, 3, 15, 12, 4, 14, 9, 7, 13, 11, 8, 0, 6, 5));
+}
+
+TEST(ElementIteratorTest, read_u4_data_increment_decrement_iterator) {
+    auto input = std::array<int8_t, 3>{0x12, 0x3a};
+    auto iter = element::iterator<element::u4>(input.data() + 1);
+
+    EXPECT_EQ(*iter--, 10);  // 2nd byte 1st nibble
+    EXPECT_EQ(*iter++, 1);   // 1st byte 2nd nibble
+    EXPECT_EQ(*++iter, 3);   // 2nd byte 2nd nibble
+    EXPECT_EQ(*iter--, 3);   // 2nd byte 2nd nibble
+    EXPECT_EQ(*--iter, 1);   // 1st byte 2nd nibble
+}
+
+TEST(ElementIteratorTest, read_u4_data_iterator_with_offset) {
+    auto input = std::array<int8_t, 5>{0x42, 0x3a, 0x61, 0x79, 0x5b};
+    auto iter = element::iterator<element::u4>(input.data() + 1);
+
+    EXPECT_EQ(*iter, 10);               // 2nd byte 1st nibble
+    EXPECT_EQ(*(iter - 2), 2);          // 1st byte 1st nibble
+    EXPECT_EQ(*(iter + 7), 5);          // 5th byte 2nd nibble
+    EXPECT_EQ(*(iter + 6), 11);         // 2nd byte 1st nibble
+    EXPECT_EQ(*(iter - 1), 4);          // 1st byte 2nd nibble
+    EXPECT_EQ(*std::prev(iter, 1), 4);  // 1st byte 2nd nibble
+    EXPECT_EQ(*std::next(iter, 2), 1);  // 3rd byte 1st nibble
+}
+
+TEST(ElementIteratorTest, read_u4_from_tensor) {
+    auto input = std::array<int8_t, 5>{0x42, 0x3a, 0x61, 0x79, 0x5b};
+    auto t = ov::Tensor(element::u4, Shape{5, 2}, input.data());
+    auto iter = element::iterator<element::u4>(static_cast<int8_t*>(t.data(element::u4)));
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + t.get_size()), ElementsAre(2, 4, 10, 3, 1, 6, 9, 7, 11, 5));
+}
+
+// --- i4
+// nibbles are counted as [n1, n0]
+TEST(ElementIteratorTest, write_i4_data) {
+    constexpr auto elements_count = 16;
+    auto input = std::array<int8_t, elements_count>{1, 2, 3, -6, -4, -1, -2, 4, 7, -7, -5, -3, -8, 0, 5, 6};
+    auto output = std::array<int8_t, get_buffer_size(4, elements_count)>{};
+    auto iter = element::iterator<element::i4>(output.data());
+
+    std::copy(input.begin(), input.end(), iter);
+
+    EXPECT_THAT(output, ElementsAre(0x21, 0xa3, 0xfc, 0x4e, 0x97, 0xdb, 0x08, 0x65));
+}
+
+TEST(ElementIteratorTest, read_const_i4_data) {
+    constexpr auto elements_count = 16;
+    constexpr auto byte_size = get_buffer_size(4, elements_count);
+    constexpr auto input = std::array<int8_t, byte_size>{0x12,
+                                                         0x3a,
+                                                         static_cast<int8_t>(0xcf),
+                                                         static_cast<int8_t>(0xe4),
+                                                         0x79,
+                                                         static_cast<int8_t>(0xbd),
+                                                         0x08,
+                                                         0x56};
+    auto iter = element::iterator<element::i4>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count),
+                ElementsAre(2, 1, -6, 3, -1, -4, 4, -2, -7, 7, -3, -5, -8, 0, 6, 5));
+}
+
+TEST(ElementIteratorTest, read_non_const_i4_data) {
+    constexpr auto elements_count = 16;
+    constexpr auto byte_size = get_buffer_size(4, elements_count);
+    auto input = std::array<int8_t, byte_size>{0x12,
+                                               0x3a,
+                                               static_cast<int8_t>(0xcf),
+                                               static_cast<int8_t>(0xe4),
+                                               0x79,
+                                               static_cast<int8_t>(0xbd),
+                                               0x08,
+                                               0x56};
+    auto iter = element::iterator<element::i4>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count),
+                ElementsAre(2, 1, -6, 3, -1, -4, 4, -2, -7, 7, -3, -5, -8, 0, 6, 5));
+}
+
+TEST(ElementIteratorTest, read_i4_data_increment_decrement_iterator) {
+    auto input = std::array<int8_t, 2>{0x12, 0x3a};
+    auto iter = element::iterator<element::i4>(input.data() + 1);
+
+    EXPECT_EQ(*iter--, -6);  // 2nd byte 1st nibble
+    EXPECT_EQ(*iter++, 1);   // 1st byte 2nd nibble
+    EXPECT_EQ(*++iter, 3);   // 2nd byte 2nd nibble
+    EXPECT_EQ(*iter--, 3);   // 2nd byte 2nd nibble
+    EXPECT_EQ(*--iter, 1);   // 1st byte 2nd nibble
+}
+
+TEST(ElementIteratorTest, read_i4_data_iterator_with_offset) {
+    auto input = std::array<int8_t, 5>{0x42, 0x3a, 0x61, 0x79, 0x5b};
+    auto iter = element::iterator<element::i4>(input.data() + 1);
+
+    EXPECT_EQ(*iter, -6);               // 2nd byte 1st nibble
+    EXPECT_EQ(*(iter - 2), 2);          // 1st byte 1st nibble
+    EXPECT_EQ(*(iter + 7), 5);          // 5th byte 2nd nibble
+    EXPECT_EQ(*(iter + 6), -5);         // 2nd byte 1st nibble
+    EXPECT_EQ(*(iter - 1), 4);          // 1st byte 2nd nibble
+    EXPECT_EQ(*std::prev(iter, 1), 4);  // 1st byte 2nd nibble
+    EXPECT_EQ(*std::next(iter, 2), 1);  // 3rd byte 1st nibble
+}
+
+TEST(ElementIteratorTest, i4_value_to_output_stream) {
+    constexpr auto value = static_cast<int8_t>(0x19);
+    auto iter = element::iterator<element::i4>(&value);
+
+    std::stringstream s;
+    s << *iter;
+
+    EXPECT_EQ(s.str(), "-7");
+}
+
+TEST(ElementIteratorTest, read_i4_from_tensor) {
+    auto input = std::array<int8_t, 5>{0x42, 0x3a, 0x61, 0x79, 0x5b};
+    auto t = ov::Tensor(element::i4, Shape{10, 1, 1}, input.data());
+    auto iter = element::iterator<element::i4>(static_cast<int8_t*>(t.data(element::i4)));
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + t.get_size()), ElementsAre(2, 4, -6, 3, 1, 6, -7, 7, -5, 5));
+}
+
+// --- u6
+TEST(ElementIteratorTest, write_u6_data) {
+    constexpr auto elements_count = 8;
+    auto input = std::array<int8_t, elements_count>{2, 1, 0, 3, 18, 49, 35, 16};
+    auto output = std::array<int8_t, 6>{};
+    auto iter = element::iterator<element::u6>(output.data());
+
+    std::copy(input.begin(), input.end(), iter);
+
+    EXPECT_THAT(output, ElementsAre(0x21, 0x03, 0x00, 0x21, 0x30, 0x79));
+}
+
+TEST(ElementIteratorTest, read_non_const_u6_data) {
+    constexpr auto elements_count = 8;
+    auto input = std::array<int8_t, 6>{0x21, 0x03, 0x00, 0x21, 0x30, 0x79};
+    auto iter = element::iterator<element::u6>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count), ElementsAre(2, 1, 0, 3, 18, 49, 35, 16));
+}
+
+TEST(ElementIteratorTest, read_const_u6_data) {
+    constexpr auto elements_count = 8;
+    constexpr auto input = std::array<int8_t, 6>{0x21, 0x03, 0x00, 0x21, 0x30, 0x79};
+    auto iter = element::iterator<element::u6>(input.data());
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + elements_count), ElementsAre(2, 1, 0, 3, 18, 49, 35, 16));
+}
+
+TEST(ElementIteratorTest, read_u6_data_increment_decrement_iterator) {
+    // Has values {1, 2, 3, 10, [3], 8, 7, 2}
+    auto input = std::array<int8_t, 6>{0x12, 0x3a, 0x00, 0x38, 0x72, 0x00};
+    auto iter = element::iterator<element::u6>(input.data() + 3);
+
+    EXPECT_EQ(*iter--, 3);
+    EXPECT_EQ(*iter++, 10);
+    EXPECT_EQ(*++iter, 8);
+    EXPECT_EQ(*iter--, 8);
+    EXPECT_EQ(*--iter, 10);
+}
+
+TEST(ElementIteratorTest, read_u6_data_iterator_with_offset) {
+    // Has values {1, 2, 3, 10, [3], 8, 7, 2, 1, 42, 4, 20}
+    auto input = std::array<int8_t, 9>{0x12, 0x3a, 0x00, 0x38, 0x72, 0x00, 0x1a, 0x44, 0x21};
+    auto iter = element::iterator<element::u6>(input.data() + 3);
+
+    EXPECT_EQ(*iter, 3);
+    EXPECT_EQ(*(iter - 3), 2);
+    EXPECT_EQ(*(iter - 4), 1);
+    EXPECT_EQ(*(iter - 2), 3);
+    EXPECT_EQ(*(iter + 1), 8);
+    EXPECT_EQ(*(iter + 5), 42);
+    EXPECT_EQ(*(iter + 7), 20);
+    EXPECT_EQ(*std::prev(iter, 1), 10);
+    EXPECT_EQ(*std::next(iter, 2), 7);
+}
+
+TEST(ElementIteratorTest, u6_value_to_output_stream) {
+    auto input = std::array<int8_t, 3>{0x12, 0x3a, 0x00};
+    auto iter = element::iterator<element::u6>(input.data());
+
+    std::stringstream s;
+    s << *iter;
+
+    EXPECT_EQ(s.str(), "1");
+}
+
+TEST(ElementIteratorTest, read_u6_from_tensor) {
+    // Has values {1, 2, 3, 10, 3, 8, 7, 2, 1, 42, 4, 20}
+    auto input = std::array<int8_t, 9>{0x12, 0x3a, 0x00, 0x38, 0x72, 0x00, 0x1a, 0x44, 0x21};
+    auto t = ov::Tensor(element::u6, Shape{4, 1, 3}, input.data());
+    auto iter = element::iterator<element::u6>(static_cast<int8_t*>(t.data(element::u6)));
+
+    EXPECT_THAT(std::vector<int8_t>(iter, iter + t.get_size()), ElementsAre(1, 2, 3, 10, 3, 8, 7, 2, 1, 42, 4, 20));
+}
+
+}  // namespace test
+}  // namespace ov
diff --git a/src/core/tests/pattern.cpp b/src/core/tests/pattern.cpp
index fdd9a783f91741..7d794aa4a69350 100644
--- a/src/core/tests/pattern.cpp
+++ b/src/core/tests/pattern.cpp
@@ -18,7 +18,9 @@
 #include "openvino/op/add.hpp"
 #include "openvino/op/broadcast.hpp"
 #include "openvino/op/constant.hpp"
+#include "openvino/op/cos.hpp"
 #include "openvino/op/divide.hpp"
+#include "openvino/op/exp.hpp"
 #include "openvino/op/multiply.hpp"
 #include "openvino/op/parameter.hpp"
 #include "openvino/op/reduce_sum.hpp"
@@ -508,6 +510,68 @@ TEST(pattern, matching_optional) {
                          std::make_shared<op::v0::Abs>(c)));
 }
 
+TEST(pattern, optional_full_match) {
+    Shape shape{};
+    auto model_input1 = std::make_shared<op::v0::Parameter>(element::i32, shape);
+    auto model_input2 = std::make_shared<op::v0::Parameter>(element::i32, shape);
+    auto model_add = std::make_shared<op::v1::Add>(model_input1->output(0), model_input2->output(0));
+    auto model_relu = std::make_shared<op::v0::Relu>(model_add->output(0));
+
+    auto pattern_add = ov::pass::pattern::optional<op::v1::Add>();
+    auto pattern_relu = std::make_shared<op::v0::Relu>(pattern_add->output(0));
+
+    TestMatcher tm;
+
+    ASSERT_TRUE(tm.match(pattern_relu, model_relu));
+}
+
+TEST(pattern, optional_half_match) {
+    Shape shape{};
+    auto model_input1 = std::make_shared<op::v0::Parameter>(element::i32, shape);
+    auto model_input2 = std::make_shared<op::v0::Parameter>(element::i32, shape);
+    auto model_add = std::make_shared<op::v1::Add>(model_input1->output(0), model_input2->output(0));
+    auto model_relu = std::make_shared<op::v0::Relu>(model_add->output(0));
+
+    auto pattern_relu = ov::pass::pattern::optional<op::v0::Relu>();
+    auto pattern_relu1 = std::make_shared<op::v0::Relu>(pattern_relu->output(0));
+
+    TestMatcher tm;
+
+    ASSERT_TRUE(tm.match(pattern_relu1, model_relu));
+}
+
+TEST(pattern, optional_testing) {
+    Shape shape{};
+    auto model_input1 = std::make_shared<op::v0::Parameter>(element::i32, shape);
+    auto model_input2 = std::make_shared<op::v0::Parameter>(element::i32, shape);
+    auto model_add = std::make_shared<op::v1::Add>(model_input1->output(0), model_input2->output(0));
+    auto model_relu = std::make_shared<op::v0::Relu>(model_add->output(0));
+    auto model_abs = std::make_shared<op::v0::Abs>(model_add->output(0));
+
+    TestMatcher tm;
+
+    ASSERT_TRUE(tm.match(ov::pass::pattern::optional<op::v0::Exp, op::v0::Relu>(model_add), model_add));
+    ASSERT_TRUE(tm.match(ov::pass::pattern::optional<op::v0::Abs, op::v0::Relu>(model_add), model_add));
+    ASSERT_TRUE(tm.match(ov::pass::pattern::optional<op::v0::Abs, op::v0::Exp>(model_add), model_add));
+    ASSERT_TRUE(tm.match(ov::pass::pattern::optional<op::v0::Exp, op::v0::Cos>(model_add), model_add));
+
+    ASSERT_TRUE(
+        tm.match(ov::pass::pattern::optional<op::v0::Abs>(model_abs), std::make_shared<op::v0::Abs>(model_abs)));
+    ASSERT_FALSE(
+        tm.match(ov::pass::pattern::optional<op::v0::Abs>(model_abs), std::make_shared<op::v0::Relu>(model_abs)));
+    ASSERT_TRUE(tm.match(ov::pass::pattern::optional<op::v0::Abs, op::v0::Relu>(model_abs),
+                         std::make_shared<op::v0::Relu>(model_abs)));
+
+    ASSERT_FALSE(tm.match(ov::pass::pattern::optional<op::v0::Exp>(model_add), model_abs));
+    ASSERT_TRUE(tm.match(ov::pass::pattern::optional<op::v0::Exp, op::v0::Abs>(model_add), model_abs));
+
+    ASSERT_TRUE(tm.match(ov::pass::pattern::optional<op::v0::Relu>(model_relu),
+                         std::make_shared<op::v0::Relu>(std::make_shared<op::v0::Relu>(model_add))));
+
+    ASSERT_TRUE(tm.match(ov::pass::pattern::optional<op::v0::Relu>(model_relu),
+                         std::make_shared<op::v0::Relu>(std::make_shared<op::v0::Relu>(model_add))));
+}
+
 TEST(pattern, mean) {
     // construct mean
     TestMatcher n;
diff --git a/src/core/tests/preprocess.cpp b/src/core/tests/preprocess.cpp
index cb76dc59f3c1f8..d7357ad598f925 100644
--- a/src/core/tests/preprocess.cpp
+++ b/src/core/tests/preprocess.cpp
@@ -914,6 +914,31 @@ TEST(pre_post_process, mean_vector_dynamic_channels_shape) {
     EXPECT_EQ(f->get_output_element_type(0), element::f32);
 }
 
+TEST(pre_post_process, pad_vector_constant_layout) {
+    auto f = create_simple_function(element::f32, Shape{1, 3, 200, 200});
+    auto p = PrePostProcessor(f);
+
+    p.input().tensor().set_shape({1, 3, 199, 199});
+    p.input().preprocess().pad({0, 0, 0, 0}, {0, 0, 1, 1}, 0, PaddingMode::CONSTANT);
+    EXPECT_NO_THROW(p.build());
+}
+
+TEST(pre_post_process, pad_vector_out_of_range) {
+    auto f = create_simple_function(element::f32, Shape{1, 3, 5, 5});
+    auto p = PrePostProcessor(f);
+
+    ASSERT_THROW(p.input().preprocess().pad({0, 0, -2, 0}, {0, 0, -4, 1}, 0, PaddingMode::CONSTANT);
+                 p.build(), ov::AssertFailure);
+}
+
+TEST(pre_post_process, pad_vector_dim_mismatch) {
+    auto f = create_simple_function(element::f32, Shape{1, 3, 5, 5});
+    auto p = PrePostProcessor(f);
+
+    ASSERT_THROW(p.input().preprocess().pad({0, 0, 2, 0, 1}, {0, 0, 4, 1, 1}, 0, PaddingMode::CONSTANT);
+                 p.build(), ov::AssertFailure);
+}
+
 TEST(pre_post_process, resize_no_model_layout) {
     auto f = create_simple_function(element::f32, Shape{1, 3, 224, 224});
     auto p = PrePostProcessor(f);
diff --git a/src/frontends/paddle/src/op/round.cpp b/src/frontends/paddle/src/op/round.cpp
new file mode 100644
index 00000000000000..f981fa1e841843
--- /dev/null
+++ b/src/frontends/paddle/src/op/round.cpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "default_opset.hpp"
+#include "openvino/frontend/paddle/node_context.hpp"
+
+namespace ov {
+namespace frontend {
+namespace paddle {
+namespace op {
+NamedOutputs round(const NodeContext& node) {
+    return node.default_single_output_mapping(
+        {std::make_shared<default_opset::Round>(node.get_input("X"),
+                                                ov::op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO)},
+        {"Out"});
+}
+
+}  // namespace op
+}  // namespace paddle
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/paddle/src/op/set_value.cpp b/src/frontends/paddle/src/op/set_value.cpp
index 63260b60da45c4..94c851479595ed 100644
--- a/src/frontends/paddle/src/op/set_value.cpp
+++ b/src/frontends/paddle/src/op/set_value.cpp
@@ -2,6 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include <limits>
+
 #include "default_opset.hpp"
 #include "openvino/frontend/paddle/node_context.hpp"
 
@@ -20,11 +22,11 @@ std::shared_ptr<Node> handle_minus_index(const std::vector<int64_t>& node, const
     return new_node;
 }
 
-// std::shared_ptr<Node> handle_maximum_index(Output<Node>& node, const Output<Node>& update_node) {
-//     const auto maximum_node = default_opset::Constant::create(element::i64, {1}, {INT_MAX});
-//     const auto mask = std::make_shared<default_opset::Equal>(node, maximum_node);
-//     return std::make_shared<default_opset::Select>(mask, update_node, node);
-// }
+std::shared_ptr<Node> handle_maximum_index(Output<Node>& node, const Output<Node>& update_node) {
+    const auto maximum_node = default_opset::Constant::create(element::i64, {1}, {std::numeric_limits<int32_t>::max()});
+    const auto mask = std::make_shared<default_opset::Equal>(node, maximum_node);
+    return std::make_shared<default_opset::Select>(mask, update_node, node);
+}
 
 bool is_contain_minus(const std::vector<int64_t> vec) {
     for (int64_t i : vec) {
@@ -52,14 +54,14 @@ NamedOutputs set_value(const NodeContext& node) {
     // Given:
     // input_data: shape(5, 6, 7, 8, 9)
     // update_value: shape(1, 6, 3, 3)
-    // operation: input_data[:, :, 2: 7: 2, -4: -1] = update_value
+    // operation: input_data[:, :, 2: 7: 2, -4: -1, :] = update_value
     // axes = [2, 3]
     // starts = [2, -4]
     // ends = [7, -1]
     // steps = [2, 1]
     // Our process is:
     // 1. Get axes [2, 3], get shape of input [5, 6, 7, 8, 9], select dimension from shape by axes: [7, 8].
-    // 2. Get starts [2, -4] and ends [3, -1]. Process minus starts and ends. starts: [2, 4], ends: [7, 7].
+    // 2. Get starts [2, -4] and ends [7, -1]. Process minus starts and ends. starts: [2, 4], ends: [7, 7].
     // 3. Calculate starts_node, ends_node and steps_node
     //    1. Create `starts node` filled with 0. Update `starts` to `starts_node` according to axes.
     //    starts_node[axes[i]] = starts[i] for i in axes.size
@@ -92,25 +94,41 @@ NamedOutputs set_value(const NodeContext& node) {
     const auto slice_shape = default_opset::Constant::create(ov::element::i64, {1, 1}, {-1});
 
     // get positive starts ends and steps
-    if (node.has_input("StartsTensorList") && node.has_input("StepsTensorList") && node.has_input("EndsTensorList")) {
+    if (node.has_input("StartsTensorList")) {
         starts = handle_minus_index(node.get_ng_inputs("StartsTensorList"), spec_dim_node);
-        ends = handle_minus_index(node.get_ng_inputs("EndsTensorList"), spec_dim_node);
-        steps = std::make_shared<default_opset::Concat>(node.get_ng_inputs("StepsTensorList"), 0);
-    } else if (node.has_attribute("starts") && node.has_attribute("steps") && node.has_attribute("ends")) {
-        const auto start_vec = node.get_attribute<std::vector<int64_t>>("starts");
-        const auto ends_vec = node.get_attribute<std::vector<int64_t>>("ends");
-        const auto step_vec = node.get_attribute<std::vector<int64_t>>("steps");
-        if (is_contain_minus(start_vec) || is_contain_minus(ends_vec) || is_contain_minus(step_vec)) {
-            PADDLE_OP_CHECK(node, (false), "Currently not support minus start, ends and steps!");
+    } else if (node.has_attribute("starts")) {
+        auto start_vec = node.get_attribute<std::vector<int64_t>>("starts");
+        if (is_contain_minus(start_vec)) {
+            PADDLE_OP_CHECK(node, (false), "Currently not support minus start!");
         }
         starts = handle_minus_index(start_vec, spec_dim_node);
+    } else
+        PADDLE_OP_CHECK(node, (false), "Invalid arguments!");
+
+    if (node.has_input("EndsTensorList")) {
+        ends = handle_minus_index(node.get_ng_inputs("EndsTensorList"), spec_dim_node);
+    } else if (node.has_attribute("ends")) {
+        auto ends_vec = node.get_attribute<std::vector<int64_t>>("ends");
+        if (is_contain_minus(ends_vec)) {
+            PADDLE_OP_CHECK(node, (false), "Currently not support minus ends!");
+        }
         ends = handle_minus_index(ends_vec, spec_dim_node);
-        steps = default_opset::Constant::create(element::i64, {step_vec.size()}, step_vec);
+    } else
+        PADDLE_OP_CHECK(node, (false), "Invalid arguments!");
+
+    if (node.has_input("StepsTensorList")) {
+        steps = handle_minus_index(node.get_ng_inputs("StepsTensorList"), spec_dim_node);
+    } else if (node.has_attribute("steps")) {
+        auto step_vec = node.get_attribute<std::vector<int64_t>>("steps");
+        if (is_contain_minus(step_vec)) {
+            PADDLE_OP_CHECK(node, (false), "Currently not support minus steps!");
+        }
+        steps = handle_minus_index(step_vec, spec_dim_node);
     } else
         PADDLE_OP_CHECK(node, (false), "Invalid arguments!");
 
     // for unsepcified end: x[::2], end will be 2147483647
-    // ends = handle_maximum_index(ends, spec_dim_node);
+    ends = handle_maximum_index(ends, spec_dim_node);
 
     // 3.1 get starts node
     starts_node =
@@ -142,7 +160,12 @@ NamedOutputs set_value(const NodeContext& node) {
         std::make_shared<default_opset::ScatterNDUpdate>(input_shape, axes_node, value_shape_update_node);
 
     // 4.5 broadcast
-    value_node = std::make_shared<default_opset::Broadcast>(value_node, value_target_shape);
+    auto value_shape = std::make_shared<default_opset::ShapeOf>(value_node);
+    auto value_rank = std::make_shared<default_opset::ShapeOf>(value_shape);
+    auto value_rank_scalar = std::make_shared<default_opset::Squeeze>(value_rank);
+    Output<Node> broadcast_axes =
+        std::make_shared<default_opset::Range>(zero_node, value_rank_scalar, one_node, element::i64);
+    value_node = std::make_shared<default_opset::Broadcast>(value_node, value_target_shape, broadcast_axes);
 
     // get total number of elements
     const auto numel_node = std::make_shared<default_opset::ReduceProd>(input_shape, zero_node);
diff --git a/src/frontends/paddle/src/op_table.cpp b/src/frontends/paddle/src/op_table.cpp
index c22441c72d85cd..3030f140996de2 100644
--- a/src/frontends/paddle/src/op_table.cpp
+++ b/src/frontends/paddle/src/op_table.cpp
@@ -98,6 +98,7 @@ OP_CONVERTER(reshape2);
 OP_CONVERTER(reverse);
 OP_CONVERTER(rnn);
 OP_CONVERTER(roi_align);
+OP_CONVERTER(round);
 OP_CONVERTER(scale);
 OP_CONVERTER(select_input);
 OP_CONVERTER(set_value);
@@ -233,6 +234,7 @@ std::map<std::string, CreatorFunction> get_supported_ops() {
             {"reverse", op::reverse},
             {"rnn", op::rnn},
             {"roi_align", op::roi_align},
+            {"round", op::round},
             {"scale", op::scale},
             {"select_input", op::select_input},
             {"set_value", op::set_value},
diff --git a/src/frontends/paddle/tests/op_fuzzy.cpp b/src/frontends/paddle/tests/op_fuzzy.cpp
index d1d9d913004a3a..ecc7fe619195b6 100644
--- a/src/frontends/paddle/tests/op_fuzzy.cpp
+++ b/src/frontends/paddle/tests/op_fuzzy.cpp
@@ -490,6 +490,7 @@ static const std::vector<std::string> models{
     std::string("rnn_lstm_layer_2_bidirectional_seq_len_4/rnn_lstm_layer_2_bidirectional_seq_len_4.pdmodel"),
     std::string("roi_align_test"),
     std::string("roi_align_test2"),
+    std::string("round"),
     std::string("scale_bias_after_float32"),
     std::string("scale_bias_after_int32"),
     std::string("scale_bias_after_int64"),
@@ -505,6 +506,7 @@ static const std::vector<std::string> models{
     std::string("set_value5"),
     // std::string("set_value6"),
     // std::string("set_value7"),
+    // std::string("set_value8"),
     // std::string("set_value_dynamic1"),
     std::string("set_value_dynamic2"),
     std::string("shape"),
diff --git a/src/frontends/paddle/tests/test_models/gen_scripts/generate_round.py b/src/frontends/paddle/tests/test_models/gen_scripts/generate_round.py
new file mode 100644
index 00000000000000..18d93ded10d7bf
--- /dev/null
+++ b/src/frontends/paddle/tests/test_models/gen_scripts/generate_round.py
@@ -0,0 +1,40 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+#
+# round paddle model generator
+#
+import numpy as np
+from save_model import saveModel
+import paddle
+import sys
+
+data_type = 'float32'
+
+def paddle_round(name:str, x):
+    paddle.enable_static()
+
+    with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()):
+        data = paddle.static.data(name='x', shape=x.shape, dtype = data_type)
+        out = paddle.round(data)
+
+        cpu = paddle.static.cpu_places(1)
+        exe = paddle.static.Executor(cpu[0])
+        # startup program will call initializer to initialize the parameters.
+        exe.run(paddle.static.default_startup_program())
+
+        outs = exe.run(
+            feed={'x': x},
+            fetch_list=[out])
+
+        saveModel(name, exe, feedkeys=['x'], fetchlist=[out], inputs=[x], outputs=[outs[0]], target_dir=sys.argv[1])
+
+    return outs[0]
+
+def main():
+    x = np.random.uniform(-1000,1000, (8, 24, 32)).astype(data_type)
+
+    paddle_round("round", x)
+
+if __name__ == "__main__":
+    main()
diff --git a/src/frontends/paddle/tests/test_models/gen_scripts/generate_set_value.py b/src/frontends/paddle/tests/test_models/gen_scripts/generate_set_value.py
index 4a9be9b3017e78..3230b09dffc3d3 100644
--- a/src/frontends/paddle/tests/test_models/gen_scripts/generate_set_value.py
+++ b/src/frontends/paddle/tests/test_models/gen_scripts/generate_set_value.py
@@ -10,6 +10,7 @@
 import paddle
 from save_model import saveModel
 
+maxint32 = np.iinfo(np.int32).max
 
 def concat(data):
     data = [np.expand_dims(d, 0) for d in data]
@@ -141,6 +142,21 @@ def set_value5(x, value, *slice):
 
     # paddle_set_value("set_value7", data, value, set_value_step1, dtype)
 
+    shape = (7, 9)
+    dtype = "int32"
+    data = np.random.randint(0, 5, shape).astype(dtype)
+    value = np.random.randint(-100, -1, (3, 1)).astype(dtype)
+
+    starts = generate_data([4], np.int64)
+    ends = generate_data([maxint32], np.int64)
+    steps = generate_data([1], np.int64)
+
+    def set_value8(x, value, *slice):
+        x[build_slice(*slice)] = value
+        return x
+
+    paddle_set_value("set_value8", data, value, set_value8, dtype, starts, ends, steps)
+
     # shape = (10, 5)
     # dtype = "float32"
     # data = np.random.randint(0, 5, shape).astype(dtype)
@@ -167,4 +183,4 @@ def set_value7(x, value, *slice):
     paddle_set_value("set_value_dynamic2", data, value, set_value7, dtype, starts, ends, steps, is_dynamic=True)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md
index e4315fc5c4edff..f4b06fafa06283 100644
--- a/src/frontends/tensorflow/docs/supported_ops.md
+++ b/src/frontends/tensorflow/docs/supported_ops.md
@@ -57,7 +57,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV
 | ApplyProximalGradientDescent                            | NO                            |                               |
 | ApplyRMSProp                                            | NO                            |                               |
 | ApproxTopK                                              | NO                            |                               |
-| ApproximateEqual                                        | NO                            |                               |
+| ApproximateEqual                                        | YES                           |                               |
 | ArgMax                                                  | YES                           |                               |
 | ArgMin                                                  | YES                           |                               |
 | AsString                                                | NO                            |                               |
diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp
index e3d8db8f0512ab..fb1597c926e6c8 100644
--- a/src/frontends/tensorflow/src/op_table.cpp
+++ b/src/frontends/tensorflow/src/op_table.cpp
@@ -422,6 +422,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"AssignVariableOp", CreatorFunction(translate_assignvariable_op)},
         {"AssignAddVariableOp", CreatorFunction(translate_add_variable_op)},
         {"AssignSubVariableOp", CreatorFunction(translate_sub_variable_op)},
+        {"ApproximateEqual", CreatorFunction(translate_approximate_equal_op)},
         {"IsVariableInitialized", CreatorFunction(translate_varisinitialized_op)},
         {"MergeV2Checkpoints", CreatorFunction(translate_identity_op)},
         {"ReadVariableOp", CreatorFunction(translate_readvariable_op)},
diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp
index 905d437ec07f6e..6d4e4a971c2f98 100644
--- a/src/frontends/tensorflow_common/include/common_op_table.hpp
+++ b/src/frontends/tensorflow_common/include/common_op_table.hpp
@@ -33,6 +33,7 @@ OP_T_CONVERTER(translate_binary_op);
 OP_T_CONVERTER(translate_direct_reduce_op);
 OP_CONVERTER(translate_addv2_op);
 OP_CONVERTER(translate_add_n_op);
+OP_CONVERTER(translate_approximate_equal_op);
 OP_CONVERTER(translate_adjust_contrast_op);
 OP_CONVERTER(translate_arg_max_op);
 OP_CONVERTER(translate_arg_min_op);
diff --git a/src/frontends/tensorflow_common/src/op/approximate_equal_op.cpp b/src/frontends/tensorflow_common/src/op/approximate_equal_op.cpp
new file mode 100644
index 00000000000000..7a2cb0f803b392
--- /dev/null
+++ b/src/frontends/tensorflow_common/src/op/approximate_equal_op.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_op_table.hpp"
+#include "openvino/op/abs.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/less.hpp"
+#include "openvino/op/subtract.hpp"
+
+using namespace std;
+using namespace ov::op;
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace op {
+
+OutputVector translate_approximate_equal_op(const NodeContext& node) {
+    default_op_checks(node, 2, {"ApproximateEqual"});
+    auto x = node.get_input(0);
+    auto y = node.get_input(1);
+    auto tolerance_value = node.get_attribute<float>("tolerance", 1e-5f);
+    auto tolerance = create_same_type_const_scalar<float>(x, tolerance_value);
+    // Implement the logic for ApproximateEqual
+    auto difference = make_shared<v1::Subtract>(x, y);
+    auto absolute = make_shared<v0::Abs>(difference);
+    auto is_less = make_shared<v1::Less>(absolute, tolerance);
+
+    // Create and return the corresponding OpenVINO operation
+    set_node_name(node.get_name(), is_less);
+    return {is_less};
+}
+}  // namespace op
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
\ No newline at end of file
diff --git a/src/plugins/auto/src/auto_compiled_model.cpp b/src/plugins/auto/src/auto_compiled_model.cpp
index 8c0448e3d420d1..90ce91797307ff 100644
--- a/src/plugins/auto/src/auto_compiled_model.cpp
+++ b/src/plugins/auto/src/auto_compiled_model.cpp
@@ -47,7 +47,8 @@ ov::Any AutoCompiledModel::get_property(const std::string& name) const {
                                                     ov::device::priorities,
                                                     ov::device::properties,
                                                     ov::hint::model_priority,
-                                                    ov::loaded_from_cache};
+                                                    ov::loaded_from_cache,
+                                                    ov::enable_profiling};
         return ro_properties;
     };
     const auto& default_rw_properties = []() {
@@ -63,6 +64,8 @@ ov::Any AutoCompiledModel::get_property(const std::string& name) const {
         supported_properties.insert(supported_properties.end(), ro_properties.begin(), ro_properties.end());
         supported_properties.insert(supported_properties.end(), rw_properties.begin(), rw_properties.end());
         return decltype(ov::supported_properties)::value_type(supported_properties);
+    } else if (name == ov::enable_profiling) {
+        return m_context->m_need_perf_counters;
     } else if (name == ov::hint::performance_mode) {
         return m_context->m_performance_hint;
     } else if (name == ov::device::priorities) {
diff --git a/src/plugins/auto/src/cumulative_compiled_model.cpp b/src/plugins/auto/src/cumulative_compiled_model.cpp
index a822b872c978f1..f2ab6ee62820ca 100644
--- a/src/plugins/auto/src/cumulative_compiled_model.cpp
+++ b/src/plugins/auto/src/cumulative_compiled_model.cpp
@@ -47,7 +47,8 @@ ov::Any AutoCumuCompiledModel::get_property(const std::string& name) const {
                                                     ov::device::properties,
                                                     ov::hint::model_priority,
                                                     ov::loaded_from_cache,
-                                                    ov::intel_auto::schedule_policy};
+                                                    ov::intel_auto::schedule_policy,
+                                                    ov::enable_profiling};
         return ro_properties;
     };
     const auto& default_rw_properties = []() {
@@ -63,6 +64,8 @@ ov::Any AutoCumuCompiledModel::get_property(const std::string& name) const {
         supported_properties.insert(supported_properties.end(), ro_properties.begin(), ro_properties.end());
         supported_properties.insert(supported_properties.end(), rw_properties.begin(), rw_properties.end());
         return decltype(ov::supported_properties)::value_type(supported_properties);
+    } else if (name == ov::enable_profiling) {
+        return m_context->m_need_perf_counters;
     } else if (name == ov::hint::performance_mode) {
         return m_context->m_performance_hint;
     } else if (name == ov::intel_auto::schedule_policy) {
diff --git a/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp b/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp
index 5b4c714e72a962..d9e82cec60dfd2 100644
--- a/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp
@@ -4,6 +4,7 @@
 
 #include "behavior/compiled_model/properties.hpp"
 
+#include "openvino/runtime/auto/properties.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/runtime/system_conf.hpp"
 
@@ -138,4 +139,24 @@ INSTANTIATE_TEST_SUITE_P(smoke_OVClassCompiledModelGetPropertyTest,
                          OVClassCompiledModelGetPropertyTest_MODEL_PRIORITY,
                          ::testing::Combine(::testing::Values("AUTO:TEMPLATE"),
                                             ::testing::ValuesIn(multiModelPriorityConfigs)));
+
+const std::vector<ov::AnyMap> auto_default_properties = {
+    {ov::enable_profiling(false)},
+    {ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
+    {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_Default_test,
+                         OVClassCompiledModelPropertiesDefaultTests,
+                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_AUTO),
+                                            ::testing::ValuesIn(auto_default_properties)),
+                         OVClassCompiledModelPropertiesDefaultTests::getTestCaseName);
+
+const std::vector<ov::AnyMap> multi_default_properties = {{ov::enable_profiling(false)}};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Multi_Default_test,
+                         OVClassCompiledModelPropertiesDefaultTests,
+                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_TEMPLATE),
+                                            ::testing::ValuesIn(multi_default_properties)),
+                         OVClassCompiledModelPropertiesDefaultTests::getTestCaseName);
+
 }  // namespace
diff --git a/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
index e662c5e9416fad..d2a839720228a2 100644
--- a/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
+++ b/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
@@ -24,7 +24,8 @@ const std::vector<ov::AnyMap> multi_Auto_properties = {
     {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::intel_auto::device_bind_buffer("NO")},
     {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::intel_auto::enable_startup_fallback("YES")},
     {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::intel_auto::enable_startup_fallback("NO")},
-};
+    {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::enable_profiling(true)},
+    {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::enable_profiling(false)}};
 
 INSTANTIATE_TEST_SUITE_P(smoke_AutoMultiBehaviorTests,
                          OVPropertiesTests,
@@ -41,7 +42,9 @@ const std::vector<ov::AnyMap> multi_setcore_properties = {
 const std::vector<ov::AnyMap> multi_compileModel_properties = {
     {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE),
      ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::model_priority(ov::hint::Priority::MEDIUM)}};
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
+    {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::enable_profiling(true)},
+    {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::enable_profiling(false)}};
 
 INSTANTIATE_TEST_SUITE_P(smoke_MultiCompileModelBehaviorTests,
                          OVSetPropComplieModleGetPropTests,
@@ -70,7 +73,9 @@ const std::vector<ov::AnyMap> auto_compileModel_properties = {
      ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
     {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE),
      ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
-     ov::hint::model_priority(ov::hint::Priority::MEDIUM)}};
+     ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
+    {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::enable_profiling(true)},
+    {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::enable_profiling(false)}};
 
 INSTANTIATE_TEST_SUITE_P(smoke_AutoCompileModelBehaviorTests,
                          OVSetPropComplieModleGetPropTests,
diff --git a/src/plugins/intel_cpu/src/nodes/executors/type_mask.hpp b/src/plugins/intel_cpu/src/nodes/executors/type_mask.hpp
index 366026070e0cb9..199a44f9c28a71 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/type_mask.hpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/type_mask.hpp
@@ -81,9 +81,10 @@ struct TypeMask {
             CASE(f8e4m3)
             CASE(f8e5m2)
             CASE(string)
+        default:
+            return _undefined;
         }
 #undef CASE
-        return _undefined;
     }
 };
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
index d616a55dc456ed..21a84781f178a9 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
@@ -503,6 +503,8 @@ void prepare_buffer_fusing::run(program& p) {
                     return;
                 if (user->is_type<loop>() || user->is_type<non_max_suppression>())
                     return;
+            }
+            for (auto user : node.get_users()) {
                 if (user->is_type<reshape>()) {
                     auto& reshape_node = user->as<reshape>();
                     if (can_reshape_be_optimized(reshape_node))
diff --git a/src/plugins/intel_gpu/src/graph/loop.cpp b/src/plugins/intel_gpu/src/graph/loop.cpp
index dd80737f908f56..50c1a02eee0016 100644
--- a/src/plugins/intel_gpu/src/graph/loop.cpp
+++ b/src/plugins/intel_gpu/src/graph/loop.cpp
@@ -375,17 +375,22 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(
     if (extern_mem_ptr != nullptr) {
         layout sliced_layout = intern_prim->get_output_layout(internal_id.idx);
         auto inter_mem_ptr = intern_prim->output_memory_ptr(internal_id.idx);
-        if (inter_mem_ptr == nullptr) {
+        if (inter_mem_ptr == nullptr || shape_changed()) {
             // if inner body intern_prim has no output memory because it has dynamic shape,
             // calculate inner body intern_prim layout using concat_mem's layout.
             auto updated_sliced_layout = sliced_layout.get_partial_shape();
             OPENVINO_ASSERT(updated_sliced_layout[io_prim_map.axis].is_static() || num_iterations > 0,
                                     "Not allowed dynamic dimension for axis when num_iteraiont is negative");
+
+            auto origin_input_layout = body_network->get_primitive(internal_id.pid)->get_node_output_layout();
             auto concat_pshape = extern_prim->get_output_layout().get_partial_shape();
             const auto shape_size = concat_pshape.size();
-            for (size_t i = 0; i < shape_size; i++) {
-                if (updated_sliced_layout[i].is_dynamic()) {
-                    updated_sliced_layout[i] = concat_pshape[i];
+            if (origin_input_layout.is_dynamic()) {
+                auto origin_input_pshape = origin_input_layout.get_partial_shape();
+                for (size_t i = 0; i < shape_size; i++) {
+                    if (origin_input_pshape[i].is_dynamic()) {
+                        updated_sliced_layout[i] = concat_pshape[i];
+                    }
                 }
             }
             GPU_DEBUG_LOG << "output pshape for [" << intern_prim->id() << "] is changed from "
diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp
index 46f1b560fad027..b5268a7c056b16 100644
--- a/src/plugins/intel_gpu/src/graph/reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/reshape.cpp
@@ -203,7 +203,8 @@ void reshape_inst::update_output_memory() {
     if (!can_be_optimized())
         return;
 
-    if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
+    if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()) &&
+        output_memory().get_layout() == _impl_params->get_output_layout())
         return;
 
     build_deps();  // reshape need deps
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp
index 48d596ab3ba0e9..a4837187b29a6a 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp
@@ -255,7 +255,7 @@ class check_hash_value: public ::testing::Test {
         const auto primitive_hash = primitve->hash();
         const auto params_hash = prim_inst->get_impl_params()->hash();
         ASSERT_EQ(primitive_hash, 4135863035456568493UL);
-        ASSERT_EQ(params_hash, 5990757629995899044UL);
+        ASSERT_EQ(params_hash, 11563701278302723583UL);
     }
 };
 
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
index fb4e54b1980c58..26ca489c5a8115 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
@@ -11,6 +11,9 @@
 #include "intel_gpu/primitives/eltwise.hpp"
 #include <intel_gpu/primitives/data.hpp>
 #include <intel_gpu/primitives/loop.hpp>
+#include <intel_gpu/primitives/reshape.hpp>
+#include <intel_gpu/primitives/reduce.hpp>
+#include <intel_gpu/primitives/shape_of.hpp>
 #include <intel_gpu/primitives/mutable_data.hpp>
 #include <intel_gpu/primitives/data.hpp>
 #include <intel_gpu/graph/program.hpp>
@@ -601,3 +604,186 @@ TEST(loop_gpu, support_dynamic_tensoriterator_outer_axis) {
 
     test_loop_gpu_wo_trip_count({ 2, 1, 1, 2}, { 2, 5, 1, 2}, input_data_5_4, output_data_5_4, 1, 4);
 }
+
+static void test_loop_gpu_wo_trip_count_w_multiple_shapes(ov::PartialShape body_input_layout,
+                                        std::vector<ov::PartialShape> whole_layouts,
+                                        std::vector<std::vector<float>> input_data_list,
+                                        std::vector<float> expected_output_data,
+                                        size_t axis,
+                                        size_t exit_value,
+                                        bool is_caching_test = false) {
+    auto& engine = get_test_engine();
+
+    auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx };
+
+    ov::PartialShape sliced_input_shape = body_input_layout;
+    sliced_input_shape[axis] = 1;
+    auto sliced_input_layout = cldnn::layout{ sliced_input_shape, data_types::f32, format::bfyx };
+
+    auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx };
+
+    auto e_initial_condition_mem = engine.allocate_memory(const_layout);
+    auto e_num_iteration_mem = engine.allocate_memory(const_layout);
+    auto b_exit_value_mem = engine.allocate_memory(const_layout);
+    auto b_index_inc_mem = engine.allocate_memory(const_layout);
+
+    // initialize input buffers
+    set_values(e_initial_condition_mem, {1});
+    set_values(b_exit_value_mem, {exit_value});
+    set_values(b_index_inc_mem, {1});
+    set_values(e_num_iteration_mem, {0});
+
+    primitive_id body_current_iteration_id = "b_index";
+    primitive_id body_execution_condition_id = "b_cond_exit_value";
+
+    cldnn::topology body(
+        input_layout(body_current_iteration_id, const_layout),
+        input_layout("b_add_data", sliced_input_layout),
+        input_layout("b_mul_data", sliced_input_layout),
+        data("b_exit_value", b_exit_value_mem),
+        data("b_index_inc", b_index_inc_mem),
+        eltwise("b_index_update", input_info(body_current_iteration_id), input_info("b_index_inc"), eltwise_mode::sum),
+        reorder("b_index_cast", input_info("b_index_update"),
+                    cldnn::format::any, data_types::f32, {}, cldnn::reorder_mean_mode::subtract, cldnn::padding(), true),
+        eltwise(body_execution_condition_id, input_info("b_index"), input_info("b_exit_value"), eltwise_mode::lt),
+        eltwise("b_add", input_info("b_add_data"), input_info("b_index_cast"), eltwise_mode::sum),
+        eltwise("b_mul", input_info("b_mul_data"), input_info("b_index_cast"), eltwise_mode::prod));
+
+    primitive_id trip_count_id = "";
+    primitive_id actual_iteration_count_id = "actual_iteration_count";
+    primitive_id initial_condition_id = "initial_condition";
+    int64_t num_iterations = -1;
+
+    std::vector<loop::io_primitive_map> input_primitive_maps {
+        loop::io_primitive_map("input", "b_add_data", axis),
+        loop::io_primitive_map("input", "b_mul_data", axis),
+        loop::io_primitive_map(actual_iteration_count_id, body_current_iteration_id) };
+    std::vector<loop::io_primitive_map> output_primitive_maps {
+        loop::io_primitive_map(cldnn::input_info("loop", 0), cldnn::input_info("b_add", 0), axis),
+        loop::io_primitive_map(cldnn::input_info("loop", 1), cldnn::input_info("b_mul", 0), axis) };
+    std::vector<loop::backedge_mapping> back_edges {
+        loop::backedge_mapping("b_index_update", body_current_iteration_id) };
+
+    auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true);
+
+    auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx});
+    std::vector<int32_t> body_input_layouts;
+    for (size_t i = 0; i < body_input_layout.size(); i++) {
+        if (body_input_layout[i].is_dynamic())
+            body_input_layouts.push_back(-1);
+        else
+            body_input_layouts.push_back(body_input_layout[i].get_length());
+    }
+    set_values<int32_t>(const_shape, body_input_layouts);
+
+    cldnn::topology topology(
+        input_layout("input_origin", b_input_layout),
+        input_layout(initial_condition_id, e_initial_condition_mem->get_layout()),
+        mutable_data(actual_iteration_count_id, e_num_iteration_mem),
+
+        shape_of("shape_of_input", input_info("input_origin"), data_types::i32),
+        reduce("reduced_shape", input_info("shape_of_input"), reduce_mode::prod, {0}, true),
+        reshape("reshape1", input_info("input_origin"), input_info("reduced_shape"), false, ov::PartialShape::dynamic(1)),
+        data("const", const_shape),
+        reshape("input", input_info("reshape1"), input_info("const"), false, ov::PartialShape::dynamic(4)),
+
+        loop("loop", { input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input") }, body_program,
+             trip_count_id, initial_condition_id, actual_iteration_count_id,
+             input_primitive_maps, output_primitive_maps, back_edges,
+             num_iterations, body_current_iteration_id, body_execution_condition_id, 2),
+        eltwise("out_sum", input_info("loop", 0), input_info("loop", 1), eltwise_mode::sum));
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+
+    for (size_t i = 0 ; i < whole_layouts.size(); i++) {
+        auto whole_layout = whole_layouts[i];
+        auto input_data = input_data_list[i];
+
+        // initialize input buffers
+        set_values(e_initial_condition_mem, {1});
+        set_values(b_exit_value_mem, {exit_value});
+        set_values(b_index_inc_mem, {1});
+        set_values(e_num_iteration_mem, {0});
+
+        auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx };
+        auto e_input_mem = engine.allocate_memory(e_input_layout); // b,f,x,y
+        auto expected_output_layout = whole_layout;
+        set_values(e_input_mem, input_data);
+        network->set_input_data("input_origin", e_input_mem);
+
+        network->set_input_data(initial_condition_id, e_initial_condition_mem);
+
+        auto outputs = network->execute();
+        ASSERT_EQ(outputs.size(), 1);
+
+        auto expected_num_iterations = (exit_value + 1);
+        expected_output_layout[axis] = expected_num_iterations;
+        auto e_output_layout = cldnn::layout{ expected_output_layout, data_types::f32, format::bfyx };
+
+        auto num_iter_mem = network->get_output_memory(actual_iteration_count_id);
+        if (num_iter_mem != nullptr) {
+            mem_lock<int64_t> num_iter_ptr{ num_iter_mem, get_test_stream() };
+            ASSERT_EQ(num_iter_ptr.data()[0], expected_num_iterations);
+        }
+
+        std::vector<float> expected(input_data.size());
+        if (expected_output_data.size() == 0) {
+            size_t unit = 1;
+            for (size_t k = axis; k < whole_layout.size(); k++) {
+                unit *= whole_layout[k].get_length();
+            }
+
+            for (size_t j = 0; j < input_data.size(); j++) {
+                auto val = static_cast<size_t>((j % unit) / 4) + 1;
+                expected[j] = static_cast<float>(input_data[j] + val) + static_cast<float>(input_data[j] * val);
+            }
+        } else {
+            expected = expected_output_data;
+        }
+
+        auto output_mem = outputs.begin()->second.get_memory();
+        auto output_layout = output_mem->get_layout();
+        ASSERT_EQ(output_layout.batch(), e_output_layout.batch());
+        ASSERT_EQ(output_layout.feature(), e_output_layout.feature());
+        ASSERT_EQ(output_layout.spatial(0), e_output_layout.spatial(0));
+        ASSERT_EQ(output_layout.spatial(1), e_output_layout.spatial(1));
+        // value check
+        {
+            mem_lock<float> output_ptr{ output_mem, get_test_stream() };
+            for (size_t i = 0, iend = output_layout.count(); i < iend; ++i) {
+                ASSERT_FLOAT_EQ(output_ptr[i], expected.at(i));
+            }
+        }
+    }
+}
+
+std::vector<float> input_data_4_4{
+    1.0f,  2.0f, -15.f,  3.0f,
+    4.0f, -15.f, 5.0f,  6.0f,
+    -15.f, 7.0f, -15.f, 0.0f,
+    0.0f, -15.f, 0.5f, -0.5f,
+};
+
+std::vector<float> input_data_2_4_4{
+    1.0f,  2.0f, -15.f,  3.0f,
+    4.0f, -15.f, 5.0f,  6.0f,
+    -15.f, 7.0f, -15.f, 0.0f,
+    0.0f, -15.f, 0.5f, -0.5f,
+
+    1.0f,  2.0f, -15.f,  3.0f,
+    4.0f, -15.f, 5.0f,  6.0f,
+    -15.f, 7.0f, -15.f, 0.0f,
+    0.0f, -15.f, 0.5f, -0.5f,
+};
+
+TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes) {
+    test_loop_gpu_wo_trip_count_w_multiple_shapes(
+        { 1, -1, 4, 4 },
+        {{ 1, 1, 4, 4 }, { 1, 2, 4, 4 }},   // axis value should be iter_num = (exit_value + 1)
+        {input_data_4_4, input_data_2_4_4},
+        std::vector<float>(),
+        2, 3);
+}
diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/subgraph_builders/preprocess_builders.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/subgraph_builders/preprocess_builders.hpp
index d4921393b75b1b..fd4ed7528cf5eb 100644
--- a/src/tests/test_utils/common_test_utils/include/common_test_utils/subgraph_builders/preprocess_builders.hpp
+++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/subgraph_builders/preprocess_builders.hpp
@@ -443,6 +443,26 @@ inline std::shared_ptr<Model> crop_dynamic() {
     return function;
 }
 
+inline std::shared_ptr<Model> pad_constant() {
+    using namespace ov::preprocess;
+    auto function = create_preprocess_1input(ov::element::f32, PartialShape{1, 3, 10, 10});
+    auto p = PrePostProcessor(function);
+    p.input().tensor().set_shape({1, 3, 9, 5});
+    p.input().preprocess().pad({0, 0, 2, 2}, {0, 0, -1, 3}, 0, PaddingMode::CONSTANT);
+    function = p.build();
+    return function;
+}
+
+inline std::shared_ptr<Model> pad_edge() {
+    using namespace ov::preprocess;
+    auto function = create_preprocess_1input(ov::element::f32, PartialShape{1, 3, 10, 10});
+    auto p = PrePostProcessor(function);
+    p.input().tensor().set_shape({1, 3, 9, 5});
+    p.input().preprocess().pad({0, 0, 2, 2}, {0, 0, -1, 3}, 0, PaddingMode::EDGE);
+    function = p.build();
+    return function;
+}
+
 inline std::vector<preprocess_func> generic_preprocess_functions() {
     return std::vector<preprocess_func>{
         preprocess_func(mean_only, "mean_only", 0.01f),
@@ -477,7 +497,8 @@ inline std::vector<preprocess_func> generic_preprocess_functions() {
         preprocess_func(cvt_color_i420_to_rgb_single_plane, "cvt_color_i420_to_rgb_single_plane", 1.f),
         preprocess_func(cvt_color_i420_to_bgr_three_planes, "cvt_color_i420_to_bgr_three_planes", 1.f),
         preprocess_func(cvt_color_bgrx_to_bgr, "cvt_color_bgrx_to_bgr", 0.01f),
-    };
+        preprocess_func(pad_constant, "pad_constant", 0.01f),
+        preprocess_func(pad_edge, "pad_edge", 0.01f)};
 }
 
 inline std::shared_ptr<Model> cvt_color_rgb_to_bgr() {
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ApproximateEqual.py b/tests/layer_tests/tensorflow_tests/test_tf_ApproximateEqual.py
new file mode 100644
index 00000000000000..c01c85591c73a4
--- /dev/null
+++ b/tests/layer_tests/tensorflow_tests/test_tf_ApproximateEqual.py
@@ -0,0 +1,45 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+import numpy as np
+import tensorflow as tf
+import pytest
+from common.tf_layer_test_class import CommonTFLayerTest
+
+class TestApproximateEqual(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        rng = np.random.default_rng()
+        assert 'tensor1:0' in inputs_info
+        assert 'tensor2:0' in inputs_info
+        tensor1_shape = inputs_info['tensor1:0']
+        tensor2_shape = inputs_info['tensor2:0']
+        inputs_data = {}
+        inputs_data['tensor1:0'] = 4 * rng.random(tensor1_shape).astype(np.float32) - 2
+        inputs_data['tensor2:0'] = 4 * rng.random(tensor2_shape).astype(np.float32) - 2
+        return inputs_data
+
+    def create_approximate_equal_net(self, input1_shape, input2_shape):
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            tensor1 = tf.compat.v1.placeholder(tf.float32, input1_shape, 'tensor1')
+            tensor2 = tf.compat.v1.placeholder(tf.float32, input2_shape, 'tensor2')
+            approx_equal_op = tf.raw_ops.ApproximateEqual(x=tensor1, y=tensor2, tolerance=0.01)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(input1_shape=[2, 3], input2_shape=[2, 3]),
+        dict(input1_shape=[3, 4, 5], input2_shape=[3, 4, 5]),
+        dict(input1_shape=[1, 2, 3, 4], input2_shape=[1, 2, 3, 4]),
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_approximate_equal_basic(self, params, ie_device, precision, ir_version, temp_dir,
+                                     use_legacy_frontend):
+        self._test(*self.create_approximate_equal_net(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_legacy_frontend=use_legacy_frontend)
\ No newline at end of file