From 772686b86db494fad0ec1b8de338c1d62fe46545 Mon Sep 17 00:00:00 2001
From: Chen Peter <peter.chen@intel.com>
Date: Mon, 26 Aug 2024 21:42:04 +0800
Subject: [PATCH 1/8] Remove OVClassModelTestP

https://github.com/openvinotoolkit/openvino/pull/26197/files

Signed-off-by: Chen Peter <peter.chen@intel.com>
---
 .../behavior/ov_plugin/core_integration.cpp               | 8 --------
 .../nvidia_plugin/tests/functional/skip_tests_config.cpp  | 2 --
 2 files changed, 10 deletions(-)
diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
index 93096ddd6..b392e7bd5 100644
--- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@@ -14,14 +14,6 @@ using namespace ov::test::behavior;
 
 namespace {
 
-//
-// OV Class Common tests with <pluginName, device_name params>
-//
-
-INSTANTIATE_TEST_SUITE_P(smoke_OVClassNetworkTestP,
-                         OVClassModelTestP,
-                         ::testing::Values(ov::test::utils::DEVICE_NVIDIA));
-
 //
 // OV Class GetMetric
 //
diff --git a/modules/nvidia_plugin/tests/functional/skip_tests_config.cpp b/modules/nvidia_plugin/tests/functional/skip_tests_config.cpp
index d413f5e06..296b5c367 100644
--- a/modules/nvidia_plugin/tests/functional/skip_tests_config.cpp
+++ b/modules/nvidia_plugin/tests/functional/skip_tests_config.cpp
@@ -39,8 +39,6 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*InferRequestIOBBlobTest.*canProcessDeallocatedOutputBlobAfterGetAndSetBlob.*)",
         // 119703
         R"(.*smoke_GroupConvolutionBias(Add|AddAdd)_2D_ExplicitPaddingSymmetric2.*FP16*.*)",
-        // Issue: 128924
-        R"(.*smoke_OVClassNetworkTestP/OVClassModelTestP.ImportModelWithNullContextThrows.*)",
     };
 
 #ifdef _WIN32

From f91a08b73017165993693767d789637ab4d74b90 Mon Sep 17 00:00:00 2001
From: Chen Peter <peter.chen@intel.com>
Date: Tue, 27 Aug 2024 17:29:50 +0800
Subject: [PATCH 2/8] Update OPENVINO_DEBUG

Signed-off-by: Chen Peter <peter.chen@intel.com>
---
 modules/llama_cpp_plugin/src/compiled_model.cpp | 6 +++---
 modules/llama_cpp_plugin/src/infer_request.cpp  | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/llama_cpp_plugin/src/compiled_model.cpp b/modules/llama_cpp_plugin/src/compiled_model.cpp
index b53b11363..7f5834ac4 100644
--- a/modules/llama_cpp_plugin/src/compiled_model.cpp
+++ b/modules/llama_cpp_plugin/src/compiled_model.cpp
@@ -27,11 +27,11 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname,
     : ICompiledModel(nullptr, plugin),
       m_gguf_fname(gguf_fname),
       m_num_threads(num_threads) {
-    OPENVINO_DEBUG << "llama_cpp_plugin: loading llama model directly from GGUF... " << std::endl;
+    OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... \n");
     llama_model_params mparams = llama_model_default_params();
     mparams.n_gpu_layers = 99;
     m_llama_model_ptr = llama_load_model_from_file(gguf_fname.c_str(), mparams);
-    OPENVINO_DEBUG << "llama_cpp_plugin: llama model loaded successfully from GGUF..." << std::endl;
+    OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF...\n");
 
     auto input_ids = std::make_shared<ov::opset13::Parameter>(ov::element::Type_t::i64, ov::PartialShape({-1, -1}));
     auto fake_convert = std::make_shared<ov::opset13::Convert>(input_ids->output(0), ov::element::Type_t::f32);
@@ -71,7 +71,7 @@ std::shared_ptr<const ov::Model> LlamaCppModel::get_runtime_model() const {
 }
 
 void LlamaCppModel::set_property(const ov::AnyMap& properties) {
-    OPENVINO_DEBUG << "llama_cpp_plugin: attempted to set_property (did nothing)";
+    OPENVINO_DEBUG("llama_cpp_plugin: attempted to set_property (did nothing)";
 }
 
 ov::Any LlamaCppModel::get_property(const std::string& name) const {
diff --git a/modules/llama_cpp_plugin/src/infer_request.cpp b/modules/llama_cpp_plugin/src/infer_request.cpp
index 3eefd56d9..cf172617b 100644
--- a/modules/llama_cpp_plugin/src/infer_request.cpp
+++ b/modules/llama_cpp_plugin/src/infer_request.cpp
@@ -28,7 +28,7 @@ void allocate_tensor_impl(ov::SoPtr<ov::ITensor>& tensor,
 LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr<const LlamaCppModel>& compiled_model,
                                                    size_t num_threads)
     : ov::ISyncInferRequest(compiled_model) {
-    OPENVINO_DEBUG << "llama_cpp_plugin: infer request ctor called\n";
+    OPENVINO_DEBUG("llama_cpp_plugin: infer request ctor called\n");
     llama_context_params cparams = llama_context_default_params();
     cparams.n_threads = num_threads ? num_threads : std::thread::hardware_concurrency();
     cparams.n_ctx = 0;  // this means that the actual n_ctx will be taken equal to the model's train-time value
@@ -51,7 +51,7 @@ LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr<const L
 }
 void LlamaCppSyncInferRequest::set_tensors_impl(const ov::Output<const ov::Node> port,
                                                 const std::vector<ov::SoPtr<ov::ITensor>>& tensors) {
-    OPENVINO_DEBUG << "llama_cpp_plugin: set_tensors_impl called\n";
+    OPENVINO_DEBUG("llama_cpp_plugin: set_tensors_impl called\n");
 }
 
 void llama_batch_add_reimpl(struct llama_batch& batch,
@@ -131,12 +131,12 @@ void LlamaCppSyncInferRequest::infer() {
     llama_batch_free(batch);
 };
 std::vector<ov::ProfilingInfo> LlamaCppSyncInferRequest::get_profiling_info() const {
-    OPENVINO_DEBUG << "llama_cpp_plugin: get_profiling_info() called\n";
+    OPENVINO_DEBUG("llama_cpp_plugin: get_profiling_info() called\n");
     return std::vector<ov::ProfilingInfo>{};
 };
 
 std::vector<ov::SoPtr<ov::IVariableState>> LlamaCppSyncInferRequest::query_state() const {
-    OPENVINO_DEBUG << "llama_cpp_plugin: query_state() called\n";
+    OPENVINO_DEBUG("llama_cpp_plugin: query_state() called\n");
     return {std::static_pointer_cast<ov::IVariableState>(std::make_shared<LlamaCppState>(m_llama_ctx))};
 }
 

From 0de9efb08349d1713df017100cb011c67e9e1f3f Mon Sep 17 00:00:00 2001
From: Chen Peter <peter.chen@intel.com>
Date: Wed, 28 Aug 2024 11:04:47 +0800
Subject: [PATCH 3/8] Fix missing right parenthesis.

---
 modules/llama_cpp_plugin/src/compiled_model.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/llama_cpp_plugin/src/compiled_model.cpp b/modules/llama_cpp_plugin/src/compiled_model.cpp
index 7f5834ac4..aa706e00c 100644
--- a/modules/llama_cpp_plugin/src/compiled_model.cpp
+++ b/modules/llama_cpp_plugin/src/compiled_model.cpp
@@ -71,7 +71,7 @@ std::shared_ptr<const ov::Model> LlamaCppModel::get_runtime_model() const {
 }
 
 void LlamaCppModel::set_property(const ov::AnyMap& properties) {
-    OPENVINO_DEBUG("llama_cpp_plugin: attempted to set_property (did nothing)";
+    OPENVINO_DEBUG("llama_cpp_plugin: attempted to set_property (did nothing)");
 }
 
 ov::Any LlamaCppModel::get_property(const std::string& name) const {

From b0dd44051896f0a377a25cf9bd224beb740e9f3c Mon Sep 17 00:00:00 2001
From: Chen Peter <peter.chen@intel.com>
Date: Wed, 28 Aug 2024 15:27:50 +0800
Subject: [PATCH 4/8] Update to requirements-convert_hf_to_gguf.txt

Signed-off-by: Chen Peter <peter.chen@intel.com>
---
 .github/workflows/llama_cpp_plugin_build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llama_cpp_plugin_build_and_test.yml b/.github/workflows/llama_cpp_plugin_build_and_test.yml
index ae239085d..f2b252d95 100644
--- a/.github/workflows/llama_cpp_plugin_build_and_test.yml
+++ b/.github/workflows/llama_cpp_plugin_build_and_test.yml
@@ -60,7 +60,7 @@ jobs:
 
       - name: Prepare test data - convert test model files
         run: |
-          pip install -r llama.cpp/requirements/requirements-convert-hf-to-gguf.txt
+          pip install -r llama.cpp/requirements/requirements-convert_hf_to_gguf.txt
           huggingface-cli download gpt2 model.safetensors tokenizer.json tokenizer_config.json vocab.json config.json merges.txt --local-dir hf_gpt2
           mkdir -p ${{ github.workspace }}/test_data
           python3 llama.cpp/convert-hf-to-gguf.py hf_gpt2 --outtype f32 --outfile ${{ github.workspace }}/test_data/gpt2.gguf

From d22339d2543d6b8795d24991d4bf6186c2dd397b Mon Sep 17 00:00:00 2001
From: Chen Peter <peter.chen@intel.com>
Date: Wed, 28 Aug 2024 17:04:13 +0800
Subject: [PATCH 5/8] Constrain numpy version

Signed-off-by: Chen Peter <peter.chen@intel.com>
---
 modules/custom_operations/tests/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/custom_operations/tests/requirements.txt b/modules/custom_operations/tests/requirements.txt
index f115e7945..7574118a7 100644
--- a/modules/custom_operations/tests/requirements.txt
+++ b/modules/custom_operations/tests/requirements.txt
@@ -2,4 +2,6 @@ torch
 onnx
 tensorboard
 pytest
+# WA CVS-150813
+numpy<2.0.0
 # open3d==0.16.0 - need to update with new release

From c05a99306f72425b831d4333b871cc7441a80945 Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Wed, 28 Aug 2024 11:39:29 +0200
Subject: [PATCH 6/8] Use python 3.9 for llama.cpp plugin test step

---
 .github/workflows/llama_cpp_plugin_build_and_test.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/llama_cpp_plugin_build_and_test.yml b/.github/workflows/llama_cpp_plugin_build_and_test.yml
index f2b252d95..17caad6d5 100644
--- a/.github/workflows/llama_cpp_plugin_build_and_test.yml
+++ b/.github/workflows/llama_cpp_plugin_build_and_test.yml
@@ -4,6 +4,7 @@ on:
   pull_request:
     paths:
       - 'modules/llama_cpp_plugin/**'
+      - '.github/workflows/llama_cpp_plugin_build_and_test.yml'
 
 permissions: read-all
 
@@ -46,6 +47,11 @@ jobs:
     needs: build_ubuntu20
     runs-on: ubuntu-20.04
     steps:
+      - name: Set up Python 3.9
+        uses: actions/setup-python@39cd149  # v5.1.1
+        with:
+          python-version: "3.9"
+
       - name: Download build artifacts
         uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
         with:

From 4162ff9650bb7ec7779953291bbc8e94efffe077 Mon Sep 17 00:00:00 2001
From: Chen Peter <peter.chen@intel.com>
Date: Mon, 2 Sep 2024 09:14:44 +0800
Subject: [PATCH 7/8] Use full commit ID

---
 .github/workflows/llama_cpp_plugin_build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llama_cpp_plugin_build_and_test.yml b/.github/workflows/llama_cpp_plugin_build_and_test.yml
index 17caad6d5..639ac7dc8 100644
--- a/.github/workflows/llama_cpp_plugin_build_and_test.yml
+++ b/.github/workflows/llama_cpp_plugin_build_and_test.yml
@@ -48,7 +48,7 @@ jobs:
     runs-on: ubuntu-20.04
     steps:
       - name: Set up Python 3.9
-        uses: actions/setup-python@39cd149  # v5.1.1
+        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f  # v5.1.1
         with:
           python-version: "3.9"
 

From 656ffb0b1dd934b68604ad419826ceb6ccc5fd1e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Mon, 2 Sep 2024 10:47:57 +0400
Subject: [PATCH 8/8] Update
 .github/workflows/llama_cpp_plugin_build_and_test.yml

---
 .github/workflows/llama_cpp_plugin_build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llama_cpp_plugin_build_and_test.yml b/.github/workflows/llama_cpp_plugin_build_and_test.yml
index 639ac7dc8..fdce29a31 100644
--- a/.github/workflows/llama_cpp_plugin_build_and_test.yml
+++ b/.github/workflows/llama_cpp_plugin_build_and_test.yml
@@ -69,7 +69,7 @@ jobs:
           pip install -r llama.cpp/requirements/requirements-convert_hf_to_gguf.txt
           huggingface-cli download gpt2 model.safetensors tokenizer.json tokenizer_config.json vocab.json config.json merges.txt --local-dir hf_gpt2
           mkdir -p ${{ github.workspace }}/test_data
-          python3 llama.cpp/convert-hf-to-gguf.py hf_gpt2 --outtype f32 --outfile ${{ github.workspace }}/test_data/gpt2.gguf
+          python3 llama.cpp/convert_hf_to_gguf.py hf_gpt2 --outtype f32 --outfile ${{ github.workspace }}/test_data/gpt2.gguf
 
       - name: Install libtbb2
         run: |