diff --git a/.github/workflows/llama_cpp_plugin_build_and_test.yml b/.github/workflows/llama_cpp_plugin_build_and_test.yml index ae239085d..fdce29a31 100644 --- a/.github/workflows/llama_cpp_plugin_build_and_test.yml +++ b/.github/workflows/llama_cpp_plugin_build_and_test.yml @@ -4,6 +4,7 @@ on: pull_request: paths: - 'modules/llama_cpp_plugin/**' + - '.github/workflows/llama_cpp_plugin_build_and_test.yml' permissions: read-all @@ -46,6 +47,11 @@ jobs: needs: build_ubuntu20 runs-on: ubuntu-20.04 steps: + - name: Set up Python 3.9 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 + with: + python-version: "3.9" + - name: Download build artifacts uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 with: @@ -60,10 +66,10 @@ jobs: - name: Prepare test data - convert test model files run: | - pip install -r llama.cpp/requirements/requirements-convert-hf-to-gguf.txt + pip install -r llama.cpp/requirements/requirements-convert_hf_to_gguf.txt huggingface-cli download gpt2 model.safetensors tokenizer.json tokenizer_config.json vocab.json config.json merges.txt --local-dir hf_gpt2 mkdir -p ${{ github.workspace }}/test_data - python3 llama.cpp/convert-hf-to-gguf.py hf_gpt2 --outtype f32 --outfile ${{ github.workspace }}/test_data/gpt2.gguf + python3 llama.cpp/convert_hf_to_gguf.py hf_gpt2 --outtype f32 --outfile ${{ github.workspace }}/test_data/gpt2.gguf - name: Install libtbb2 run: | diff --git a/modules/custom_operations/tests/requirements.txt b/modules/custom_operations/tests/requirements.txt index f115e7945..7574118a7 100644 --- a/modules/custom_operations/tests/requirements.txt +++ b/modules/custom_operations/tests/requirements.txt @@ -2,4 +2,6 @@ torch onnx tensorboard pytest +# WA CVS-150813 +numpy<2.0.0 # open3d==0.16.0 - need to update with new release diff --git a/modules/llama_cpp_plugin/src/compiled_model.cpp b/modules/llama_cpp_plugin/src/compiled_model.cpp index b53b11363..aa706e00c 100644 --- a/modules/llama_cpp_plugin/src/compiled_model.cpp +++ b/modules/llama_cpp_plugin/src/compiled_model.cpp @@ -27,11 +27,11 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname, : ICompiledModel(nullptr, plugin), m_gguf_fname(gguf_fname), m_num_threads(num_threads) { - OPENVINO_DEBUG << "llama_cpp_plugin: loading llama model directly from GGUF... " << std::endl; + OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... \n"); llama_model_params mparams = llama_model_default_params(); mparams.n_gpu_layers = 99; m_llama_model_ptr = llama_load_model_from_file(gguf_fname.c_str(), mparams); - OPENVINO_DEBUG << "llama_cpp_plugin: llama model loaded successfully from GGUF..." << std::endl; + OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF...\n"); auto input_ids = std::make_shared(ov::element::Type_t::i64, ov::PartialShape({-1, -1})); auto fake_convert = std::make_shared(input_ids->output(0), ov::element::Type_t::f32); @@ -71,7 +71,7 @@ std::shared_ptr LlamaCppModel::get_runtime_model() const { } void LlamaCppModel::set_property(const ov::AnyMap& properties) { - OPENVINO_DEBUG << "llama_cpp_plugin: attempted to set_property (did nothing)"; + OPENVINO_DEBUG("llama_cpp_plugin: attempted to set_property (did nothing)"); } ov::Any LlamaCppModel::get_property(const std::string& name) const { diff --git a/modules/llama_cpp_plugin/src/infer_request.cpp b/modules/llama_cpp_plugin/src/infer_request.cpp index 3eefd56d9..cf172617b 100644 --- a/modules/llama_cpp_plugin/src/infer_request.cpp +++ b/modules/llama_cpp_plugin/src/infer_request.cpp @@ -28,7 +28,7 @@ void allocate_tensor_impl(ov::SoPtr& tensor, LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr& compiled_model, size_t num_threads) : ov::ISyncInferRequest(compiled_model) { - OPENVINO_DEBUG << "llama_cpp_plugin: infer request ctor called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: infer request ctor called\n"); llama_context_params cparams = llama_context_default_params(); cparams.n_threads = num_threads ? num_threads : std::thread::hardware_concurrency(); cparams.n_ctx = 0; // this means that the actual n_ctx will be taken equal to the model's train-time value @@ -51,7 +51,7 @@ LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr port, const std::vector>& tensors) { - OPENVINO_DEBUG << "llama_cpp_plugin: set_tensors_impl called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: set_tensors_impl called\n"); } void llama_batch_add_reimpl(struct llama_batch& batch, @@ -131,12 +131,12 @@ void LlamaCppSyncInferRequest::infer() { llama_batch_free(batch); }; std::vector LlamaCppSyncInferRequest::get_profiling_info() const { - OPENVINO_DEBUG << "llama_cpp_plugin: get_profiling_info() called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: get_profiling_info() called\n"); return std::vector{}; }; std::vector> LlamaCppSyncInferRequest::query_state() const { - OPENVINO_DEBUG << "llama_cpp_plugin: query_state() called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: query_state() called\n"); return {std::static_pointer_cast(std::make_shared(m_llama_ctx))}; } diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp index 93096ddd6..b392e7bd5 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp @@ -14,14 +14,6 @@ using namespace ov::test::behavior; namespace { -// -// OV Class Common tests with -// - -INSTANTIATE_TEST_SUITE_P(smoke_OVClassNetworkTestP, - OVClassModelTestP, - ::testing::Values(ov::test::utils::DEVICE_NVIDIA)); - // // OV Class GetMetric // diff --git a/modules/nvidia_plugin/tests/functional/skip_tests_config.cpp b/modules/nvidia_plugin/tests/functional/skip_tests_config.cpp index d413f5e06..296b5c367 100644 --- a/modules/nvidia_plugin/tests/functional/skip_tests_config.cpp +++ b/modules/nvidia_plugin/tests/functional/skip_tests_config.cpp @@ -39,8 +39,6 @@ std::vector disabledTestPatterns() { R"(.*InferRequestIOBBlobTest.*canProcessDeallocatedOutputBlobAfterGetAndSetBlob.*)", // 119703 R"(.*smoke_GroupConvolutionBias(Add|AddAdd)_2D_ExplicitPaddingSymmetric2.*FP16*.*)", - // Issue: 128924 - R"(.*smoke_OVClassNetworkTestP/OVClassModelTestP.ImportModelWithNullContextThrows.*)", }; #ifdef _WIN32