Merge remote-tracking branch 'upstream/master' into ci/linux-workflow

openvinotoolkit · May 16, 2024 · 87889c3 · 87889c3
2 parents 8400f50 + e2dd6b9
commit 87889c3
Show file tree

Hide file tree

Showing 40 changed files with 964 additions and 81 deletions.
diff --git a/.github/workflows/llama_cpp_plugin_build_and_test.yml b/.github/workflows/llama_cpp_plugin_build_and_test.yml
@@ -31,7 +31,7 @@ jobs:
         run: cmake -B build -DCMAKE_BUILD_TYPE=Release -DOPENVINO_EXTRA_MODULES=${{ github.workspace }}/openvino_contrib/modules/llama_cpp_plugin -DENABLE_TESTS=ON -DENABLE_FUNCTIONAL_TESTS=ON -DENABLE_PLUGINS_XML=ON -DENABLE_LLAMA_CPP_PLUGIN_REGISTRATION=ON openvino
 
       - name: CMake - build
-        run: cmake --build build -j`nproc` -- llama_cpp_plugin llama_cpp_e2e_tests
+        run: cmake --build build -j`nproc` -- llama_cpp_plugin llama_cpp_e2e_tests llama_cpp_func_tests
 
 
       - name: Upload build artifacts
@@ -69,6 +69,12 @@ jobs:
           mkdir -p tbb
           tar xvzf oneapi-tbb-2021.2.4-lin.tgz
 
+      - name: Run functional tests
+        run: |
+          chmod +x ${{ github.workspace }}/binaries/llama_cpp_func_tests
+          export LD_LIBRARY_PATH=${{ github.workspace }}/binaries:${{ github.workspace }}/tbb/lib
+          ${{ github.workspace }}/binaries/llama_cpp_func_tests
+
       - name: Run E2E tests
         run: |
           chmod +x ${{ github.workspace }}/binaries/llama_cpp_e2e_tests

diff --git a/modules/java_api/src/main/cpp/input_tensor_info.cpp b/modules/java_api/src/main/cpp/input_tensor_info.cpp
@@ -13,7 +13,7 @@ JNIEXPORT void JNICALL Java_org_intel_openvino_InputTensorInfo_SetElementType(JN
 {
     JNI_METHOD("SetElementType",
         preprocess::InputTensorInfo *info = (preprocess::InputTensorInfo *)addr;
-        auto t_type = element::Type_t(type);
+        auto t_type = get_ov_type(type);
 
         info->set_element_type(t_type);
     )

diff --git a/modules/java_api/src/main/cpp/jni_common.hpp b/modules/java_api/src/main/cpp/jni_common.hpp
@@ -231,3 +231,37 @@ static jobject vectorToJavaList(JNIEnv *env, std::vector<std::string> items)
 
     return nullptr;
 }
+
+static const ov::element::Type_t& get_ov_type(int type)
+{
+    static const std::vector<ov::element::Type_t> java_type_to_ov_type
+    {
+        ov::element::Type_t::undefined,
+        ov::element::Type_t::dynamic,
+        ov::element::Type_t::boolean,
+        ov::element::Type_t::bf16,
+        ov::element::Type_t::f16,
+        ov::element::Type_t::f32,
+        ov::element::Type_t::f64,
+        ov::element::Type_t::i4,
+        ov::element::Type_t::i8,
+        ov::element::Type_t::i16,
+        ov::element::Type_t::i32,
+        ov::element::Type_t::i64,
+        ov::element::Type_t::u1,
+        ov::element::Type_t::u2,
+        ov::element::Type_t::u3,
+        ov::element::Type_t::u4,
+        ov::element::Type_t::u6,
+        ov::element::Type_t::u8,
+        ov::element::Type_t::u16,
+        ov::element::Type_t::u32,
+        ov::element::Type_t::u64,
+        ov::element::Type_t::nf4,
+        ov::element::Type_t::f8e4m3,
+        ov::element::Type_t::f8e5m2,
+        ov::element::Type_t::string
+    };
+
+    return java_type_to_ov_type.at(type);
+}
diff --git a/modules/java_api/src/main/cpp/openvino.cpp b/modules/java_api/src/main/cpp/openvino.cpp
@@ -20,3 +20,12 @@ JNIEXPORT void JNICALL Java_org_intel_openvino_Openvino_serialize(JNIEnv *env, j
         serialize(*model, xml_path, bin_path);
     )
 }
+
+JNIEXPORT void JNICALL Java_org_intel_openvino_Openvino_SaveModel(JNIEnv *env, jobject obj, jlong modelAddr, jstring outputModel, jboolean compressToFp16)
+{
+    JNI_METHOD("SaveModel",
+        std::string n_output_model = jstringToString(env, outputModel);
+        std::shared_ptr<const Model> *model = reinterpret_cast<std::shared_ptr<const Model> *>(modelAddr);
+        save_model(*model, n_output_model, (bool) compressToFp16);
+    )
+}
diff --git a/modules/java_api/src/main/cpp/openvino_java.hpp b/modules/java_api/src/main/cpp/openvino_java.hpp
@@ -9,6 +9,7 @@ extern "C"
 #endif
     //ov
     JNIEXPORT void JNICALL Java_org_intel_openvino_Openvino_serialize(JNIEnv *, jobject, jlong, jstring, jstring);
+    JNIEXPORT void JNICALL Java_org_intel_openvino_Openvino_SaveModel(JNIEnv *, jobject, jlong, jstring, jboolean);
 
     // ov::Core
     JNIEXPORT jlong JNICALL Java_org_intel_openvino_Core_GetCore(JNIEnv *, jobject);

diff --git a/modules/java_api/src/main/cpp/tensor.cpp b/modules/java_api/src/main/cpp/tensor.cpp
@@ -13,7 +13,7 @@ JNIEXPORT jlong JNICALL Java_org_intel_openvino_Tensor_TensorCArray(JNIEnv *env,
 {
     JNI_METHOD(
         "TensorCArray",
-        auto input_type = element::Type_t(type);
+        auto input_type = get_ov_type(type);
         Shape input_shape = jintArrayToVector(env, shape);
         Tensor *ov_tensor = new Tensor();
 

diff --git a/modules/java_api/src/main/java/org/intel/openvino/ElementType.java b/modules/java_api/src/main/java/org/intel/openvino/ElementType.java
@@ -20,11 +20,18 @@ public enum ElementType {
     i32(10),
     i64(11),
     u1(12),
-    u4(13),
-    u8(14),
-    u16(15),
-    u32(16),
-    u64(17);
+    u2(13),
+    u3(14),
+    u4(15),
+    u6(16),
+    u8(17),
+    u16(18),
+    u32(19),
+    u64(20),
+    nf4(21),
+    f8e4m3(22),
+    f8e5m2(23),
+    string(24);
 
     private int value;
     private static Map<Integer, ElementType> map = new HashMap<Integer, ElementType>();

diff --git a/modules/java_api/src/main/java/org/intel/openvino/Openvino.java b/modules/java_api/src/main/java/org/intel/openvino/Openvino.java
@@ -24,8 +24,27 @@ public static void serialize(Model model, final String xmlPath, final String bin
         serialize(model.nativeObj, xmlPath, binPath);
     }
 
+    /**
+     * Save model into IR files (xml and bin).
+     *
+     * <p>This method saves a model to IR applying all necessary transformations that are usually
+     * applied in model conversion flow provided by mo tool. Particularly, floating point weights
+     * are compressed to FP16, debug information in model nodes are cleaned up, etc.
+     *
+     * @param model Model which will be converted to IR representation.
+     * @param outputModel Path to output model file.
+     * @param compressToFp16 Whether to compress floating point weights to FP16.
+     */
+    public static void save_model(
+            Model model, final String outputModel, final boolean compressToFp16) {
+        SaveModel(model.nativeObj, outputModel, compressToFp16);
+    }
+
     /*----------------------------------- native methods -----------------------------------*/
 
     private static native void serialize(
             long modelAddr, final String xmlPath, final String binPath);
+
+    private static native void SaveModel(
+            long modelAddr, final String outputModel, final boolean compressToFp16);
 }
diff --git a/modules/java_api/src/main/java/org/intel/openvino/ResizeAlgorithm.java b/modules/java_api/src/main/java/org/intel/openvino/ResizeAlgorithm.java
@@ -6,7 +6,9 @@
 public enum ResizeAlgorithm {
     RESIZE_LINEAR(0),
     RESIZE_CUBIC(1),
-    RESIZE_NEAREST(2);
+    RESIZE_NEAREST(2),
+    RESIZE_BILINEAR_PILLOW(3),
+    RESIZE_BICUBIC_PILLOW(4);
 
     private int value;
 

diff --git a/modules/java_api/src/test/java/org/intel/openvino/OpenvinoTests.java b/modules/java_api/src/test/java/org/intel/openvino/OpenvinoTests.java
@@ -32,4 +32,17 @@ public void testSerialize() throws IOException {
         Openvino.serialize(model, xmlPath.getAbsolutePath(), binPath.getAbsolutePath());
         assertTrue(xmlPath.exists() && binPath.exists());
     }
+
+    @Test
+    public void testSaveModel() throws IOException {
+        File tmp = Files.createTempDirectory("ovSaveModelTest").toFile();
+        File xmlPath = Paths.get(tmp.getAbsolutePath(), "saved_model.xml").toFile();
+        File binPath = Paths.get(tmp.getAbsolutePath(), "saved_model.bin").toFile();
+        xmlPath.deleteOnExit();
+        binPath.deleteOnExit();
+        tmp.deleteOnExit();
+
+        Openvino.save_model(model, xmlPath.getAbsolutePath(), false);
+        assertTrue(xmlPath.exists() && binPath.exists());
+    }
 }
diff --git a/modules/llama_cpp_plugin/CMakeLists.txt b/modules/llama_cpp_plugin/CMakeLists.txt
@@ -24,7 +24,9 @@ FetchContent_MakeAvailable(llama_cpp)
 if(ENABLE_TESTS)
     include(CTest)
     enable_testing()
+    add_subdirectory(tests/common)
     add_subdirectory(tests/e2e)
+    add_subdirectory(tests/functional)
 endif()
 
 # install

diff --git a/modules/llama_cpp_plugin/include/compiled_model.hpp b/modules/llama_cpp_plugin/include/compiled_model.hpp
@@ -15,7 +15,7 @@ class LlamaCppPlugin;
 class LlamaCppState;
 class LlamaCppModel : public ICompiledModel {
 public:
-    LlamaCppModel(const std::string& gguf_fname, const std::shared_ptr<const IPlugin>& plugin);
+    LlamaCppModel(const std::string& gguf_fname, const std::shared_ptr<const IPlugin>& plugin, size_t num_threads = 0);
     /**
      * @brief Export compiled model to stream
      *
@@ -61,6 +61,7 @@ class LlamaCppModel : public ICompiledModel {
 private:
     gguf_context* m_gguf_ctx = nullptr;
     std::string m_gguf_fname;
+    size_t m_num_threads;
 
     llama_model* m_llama_model_ptr = nullptr;
     llama_context* m_llama_ctx = nullptr;

diff --git a/modules/llama_cpp_plugin/include/infer_request.hpp b/modules/llama_cpp_plugin/include/infer_request.hpp
@@ -12,8 +12,8 @@ namespace llama_cpp_plugin {
 
 class LlamaCppSyncInferRequest : public ISyncInferRequest {
 public:
-    explicit LlamaCppSyncInferRequest(const std::shared_ptr<const LlamaCppModel>& compiled_model);
-    virtual ~LlamaCppSyncInferRequest(){};
+    explicit LlamaCppSyncInferRequest(const std::shared_ptr<const LlamaCppModel>& compiled_model, size_t num_threads);
+    virtual ~LlamaCppSyncInferRequest() override;
 
     virtual void set_tensors_impl(const ov::Output<const ov::Node> port,
                                   const std::vector<ov::SoPtr<ov::ITensor>>& tensors) override;
@@ -24,6 +24,7 @@ class LlamaCppSyncInferRequest : public ISyncInferRequest {
 
 private:
     std::shared_ptr<const LlamaCppModel> m_compiled_model_ptr;
+    llama_context* m_llama_ctx;
 };
 
 }  // namespace llama_cpp_plugin

diff --git a/modules/llama_cpp_plugin/include/plugin.hpp b/modules/llama_cpp_plugin/include/plugin.hpp
@@ -39,6 +39,9 @@ class LlamaCppPlugin : public IPlugin {
 
     virtual ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
                                             const ov::AnyMap& properties) const override;
+
+private:
+    size_t m_num_threads = 0;
 };
 }  // namespace llama_cpp_plugin
 }  // namespace ov

diff --git a/modules/llama_cpp_plugin/include/state.hpp b/modules/llama_cpp_plugin/include/state.hpp
@@ -12,15 +12,16 @@ namespace llama_cpp_plugin {
 class LlamaCppState : public IVariableState {
 public:
     LlamaCppState() = delete;
-    LlamaCppState(const std::shared_ptr<const LlamaCppModel>& model_ptr)
-        : m_model_ptr(model_ptr),
+    LlamaCppState(llama_context* llama_context_ptr)
+        : m_llama_ctx_ptr(llama_context_ptr),
           IVariableState("llama_cpp_state") {}
     void reset() override {
-        llama_kv_cache_clear(m_model_ptr->m_llama_ctx);
+        OPENVINO_ASSERT(m_llama_ctx_ptr != nullptr);
+        llama_kv_cache_clear(m_llama_ctx_ptr);
     }
 
 private:
-    const std::shared_ptr<const LlamaCppModel>& m_model_ptr;
+    llama_context* m_llama_ctx_ptr;
 };
 }  // namespace llama_cpp_plugin
 }  // namespace ov