Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into ci/linux-workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
akashchi committed May 16, 2024
2 parents 8400f50 + e2dd6b9 commit 87889c3
Show file tree
Hide file tree
Showing 40 changed files with 964 additions and 81 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/llama_cpp_plugin_build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
run: cmake -B build -DCMAKE_BUILD_TYPE=Release -DOPENVINO_EXTRA_MODULES=${{ github.workspace }}/openvino_contrib/modules/llama_cpp_plugin -DENABLE_TESTS=ON -DENABLE_FUNCTIONAL_TESTS=ON -DENABLE_PLUGINS_XML=ON -DENABLE_LLAMA_CPP_PLUGIN_REGISTRATION=ON openvino

- name: CMake - build
run: cmake --build build -j`nproc` -- llama_cpp_plugin llama_cpp_e2e_tests
run: cmake --build build -j`nproc` -- llama_cpp_plugin llama_cpp_e2e_tests llama_cpp_func_tests


- name: Upload build artifacts
Expand Down Expand Up @@ -69,6 +69,12 @@ jobs:
mkdir -p tbb
tar xvzf oneapi-tbb-2021.2.4-lin.tgz
- name: Run functional tests
run: |
chmod +x ${{ github.workspace }}/binaries/llama_cpp_func_tests
export LD_LIBRARY_PATH=${{ github.workspace }}/binaries:${{ github.workspace }}/tbb/lib
${{ github.workspace }}/binaries/llama_cpp_func_tests
- name: Run E2E tests
run: |
chmod +x ${{ github.workspace }}/binaries/llama_cpp_e2e_tests
Expand Down
2 changes: 1 addition & 1 deletion modules/java_api/src/main/cpp/input_tensor_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ JNIEXPORT void JNICALL Java_org_intel_openvino_InputTensorInfo_SetElementType(JN
{
JNI_METHOD("SetElementType",
preprocess::InputTensorInfo *info = (preprocess::InputTensorInfo *)addr;
auto t_type = element::Type_t(type);
auto t_type = get_ov_type(type);

info->set_element_type(t_type);
)
Expand Down
34 changes: 34 additions & 0 deletions modules/java_api/src/main/cpp/jni_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,37 @@ static jobject vectorToJavaList(JNIEnv *env, std::vector<std::string> items)

return nullptr;
}

static const ov::element::Type_t& get_ov_type(int type)
{
static const std::vector<ov::element::Type_t> java_type_to_ov_type
{
ov::element::Type_t::undefined,
ov::element::Type_t::dynamic,
ov::element::Type_t::boolean,
ov::element::Type_t::bf16,
ov::element::Type_t::f16,
ov::element::Type_t::f32,
ov::element::Type_t::f64,
ov::element::Type_t::i4,
ov::element::Type_t::i8,
ov::element::Type_t::i16,
ov::element::Type_t::i32,
ov::element::Type_t::i64,
ov::element::Type_t::u1,
ov::element::Type_t::u2,
ov::element::Type_t::u3,
ov::element::Type_t::u4,
ov::element::Type_t::u6,
ov::element::Type_t::u8,
ov::element::Type_t::u16,
ov::element::Type_t::u32,
ov::element::Type_t::u64,
ov::element::Type_t::nf4,
ov::element::Type_t::f8e4m3,
ov::element::Type_t::f8e5m2,
ov::element::Type_t::string
};

return java_type_to_ov_type.at(type);
}
9 changes: 9 additions & 0 deletions modules/java_api/src/main/cpp/openvino.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,12 @@ JNIEXPORT void JNICALL Java_org_intel_openvino_Openvino_serialize(JNIEnv *env, j
serialize(*model, xml_path, bin_path);
)
}

JNIEXPORT void JNICALL Java_org_intel_openvino_Openvino_SaveModel(JNIEnv *env, jobject obj, jlong modelAddr, jstring outputModel, jboolean compressToFp16)
{
JNI_METHOD("SaveModel",
std::string n_output_model = jstringToString(env, outputModel);
std::shared_ptr<const Model> *model = reinterpret_cast<std::shared_ptr<const Model> *>(modelAddr);
save_model(*model, n_output_model, (bool) compressToFp16);
)
}
1 change: 1 addition & 0 deletions modules/java_api/src/main/cpp/openvino_java.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ extern "C"
#endif
//ov
JNIEXPORT void JNICALL Java_org_intel_openvino_Openvino_serialize(JNIEnv *, jobject, jlong, jstring, jstring);
JNIEXPORT void JNICALL Java_org_intel_openvino_Openvino_SaveModel(JNIEnv *, jobject, jlong, jstring, jboolean);

// ov::Core
JNIEXPORT jlong JNICALL Java_org_intel_openvino_Core_GetCore(JNIEnv *, jobject);
Expand Down
2 changes: 1 addition & 1 deletion modules/java_api/src/main/cpp/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ JNIEXPORT jlong JNICALL Java_org_intel_openvino_Tensor_TensorCArray(JNIEnv *env,
{
JNI_METHOD(
"TensorCArray",
auto input_type = element::Type_t(type);
auto input_type = get_ov_type(type);
Shape input_shape = jintArrayToVector(env, shape);
Tensor *ov_tensor = new Tensor();

Expand Down
17 changes: 12 additions & 5 deletions modules/java_api/src/main/java/org/intel/openvino/ElementType.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,18 @@ public enum ElementType {
i32(10),
i64(11),
u1(12),
u4(13),
u8(14),
u16(15),
u32(16),
u64(17);
u2(13),
u3(14),
u4(15),
u6(16),
u8(17),
u16(18),
u32(19),
u64(20),
nf4(21),
f8e4m3(22),
f8e5m2(23),
string(24);

private int value;
private static Map<Integer, ElementType> map = new HashMap<Integer, ElementType>();
Expand Down
19 changes: 19 additions & 0 deletions modules/java_api/src/main/java/org/intel/openvino/Openvino.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,27 @@ public static void serialize(Model model, final String xmlPath, final String bin
serialize(model.nativeObj, xmlPath, binPath);
}

/**
* Save model into IR files (xml and bin).
*
* <p>This method saves a model to IR applying all necessary transformations that are usually
* applied in model conversion flow provided by mo tool. Particularly, floating point weights
* are compressed to FP16, debug information in model nodes are cleaned up, etc.
*
* @param model Model which will be converted to IR representation.
* @param outputModel Path to output model file.
* @param compressToFp16 Whether to compress floating point weights to FP16.
*/
public static void save_model(
Model model, final String outputModel, final boolean compressToFp16) {
SaveModel(model.nativeObj, outputModel, compressToFp16);
}

/*----------------------------------- native methods -----------------------------------*/

private static native void serialize(
long modelAddr, final String xmlPath, final String binPath);

private static native void SaveModel(
long modelAddr, final String outputModel, final boolean compressToFp16);
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
public enum ResizeAlgorithm {
RESIZE_LINEAR(0),
RESIZE_CUBIC(1),
RESIZE_NEAREST(2);
RESIZE_NEAREST(2),
RESIZE_BILINEAR_PILLOW(3),
RESIZE_BICUBIC_PILLOW(4);

private int value;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,17 @@ public void testSerialize() throws IOException {
Openvino.serialize(model, xmlPath.getAbsolutePath(), binPath.getAbsolutePath());
assertTrue(xmlPath.exists() && binPath.exists());
}

@Test
public void testSaveModel() throws IOException {
File tmp = Files.createTempDirectory("ovSaveModelTest").toFile();
File xmlPath = Paths.get(tmp.getAbsolutePath(), "saved_model.xml").toFile();
File binPath = Paths.get(tmp.getAbsolutePath(), "saved_model.bin").toFile();
xmlPath.deleteOnExit();
binPath.deleteOnExit();
tmp.deleteOnExit();

Openvino.save_model(model, xmlPath.getAbsolutePath(), false);
assertTrue(xmlPath.exists() && binPath.exists());
}
}
2 changes: 2 additions & 0 deletions modules/llama_cpp_plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ FetchContent_MakeAvailable(llama_cpp)
if(ENABLE_TESTS)
include(CTest)
enable_testing()
add_subdirectory(tests/common)
add_subdirectory(tests/e2e)
add_subdirectory(tests/functional)
endif()

# install
Expand Down
3 changes: 2 additions & 1 deletion modules/llama_cpp_plugin/include/compiled_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class LlamaCppPlugin;
class LlamaCppState;
class LlamaCppModel : public ICompiledModel {
public:
LlamaCppModel(const std::string& gguf_fname, const std::shared_ptr<const IPlugin>& plugin);
LlamaCppModel(const std::string& gguf_fname, const std::shared_ptr<const IPlugin>& plugin, size_t num_threads = 0);
/**
* @brief Export compiled model to stream
*
Expand Down Expand Up @@ -61,6 +61,7 @@ class LlamaCppModel : public ICompiledModel {
private:
gguf_context* m_gguf_ctx = nullptr;
std::string m_gguf_fname;
size_t m_num_threads;

llama_model* m_llama_model_ptr = nullptr;
llama_context* m_llama_ctx = nullptr;
Expand Down
5 changes: 3 additions & 2 deletions modules/llama_cpp_plugin/include/infer_request.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ namespace llama_cpp_plugin {

class LlamaCppSyncInferRequest : public ISyncInferRequest {
public:
explicit LlamaCppSyncInferRequest(const std::shared_ptr<const LlamaCppModel>& compiled_model);
virtual ~LlamaCppSyncInferRequest(){};
explicit LlamaCppSyncInferRequest(const std::shared_ptr<const LlamaCppModel>& compiled_model, size_t num_threads);
virtual ~LlamaCppSyncInferRequest() override;

virtual void set_tensors_impl(const ov::Output<const ov::Node> port,
const std::vector<ov::SoPtr<ov::ITensor>>& tensors) override;
Expand All @@ -24,6 +24,7 @@ class LlamaCppSyncInferRequest : public ISyncInferRequest {

private:
std::shared_ptr<const LlamaCppModel> m_compiled_model_ptr;
llama_context* m_llama_ctx;
};

} // namespace llama_cpp_plugin
Expand Down
3 changes: 3 additions & 0 deletions modules/llama_cpp_plugin/include/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ class LlamaCppPlugin : public IPlugin {

virtual ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
const ov::AnyMap& properties) const override;

private:
size_t m_num_threads = 0;
};
} // namespace llama_cpp_plugin
} // namespace ov
Expand Down
9 changes: 5 additions & 4 deletions modules/llama_cpp_plugin/include/state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@ namespace llama_cpp_plugin {
class LlamaCppState : public IVariableState {
public:
LlamaCppState() = delete;
LlamaCppState(const std::shared_ptr<const LlamaCppModel>& model_ptr)
: m_model_ptr(model_ptr),
LlamaCppState(llama_context* llama_context_ptr)
: m_llama_ctx_ptr(llama_context_ptr),
IVariableState("llama_cpp_state") {}
void reset() override {
llama_kv_cache_clear(m_model_ptr->m_llama_ctx);
OPENVINO_ASSERT(m_llama_ctx_ptr != nullptr);
llama_kv_cache_clear(m_llama_ctx_ptr);
}

private:
const std::shared_ptr<const LlamaCppModel>& m_model_ptr;
llama_context* m_llama_ctx_ptr;
};
} // namespace llama_cpp_plugin
} // namespace ov
Expand Down
Loading

0 comments on commit 87889c3

Please sign in to comment.