-
Notifications
You must be signed in to change notification settings - Fork 157
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[onert/3rdparty] Introduce ggml (#13995)
This commit introduces ggml in `runtime/3rdparty`. It does not include all code to reduce binary size, and it disable unused part of ggml. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
- Loading branch information
Showing
12 changed files
with
46,420 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,264 @@ | ||
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. | ||
project("ggml" C CXX) | ||
include(CheckIncludeFileCXX) | ||
|
||
# [FIX] Force build option: static build, disable OpenMP | ||
set(GGML_STATIC ON) | ||
set(GGML_OPENMP OFF) | ||
set(BUILD_SHARED_LIBS OFF) | ||
|
||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||
|
||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) | ||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) | ||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") | ||
endif() | ||
|
||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) | ||
set(GGML_STANDALONE ON) | ||
|
||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) | ||
|
||
# configure project version | ||
# TODO | ||
else() | ||
set(GGML_STANDALONE OFF) | ||
endif() | ||
|
||
if (EMSCRIPTEN) | ||
set(BUILD_SHARED_LIBS_DEFAULT OFF) | ||
|
||
option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON) | ||
else() | ||
if (MINGW) | ||
set(BUILD_SHARED_LIBS_DEFAULT OFF) | ||
else() | ||
set(BUILD_SHARED_LIBS_DEFAULT ON) | ||
endif() | ||
endif() | ||
|
||
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) | ||
|
||
# | ||
# option list | ||
# | ||
|
||
# TODO: mark all options as advanced when not GGML_STANDALONE | ||
|
||
if (APPLE) | ||
set(GGML_METAL_DEFAULT ON) | ||
set(GGML_BLAS_DEFAULT ON) | ||
set(GGML_BLAS_VENDOR_DEFAULT "Apple") | ||
else() | ||
set(GGML_METAL_DEFAULT OFF) | ||
set(GGML_BLAS_DEFAULT OFF) | ||
set(GGML_BLAS_VENDOR_DEFAULT "Generic") | ||
endif() | ||
|
||
if (CMAKE_CROSSCOMPILING) | ||
set(GGML_NATIVE_DEFAULT OFF) | ||
else() | ||
set(GGML_NATIVE_DEFAULT ON) | ||
endif() | ||
|
||
# general | ||
option(GGML_STATIC "ggml: static link libraries" OFF) | ||
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT}) | ||
option(GGML_LTO "ggml: enable link time optimization" OFF) | ||
option(GGML_CCACHE "ggml: use ccache if available" ON) | ||
|
||
# debug | ||
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON) | ||
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF) | ||
option(GGML_GPROF "ggml: enable gprof" OFF) | ||
|
||
# build | ||
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF) | ||
|
||
# sanitizers | ||
option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF) | ||
option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF) | ||
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF) | ||
|
||
# instruction set specific | ||
if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT) | ||
set(INS_ENB OFF) | ||
else() | ||
set(INS_ENB ON) | ||
endif() | ||
|
||
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF) | ||
|
||
option(GGML_AVX "ggml: enable AVX" ${INS_ENB}) | ||
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB}) | ||
option(GGML_AVX512 "ggml: enable AVX512" OFF) | ||
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF) | ||
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF) | ||
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF) | ||
option(GGML_FMA "ggml: enable FMA" ${INS_ENB}) | ||
if (NOT MSVC) | ||
option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512 | ||
endif() | ||
option(GGML_LASX "ggml: enable lasx" ON) | ||
option(GGML_LSX "ggml: enable lsx" ON) | ||
option(GGML_SVE "ggml: enable SVE" OFF) | ||
|
||
if (WIN32) | ||
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version") | ||
endif() | ||
|
||
# ggml core | ||
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism") | ||
|
||
# 3rd party libs / backends | ||
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON) | ||
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT}) | ||
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING | ||
"ggml: BLAS library vendor") | ||
option(GGML_LLAMAFILE "ggml: use LLAMAFILE" OFF) | ||
|
||
option(GGML_CUDA "ggml: use CUDA" OFF) | ||
option(GGML_MUSA "ggml: use MUSA" OFF) | ||
option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF) | ||
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF) | ||
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF) | ||
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels") | ||
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels") | ||
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF) | ||
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING | ||
"ggml: iters./thread per block for Q2_K/Q6_K") | ||
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING | ||
"ggml: max. batch size for using peer access") | ||
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF) | ||
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) | ||
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) | ||
option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF) | ||
|
||
option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF) | ||
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF) | ||
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF) | ||
option(GGML_VULKAN "ggml: use Vulkan" OFF) | ||
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF) | ||
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF) | ||
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF) | ||
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF) | ||
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) | ||
option(GGML_KOMPUTE "ggml: use Kompute" OFF) | ||
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) | ||
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) | ||
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF) | ||
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL}) | ||
set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING | ||
"ggml: metal minimum macOS version") | ||
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)") | ||
option(GGML_OPENMP "ggml: use OpenMP" ON) | ||
option(GGML_RPC "ggml: use RPC" OFF) | ||
option(GGML_SYCL "ggml: use SYCL" OFF) | ||
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF) | ||
set (GGML_SYCL_TARGET "INTEL" CACHE STRING | ||
"ggml: sycl target device") | ||
|
||
# extra artifacts | ||
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) | ||
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) | ||
|
||
# | ||
# dependencies | ||
# | ||
|
||
set(CMAKE_C_STANDARD 11) | ||
set(CMAKE_C_STANDARD_REQUIRED true) | ||
|
||
if (GGML_SYCL) | ||
set(CMAKE_CXX_STANDARD 17) | ||
else() | ||
set(CMAKE_CXX_STANDARD 11) | ||
endif() | ||
set(CMAKE_CXX_STANDARD_REQUIRED true) | ||
|
||
set(THREADS_PREFER_PTHREAD_FLAG ON) | ||
|
||
find_package(Threads REQUIRED) | ||
|
||
# | ||
# build the library | ||
# | ||
|
||
add_subdirectory(src) | ||
|
||
# | ||
# tests and examples | ||
# | ||
|
||
if (GGML_BUILD_TESTS) | ||
enable_testing() | ||
add_subdirectory(tests) | ||
endif () | ||
|
||
if (GGML_BUILD_EXAMPLES) | ||
add_subdirectory(examples) | ||
endif () | ||
|
||
# | ||
# install | ||
# | ||
|
||
include(GNUInstallDirs) | ||
include(CMakePackageConfigHelpers) | ||
|
||
# all public headers | ||
# [FIX] comment out public header installation | ||
#set(GGML_PUBLIC_HEADERS | ||
# include/ggml.h) | ||
# include/ggml-alloc.h | ||
# include/ggml-backend.h | ||
# include/ggml-blas.h | ||
# include/ggml-cann.h | ||
# include/ggml-cuda.h | ||
# include/ggml.h | ||
# include/ggml-kompute.h | ||
# include/ggml-metal.h | ||
# include/ggml-rpc.h | ||
# include/ggml-sycl.h | ||
# include/ggml-vulkan.h) | ||
# | ||
#set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") | ||
#if (GGML_METAL) | ||
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal") | ||
#endif() | ||
# | ||
#install(TARGETS ggml PUBLIC_HEADER) | ||
|
||
# [FIX] Add -fPIC option for static | ||
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||
|
||
if (BUILD_SHARED_LIBS) | ||
install(TARGETS ggml LIBRARY) | ||
endif() | ||
|
||
if (GGML_METAL) | ||
install( | ||
FILES src/ggml-metal.metal | ||
PERMISSIONS | ||
OWNER_READ | ||
OWNER_WRITE | ||
GROUP_READ | ||
WORLD_READ | ||
DESTINATION ${CMAKE_INSTALL_BINDIR}) | ||
|
||
if (NOT GGML_METAL_EMBED_LIBRARY) | ||
install( | ||
FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib | ||
DESTINATION ${CMAKE_INSTALL_BINDIR} | ||
) | ||
endif() | ||
endif() | ||
|
||
if (GGML_STANDALONE) | ||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in | ||
${CMAKE_CURRENT_BINARY_DIR}/ggml.pc | ||
@ONLY) | ||
|
||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc | ||
DESTINATION share/pkgconfig) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2023-2024 The ggml authors | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Origin of source code | ||
|
||
It is ggml part in llama.cpp https://github.com/ggerganov/llama.cpp/ | ||
|
||
# Version | ||
|
||
b3542: https://github.com/ggerganov/llama.cpp/tree/b3542 | ||
|
||
# Background | ||
|
||
It is part of ggml, not all code to reduce the binary size. | ||
|
||
C code marking | ||
|
||
- `#if 0 // [FIX] disable` & `#endif // [FIX] end` pair: Manually disable unused code | ||
|
||
CMake marking | ||
- `# [FIX] comment~ `: Manually fix for build |
Oops, something went wrong.