Skip to content

Commit

Permalink
[onert/3rdparty] Introduce ggml (#13995)
Browse files Browse the repository at this point in the history
This commit introduces ggml in `runtime/3rdparty`.
It does not include all code to reduce binary size, and it disable unused part of ggml.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
  • Loading branch information
hseok-oh authored Sep 13, 2024
1 parent 5054c29 commit 59754fa
Show file tree
Hide file tree
Showing 12 changed files with 46,420 additions and 0 deletions.
264 changes: 264 additions & 0 deletions runtime/3rdparty/ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
project("ggml" C CXX)
include(CheckIncludeFileCXX)

# [FIX] Force build option: static build, disable OpenMP
set(GGML_STATIC ON)
set(GGML_OPENMP OFF)
set(BUILD_SHARED_LIBS OFF)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()

if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(GGML_STANDALONE ON)

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

# configure project version
# TODO
else()
set(GGML_STANDALONE OFF)
endif()

if (EMSCRIPTEN)
set(BUILD_SHARED_LIBS_DEFAULT OFF)

option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)
else()
if (MINGW)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
else()
set(BUILD_SHARED_LIBS_DEFAULT ON)
endif()
endif()

option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})

#
# option list
#

# TODO: mark all options as advanced when not GGML_STANDALONE

if (APPLE)
set(GGML_METAL_DEFAULT ON)
set(GGML_BLAS_DEFAULT ON)
set(GGML_BLAS_VENDOR_DEFAULT "Apple")
else()
set(GGML_METAL_DEFAULT OFF)
set(GGML_BLAS_DEFAULT OFF)
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
endif()

if (CMAKE_CROSSCOMPILING)
set(GGML_NATIVE_DEFAULT OFF)
else()
set(GGML_NATIVE_DEFAULT ON)
endif()

# general
option(GGML_STATIC "ggml: static link libraries" OFF)
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
option(GGML_LTO "ggml: enable link time optimization" OFF)
option(GGML_CCACHE "ggml: use ccache if available" ON)

# debug
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
option(GGML_GPROF "ggml: enable gprof" OFF)

# build
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)

# sanitizers
option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)

# instruction set specific
if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT)
set(INS_ENB OFF)
else()
set(INS_ENB ON)
endif()

option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)

option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
option(GGML_AVX512 "ggml: enable AVX512" OFF)
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
if (NOT MSVC)
option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
endif()
option(GGML_LASX "ggml: enable lasx" ON)
option(GGML_LSX "ggml: enable lsx" ON)
option(GGML_SVE "ggml: enable SVE" OFF)

if (WIN32)
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version")
endif()

# ggml core
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")

# 3rd party libs / backends
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
"ggml: BLAS library vendor")
option(GGML_LLAMAFILE "ggml: use LLAMAFILE" OFF)

option(GGML_CUDA "ggml: use CUDA" OFF)
option(GGML_MUSA "ggml: use MUSA" OFF)
option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF)
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
"ggml: iters./thread per block for Q2_K/Q6_K")
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
"ggml: max. batch size for using peer access")
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF)

option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF)
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
option(GGML_VULKAN "ggml: use Vulkan" OFF)
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
"ggml: metal minimum macOS version")
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
option(GGML_OPENMP "ggml: use OpenMP" ON)
option(GGML_RPC "ggml: use RPC" OFF)
option(GGML_SYCL "ggml: use SYCL" OFF)
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
"ggml: sycl target device")

# extra artifacts
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})

#
# dependencies
#

set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)

if (GGML_SYCL)
set(CMAKE_CXX_STANDARD 17)
else()
set(CMAKE_CXX_STANDARD 11)
endif()
set(CMAKE_CXX_STANDARD_REQUIRED true)

set(THREADS_PREFER_PTHREAD_FLAG ON)

find_package(Threads REQUIRED)

#
# build the library
#

add_subdirectory(src)

#
# tests and examples
#

if (GGML_BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
endif ()

if (GGML_BUILD_EXAMPLES)
add_subdirectory(examples)
endif ()

#
# install
#

include(GNUInstallDirs)
include(CMakePackageConfigHelpers)

# all public headers
# [FIX] comment out public header installation
#set(GGML_PUBLIC_HEADERS
# include/ggml.h)
# include/ggml-alloc.h
# include/ggml-backend.h
# include/ggml-blas.h
# include/ggml-cann.h
# include/ggml-cuda.h
# include/ggml.h
# include/ggml-kompute.h
# include/ggml-metal.h
# include/ggml-rpc.h
# include/ggml-sycl.h
# include/ggml-vulkan.h)
#
#set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
#if (GGML_METAL)
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
#endif()
#
#install(TARGETS ggml PUBLIC_HEADER)

# [FIX] Add -fPIC option for static
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)

if (BUILD_SHARED_LIBS)
install(TARGETS ggml LIBRARY)
endif()

if (GGML_METAL)
install(
FILES src/ggml-metal.metal
PERMISSIONS
OWNER_READ
OWNER_WRITE
GROUP_READ
WORLD_READ
DESTINATION ${CMAKE_INSTALL_BINDIR})

if (NOT GGML_METAL_EMBED_LIBRARY)
install(
FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
DESTINATION ${CMAKE_INSTALL_BINDIR}
)
endif()
endif()

if (GGML_STANDALONE)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
@ONLY)

install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
DESTINATION share/pkgconfig)
endif()
21 changes: 21 additions & 0 deletions runtime/3rdparty/ggml/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023-2024 The ggml authors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
18 changes: 18 additions & 0 deletions runtime/3rdparty/ggml/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Origin of source code

It is ggml part in llama.cpp https://github.com/ggerganov/llama.cpp/

# Version

b3542: https://github.com/ggerganov/llama.cpp/tree/b3542

# Background

It is part of ggml, not all code to reduce the binary size.

C code marking

- `#if 0 // [FIX] disable` & `#endif // [FIX] end` pair: Manually disable unused code

CMake marking
- `# [FIX] comment~ `: Manually fix for build
Loading

0 comments on commit 59754fa

Please sign in to comment.