Merge pull request NixOS#289108 from abysssol/update-ollama-0.1.24

ollama: 0.1.17 -> 0.1.24
tomodachi94 · Feb 19, 2024 · 36c6317 · 36c6317
2 parents 7fda771 + 30a7446
commit 36c6317
Show file tree

Hide file tree

Showing 7 changed files with 209 additions and 69 deletions.
diff --git a/maintainers/maintainer-list.nix b/maintainers/maintainer-list.nix
@@ -385,6 +385,13 @@
     githubId = 2526296;
     name = "Adrien Bustany";
   };
+  abysssol = {
+    name = "abysssol";
+    email = "[email protected]";
+    matrix = "@abysssol:tchncs.de";
+    github = "abysssol";
+    githubId = 76763323;
+  };
   acairncross = {
     email = "[email protected]";
     github = "acairncross";

diff --git a/pkgs/tools/misc/ollama/cmake-include.patch b/pkgs/tools/misc/ollama/cmake-include.patch
@@ -0,0 +1,7 @@
+--- a/llm/llama.cpp/examples/server/CMakeLists.txt
++++ b/llm/llama.cpp/examples/server/CMakeLists.txt
+@@ -11,3 +11,4 @@
+     TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
+ endif()
+ target_compile_features(${TARGET} PRIVATE cxx_std_11)
++include (../../../ext_server/CMakeLists.txt) # ollama
diff --git a/pkgs/tools/misc/ollama/default.nix b/pkgs/tools/misc/ollama/default.nix
@@ -1,50 +1,182 @@
 { lib
 , buildGoModule
 , fetchFromGitHub
-, llama-cpp
+, fetchpatch
+, buildEnv
+, linkFarm
+, overrideCC
+, makeWrapper
+, stdenv
+
+, cmake
+, gcc12
+, clblast
+, libdrm
+, rocmPackages
+, cudaPackages
+, linuxPackages
+, darwin
+
+, enableRocm ? false
+, enableCuda ? false
 }:
 
-buildGoModule rec {
+let
   pname = "ollama";
-  version = "0.1.17";
+  version = "0.1.24";
+
+  warnIfNotLinux = warning: (lib.warnIfNot stdenv.isLinux warning stdenv.isLinux);
+  gpuWarning = api: "building ollama with ${api} is only supported on linux; falling back to cpu";
+  rocmIsEnabled = enableRocm && (warnIfNotLinux (gpuWarning "rocm"));
+  cudaIsEnabled = enableCuda && (warnIfNotLinux (gpuWarning "cuda"));
+  enableLinuxGpu = rocmIsEnabled || cudaIsEnabled;
+
+  appleFrameworks = darwin.apple_sdk_11_0.frameworks;
+  metalFrameworks = [
+    appleFrameworks.Accelerate
+    appleFrameworks.Metal
+    appleFrameworks.MetalKit
+    appleFrameworks.MetalPerformanceShaders
+  ];
 
   src = fetchFromGitHub {
     owner = "jmorganca";
     repo = "ollama";
     rev = "v${version}";
-    hash = "sha256-eXukNn9Lu1hF19GEi7S7a96qktsjnmXCUp38gw+3MzY=";
+    hash = "sha256-GwZA1QUH8I8m2bGToIcMMaB5MBnioQP4+n1SauUJYP8=";
+    fetchSubmodules = true;
+  };
+  preparePatch = patch: hash: fetchpatch {
+    url = "file://${src}/llm/patches/${patch}";
+    inherit hash;
+    stripLen = 1;
+    extraPrefix = "llm/llama.cpp/";
   };
+  inherit (lib) licenses platforms maintainers;
+  ollama = {
+    inherit pname version src;
+    vendorHash = "sha256-wXRbfnkbeXPTOalm7SFLvHQ9j46S/yLNbFy+OWNSamQ=";
 
-  patches = [
-    # disable passing the deprecated gqa flag to llama-cpp-server
-    # see https://github.com/ggerganov/llama.cpp/issues/2975
-    ./disable-gqa.patch
+    nativeBuildInputs = [
+      cmake
+    ] ++ lib.optionals enableLinuxGpu [
+      makeWrapper
+    ] ++ lib.optionals stdenv.isDarwin
+      metalFrameworks;
 
-    # replace the call to the bundled llama-cpp-server with the one in the llama-cpp package
-    ./set-llamacpp-path.patch
-  ];
+    patches = [
+      # remove uses of `git` in the `go generate` script
+      # instead use `patch` where necessary
+      ./remove-git.patch
+      # replace a hardcoded use of `g++` with `$CXX`
+      ./replace-gcc.patch
 
-  postPatch = ''
-    substituteInPlace llm/llama.go \
-      --subst-var-by llamaCppServer "${llama-cpp}/bin/llama-cpp-server"
-    substituteInPlace server/routes_test.go --replace "0.0.0" "${version}"
-  '';
+      # ollama's patches of llama.cpp's example server
+      # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream"
+      (preparePatch "01-cache.diff" "sha256-PC4yN98hFvK+PEITiDihL8ki3bJuLVXrAm0CGf8GPJE=")
+      (preparePatch "02-shutdown.diff" "sha256-cElAp9Z9exxN964vB/YFuBhZoEcoAwGSMCnbh+l/V4Q=")
+    ];
+    postPatch = ''
+      # use a patch from the nix store in the `go generate` script
+      substituteInPlace llm/generate/gen_common.sh \
+        --subst-var-by cmakeIncludePatch '${./cmake-include.patch}'
+      # `ollama/llm/generate/gen_common.sh` -> "avoid duplicate main symbols when we link into the cgo binary"
+      substituteInPlace llm/llama.cpp/examples/server/server.cpp \
+        --replace-fail 'int main(' 'int __main('
+      # replace inaccurate version number with actual release version
+      substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
+    '';
+    preBuild = ''
+      export OLLAMA_SKIP_PATCHING=true
+      # build llama.cpp libraries for ollama
+      go generate ./...
+    '';
 
-  vendorHash = "sha256-yGdCsTJtvdwHw21v0Ot6I8gxtccAvNzZyRu1T0vaius=";
+    ldflags = [
+      "-s"
+      "-w"
+      "-X=github.com/jmorganca/ollama/version.Version=${version}"
+      "-X=github.com/jmorganca/ollama/server.mode=release"
+    ];
 
-  ldflags = [
-    "-s"
-    "-w"
-    "-X=github.com/jmorganca/ollama/version.Version=${version}"
-    "-X=github.com/jmorganca/ollama/server.mode=release"
-  ];
+    meta = {
+      description = "Get up and running with large language models locally";
+      homepage = "https://github.com/jmorganca/ollama";
+      license = licenses.mit;
+      platforms = platforms.unix;
+      mainProgram = "ollama";
+      maintainers = with maintainers; [ abysssol dit7ya elohmeier ];
+    };
+  };
 
-  meta = with lib; {
-    description = "Get up and running with large language models locally";
-    homepage = "https://github.com/jmorganca/ollama";
-    license = licenses.mit;
-    mainProgram = "ollama";
-    maintainers = with maintainers; [ dit7ya elohmeier ];
-    platforms = platforms.unix;
+
+  rocmClang = linkFarm "rocm-clang" {
+    llvm = rocmPackages.llvm.clang;
+  };
+  rocmPath = buildEnv {
+    name = "rocm-path";
+    paths = [
+      rocmPackages.rocm-device-libs
+      rocmClang
+    ];
+  };
+  rocmVars = {
+    ROCM_PATH = rocmPath;
+    CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
   };
-}
+
+  cudaToolkit = buildEnv {
+    name = "cuda-toolkit";
+    ignoreCollisions = true; # FIXME: find a cleaner way to do this without ignoring collisions
+    paths = [
+      cudaPackages.cudatoolkit
+      cudaPackages.cuda_cudart
+    ];
+  };
+  cudaVars = {
+    CUDA_LIB_DIR = "${cudaToolkit}/lib";
+    CUDACXX = "${cudaToolkit}/bin/nvcc";
+    CUDAToolkit_ROOT = cudaToolkit;
+  };
+
+  linuxGpuLibs = {
+    buildInputs = lib.optionals rocmIsEnabled [
+      rocmPackages.clr
+      rocmPackages.hipblas
+      rocmPackages.rocblas
+      rocmPackages.rocsolver
+      rocmPackages.rocsparse
+      libdrm
+    ] ++ lib.optionals cudaIsEnabled [
+      cudaPackages.cuda_cudart
+    ];
+  };
+
+  appleGpuLibs = { buildInputs = metalFrameworks; };
+
+  runtimeLibs = lib.optionals rocmIsEnabled [
+    rocmPackages.rocm-smi
+  ] ++ lib.optionals cudaIsEnabled [
+    linuxPackages.nvidia_x11
+  ];
+  runtimeLibWrapper = {
+    postFixup = ''
+      mv "$out/bin/${pname}" "$out/bin/.${pname}-unwrapped"
+      makeWrapper "$out/bin/.${pname}-unwrapped" "$out/bin/${pname}" \
+        --suffix LD_LIBRARY_PATH : '${lib.makeLibraryPath runtimeLibs}'
+    '';
+  };
+
+  goBuild =
+    if cudaIsEnabled then
+      buildGoModule.override { stdenv = overrideCC stdenv gcc12; }
+    else
+      buildGoModule;
+in
+goBuild (ollama
+  // (lib.optionalAttrs rocmIsEnabled rocmVars)
+  // (lib.optionalAttrs cudaIsEnabled cudaVars)
+  // (lib.optionalAttrs enableLinuxGpu linuxGpuLibs)
+  // (lib.optionalAttrs enableLinuxGpu runtimeLibWrapper)
+
+  // (lib.optionalAttrs stdenv.isDarwin appleGpuLibs))
diff --git a/pkgs/tools/misc/ollama/disable-gqa.patch b/pkgs/tools/misc/ollama/disable-gqa.patch
diff --git a/pkgs/tools/misc/ollama/remove-git.patch b/pkgs/tools/misc/ollama/remove-git.patch
@@ -0,0 +1,21 @@
+--- a/llm/generate/gen_common.sh
++++ b/llm/generate/gen_common.sh
+@@ -60,6 +60,9 @@
+ }
+
+ apply_patches() {
++    patch -i '@cmakeIncludePatch@' "${LLAMACPP_DIR}/examples/server/CMakeLists.txt"
++    return
++    
+     # Wire up our CMakefile
+     if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then
+         echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt
+@@ -113,6 +116,8 @@
+
+ # Keep the local tree clean after we're done with the build
+ cleanup() {
++    return
++
+     (cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp)
+
+     if [ -n "$(ls -A ../patches/*.diff)" ]; then
diff --git a/pkgs/tools/misc/ollama/replace-gcc.patch b/pkgs/tools/misc/ollama/replace-gcc.patch
@@ -0,0 +1,11 @@
+--- a/llm/generate/gen_common.sh
++++ b/llm/generate/gen_common.sh
+@@ -86,7 +89,7 @@
+     cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
+     cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
+     mkdir -p ${BUILD_DIR}/lib/
+-    g++ -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \
++    $CXX -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \
+         ${GCC_ARCH} \
+         ${WHOLE_ARCHIVE} ${BUILD_DIR}/examples/server/libext_server.a ${NO_WHOLE_ARCHIVE} \
+         ${BUILD_DIR}/common/libcommon.a \
diff --git a/pkgs/tools/misc/ollama/set-llamacpp-path.patch b/pkgs/tools/misc/ollama/set-llamacpp-path.patch