-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ported from https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix Also see the nixos module: https://github.com/NixOS/nixpkgs/blob/master/nixos/modules/services/misc/ollama.nix
- Loading branch information
1 parent
3e68d66
commit 0557ae9
Showing
5 changed files
with
208 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Ollama | ||
|
||
[Ollama](https://github.com/ollama/ollama) enables you to get up and running with Llama 3, Mistral, Gemma, and other large language models. | ||
|
||
## Getting Started | ||
|
||
```nix | ||
# In `perSystem.process-compose.<name>` | ||
{ | ||
services.ollama."ollama1".enable = true; | ||
} | ||
``` | ||
|
||
## Acceleration | ||
|
||
By default Ollama uses the CPU for inference. To enable GPU acceleration: | ||
|
||
### Cuda | ||
|
||
```nix | ||
# In `perSystem.process-compose.<name>` | ||
{ | ||
services.ollama."ollama1" = { | ||
enable = true; | ||
acceleration = "cuda"; | ||
}; | ||
} | ||
``` | ||
|
||
### ROCm | ||
|
||
```nix | ||
# In `perSystem.process-compose.<name>` | ||
{ | ||
services.ollama."ollama1" = { | ||
enable = true; | ||
acceleration = "rocm"; | ||
}; | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ in | |
./elasticsearch.nix | ||
./mysql | ||
./nginx | ||
./ollama.nix | ||
./postgres | ||
./redis-cluster.nix | ||
./redis.nix | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
# Based on https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix | ||
{ pkgs, lib, name, config, ... }: | ||
let | ||
inherit (lib) types; | ||
ollamaPackage = pkgs.ollama.override { | ||
inherit (config) acceleration; | ||
linuxPackages = config.boot.kernelPackages // { | ||
nvidia_x11 = config.hardware.nvidia.package; | ||
}; | ||
}; | ||
in | ||
{ | ||
options = { | ||
enable = lib.mkEnableOption "Enable the Ollama service"; | ||
package = lib.mkOption { | ||
type = types.package; | ||
default = ollamaPackage; | ||
description = "The Ollama package to use"; | ||
}; | ||
port = lib.mkOption { | ||
type = types.port; | ||
default = 11434; | ||
description = "The port on which the Ollama service's REST API will listen"; | ||
}; | ||
host = lib.mkOption { | ||
type = types.str; | ||
default = "127.0.0.1"; | ||
example = "0.0.0.0"; | ||
description = "The host on which the Ollama service's REST API will listen"; | ||
}; | ||
dataDir = lib.mkOption { | ||
type = types.str; | ||
default = "./data/ollama"; | ||
description = '' | ||
The directory containing the Ollama models. | ||
Sets the `OLLAMA_MODELS` environment variable. | ||
''; | ||
}; | ||
keepAlive = lib.mkOption { | ||
type = types.str; | ||
default = "5m"; | ||
description = '' | ||
The duration that models stay loaded in memory. | ||
Sets the `OLLAMA_KEEP_ALIVE` environment variable. | ||
Note: Use a duration string like "5m" for 5 minutes. Or "70" for 70 seconds. | ||
''; | ||
example = "70"; | ||
}; | ||
models = lib.mkOption { | ||
type = types.listOf types.str; | ||
default = [ ]; | ||
description = '' | ||
The models to load post start. | ||
Search for models of your choice from: https://ollama.com/library | ||
''; | ||
}; | ||
acceleration = lib.mkOption { | ||
type = types.nullOr (types.enum [ false "rocm" "cuda" ]); | ||
default = null; | ||
example = "rocm"; | ||
description = '' | ||
What interface to use for hardware acceleration. | ||
- `null`: default behavior | ||
- if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"` | ||
- if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"` | ||
- otherwise defaults to `false` | ||
- `false`: disable GPU, only use CPU | ||
- `"rocm"`: supported by most modern AMD GPUs | ||
- `"cuda"`: supported by most modern NVIDIA GPUs | ||
''; | ||
}; | ||
environment = lib.mkOption { | ||
type = types.attrsOf types.str; | ||
default = { }; | ||
example = { | ||
OLLAMA_DEBUG = "1"; | ||
}; | ||
description = '' | ||
Extra environment variables passed to the `ollama-server` process. | ||
''; | ||
}; | ||
|
||
outputs.settings = lib.mkOption { | ||
type = types.deferredModule; | ||
internal = true; | ||
readOnly = true; | ||
default = { | ||
processes = { | ||
"${name}" = | ||
let | ||
startScript = pkgs.writeShellApplication { | ||
name = "ollama-server"; | ||
text = '' | ||
if [ ! -d ${config.dataDir} ]; then | ||
echo "Creating directory ${config.dataDir}" | ||
mkdir -p ${config.dataDir} | ||
fi | ||
${lib.getExe config.package} serve | ||
''; | ||
}; | ||
in | ||
{ | ||
command = startScript; | ||
|
||
environment = { | ||
OLLAMA_MODELS = config.dataDir; | ||
OLLAMA_HOST = "${config.host}:${toString config.port}"; | ||
OLLAMA_KEEP_ALIVE = config.keepAlive; | ||
} // config.environment; | ||
|
||
readiness_probe = { | ||
http_get = { | ||
host = config.host; | ||
port = config.port; | ||
}; | ||
initial_delay_seconds = 2; | ||
period_seconds = 10; | ||
timeout_seconds = 4; | ||
success_threshold = 1; | ||
failure_threshold = 5; | ||
}; | ||
namespace = name; | ||
availability.restart = "on_failure"; | ||
}; | ||
|
||
"${name}-models" = { | ||
command = pkgs.writeShellApplication { | ||
name = "ollama-models"; | ||
text = '' | ||
set -x | ||
OLLAMA_HOST=${config.host}:${toString config.port} | ||
export OLLAMA_HOST | ||
models="${lib.concatStringsSep " " config.models}" | ||
for model in $models | ||
do | ||
${lib.getExe config.package} pull "$model" | ||
done | ||
''; | ||
}; | ||
namespace = name; | ||
depends_on."${name}".condition = "process_healthy"; | ||
}; | ||
}; | ||
}; | ||
}; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
{ pkgs, ... }: { | ||
services.ollama."ollama1".enable = true; | ||
|
||
# Cannot test auto-loading models yet because that requires internet connection. | ||
settings.processes.test = | ||
{ | ||
command = pkgs.writeShellApplication { | ||
runtimeInputs = [ pkgs.curl ]; | ||
text = '' | ||
curl http://127.0.0.1:11434 | ||
''; | ||
name = "ollama-test"; | ||
}; | ||
depends_on."ollama1".condition = "process_healthy"; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters