diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc index 3c261aeb340b..2a762b5d1298 100644 --- a/crates/llama-cpp-bindings/src/engine.cc +++ b/crates/llama-cpp-bindings/src/engine.cc @@ -316,8 +316,12 @@ std::unique_ptr create_engine(bool use_gpu, rust::Str model llama_context_params ctx_params = llama_context_default_params(); ctx_params.n_ctx = N_CTX * parallelism; ctx_params.n_batch = N_BATCH; + if (const char* n_thread_str = std::getenv("LLAMA_CPP_N_THREADS")) { + int n_threads = std::stoi(n_thread_str); + ctx_params.n_threads = n_threads; + ctx_params.n_threads_batch = n_threads; + } llama_context* ctx = llama_new_context_with_model(model, ctx_params); - return std::make_unique( owned(model, llama_free_model), owned(ctx, llama_free),