From 2a35f356204b2971dc5dbe798fb1b3843a66368a Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Thu, 2 Nov 2023 16:00:15 -0700 Subject: [PATCH] feat: add model warmup logic --- crates/llama-cpp-bindings/src/engine.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc index 5a1f9f755dae..a314a6a9c665 100644 --- a/crates/llama-cpp-bindings/src/engine.cc +++ b/crates/llama-cpp-bindings/src/engine.cc @@ -88,6 +88,22 @@ class TextInferenceEngineImpl : public TextInferenceEngine { model_(std::move(model)), ctx_(std::move(ctx)) { batch_ = llama_batch_init(N_CTX * N_CONCURRENT_REQUESTS, 0, 1); + // warm up + { + for (int i = 0; i < 16; ++i) { + batch_.token[i] = 0; + batch_.pos[i] = i; + batch_.n_seq_id[0] = 1; + batch_.seq_id[i][0] = 0; + batch_.logits[i] = false; + } + + if (!llama_decode(ctx_.get(), batch_)) { + fprintf(stderr, "%s: warmup failed\n", __func__); + } + + llama_kv_cache_clear(ctx_.get()); + } } ~TextInferenceEngineImpl() {