From 2ea15356d2a95c539ce7c6a5e7a2eb4e8de8f6db Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Mon, 5 Feb 2024 15:43:32 -0800
Subject: [PATCH] refactor: fix warnings when comparing between llama_token and
 size_t in engine.cc

---
 crates/llama-cpp-bindings/src/engine.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc
index 815e32d2c996..12e3d6ac0b8d 100644
--- a/crates/llama-cpp-bindings/src/engine.cc
+++ b/crates/llama-cpp-bindings/src/engine.cc
@@ -248,7 +248,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
         throw std::runtime_error(string_format("llama_decode failed with code: %d", ret));
       }
 
-      const auto eos_id = llama_token_eos(llama_get_model(ctx));
+      const llama_token eos_id = llama_token_eos(llama_get_model(ctx));
       for (auto& request : requests_) {
         if ((request.i_batch < i) || (request.i_batch >= (i + n_tokens))) {
           continue;
@@ -257,7 +257,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
         int32_t i_batch = request.i_batch - i;
         float* logits = llama_get_logits_ith(ctx, i_batch);
         compute_softmax_inplace(logits, n_vocab, request.temperature);
-        auto next_token = weighted_random(logits, n_vocab, request.seed);
+        const llama_token next_token = weighted_random(logits, n_vocab, request.seed);
         request.n_past += request.tokens.size();
 
         request.tokens.clear();