refactor: fix warnings when comparing between llama_token and size_t …

…in engine.cc
TabbyML · Feb 5, 2024 · 2ea1535 · 2ea1535
1 parent 518bc48
commit 2ea1535
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc
@@ -248,7 +248,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
         throw std::runtime_error(string_format("llama_decode failed with code: %d", ret));
       }
 
-      const auto eos_id = llama_token_eos(llama_get_model(ctx));
+      const llama_token eos_id = llama_token_eos(llama_get_model(ctx));
       for (auto& request : requests_) {
         if ((request.i_batch < i) || (request.i_batch >= (i + n_tokens))) {
           continue;
@@ -257,7 +257,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
         int32_t i_batch = request.i_batch - i;
         float* logits = llama_get_logits_ith(ctx, i_batch);
         compute_softmax_inplace(logits, n_vocab, request.temperature);
-        auto next_token = weighted_random(logits, n_vocab, request.seed);
+        const llama_token next_token = weighted_random(logits, n_vocab, request.seed);
         request.n_past += request.tokens.size();
 
         request.tokens.clear();