From 2ea15356d2a95c539ce7c6a5e7a2eb4e8de8f6db Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Mon, 5 Feb 2024 15:43:32 -0800 Subject: [PATCH] refactor: fix warnings when comparing between llama_token and size_t in engine.cc --- crates/llama-cpp-bindings/src/engine.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc index 815e32d2c996..12e3d6ac0b8d 100644 --- a/crates/llama-cpp-bindings/src/engine.cc +++ b/crates/llama-cpp-bindings/src/engine.cc @@ -248,7 +248,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine { throw std::runtime_error(string_format("llama_decode failed with code: %d", ret)); } - const auto eos_id = llama_token_eos(llama_get_model(ctx)); + const llama_token eos_id = llama_token_eos(llama_get_model(ctx)); for (auto& request : requests_) { if ((request.i_batch < i) || (request.i_batch >= (i + n_tokens))) { continue; @@ -257,7 +257,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine { int32_t i_batch = request.i_batch - i; float* logits = llama_get_logits_ith(ctx, i_batch); compute_softmax_inplace(logits, n_vocab, request.temperature); - auto next_token = weighted_random(logits, n_vocab, request.seed); + const llama_token next_token = weighted_random(logits, n_vocab, request.seed); request.n_past += request.tokens.size(); request.tokens.clear();