diff --git a/example/openai_server/fastllm_completion.py b/example/openai_server/fastllm_completion.py index 875261e2..7b1099f4 100644 --- a/example/openai_server/fastllm_completion.py +++ b/example/openai_server/fastllm_completion.py @@ -55,7 +55,7 @@ def init_fast_llm_model(self): llm.set_cpu_threads(self.cpu_thds) llm.set_cpu_low_mem(self.low_mem_mode) llm.set_cuda_embedding(self.cuda_embedding) - self.model = llm.model(self.model_path, dtype = self.dtype, tokenizer_type = "fastllm") + self.model = llm.model(self.model_path, dtype = self.dtype, tokenizer_type = "auto") self.model.set_atype(self.atype) def create_error_response(