Fix errors, update llamacpp

DevXT-LLC · Jan 17, 2024 · 3d5652e · 3d5652e
1 parent b4607d7
commit 3d5652e
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 8 deletions.
diff --git a/local_llm/__init__.py b/local_llm/__init__.py
@@ -77,7 +77,7 @@ def get_model_url(model_name="Mistral-7B-OpenOrca"):
 def get_tokens(text: str) -> int:
     encoding = tiktoken.get_encoding("cl100k_base")
     num_tokens = len(encoding.encode(text))
-    return num_tokens
+    return int(num_tokens)
 
 
 def get_model_name(model_url="TheBloke/Mistral-7B-OpenOrca-GGUF"):
@@ -204,7 +204,7 @@ def replace(match):
     return result
 
 
-def format_prompt(prompt, prompt_template, system_message=""):
+def custom_format_prompt(prompt, prompt_template, system_message=""):
     formatted_prompt = custom_format(
         string=prompt_template, prompt=prompt, system_message=system_message
     )
@@ -268,7 +268,7 @@ def __init__(
             model_max_tokens = 8192
             self.prompt_template = "{system_message}\n\n{prompt}"
         self.max_tokens = model_max_tokens
-        self.params["n_ctx"] = self.max_tokens
+        self.params["n_ctx"] = 0
         self.params["verbose"] = False
         self.system_message = system_message
         self.params["mirostat_mode"] = 2
@@ -307,14 +307,13 @@ def __init__(
 
     def generate(self, prompt, format_prompt: bool = True):
         if format_prompt:
-            formatted_prompt = format_prompt(
+            formatted_prompt = custom_format_prompt(
                 prompt=prompt,
                 prompt_template=self.prompt_template,
                 system_message=self.system_message,
             )
         tokens = get_tokens(formatted_prompt if format_prompt else prompt)
-        self.params["n_predict"] = int(self.max_tokens) - tokens
-        self.params["n_ctx"] = int(self.max_tokens) - tokens
+        self.params["max_tokens"] = int(self.max_tokens) - int(tokens)
         llm = Llama(**self.params)
         data = llm(prompt=formatted_prompt if format_prompt else prompt)
         data["model"] = self.model_name

diff --git a/requirements.txt b/requirements.txt
@@ -4,7 +4,7 @@ requests
 uvicorn
 pyjwt
 tiktoken
-llama-cpp-python==0.2.28
+llama-cpp-python==0.2.29
 python-dotenv
 GPUtil
 psutil

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 
 setup(
     name="local-llm",
-    version="0.0.41",
+    version="0.0.42",
     description="Local-LLM is a llama.cpp server in Docker with OpenAI Style Endpoints.",
     long_description=long_description,
     long_description_content_type="text/markdown",