From 34d204b5e30ce1f2262c412ced80508d961fc17f Mon Sep 17 00:00:00 2001
From: adbenitez <asieldbenitez@gmail.com>
Date: Wed, 4 Sep 2024 18:05:58 +0200
Subject: [PATCH] add fake_reply parameter to Embed4All.generate()

---
 gpt4all-bindings/python/gpt4all/_pyllmodel.py | 3 ++-
 gpt4all-bindings/python/gpt4all/gpt4all.py    | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gpt4all-bindings/python/gpt4all/_pyllmodel.py b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
index 208d834c580c..2313729aa398 100644
--- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
@@ -493,6 +493,7 @@ def prompt_model(
         context_erase: float = 0.75,
         reset_context: bool = False,
         special: bool = False,
+        fake_reply: str = "",
     ):
         """
         Generate response from model from a prompt.
@@ -537,7 +538,7 @@ def prompt_model(
             True,
             self.context,
             special,
-            ctypes.c_char_p(),
+            ctypes.c_char_p(fake_reply.encode()) if fake_reply else ctypes.c_char_p(),
         )
 
 
diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py
index 027f28df357c..d4fd76751151 100644
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@@ -496,6 +496,7 @@ def generate(
         n_batch: int = 8,
         n_predict: int | None = None,
         streaming: bool = False,
+        fake_reply: str = "",
         callback: ResponseCallbackType = empty_response_callback,
     ) -> Any:
         """
@@ -513,6 +514,7 @@ def generate(
             n_batch: Number of prompt tokens processed in parallel. Larger values decrease latency but increase resource requirements.
             n_predict: Equivalent to max_tokens, exists for backwards compatibility.
             streaming: If True, this method will instead return a generator that yields tokens as the model generates them.
+            fake_reply: A spoofed reply for the given prompt, used as a way to load chat history.
             callback: A function with arguments token_id:int and response:str, which receives the tokens from the model as they are generated and stops the generation by returning False.
 
         Returns:
@@ -529,6 +531,7 @@ def generate(
             repeat_last_n=repeat_last_n,
             n_batch=n_batch,
             n_predict=n_predict if n_predict is not None else max_tokens,
+            fake_reply=fake_reply,
         )
 
         if self._history is not None: