entropy-research · sdan · May 22, 2024 · May 22, 2024 · May 22, 2024 · May 22, 2024
diff --git a/.gitignore b/.gitignore
@@ -103,7 +103,7 @@ ipython_config.py
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
+poetry.lock
 
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.

diff --git a/devon_agent/agent.py b/devon_agent/agent.py
@@ -13,6 +13,7 @@
     system_prompt_template_v3,
 )
 from devon_agent.tools.utils import get_cwd
+from devon_agent.tools.memory import VLiteMemoryTool
 
 from devon_agent.udiff import Hallucination
 from devon_agent.utils import LOGGER_NAME, DotDict
@@ -34,7 +35,7 @@ class Agent:
     name: str
     model: str
     temperature: float = 0.0
-    chat_history: list[dict[str, str]] = field(default_factory=list)
+    chat_history: VLiteMemoryTool()
     interrupt: str = ""
     api_key: Optional[str] = None
     scratchpad = None
@@ -97,9 +98,7 @@ def predict(
                 session.state.editor.files, session.state.editor.PAGE_SIZE
             )
 
-            self.chat_history.append(
-                {"role": "user", "content": observation, "agent": self.name}
-            )
+            self.chat_history.add(observation, metadata={"role": "user", "content": observation, "agent": self.name})
 
             commands = (
                 "Avaliable Custom Commands:\n"
@@ -122,15 +121,15 @@ def predict(
             last_observation = None
             second_last_observation = None
             if len(self.chat_history) > 2:
-                last_observation = self.chat_history[-1]["content"]
-                second_last_observation = self.chat_history[-3]["content"]
+                last_observation = self.chat_history.get_last_item(1)["text"]
+                second_last_observation = self.chat_history.get_last_item(3)["text"]
             if (
                 last_observation
                 and second_last_observation
                 and "Failed to edit file" in last_observation
                 and "Failed to edit file" in second_last_observation
             ):
-                self.chat_history = self.chat_history[:-6]
+                self.chat_history = self.chat_history.get_last_item(6)
                 history = history_to_bash_history(self.chat_history)
                 self.current_model.args.temperature += (
                     0.2 if self.current_model.args.temperature < 0.8 else 0
@@ -175,16 +174,14 @@ def predict(
 
             if not thought or not action:
                 raise Hallucination("Agent failed to follow response format instructions")
-
-            self.chat_history.append(
-                {
+
+            self.chat_history.add(output, metadata={
                     "role": "assistant",
                     "content": output,
                     "thought": thought,
                     "action": action,
                     "agent": self.name,
-                }
-            )
+                })
 
             logger.info(f"""
 \n\n\n\n****************\n\n

diff --git a/devon_agent/agents/default/agent.py b/devon_agent/agents/default/agent.py
@@ -14,6 +14,7 @@
 from devon_agent.agents.default.llama3_prompts import llama3_commands_to_command_docs, llama3_history_to_bash_history, llama3_last_user_prompt_template_v1, llama3_parse_response, llama3_system_prompt_template_v1
 
 from devon_agent.tools.utils import get_cwd
+from devon_agent.tools.memory import VLiteMemoryTool
 
 from devon_agent.udiff import Hallucination
 from devon_agent.utils import LOGGER_NAME, DotDict
@@ -33,7 +34,7 @@ class Agent:
     name: str
     model: str
     temperature: float = 0.0
-    chat_history: list[dict[str, str]] = field(default_factory=list)
+    chat_history: VLiteMemoryTool()
     interrupt: str = ""
     api_key: Optional[str] = None
     scratchpad = None
@@ -104,7 +105,7 @@ def predict(
                 session.state.editor.files, session.state.editor.PAGE_SIZE
             )
 
-            self.chat_history.append(
+            self.chat_history.add(observation, metadata=
                 {"role": "user", "content": observation, "agent": self.name}
             )
 
@@ -116,15 +117,15 @@ def predict(
             last_observation = None
             second_last_observation = None
             if len(self.chat_history) > 2:
-                last_observation = self.chat_history[-1]["content"]
-                second_last_observation = self.chat_history[-3]["content"]
+                last_observation = self.chat_history.get_last_item(1)["text"]
+                second_last_observation = self.chat_history.get_last_item(3)["text"]
             if (
                 last_observation
                 and second_last_observation
                 and "Failed to edit file" in last_observation
                 and "Failed to edit file" in second_last_observation
             ):
-                self.chat_history = self.chat_history[:-6]
+                self.chat_history = self.chat_history.get_last_item(6)
                 self.current_model.args.temperature += (
                     0.2 if self.current_model.args.temperature < 0.8 else 0
                 )
@@ -164,7 +165,7 @@ def predict(
                     + "\n"
                 )
 
-                history = [entry for entry in self.chat_history if entry["role"] == "user" or entry["role"] == "assistant"]
+                history = self.chat_history.get_entries_by_metadata({"role": ["user", "assistant"]})
                 system_prompt = openai_system_prompt_template_v3(command_docs)
                 last_user_prompt = openai_last_user_prompt_template_v3(
                     task,

diff --git a/devon_agent/agents/default/anthropic_prompts.py b/devon_agent/agents/default/anthropic_prompts.py
@@ -28,7 +28,7 @@ def editor_repr(editor):
     return editor
 
 
-def anthropic_history_to_bash_history(history):
+def anthropic_history_to_bash_history(memory_tool):
     # self.history.append(
     # {
     #     "role": "assistant",
@@ -38,20 +38,20 @@ def anthropic_history_to_bash_history(history):
     #     "agent": self.name,
 
     bash_history = ""
+    history = memory_tool.get_all_items()
     for entry in history:
-        if entry["role"] == "user":
-            result = entry["content"].strip() if entry["content"] else "" + "\n"
+        if entry["metadata"].get("role") == "user":
+            result = entry["text"].strip() + "\n"
             bash_history += f"<RESULT>\n{result}\n</RESULT>"
-        elif entry["role"] == "assistant":
+        elif entry["metadata"].get("role") == "assistant":
             bash_history += f"""
 <YOU>
-<THOUGHT>{entry['thought']}</THOUGHT>
+<THOUGHT>{entry['metadata'].get('thought', '')}</THOUGHT>
 <COMMAND>
-{entry['action'][1:]}
+{entry['metadata'].get('action', '')[1:]}
 </COMMAND>
 </YOU>
 """
-    return bash_history
 
 
 def object_to_xml(data: Union[dict, bool], root="object"):

diff --git a/devon_agent/agents/default/llama3_prompts.py b/devon_agent/agents/default/llama3_prompts.py
@@ -9,7 +9,7 @@ def llama3_commands_to_command_docs(commands: List[Dict]):
 def editor_repr(editor):
     return "\n\n".join(f"{file}:\n{editor[file]}" for file in editor)
 
-def llama3_history_to_bash_history(history):
+def llama3_history_to_bash_history(memory_tool):
     # self.history.append(
     # {
     #     "role": "assistant",
@@ -19,21 +19,20 @@ def llama3_history_to_bash_history(history):
     #     "agent": self.name,
 
     bash_history = ""
+    history = memory_tool.get_all_items()
     for entry in history:
-        if entry["role"] == "user":
-            result = entry["content"].strip() if entry["content"] else "" + "\n"
+        if entry["metadata"].get("role") == "user":
+            result = entry["text"].strip() + "\n"
             bash_history += f"<RESULT>\n{result}\n</RESULT>"
-        elif entry["role"] == "assistant":
+        elif entry["metadata"].get("role") == "assistant":
             bash_history += f"""
 <YOU>
-<THOUGHT>{entry['thought']}</THOUGHT>
+<THOUGHT>{entry['metadata'].get('thought', '')}</THOUGHT>
 <COMMAND>
-{entry['action'][1:]}
+{entry['metadata'].get('action', '')[1:]}
 </COMMAND>
 </YOU>
 """
-    return bash_history
-
 def object_to_xml(data: Union[dict, bool], root="object"):
     xml = f"<{root}>"
     if isinstance(data, dict):

diff --git a/devon_agent/prompt.py b/devon_agent/prompt.py
@@ -8,7 +8,7 @@
 # Action
 
 from typing import Dict, List, Union
-
+from devon_agent.tools.memory import VLiteMemoryTool
 
 def commands_to_command_docs(commands: List[Dict]):
     doc = """"""
@@ -247,7 +247,7 @@ def system_prompt_template_v2(command_docs: str):
 """
 
 
-def history_to_bash_history(history):
+def history_to_bash_history(memory_tool):
     # self.history.append(
     # {
     #     "role": "assistant",
@@ -257,20 +257,20 @@ def history_to_bash_history(history):
     #     "agent": self.name,
 
     bash_history = ""
+    history = memory_tool.get_all_items()
     for entry in history:
-        if entry["role"] == "user":
-            result = entry["content"].strip() if entry["content"] else "" + "\n"
+        if entry["metadata"].get("role") == "user":
+            result = entry["text"].strip() + "\n"
             bash_history += f"<RESULT>\n{result}\n</RESULT>"
-        elif entry["role"] == "assistant":
+        elif entry["metadata"].get("role") == "assistant":
             bash_history += f"""
 <YOU>
-<THOUGHT>{entry['thought']}</THOUGHT>
+<THOUGHT>{entry['metadata'].get('thought', '')}</THOUGHT>
 <COMMAND>
-{entry['action'][1:]}
+{entry['metadata'].get('action', '')[1:]}
 </COMMAND>
 </YOU>
 """
-    return bash_history
 
 
 def object_to_xml(data: Union[dict, bool], root="object"):

diff --git a/devon_agent/tools/memory.py b/devon_agent/tools/memory.py
@@ -0,0 +1,56 @@
+from vlite import VLite
+from devon_agent.tool import Tool
+
+class VLiteMemoryTool(Tool):
+    def __init__(self, collection_name="devon_collection"):
+        self.memory_manager = VLite(collection=collection_name)
+
+    def __len__(self):
+        return self.memory_manager.count()
+
+    def add(self, text, metadata=None):
+        result = self.memory_manager.add(data=text, metadata=metadata)
+        self.memory_manager.save()  # Ensure data is saved after adding
+        return result
+
+    def retrieve(self, query_text, top_k=5, metadata=None, return_scores=False):
+        return self.memory_manager.retrieve(text=query_text, top_k=top_k, metadata=metadata, return_scores=return_scores)
+
+    def update(self, item_id, new_text=None, metadata=None, vector=None):
+        return self.memory_manager.update(id=item_id, text=new_text, metadata=metadata, vector=vector)
+
+    def get_last_item(self, n=1):
+        if not self.memory_manager.index or n <= 0:
+            return None
+
+        # Ensure that n does not exceed the number of items in the index
+        if n > len(self.memory_manager.index):
+            return None
+
+        nth_last_chunk_id = list(self.memory_manager.index.keys())[-n]
+        nth_last_item = self.memory_manager.index[nth_last_chunk_id]
+
+        return {
+            'id': nth_last_chunk_id,
+            'text': nth_last_item['text'],
+            'metadata': nth_last_item['metadata'],
+            'binary_vector': nth_last_item['binary_vector']
+        }
+
+    def get_all_items(self):
+        return list(self.memory_manager.dump().values())
+
+    def get_entries_by_metadata(self, metadata):
+        return self.memory_manager.get(where=metadata)
+
+    def delete(self, item_ids):
+        return self.memory_manager.delete(ids=item_ids)
+
+    def save(self):
+        self.memory_manager.save()
+
+    def clear(self):
+        self.memory_manager.clear()
+
+    def info(self):
+        self.memory_manager.info()
diff --git a/devon_swe_bench_experimental/agent/prompt.py b/devon_swe_bench_experimental/agent/prompt.py
@@ -243,7 +243,7 @@ def system_prompt_template_v2(command_docs: str):
 """
 
 
-def history_to_bash_history(history):
+def history_to_bash_history(memory_tool):
     # self.history.append(
     # {
     #     "role": "assistant",
@@ -253,21 +253,20 @@ def history_to_bash_history(history):
     #     "agent": self.name,
 
     bash_history = ""
+    history = memory_tool.get_all_items()
     for entry in history:
-        if entry["role"] == "user":
-            result = json.loads(entry["content"]) if entry["content"] else "" + "\n"
+        if entry["metadata"].get("role") == "user":
+            result = entry["text"].strip() + "\n"
             bash_history += f"<RESULT>\n{result}\n</RESULT>"
-        elif entry["role"] == "assistant":
+        elif entry["metadata"].get("role") == "assistant":
             bash_history += f"""
 <YOU>
-<THOUGHT>{entry['thought']}</THOUGHT>
+<THOUGHT>{entry['metadata'].get('thought', '')}</THOUGHT>
 <COMMAND>
-{entry['action'][1:]}
+{entry['metadata'].get('action', '')[1:]}
 </COMMAND>
 </YOU>
 """
-    return bash_history
-
 
 def object_to_xml(data: Union[dict, bool], root="object"):
     xml = f"<{root}>"

diff --git a/devon_swe_bench_experimental/environment/agent.py b/devon_swe_bench_experimental/environment/agent.py
@@ -13,6 +13,7 @@
 )
 
 from devon_swe_bench_experimental.environment.utils import LOGGER_NAME, Event
+from devon_agent.tools.memory import VLiteMemoryTool
 from tenacity import RetryError
 
 from typing import TYPE_CHECKING
@@ -31,7 +32,7 @@ class Agent:
     name: str
     model: str
     temperature: float = 0.0
-    chat_history: list[dict[str, str]] = field(default_factory=list)
+    chat_history: VLiteMemoryTool()
     interrupt: str = ""
 
     def run(self, session: "Session", observation: str = None): ...
@@ -85,7 +86,7 @@ def predict(
                 session.state.editor, session.state.PAGE_SIZE
             )
 
-            self.chat_history.append(
+            self.chat_history.add(observation,metadata=
                 {"role": "user", "content": observation, "agent": self.name}
             )
 
@@ -110,15 +111,15 @@ def predict(
             last_observation = None
             second_last_observation = None
             if len(self.chat_history) > 2:
-                last_observation = self.chat_history[-1]["content"]
-                second_last_observation = self.chat_history[-3]["content"]
+                last_observation = self.chat_history.get_last_item(1)["text"]
+                second_last_observation = self.chat_history.get_last_item(3)["text"]
             if (
                 last_observation
                 and second_last_observation
                 and "Failed to edit file" in last_observation
                 and "Failed to edit file" in second_last_observation
             ):
-                self.chat_history = self.chat_history[:-6]
+                self.chat_history = self.chat_history.get_last_item(6)
                 history = history_to_bash_history(self.chat_history)
                 self.current_model.args.temperature += (
                     0.2 if self.current_model.args.temperature < 0.8 else 0
@@ -142,7 +143,7 @@ def predict(
 
             thought, action = parse_response(output)
 
-            self.chat_history.append(
+            self.chat_history.add(output,metadata=
                 {
                     "role": "assistant",
                     "content": output,