From 8f5cb6e7799e4a721d3994d38568d0cceeafb51f Mon Sep 17 00:00:00 2001 From: pancake Date: Wed, 15 May 2024 00:59:10 +0200 Subject: [PATCH] Remove large code from the interpreter --- Makefile | 2 +- r2ai/interpreter.py | 99 +-------------------------------------------- r2ai/large.py | 25 ++++++------ r2ai/main.py | 3 +- 4 files changed, 17 insertions(+), 112 deletions(-) diff --git a/Makefile b/Makefile index f1a4618d..a54b9b1d 100644 --- a/Makefile +++ b/Makefile @@ -7,11 +7,11 @@ PIP=$(PYTHON) -m pip LINTED=r2ai/code_block.py LINTED+=r2ai/bubble.py LINTED+=r2ai/const.py -LINTED+=r2ai/voice.py LINTED+=setup.py LINTED+=main.py LINTED+=r2ai/backend/kobaldcpp.py # LINTED+=r2ai/index.py +# LINTED+=r2ai/voice.py # LINTED+=r2ai/anthropic.py ifeq ($(R2PM_BINDIR),) diff --git a/r2ai/interpreter.py b/r2ai/interpreter.py index 63625b1f..d7c13f11 100644 --- a/r2ai/interpreter.py +++ b/r2ai/interpreter.py @@ -612,6 +612,7 @@ def __init__(self): # gpt-4 is faster, smarter, can call functions, and is all-around easier to use. # This makes gpt-4 better aligned with Open Interpreters priority to be easy to use. self.llama_instance = None + self.large = Large(self) def get_info_for_system_message(self): """Gets relevent information for the system message.""" @@ -763,102 +764,6 @@ def clear_hints(self): res.append(msg) self.messages = res - def trimsource(self, msg): - msg = msg.replace("public ", "") - msg = re.sub(r'import.*\;', "", msg) - msg = msg.replace("const ", "") - msg = msg.replace("new ", "") - msg = msg.replace("undefined", "0") - msg = msg.replace("null", "0") - msg = msg.replace("false", "0") - msg = msg.replace("true", "1") - msg = msg.replace("let ", "") - msg = msg.replace("var ", "") - msg = msg.replace("class ", "") - msg = msg.replace("interface ", "") - msg = msg.replace("function ", "fn ") - msg = msg.replace("substring", "") - msg = msg.replace("this.", "") - msg = msg.replace("while (", "while(") - msg = msg.replace("if (", "if(") - msg = msg.replace("!== 0", "") - msg = msg.replace("=== true", "") - msg = msg.replace(" = ", "=") - msg = msg.replace(" === ", "==") - msg = msg.replace("\t", " ") - msg = msg.replace("\n", "") - msg = re.sub(r"/\*.*?\*/", '', msg, flags=re.DOTALL) - # msg = re.sub(r"\n+", "\n", msg) - msg = re.sub(r"\t+", ' ', msg) - msg = re.sub(r"\s+", " ", msg) - # msg = msg.replace(";", "") - return msg.strip() - - def trimsource_ai(self, msg): - words = [] - if self.mistral == None: - mmname = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" - mmname = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" - ctxwindow = int(self.env["llm.window"]) - self.mistral = new_get_hf_llm(self, mmname, False, ctxwindow) - # q = f"Rewrite this code into shorter pseudocode (less than 500 tokens). keep the comments and essential logic:\n```\n{msg}\n```\n" - q = f"Rewrite this code into shorter pseudocode (less than 200 tokens). keep the relevant comments and essential logic:\n```\n{msg}\n```\n" - response = self.mistral(q, stream=False, temperature=0.1, stop="", max_tokens=4096) - text0 = response["choices"][0]["text"] - if "```" in text0: - return text0.split("```")[1].strip() - return text0.strip().replace("```", "") - - def compress_code_ai(self, code): - piecesize = 1024 * 8 # mistral2 supports 32k vs 4096 - codelen = len(code) - pieces = int(codelen / piecesize) - if pieces < 1: - pieces = 1 - plen = int(codelen / pieces) - off = 0 - res = [] - for i in range(pieces): - piece = i + 1 - print(f"Processing {piece} / {pieces} ...") - if piece == pieces: - r = self.trimsource_ai(code[off:]) - else: - r = self.trimsource_ai(code[off:off+plen]) - res.append(r) - off += plen - return "\n".join(res) - - def compress_messages(self, messages): - # TODO: implement a better logic in here asking the lm to summarize the context - olen = 0 - msglen = 0 - for msg in messages: - if self.env["chat.reply"] == "false": - if msg["role"] != "user": - continue - if "content" in msg: - amsg = msg["content"] - olen += len(amsg) - if len(amsg) > int(self.env["llm.maxmsglen"]): - if "while" in amsg and "```" in amsg: - que = re.search(r"^(.*?)```", amsg, re.DOTALL).group(0).replace("```", "") - cod = re.search(r"```(.*?)$", amsg, re.DOTALL).group(0).replace("```", "") - shortcode = cod - while len(shortcode) > 4000: - olen = len(shortcode) - shortcode = self.compress_code_ai(shortcode) - nlen = len(shortcode) - print(f"Went from {olen} to {nlen}") - msg["content"] = f"{que}\n```\n{shortcode}\n```\n" - else: - print(f"total length {msglen} (original length was {olen})") - msglen += len(msg["content"]) - # print(f"total length {msglen} (original length was {olen})") - # if msglen > 4096: - # ¡print("Query is too large.. you should consider triming old messages") - return messages - def respond(self): global Ginterrupted maxtokens = int(self.env["llm.maxtokens"]) @@ -880,7 +785,7 @@ def respond(self): import tokentrim messages = tokentrim.trim(self.messages, max_tokens=maxtokens, system_message=system_message) else: - messages = self.compress_messages(self.messages) + messages = self.large.compress_messages(self.messages) if self.env["debug"] == "true": print(messages) diff --git a/r2ai/large.py b/r2ai/large.py index 6245ee28..18bc8362 100644 --- a/r2ai/large.py +++ b/r2ai/large.py @@ -10,15 +10,16 @@ def __init__(self, ai = None): # self.model = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" self.model = "FaradayDotDev/llama-3-8b-Instruct-GGUF" if ai is not None: - self.env = ai.env + self.ai = ai else: - self.env = {} - self.env["llm.gpu"] = "true" + self.ai = {} + self.ai.env = {} + self.ai.env["llm.gpu"] = "true" def slice_text(self, amsg): slices = [] pos = self.maxlen - while(len(amsg) > self.maxlen): + while len(amsg) > self.maxlen: s = amsg[:pos] amsg = amsg[pos:] slices.append(s) @@ -27,7 +28,7 @@ def slice_text(self, amsg): def compress_text(self, msg): if self.mistral == None: - self.mistral = new_get_hf_llm(self, self.model, False, self.window) + self.mistral = new_get_hf_llm(self.ai, self.model, False, self.window) # q = f"Rewrite this code into shorter pseudocode (less than 500 tokens). keep the comments and essential logic:\n```\n{msg}\n```\n" #q = f"Rewrite this code into shorter pseudocode (less than 200 tokens). keep the relevant comments and essential logic:\n```\n{msg}\n```\n" q = f"Resumen y responde SOLO la información relevante del siguiente texto:\n{msg}" @@ -55,12 +56,12 @@ def summarize_text(self, amsg): def keywords_ai(self, text): # kws = self.keywords_ai("who is the author of radare?") => "author,radare2" words = [] - ctxwindow = int(self.env["llm.window"]) - mm = new_get_hf_llm(self, self.model, False, ctxwindow) + ctxwindow = int(self.ai.env["llm.window"]) + mm = new_get_hf_llm(self.ai, self.model, False, ctxwindow) msg = f"Considering the sentence \"{text}\" as input, Take the KEYWORDS or combination of TWO words from the given text and respond ONLY a comma separated list of the most relevant words. DO NOT introduce your response, ONLY show the words" msg = f"Take \"{text}\" as input, and extract the keywords and combination of keywords to make a search online, the output must be a comma separated list" #Take the KEYWORDS or combination of TWO words from the given text and respond ONLY a comma separated list of the most relevant words. DO NOT introduce your response, ONLY show the words" response = mm(msg, stream=False, temperature=0.001, stop="", max_tokens=1750) - if self.env["debug"] == "true": + if self.ai.env["debug"] == "true": print("KWSPLITRESPONSE", response) text0 = response["choices"][0]["text"] text0 = text0.replace('"', ",") @@ -108,8 +109,8 @@ def trimsource(self, msg): def trimsource_ai(self, msg): words = [] if self.mistral == None: - ctxwindow = int(self.env["llm.window"]) - self.mistral = new_get_hf_llm(self, self.model, False, ctxwindow) + ctxwindow = int(self.ai.env["llm.window"]) + self.mistral = new_get_hf_llm(self.ai, self.model, False, ctxwindow) # q = f"Rewrite this code into shorter pseudocode (less than 500 tokens). keep the comments and essential logic:\n```\n{msg}\n```\n" q = f"Rewrite this code into shorter pseudocode (less than 200 tokens). keep the relevant comments and essential logic:\n```\n{msg}\n```\n" response = self.mistral(q, stream=False, temperature=0.1, stop="", max_tokens=4096) @@ -143,13 +144,13 @@ def compress_messages(self, messages): olen = 0 msglen = 0 for msg in messages: - if self.env["chat.reply"] == "false": + if self.ai.env["chat.reply"] == "false": if msg["role"] != "user": continue if "content" in msg: amsg = msg["content"] olen += len(amsg) - if len(amsg) > int(self.env["llm.maxmsglen"]): + if len(amsg) > int(self.ai.env["llm.maxmsglen"]): if "while" in amsg and "```" in amsg: que = re.search(r"^(.*?)```", amsg, re.DOTALL).group(0).replace("```", "") cod = re.search(r"```(.*?)$", amsg, re.DOTALL).group(0).replace("```", "") diff --git a/r2ai/main.py b/r2ai/main.py index b5947423..7e73774a 100755 --- a/r2ai/main.py +++ b/r2ai/main.py @@ -6,8 +6,6 @@ import builtins import traceback -def __main__(): - print("Hello maiN") os.environ["TOKENIZERS_PARALLELISM"]="false" try: @@ -33,6 +31,7 @@ def __main__(): pass OPENAI_KEY = "" +print("MAIN") try: if "HOME" in os.environ: from r2ai.utils import slurp