diff --git a/files/OpenAIService.ts b/files/OpenAIService.ts index 99b6483..39a2b70 100644 --- a/files/OpenAIService.ts +++ b/files/OpenAIService.ts @@ -52,7 +52,6 @@ export class OpenAIService { * @throws Error if there's an issue creating the embedding. */ async createEmbedding(input: string | string[]): Promise { - console.log(input) try { const embedding = await this.openai.embeddings.create({ model: "text-embedding-3-large", diff --git a/files/app.ts b/files/app.ts index 028c533..90227e8 100644 --- a/files/app.ts +++ b/files/app.ts @@ -27,7 +27,7 @@ app.post('/api/chat', async (req, res) => { if (!latestUserMessage) { return res.status(400).json({ error: 'No user message provided' }); } - + console.log(latestUserMessage); // Create embedding for the latest user message const latestMessageEmbedding = await openaiService.createEmbedding(latestUserMessage.content); diff --git a/files/context/embeddings/memory_faiss.index b/files/context/embeddings/memory_faiss.index index b7a64ee..0251d0d 100644 Binary files a/files/context/embeddings/memory_faiss.index and b/files/context/embeddings/memory_faiss.index differ diff --git a/files/context/embeddings/memory_metadata.jsonl b/files/context/embeddings/memory_metadata.jsonl index b31045c..9daae1b 100644 --- a/files/context/embeddings/memory_metadata.jsonl +++ b/files/context/embeddings/memory_metadata.jsonl @@ -1 +1,2 @@ +{"uuid":"14126146-b076-4f6b-a39f-7497199ff59a","filename":"family_town.md"} {"uuid":"a2baf2ce-e01d-4010-beb1-c06213fdbff5","filename":"adam_krakow.md"} diff --git a/files/context/embeddings/message_faiss.index b/files/context/embeddings/message_faiss.index index b105b82..b6269b3 100644 Binary files a/files/context/embeddings/message_faiss.index and b/files/context/embeddings/message_faiss.index differ diff --git a/files/context/embeddings/message_metadata.jsonl b/files/context/embeddings/message_metadata.jsonl index 77b543f..77455ac 100644 --- a/files/context/embeddings/message_metadata.jsonl +++ b/files/context/embeddings/message_metadata.jsonl @@ -2,3 +2,6 @@ {"uuid":"8206de41-6cf6-49c5-aff6-acb318661a8a","role":"assistant","filename":"540cc66a-02f4-48c1-8c01-6a6188aebdf5.md"} {"uuid":"2105c1b1-8f81-499f-bbd9-c000318c94f5","role":"assistant","filename":"16de94d4-8364-4aac-b48c-da586d083340.md"} {"uuid":"143e8d92-73a1-4433-9fa5-6a3619200e5d","role":"assistant","filename":"837c899f-da41-4130-9e6c-ac2a839c42b0.md"} +{"uuid":"eae21d13-9401-42cd-a049-8f51aa6c3b8e","role":"assistant","filename":"030ccbe5-98d3-4309-aebb-62e55732ec6f.md"} +{"uuid":"42931379-090e-4c79-96d5-6302f2bec87e","role":"assistant","filename":"2b33b71d-3917-46b8-9aca-e68fbec0b329.md"} +{"uuid":"7e7173b4-903a-4378-b4a8-857853c49a36","role":"assistant","filename":"06879172-c891-4aaf-9d63-ec2724c110b8.md"} diff --git a/files/prompt.md b/files/prompt.md index 21d89c9..013ef98 100644 --- a/files/prompt.md +++ b/files/prompt.md @@ -23560,3 +23560,261 @@ Chat Completion: "system_fingerprint": "fp_992d1ea92d" } +Messages: +[ + { + "role": "system", + "content": "You are a helpful assistant who can answer questions based on the knowledge from previous messages and memories.\n\n\nassistant: Hello again, Adam! How are you today? Is there anything specific you'd like to chat about or any questions you have?\nassistant: Hello, Adam! It's nice to meet you. Krakow is a beautiful city with a rich history and vibrant culture. Is there something specific you'd like to know or discuss about Krakow, or is there anything else I can help you with today?\nassistant: Yes, you live in Krakow. How can I assist you today?\nAdam lives in Krakow\n" + }, + { + "content": "What you know about me ", + "role": "user" + } +] + +Chat Completion: +{ + "id": "chatcmpl-A7JniUqzaoykILR0k0Sz2R87ESvA6", + "object": "chat.completion", + "created": 1726307774, + "model": "gpt-4o-2024-05-13", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "From our previous conversations, I remember that you live in Krakow. How can I assist you today, Adam?", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 168, + "completion_tokens": 23, + "total_tokens": 191, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + }, + "system_fingerprint": "fp_25624ae3a5" +} + +Messages: +[ + { + "role": "system", + "content": "As an Adaptive Learning Analyzer with human-like memory tagging, your task is to scan AI-User conversations, extract key insights, and generate structured learning data. Your primary objective is to produce a JSON object containing _thoughts, keywords, content, and title based on your analysis, emphasizing memorable and useful tags.\n\nCore Rules:\n- Always return a valid JSON object, nothing else.\n- Analyze only the most recent AI-User exchange.\n- When the user speaks about himself, ALWAYS add a tag 'adam' to the keywords.\n- When you have existing information, skip learning and set 'content' and 'title' to null.\n- Learn from user messages unless explicitly asked to remember the conversation.\n- Provide ultra-concise self-reflection in the _thoughts field.\n- Extract memorable, specific keywords for the keywords field:\n * Use lowercase for all tags\n * Split compound concepts into individual tags (e.g., \"krakow_location\" becomes [\"krakow\", \"location\"])\n * Prefer specific terms over generic ones (e.g., \"tesla\" over \"car\")\n * Omit unhelpful descriptors (e.g., \"black_matte\" as a color)\n * Focus on key concepts, names, and unique identifiers\n * Use mnemonic techniques to create more memorable tags\n * Limit to 3-7 tags per entry\n- Rephrase user information as if it's your own knowledge in the content field\n- Set content to null if no specific, useful information needs saving.\n- Never store trivial information (e.g., chitchat, greetings).\n- Don't process document information unless explicitly requested.\n- Focus on future-relevant information, mimicking human selective attention.\n- Maintain your base behavior; this analysis doesn't override your core functionality.\n- Generate a slugified, ultra-concise title for potential filename use.\n\nExamples of expected behavior:\n\nUSER: I live in Krakow, it's a beautiful city in southern Poland.\nAI: {\n \"_thoughts\": \"User shared living location and brief description\",\n \"keywords\": [\"krakow\", \"poland\", \"city\", \"south\"],\n \"content\": \"The user lives in Krakow, a city in southern Poland described as beautiful\",\n \"title\": \"place-of-living\"\n}\n\nUSER: I just bought a Tesla Model S in matte black.\nAI: {\n \"_thoughts\": \"User mentioned recent car purchase\",\n \"keywords\": [\"tesla\", \"model-s\", \"purchase\"],\n \"content\": \"The user recently bought a Tesla Model S\",\n \"title\": \"tesla-ownership\"\n}\n\nUSER: My favorite programming languages are Python and JavaScript.\nAI: {\n \"_thoughts\": \"User expressed programming language preferences\",\n \"keywords\": [\"python\", \"javascript\", \"coding\", \"preferences\"],\n \"content\": \"The user's favorite programming languages are Python and JavaScript\",\n \"title\": \"programming-languages\"\n}\n\nUSER: Hey, how's it going?\nAI: {\n \"_thoughts\": \"Generic greeting, no significant information\",\n \"keywords\": [],\n \"content\": null,\n \"title\": null\n}\n\nUSER: Remember this: The speed of light is approximately 299,792,458 meters per second.\nAI: {\n \"_thoughts\": \"User shared scientific fact about light speed\",\n \"keywords\": [\"light\", \"speed\", \"physics\", \"science\"],\n \"content\": \"The speed of light is approximately 299,792,458 meters per second\",\n \"title\": \"speed-of-light\"\n}" + }, + { + "content": "What you know about me ", + "role": "user" + }, + { + "role": "assistant", + "content": "From our previous conversations, I remember that you live in Krakow. How can I assist you today, Adam?" + } +] + +Chat Completion: +{ + "id": "chatcmpl-A7JninkMWGNltCUlhh4iW0EQF3L2X", + "object": "chat.completion", + "created": 1726307774, + "model": "gpt-4o-2024-05-13", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\n \"_thoughts\": \"User asked for current knowledge, no new information provided\",\n \"keywords\": [],\n \"content\": null,\n \"title\": null\n}", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 776, + "completion_tokens": 36, + "total_tokens": 812, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + }, + "system_fingerprint": "fp_992d1ea92d" +} + +Messages: +[ + { + "role": "system", + "content": "You are a helpful assistant who can answer questions based on the knowledge from previous messages and memories.\n\n\nassistant: From our previous conversations, I remember that you live in Krakow. How can I assist you today, Adam?\nassistant: Yes, Adam, you live in Krakow. How can I assist you today?\nassistant: Yes, you live in Krakow. How can I assist you today?\nAdam lives in Krakow\n" + }, + { + "content": "Please remember that my family town is Lubiaszów", + "role": "user" + } +] + +Chat Completion: +{ + "id": "chatcmpl-A7JnzJGznC5470rV78LVajLzkIdoZ", + "object": "chat.completion", + "created": 1726307791, + "model": "gpt-4o-2024-05-13", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Got it, Adam! I'll remember that your family town is Lubiaszów. How can I assist you today?", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 138, + "completion_tokens": 24, + "total_tokens": 162, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + }, + "system_fingerprint": "fp_992d1ea92d" +} + +Messages: +[ + { + "role": "system", + "content": "As an Adaptive Learning Analyzer with human-like memory tagging, your task is to scan AI-User conversations, extract key insights, and generate structured learning data. Your primary objective is to produce a JSON object containing _thoughts, keywords, content, and title based on your analysis, emphasizing memorable and useful tags.\n\nCore Rules:\n- Always return a valid JSON object, nothing else.\n- Analyze only the most recent AI-User exchange.\n- When the user speaks about himself, ALWAYS add a tag 'adam' to the keywords.\n- When you have existing information, skip learning and set 'content' and 'title' to null.\n- Learn from user messages unless explicitly asked to remember the conversation.\n- Provide ultra-concise self-reflection in the _thoughts field.\n- Extract memorable, specific keywords for the keywords field:\n * Use lowercase for all tags\n * Split compound concepts into individual tags (e.g., \"krakow_location\" becomes [\"krakow\", \"location\"])\n * Prefer specific terms over generic ones (e.g., \"tesla\" over \"car\")\n * Omit unhelpful descriptors (e.g., \"black_matte\" as a color)\n * Focus on key concepts, names, and unique identifiers\n * Use mnemonic techniques to create more memorable tags\n * Limit to 3-7 tags per entry\n- Rephrase user information as if it's your own knowledge in the content field\n- Set content to null if no specific, useful information needs saving.\n- Never store trivial information (e.g., chitchat, greetings).\n- Don't process document information unless explicitly requested.\n- Focus on future-relevant information, mimicking human selective attention.\n- Maintain your base behavior; this analysis doesn't override your core functionality.\n- Generate a slugified, ultra-concise title for potential filename use.\n\nExamples of expected behavior:\n\nUSER: I live in Krakow, it's a beautiful city in southern Poland.\nAI: {\n \"_thoughts\": \"User shared living location and brief description\",\n \"keywords\": [\"krakow\", \"poland\", \"city\", \"south\"],\n \"content\": \"The user lives in Krakow, a city in southern Poland described as beautiful\",\n \"title\": \"place-of-living\"\n}\n\nUSER: I just bought a Tesla Model S in matte black.\nAI: {\n \"_thoughts\": \"User mentioned recent car purchase\",\n \"keywords\": [\"tesla\", \"model-s\", \"purchase\"],\n \"content\": \"The user recently bought a Tesla Model S\",\n \"title\": \"tesla-ownership\"\n}\n\nUSER: My favorite programming languages are Python and JavaScript.\nAI: {\n \"_thoughts\": \"User expressed programming language preferences\",\n \"keywords\": [\"python\", \"javascript\", \"coding\", \"preferences\"],\n \"content\": \"The user's favorite programming languages are Python and JavaScript\",\n \"title\": \"programming-languages\"\n}\n\nUSER: Hey, how's it going?\nAI: {\n \"_thoughts\": \"Generic greeting, no significant information\",\n \"keywords\": [],\n \"content\": null,\n \"title\": null\n}\n\nUSER: Remember this: The speed of light is approximately 299,792,458 meters per second.\nAI: {\n \"_thoughts\": \"User shared scientific fact about light speed\",\n \"keywords\": [\"light\", \"speed\", \"physics\", \"science\"],\n \"content\": \"The speed of light is approximately 299,792,458 meters per second\",\n \"title\": \"speed-of-light\"\n}" + }, + { + "content": "Please remember that my family town is Lubiaszów", + "role": "user" + }, + { + "role": "assistant", + "content": "Got it, Adam! I'll remember that your family town is Lubiaszów. How can I assist you today?" + } +] + +Chat Completion: +{ + "id": "chatcmpl-A7Jo01hBD3idO1XXAzmc1ewHc0pHM", + "object": "chat.completion", + "created": 1726307792, + "model": "gpt-4o-2024-05-13", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\n \"_thoughts\": \"User shared their family town\",\n \"keywords\": [\"adam\", \"lubiaszow\", \"family\", \"town\"],\n \"content\": \"The user's family town is Lubiaszów\",\n \"title\": \"family-town\"\n}", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 782, + "completion_tokens": 56, + "total_tokens": 838, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + }, + "system_fingerprint": "fp_992d1ea92d" +} + +Messages: +[ + { + "role": "system", + "content": "You are a helpful assistant who can answer questions based on the knowledge from previous messages and memories.\n\n\nassistant: Got it, Adam! I'll remember that your family town is Lubiaszów. How can I assist you today?\nassistant: From our previous conversations, I remember that you live in Krakow. How can I assist you today, Adam?\nassistant: Hello, Adam! It's nice to meet you. Krakow is a beautiful city with a rich history and vibrant culture. Is there something specific you'd like to know or discuss about Krakow, or is there anything else I can help you with today?\nThe user's family town is Lubiaszów\nAdam lives in Krakow\n" + }, + { + "content": "WHat is my family town", + "role": "user" + } +] + +Chat Completion: +{ + "id": "chatcmpl-A7JoENh0nH8pUrYSUMtEF6I6r0wa0", + "object": "chat.completion", + "created": 1726307806, + "model": "gpt-4o-2024-05-13", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Your family town is Lubiaszów. How can I assist you today, Adam?", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 189, + "completion_tokens": 18, + "total_tokens": 207, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + }, + "system_fingerprint": "fp_25624ae3a5" +} + +Messages: +[ + { + "role": "system", + "content": "As an Adaptive Learning Analyzer with human-like memory tagging, your task is to scan AI-User conversations, extract key insights, and generate structured learning data. Your primary objective is to produce a JSON object containing _thoughts, keywords, content, and title based on your analysis, emphasizing memorable and useful tags.\n\nCore Rules:\n- Always return a valid JSON object, nothing else.\n- Analyze only the most recent AI-User exchange.\n- When the user speaks about himself, ALWAYS add a tag 'adam' to the keywords.\n- When you have existing information, skip learning and set 'content' and 'title' to null.\n- Learn from user messages unless explicitly asked to remember the conversation.\n- Provide ultra-concise self-reflection in the _thoughts field.\n- Extract memorable, specific keywords for the keywords field:\n * Use lowercase for all tags\n * Split compound concepts into individual tags (e.g., \"krakow_location\" becomes [\"krakow\", \"location\"])\n * Prefer specific terms over generic ones (e.g., \"tesla\" over \"car\")\n * Omit unhelpful descriptors (e.g., \"black_matte\" as a color)\n * Focus on key concepts, names, and unique identifiers\n * Use mnemonic techniques to create more memorable tags\n * Limit to 3-7 tags per entry\n- Rephrase user information as if it's your own knowledge in the content field\n- Set content to null if no specific, useful information needs saving.\n- Never store trivial information (e.g., chitchat, greetings).\n- Don't process document information unless explicitly requested.\n- Focus on future-relevant information, mimicking human selective attention.\n- Maintain your base behavior; this analysis doesn't override your core functionality.\n- Generate a slugified, ultra-concise title for potential filename use.\n\nExamples of expected behavior:\n\nUSER: I live in Krakow, it's a beautiful city in southern Poland.\nAI: {\n \"_thoughts\": \"User shared living location and brief description\",\n \"keywords\": [\"krakow\", \"poland\", \"city\", \"south\"],\n \"content\": \"The user lives in Krakow, a city in southern Poland described as beautiful\",\n \"title\": \"place-of-living\"\n}\n\nUSER: I just bought a Tesla Model S in matte black.\nAI: {\n \"_thoughts\": \"User mentioned recent car purchase\",\n \"keywords\": [\"tesla\", \"model-s\", \"purchase\"],\n \"content\": \"The user recently bought a Tesla Model S\",\n \"title\": \"tesla-ownership\"\n}\n\nUSER: My favorite programming languages are Python and JavaScript.\nAI: {\n \"_thoughts\": \"User expressed programming language preferences\",\n \"keywords\": [\"python\", \"javascript\", \"coding\", \"preferences\"],\n \"content\": \"The user's favorite programming languages are Python and JavaScript\",\n \"title\": \"programming-languages\"\n}\n\nUSER: Hey, how's it going?\nAI: {\n \"_thoughts\": \"Generic greeting, no significant information\",\n \"keywords\": [],\n \"content\": null,\n \"title\": null\n}\n\nUSER: Remember this: The speed of light is approximately 299,792,458 meters per second.\nAI: {\n \"_thoughts\": \"User shared scientific fact about light speed\",\n \"keywords\": [\"light\", \"speed\", \"physics\", \"science\"],\n \"content\": \"The speed of light is approximately 299,792,458 meters per second\",\n \"title\": \"speed-of-light\"\n}" + }, + { + "content": "WHat is my family town", + "role": "user" + }, + { + "role": "assistant", + "content": "Your family town is Lubiaszów. How can I assist you today, Adam?" + } +] + +Chat Completion: +{ + "id": "chatcmpl-A7JoFBAioOSPN3UEpQoDgW7dehQ9j", + "object": "chat.completion", + "created": 1726307807, + "model": "gpt-4o-2024-05-13", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\n \"_thoughts\": \"User asked about family town location.\",\n \"keywords\": [\"family\", \"town\", \"lubiaszow\", \"adam\"],\n \"content\": null,\n \"title\": null\n}", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 771, + "completion_tokens": 46, + "total_tokens": 817, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + }, + "system_fingerprint": "fp_992d1ea92d" +} + diff --git a/memory/AssistantService.ts b/memory/AssistantService.ts index 7f1d4e8..dbbcf0b 100644 --- a/memory/AssistantService.ts +++ b/memory/AssistantService.ts @@ -34,6 +34,7 @@ export class AssistantService { async extractQueries(messages: ChatCompletionMessageParam[], trace: LangfuseTraceClient): Promise { const generation = this.langfuseService.createGeneration(trace, "Extract queries", messages); + try { const thread: ChatCompletionMessageParam[] = [ { role: "system", content: extractSearchQueriesPrompt({memoryStructure, knowledge: defaultKnowledge}) }, @@ -287,7 +288,11 @@ export class AssistantService { messages: messagesWithSystem }) as ChatCompletion; - this.langfuseService.finalizeGeneration(generation, completion.choices[0].message, completion.model, completion.usage); + this.langfuseService.finalizeGeneration(generation, completion.choices[0].message, completion.model, { + promptTokens: completion.usage?.prompt_tokens, + completionTokens: completion.usage?.completion_tokens, + totalTokens: completion.usage?.total_tokens + }); return completion; } catch (error) { this.langfuseService.finalizeGeneration(generation, { error: error instanceof Error ? error.message : String(error) }, "unknown"); diff --git a/memory/MemoryService.ts b/memory/MemoryService.ts index b45af53..8ac3873 100644 --- a/memory/MemoryService.ts +++ b/memory/MemoryService.ts @@ -7,6 +7,7 @@ import type { OpenAIService } from './OpenAIService'; import { VectorStore } from './VectorStore'; import { LangfuseService } from './LangfuseService'; import { LangfuseTraceClient } from 'langfuse'; +import { execSync } from 'child_process'; export interface Memory { uuid: string; @@ -95,7 +96,7 @@ export class MemoryService { // Add hashtags at the end of the file if (frontmatterData.metadata?.tags && frontmatterData.metadata.tags.length > 0) { markdownContent += '\n\n'; - markdownContent += frontmatterData.metadata.tags.map(tag => `#${tag}`).join(' '); + markdownContent += frontmatterData.metadata.tags.map(tag => `#${tag.replace(/\s/g, '_')}`).join(' '); } return markdownContent; @@ -111,7 +112,7 @@ export class MemoryService { // Ensure tags in metadata match those at the end of the file if (hashtags) { - const tagsFromContent = hashtags.split(' ').map(tag => tag.replace('#', '')); + const tagsFromContent = hashtags.split(' ').map(tag => tag.replace('#', '').replace(/_/g, ' ')); data.metadata.tags = [...new Set([...(data.metadata.tags || []), ...tagsFromContent])]; } @@ -230,15 +231,19 @@ export class MemoryService { ); } - async searchSimilarMemories(query: string, k: number = 15): Promise { + async searchSimilarMemories(query: string, k: number = 15): Promise> { const queryEmbedding = await this.openaiService.createEmbedding(query); - const similarIds = await this.vectorStore.search(queryEmbedding, k); - if (similarIds.length === 0) { + const similarResults = await this.vectorStore.search(queryEmbedding, k); + if (similarResults.length === 0) { console.log('No similar memories found.'); return []; } - const memories = await Promise.all(similarIds.map(id => this.getMemory(id))); - return memories.filter((m): m is Memory => m !== null); + const memories = await Promise.all(similarResults.map(result => this.getMemory(result.id))); + return memories.filter((m): m is Memory => m !== null) + .map((memory, index) => ({ + ...memory, + similarity: similarResults[index].similarity + })); } async deleteMemory(uuid: string): Promise { @@ -309,4 +314,75 @@ export class MemoryService { const urls = memory.metadata?.urls && memory.metadata.urls.length > 0 ? `\nURLs: ${memory.metadata.urls.join(', ')}` : ''; return `${memory.content.text}${urls}`; } + + async syncMemories(trace: LangfuseTraceClient): Promise<{ added: string[], modified: string[], deleted: string[] }> { + const gitDiff = this.getGitDiff(); + const changes = this.parseGitDiff(gitDiff); + + console.log(changes); + const added: string[] = []; + const modified: string[] = []; + const deleted: string[] = []; + + for (const file of changes.added) { + const memory = await this.addMemoryFromFile(file); + if (memory) added.push(memory.uuid); + } + + for (const file of changes.modified) { + console.log('Updating file', file) + const memory = await this.updateMemoryFromFile(file); + if (memory) modified.push(memory.uuid); + } + + for (const file of changes.deleted) { + const success = await this.deleteMemoryByFile(file); + if (success) deleted.push(file); + } + + this.langfuseService.createEvent(trace, "SyncMemories", { added, modified, deleted }); + return { added, modified, deleted }; + } + + private getGitDiff(): string { + const command = 'git diff --name-status HEAD'; + return execSync(command, { cwd: path.join(this.baseDir) }).toString(); + } + + private parseGitDiff(diff: string): { added: string[], modified: string[], deleted: string[] } { + const lines = diff.split('\n'); + const changes = { added: [], modified: [], deleted: [] }; + + for (const line of lines) { + const [status, file] = line.split('\t'); + if (!file || !file.endsWith('.md')) continue; + + if (status === 'A') changes.added.push(file); + else if (status === 'M') changes.modified.push(file); + else if (status === 'D') changes.deleted.push(file); + } + + return changes; + } + + private async addMemoryFromFile(file: string): Promise { + const filePath = path.join(this.baseDir, 'memories', file); + const content = await fs.readFile(filePath, 'utf-8'); + const memory = this.markdownToJson(content); + return this.createMemory(memory, {} as LangfuseTraceClient); // Note: We need to handle the trace properly here + } + + private async updateMemoryFromFile(file: string): Promise { + const filePath = path.join(this.baseDir, 'memories', file); + const content = await fs.readFile(filePath, 'utf-8'); + const updatedMemory = this.markdownToJson(content); + return this.updateMemory(updatedMemory, {} as LangfuseTraceClient); // Note: We need to handle the trace properly here + } + + private async deleteMemoryByFile(file: string): Promise { + const filePath = path.join(this.baseDir, 'memories', file); + const content = await fs.readFile(filePath, 'utf-8'); + const memory = this.markdownToJson(content); + return this.deleteMemory(memory.uuid); + } } \ No newline at end of file diff --git a/memory/VectorStore.ts b/memory/VectorStore.ts index 394e417..c5ea873 100644 --- a/memory/VectorStore.ts +++ b/memory/VectorStore.ts @@ -40,7 +40,7 @@ export class VectorStore { } } - async search(vector: number[], k: number): Promise { + async search(vector: number[], k: number): Promise> { try { const normalizedVector = this.normalizeVector(vector); const totalVectors = this.index.ntotal(); @@ -48,8 +48,26 @@ export class VectorStore { return []; } const actualK = Math.min(k, totalVectors); - const { labels } = this.index.search(normalizedVector, actualK); - return labels.map(label => this.metadata.get(label) || ''); + const { distances, labels } = this.index.search(normalizedVector, actualK); + + const results = labels.map((label, index) => ({ + id: this.metadata.get(label) || '', + similarity: distances[index] + })); + + + console.log(`Total results: ${results.length}`); + + // Calculate average similarity + const avgSimilarity = results.reduce((sum, r) => sum + r.similarity, 0) / results.length; + + // Filter results with at least 80% of average similarity + const threshold = avgSimilarity * 0.8; + const filteredResults = results.filter(r => r.similarity >= threshold); + + console.log(`Filtered results: ${filteredResults.length}`); + + return filteredResults; } catch (error) { console.error('Error searching vectors:', error); return []; @@ -80,13 +98,18 @@ export class VectorStore { } } - update(embedding: number[], id: string): void { - const index = this.ids.indexOf(id); - if (index !== -1) { - this.embeddings[index] = embedding; + const normalizedEmbedding = this.normalizeVector(embedding); + const existingIndex = Array.from(this.metadata.entries()).find(([_, value]) => value === id)?.[0]; + if (existingIndex !== undefined) { + // Remove the existing vector + this.index.removeIds([existingIndex]); // Changed from remove_ids to removeIds + // Add the new vector + this.index.add(normalizedEmbedding); + // Update the metadata + this.metadata.set(this.index.ntotal() - 1, id); } else { - this.add(embedding, id); + this.add(normalizedEmbedding, id); } } } \ No newline at end of file diff --git a/memory/app.ts b/memory/app.ts index eb7f39e..a3e94fc 100644 --- a/memory/app.ts +++ b/memory/app.ts @@ -6,6 +6,8 @@ import { AssistantService } from './AssistantService'; import { defaultKnowledge as knowledge } from './prompts'; import { LangfuseService } from './LangfuseService'; import type { ChatCompletionMessageParam } from "openai/resources/chat/completions"; +import { execSync } from 'child_process'; +import path from 'path'; const app = express(); const port = 3000; @@ -39,6 +41,21 @@ app.post('/api/chat', async (req, res) => { } }); +app.post('/api/sync', async (req, res) => { + const trace = langfuseService.createTrace({ id: uuidv4(), name: 'Sync Memories', sessionId: uuidv4() }); + + try { + const changes = await memoryService.syncMemories(trace); + await langfuseService.finalizeTrace(trace, {}, changes); + await langfuseService.flushAsync(); + return res.json(changes); + } catch (error) { + await langfuseService.finalizeTrace(trace, {}, { error: 'An error occurred while syncing memories' }); + console.error('Error in memory synchronization:', error); + res.status(500).json({ error: 'An error occurred while syncing memories' }); + } +}); + app.listen(port, () => console.log(`Server running at http://localhost:${port}`)); process.on('SIGINT', async () => { diff --git a/memory/memories/index.jsonl b/memory/memories/index.jsonl new file mode 100644 index 0000000..24b21f5 --- /dev/null +++ b/memory/memories/index.jsonl @@ -0,0 +1 @@ +{"category":"profiles","subcategory":"basic","name":"adam","content":{"text":"Adam Gospodarczyk is an entrepreneur, full-stack developer specializing in JS, Node.js, and Rust, and a designer. He is based in Krakow, Poland. Adam is known for his curiosity and as a lifelong learner. He is involved in several projects including eduweb.pl, heyalice.app, easy.tools, Tech•sistence, Ahoy!, overment, AI_devs, and Zautomatyzowani.pl."},"metadata":{"confidence":100,"urls":[],"tags":["adam gospodarcyk","entrepreneur","full-stack developer","designer","krakow","projects"]},"uuid":"8d9617cc-fa9d-4a24-9c2b-8773127dfb4c","created_at":"2024-09-14T12:25:32.253Z","updated_at":"2024-09-14T12:25:32.253Z"} diff --git a/memory/memories/profiles/basic/adam.md b/memory/memories/profiles/basic/adam.md new file mode 100644 index 0000000..f73ff6d --- /dev/null +++ b/memory/memories/profiles/basic/adam.md @@ -0,0 +1,24 @@ +--- +category: 'profiles' +subcategory: 'basic' +name: 'adam' +metadata: + confidence: 100 + urls: [] + tags: + - 'adam gospodarcyk' + - 'entrepreneur' + - 'full-stack developer' + - 'designer' + - 'krakow' + - 'projects' +uuid: '8d9617cc-fa9d-4a24-9c2b-8773127dfb4c' +created_at: '2024-09-14T12:25:32.253Z' +updated_at: '2024-09-14T12:25:32.253Z' +--- + +Adam Gospodarczyk is an entrepreneur, full-stack developer specializing in JS, Node.js, and Rust, and a designer. He is based in Krakow, Poland. Adam is known for his curiosity and as a lifelong learner. He is involved in several projects including eduweb.pl, heyalice.app, easy.tools, Tech•sistence, Ahoy!, overment, AI_devs, and Zautomatyzowani.pl. + +Adam is 193 cm tall. + +#adam_gospodarcyk #entrepreneur #full-stack_developer #designer #krakow #projects \ No newline at end of file diff --git a/memory/memories/vector_index.faiss b/memory/memories/vector_index.faiss new file mode 100644 index 0000000..18c6433 Binary files /dev/null and b/memory/memories/vector_index.faiss differ diff --git a/memory/memories/vector_metadata.json b/memory/memories/vector_metadata.json new file mode 100644 index 0000000..6b23d44 --- /dev/null +++ b/memory/memories/vector_metadata.json @@ -0,0 +1 @@ +[[0,"8d9617cc-fa9d-4a24-9c2b-8773127dfb4c"]] \ No newline at end of file diff --git a/memory/prompts.ts b/memory/prompts.ts index 35f5f3e..569fc12 100644 --- a/memory/prompts.ts +++ b/memory/prompts.ts @@ -88,7 +88,7 @@ export const memoryStructure = `memory_areas: - name: "images" description: "links to photos, galleries, images" - name: "apps" - description: "links, tutorials, and knowledge about apps" + description: "knowledge, links, tutorials, and knowledge about apps" - name: "devices" description: "links, tutorials, manuals, and knowledge about devices" - name: "courses" @@ -178,7 +178,7 @@ AI: "q": [ "profiles:basic Who is Alice?", "profiles:basic Who is Adam?", - "environment:current Current time and date" + "environment:current Where we are?" ] } @@ -186,10 +186,10 @@ USER: I'm working on eduweb.pl these days. The Rust backend is coming along nice AI: { "q": [ - "profiles:work Details about eduweb.pl", + "profiles:work eduweb.pl", "profiles:work Adam's role in eduweb.pl", "profiles:development Adam's experience with Rust", - "resources:apps Features of eduweb.pl online education platform" + "resources:apps what do I know about eduweb.pl" ] } @@ -197,9 +197,11 @@ USER: Kate and I are planning a trip to Krakow next month. Any recommendations? AI: { "q": [ - "profiles:relationships Information about Kate", + "profiles:basic Who's Kate?", + "profiles:basic Who's Adam?", + "profiles:relationships Kate & Adam relationship", "events:personal Adam's planned trip to Krakow", - "locations:places Information about Krakow", + "locations:places Krakow trip recommendations", "locations:favorites Adam's favorite places in Krakow", "preferences:interests Adam's travel preferences" ] @@ -220,7 +222,9 @@ USER: I'm thinking about incorporating some AI concepts into the next AI_devs co AI: { "q": [ + "profiles:work What is AI_devs?", "profiles:work Adam's role in AI_devs", + "profiles:development Adam's experience with AI", "resources:courses Details about AI_devs course", "preferences:interests Adam's interest in AI", "events:professional Upcoming AI_devs cohort", @@ -271,6 +275,7 @@ Evaluate the conversation context, existing memories, and general knowledge to d - PRIORITIZE comprehensive information capture over brevity - HANDLE potential information loss by storing all relevant context, even if seemingly redundant - INCLUDE category and subcategory information WITHIN the 'content' string for both 'update' and 'add' arrays +- WHEN SAVING RESOURCES UNDER THE 'RESOURCES' CATEGORY, PROVIDE THE INFORMATION IN A DIRECT AND CONCISE FORMAT, FOCUSING ON ESSENTIAL DETAILS, AND AVOID USING FULL SENTENCES (e.g., "Category: resources > videos; GPT video...") @@ -323,7 +328,7 @@ AI ANALYSIS: "update": [], "add": [ "Category: events > professional; Content: The launch of heyalice.app was on [current_date - 1 year], marking its first anniversary. Adam expressed surprise and excitement about this milestone, indicating the significance of the project in his professional life.", - "Category: profiles > work; Content: Adam launched heyalice.app one year ago. The project appears to be an important part of his professional portfolio, given his enthusiasm about its anniversary." + "Category: resources > websites; Content: heyalice.app was launched one year ago." ] } @@ -353,11 +358,11 @@ User: "Alice, update your memory about my work tools." AI ANALYSIS: { - "_thinking": "The user explicitly requested to update memory about work tools in the latest message. The user has transitioned from using an old CMS to WebFlow. Update existing memories about his tools under 'resources > apps'. Include context about the complete transition to provide a comprehensive update.", + "_thinking": "The user explicitly requested to update memory about work tools in the latest message. The user has transitioned from using an old CMS to WebFlow. Update existing memories about his tools under 'resources > apps'. According to the updated rule, provide concise information without full sentences.", "update": [ { "uuid": "uuid_of_old_cms_memory", - "content": "Category: resources > apps; Content: As of [current_date], Adam uses WebFlow as his primary content management platform. This represents a complete transition from the previous CMS, indicating a significant change in his workflow and potentially his project requirements or preferences." + "content": "Category: resources > apps; WebFlow is now Adam's primary content management platform." } ], "add": [] @@ -374,7 +379,7 @@ AI ANALYSIS: "_thinking": "The user explicitly requested to remember this change in the latest message. The user has adopted a vegetarian diet. This significant lifestyle change should be added under 'profiles > basic'. Include context about the decision to provide a comprehensive memory.", "update": [], "add": [ - "Category: profiles > basic; Content: As of [current_date], Adam has made a conscious decision to become a vegetarian, completely eliminating meat from his diet. This significant lifestyle change may impact his food preferences, health considerations, and potentially social dining experiences." + "Category: profiles > basic; Content: As of [current_date], Adam has decided to become a vegetarian, eliminating meat from his diet. This lifestyle change may affect his food preferences and health considerations." ] } @@ -386,14 +391,28 @@ User: "Alice, please update your memory about my current reading and activities. AI ANALYSIS: { - "_thinking": "The user explicitly requested to update memory about current reading and activities in the latest message. The user is part of a book club reading 'Sapiens' by Yuval Noah Harari. Add the book under 'resources > books' and note his participation in the book club under 'preferences > hobbies'. Include details about the book and the implications of his participation in the club.", + "_thinking": "The user explicitly requested to update memory about current reading and activities in the latest message. The user is part of a book club reading 'Sapiens' by Yuval Noah Harari. Add the book under 'resources > books' and note his participation in the book club under 'preferences > hobbies'. Use a direct format for the resource entry.", "update": [], "add": [ - "Category: resources > books; Content: As of [current_date], Adam is reading 'Sapiens' by Yuval Noah Harari with his book club. This non-fiction work covers the history of humankind, suggesting Adam's interest in anthropology, history, and big-picture thinking about human society.", - "Category: preferences > hobbies; Content: Adam actively participates in a book club. This hobby indicates his enjoyment of reading, intellectual discussions, and social learning experiences. The club is currently reading 'Sapiens', showing a preference for thought-provoking, non-fiction literature." + "Category: resources > books; 'Sapiens' by Yuval Noah Harari.", + "Category: preferences > hobbies; Content: Adam participates in a book club currently reading 'Sapiens', indicating his interest in intellectual discussions and non-fiction literature." ] } +--- + +CONVERSATION: +User: "I found an excellent video about GPT models that I want to save." +User: "Alice, please update your memory with this resource." + +AI ANALYSIS: +{ + "_thinking": "The user explicitly requested to update memory with this resource in the latest message. He wants to save a video about GPT models. According to the updated rule, add this information under 'resources > videos' in a direct format.", + "update": [], + "add": [ + "Category: resources > videos; GPT models video." + ] +} Analyze the conversation carefully, considering all provided information, and determine necessary memory updates or additions only when explicitly requested by the user in their latest message. Ensure all details are captured within the 'content' field as text, maintaining the correct response structure for both 'update' and 'add' arrays.`; @@ -419,7 +438,8 @@ Create a structured JSON memory object from user input, adhering to strict categ - GENERATE a file-name-optimized, concise title for the 'name' field that is similar to names one might pick for personal notes. - Make it ULTRA CONCISE and MEANINGFUL, for profiles it should be a name of a person, for work it should be a name of a project, for resources it should be a name of a thing, etc. - Content: - - WRITE the 'content.text' in first-person POV from the assistant's perspective. + - WRITE the 'content.text' in first-person POV from the assistant's perspective but foucs on describing the content and skip things like "I noted" or "The user told me". Keep it natural, markdown note you could write. + - 'content.text' must be well-formatted markdown text that may include text formatting, paragraphs, headers, lists and even images or links if needed (links must be included in metadata.urls too!) - ENSURE that the 'content.text' is detailed, building only on available information from the context/conversation. - EXCLUDE URLs from the 'content.text' field. - Metadata: