Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
iceener committed Sep 14, 2024
1 parent dfe5ed1 commit 036230d
Show file tree
Hide file tree
Showing 16 changed files with 460 additions and 32 deletions.
1 change: 0 additions & 1 deletion files/OpenAIService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ export class OpenAIService {
* @throws Error if there's an issue creating the embedding.
*/
async createEmbedding(input: string | string[]): Promise<number[]> {
console.log(input)
try {
const embedding = await this.openai.embeddings.create({
model: "text-embedding-3-large",
Expand Down
2 changes: 1 addition & 1 deletion files/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ app.post('/api/chat', async (req, res) => {
if (!latestUserMessage) {
return res.status(400).json({ error: 'No user message provided' });
}

console.log(latestUserMessage);
// Create embedding for the latest user message
const latestMessageEmbedding = await openaiService.createEmbedding(latestUserMessage.content);

Expand Down
Binary file modified files/context/embeddings/memory_faiss.index
Binary file not shown.
1 change: 1 addition & 0 deletions files/context/embeddings/memory_metadata.jsonl
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
{"uuid":"14126146-b076-4f6b-a39f-7497199ff59a","filename":"family_town.md"}
{"uuid":"a2baf2ce-e01d-4010-beb1-c06213fdbff5","filename":"adam_krakow.md"}
Binary file modified files/context/embeddings/message_faiss.index
Binary file not shown.
3 changes: 3 additions & 0 deletions files/context/embeddings/message_metadata.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
{"uuid":"8206de41-6cf6-49c5-aff6-acb318661a8a","role":"assistant","filename":"540cc66a-02f4-48c1-8c01-6a6188aebdf5.md"}
{"uuid":"2105c1b1-8f81-499f-bbd9-c000318c94f5","role":"assistant","filename":"16de94d4-8364-4aac-b48c-da586d083340.md"}
{"uuid":"143e8d92-73a1-4433-9fa5-6a3619200e5d","role":"assistant","filename":"837c899f-da41-4130-9e6c-ac2a839c42b0.md"}
{"uuid":"eae21d13-9401-42cd-a049-8f51aa6c3b8e","role":"assistant","filename":"030ccbe5-98d3-4309-aebb-62e55732ec6f.md"}
{"uuid":"42931379-090e-4c79-96d5-6302f2bec87e","role":"assistant","filename":"2b33b71d-3917-46b8-9aca-e68fbec0b329.md"}
{"uuid":"7e7173b4-903a-4378-b4a8-857853c49a36","role":"assistant","filename":"06879172-c891-4aaf-9d63-ec2724c110b8.md"}
258 changes: 258 additions & 0 deletions files/prompt.md

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion memory/AssistantService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export class AssistantService {

async extractQueries(messages: ChatCompletionMessageParam[], trace: LangfuseTraceClient): Promise<string[]> {
const generation = this.langfuseService.createGeneration(trace, "Extract queries", messages);

try {
const thread: ChatCompletionMessageParam[] = [
{ role: "system", content: extractSearchQueriesPrompt({memoryStructure, knowledge: defaultKnowledge}) },
Expand Down Expand Up @@ -287,7 +288,11 @@ export class AssistantService {
messages: messagesWithSystem
}) as ChatCompletion;

this.langfuseService.finalizeGeneration(generation, completion.choices[0].message, completion.model, completion.usage);
this.langfuseService.finalizeGeneration(generation, completion.choices[0].message, completion.model, {
promptTokens: completion.usage?.prompt_tokens,
completionTokens: completion.usage?.completion_tokens,
totalTokens: completion.usage?.total_tokens
});
return completion;
} catch (error) {
this.langfuseService.finalizeGeneration(generation, { error: error instanceof Error ? error.message : String(error) }, "unknown");
Expand Down
90 changes: 83 additions & 7 deletions memory/MemoryService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import type { OpenAIService } from './OpenAIService';
import { VectorStore } from './VectorStore';
import { LangfuseService } from './LangfuseService';
import { LangfuseTraceClient } from 'langfuse';
import { execSync } from 'child_process';

export interface Memory {
uuid: string;
Expand Down Expand Up @@ -95,7 +96,7 @@ export class MemoryService {
// Add hashtags at the end of the file
if (frontmatterData.metadata?.tags && frontmatterData.metadata.tags.length > 0) {
markdownContent += '\n\n';
markdownContent += frontmatterData.metadata.tags.map(tag => `#${tag}`).join(' ');
markdownContent += frontmatterData.metadata.tags.map(tag => `#${tag.replace(/\s/g, '_')}`).join(' ');
}

return markdownContent;
Expand All @@ -111,7 +112,7 @@ export class MemoryService {

// Ensure tags in metadata match those at the end of the file
if (hashtags) {
const tagsFromContent = hashtags.split(' ').map(tag => tag.replace('#', ''));
const tagsFromContent = hashtags.split(' ').map(tag => tag.replace('#', '').replace(/_/g, ' '));
data.metadata.tags = [...new Set([...(data.metadata.tags || []), ...tagsFromContent])];
}

Expand Down Expand Up @@ -230,15 +231,19 @@ export class MemoryService {
);
}

async searchSimilarMemories(query: string, k: number = 15): Promise<Memory[]> {
async searchSimilarMemories(query: string, k: number = 15): Promise<Array<Memory & { similarity: number }>> {
const queryEmbedding = await this.openaiService.createEmbedding(query);
const similarIds = await this.vectorStore.search(queryEmbedding, k);
if (similarIds.length === 0) {
const similarResults = await this.vectorStore.search(queryEmbedding, k);
if (similarResults.length === 0) {
console.log('No similar memories found.');
return [];
}
const memories = await Promise.all(similarIds.map(id => this.getMemory(id)));
return memories.filter((m): m is Memory => m !== null);
const memories = await Promise.all(similarResults.map(result => this.getMemory(result.id)));
return memories.filter((m): m is Memory => m !== null)
.map((memory, index) => ({
...memory,
similarity: similarResults[index].similarity
}));
}

async deleteMemory(uuid: string): Promise<boolean> {
Expand Down Expand Up @@ -309,4 +314,75 @@ export class MemoryService {
const urls = memory.metadata?.urls && memory.metadata.urls.length > 0 ? `\nURLs: ${memory.metadata.urls.join(', ')}` : '';
return `<memory uuid="${memory.uuid}" name="${memory.name}" category="${memory.category}" subcategory="${memory.subcategory}" lastmodified="${memory.updated_at}">${memory.content.text}${urls}</memory>`;
}

async syncMemories(trace: LangfuseTraceClient): Promise<{ added: string[], modified: string[], deleted: string[] }> {
const gitDiff = this.getGitDiff();
const changes = this.parseGitDiff(gitDiff);

console.log(changes);
const added: string[] = [];
const modified: string[] = [];
const deleted: string[] = [];

for (const file of changes.added) {
const memory = await this.addMemoryFromFile(file);
if (memory) added.push(memory.uuid);
}

for (const file of changes.modified) {
console.log('Updating file', file)
const memory = await this.updateMemoryFromFile(file);
if (memory) modified.push(memory.uuid);
}

for (const file of changes.deleted) {
const success = await this.deleteMemoryByFile(file);
if (success) deleted.push(file);
}

this.langfuseService.createEvent(trace, "SyncMemories", { added, modified, deleted });
return { added, modified, deleted };
}

private getGitDiff(): string {
const command = 'git diff --name-status HEAD';
return execSync(command, { cwd: path.join(this.baseDir) }).toString();
}

private parseGitDiff(diff: string): { added: string[], modified: string[], deleted: string[] } {
const lines = diff.split('\n');
const changes = { added: [], modified: [], deleted: [] };

for (const line of lines) {
const [status, file] = line.split('\t');
if (!file || !file.endsWith('.md')) continue;

if (status === 'A') changes.added.push(file);
else if (status === 'M') changes.modified.push(file);
else if (status === 'D') changes.deleted.push(file);
}

return changes;
}

private async addMemoryFromFile(file: string): Promise<Memory | null> {
const filePath = path.join(this.baseDir, 'memories', file);
const content = await fs.readFile(filePath, 'utf-8');
const memory = this.markdownToJson(content);
return this.createMemory(memory, {} as LangfuseTraceClient); // Note: We need to handle the trace properly here
}

private async updateMemoryFromFile(file: string): Promise<Memory | null> {
const filePath = path.join(this.baseDir, 'memories', file);
const content = await fs.readFile(filePath, 'utf-8');
const updatedMemory = this.markdownToJson(content);
return this.updateMemory(updatedMemory, {} as LangfuseTraceClient); // Note: We need to handle the trace properly here
}

private async deleteMemoryByFile(file: string): Promise<boolean> {
const filePath = path.join(this.baseDir, 'memories', file);
const content = await fs.readFile(filePath, 'utf-8');
const memory = this.markdownToJson(content);
return this.deleteMemory(memory.uuid);
}
}
39 changes: 31 additions & 8 deletions memory/VectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,34 @@ export class VectorStore {
}
}

async search(vector: number[], k: number): Promise<string[]> {
async search(vector: number[], k: number): Promise<Array<{ id: string; similarity: number }>> {
try {
const normalizedVector = this.normalizeVector(vector);
const totalVectors = this.index.ntotal();
if (totalVectors === 0) {
return [];
}
const actualK = Math.min(k, totalVectors);
const { labels } = this.index.search(normalizedVector, actualK);
return labels.map(label => this.metadata.get(label) || '');
const { distances, labels } = this.index.search(normalizedVector, actualK);

const results = labels.map((label, index) => ({
id: this.metadata.get(label) || '',
similarity: distances[index]
}));


console.log(`Total results: ${results.length}`);

// Calculate average similarity
const avgSimilarity = results.reduce((sum, r) => sum + r.similarity, 0) / results.length;

// Filter results with at least 80% of average similarity
const threshold = avgSimilarity * 0.8;
const filteredResults = results.filter(r => r.similarity >= threshold);

console.log(`Filtered results: ${filteredResults.length}`);

return filteredResults;
} catch (error) {
console.error('Error searching vectors:', error);
return [];
Expand Down Expand Up @@ -80,13 +98,18 @@ export class VectorStore {
}
}


update(embedding: number[], id: string): void {
const index = this.ids.indexOf(id);
if (index !== -1) {
this.embeddings[index] = embedding;
const normalizedEmbedding = this.normalizeVector(embedding);
const existingIndex = Array.from(this.metadata.entries()).find(([_, value]) => value === id)?.[0];
if (existingIndex !== undefined) {
// Remove the existing vector
this.index.removeIds([existingIndex]); // Changed from remove_ids to removeIds
// Add the new vector
this.index.add(normalizedEmbedding);
// Update the metadata
this.metadata.set(this.index.ntotal() - 1, id);
} else {
this.add(embedding, id);
this.add(normalizedEmbedding, id);
}
}
}
17 changes: 17 additions & 0 deletions memory/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import { AssistantService } from './AssistantService';
import { defaultKnowledge as knowledge } from './prompts';
import { LangfuseService } from './LangfuseService';
import type { ChatCompletionMessageParam } from "openai/resources/chat/completions";
import { execSync } from 'child_process';
import path from 'path';

const app = express();
const port = 3000;
Expand Down Expand Up @@ -39,6 +41,21 @@ app.post('/api/chat', async (req, res) => {
}
});

app.post('/api/sync', async (req, res) => {
const trace = langfuseService.createTrace({ id: uuidv4(), name: 'Sync Memories', sessionId: uuidv4() });

try {
const changes = await memoryService.syncMemories(trace);
await langfuseService.finalizeTrace(trace, {}, changes);
await langfuseService.flushAsync();
return res.json(changes);
} catch (error) {
await langfuseService.finalizeTrace(trace, {}, { error: 'An error occurred while syncing memories' });
console.error('Error in memory synchronization:', error);
res.status(500).json({ error: 'An error occurred while syncing memories' });
}
});

app.listen(port, () => console.log(`Server running at http://localhost:${port}`));

process.on('SIGINT', async () => {
Expand Down
1 change: 1 addition & 0 deletions memory/memories/index.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"category":"profiles","subcategory":"basic","name":"adam","content":{"text":"Adam Gospodarczyk is an entrepreneur, full-stack developer specializing in JS, Node.js, and Rust, and a designer. He is based in Krakow, Poland. Adam is known for his curiosity and as a lifelong learner. He is involved in several projects including eduweb.pl, heyalice.app, easy.tools, Tech•sistence, Ahoy!, overment, AI_devs, and Zautomatyzowani.pl."},"metadata":{"confidence":100,"urls":[],"tags":["adam gospodarcyk","entrepreneur","full-stack developer","designer","krakow","projects"]},"uuid":"8d9617cc-fa9d-4a24-9c2b-8773127dfb4c","created_at":"2024-09-14T12:25:32.253Z","updated_at":"2024-09-14T12:25:32.253Z"}
24 changes: 24 additions & 0 deletions memory/memories/profiles/basic/adam.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
category: 'profiles'
subcategory: 'basic'
name: 'adam'
metadata:
confidence: 100
urls: []
tags:
- 'adam gospodarcyk'
- 'entrepreneur'
- 'full-stack developer'
- 'designer'
- 'krakow'
- 'projects'
uuid: '8d9617cc-fa9d-4a24-9c2b-8773127dfb4c'
created_at: '2024-09-14T12:25:32.253Z'
updated_at: '2024-09-14T12:25:32.253Z'
---

Adam Gospodarczyk is an entrepreneur, full-stack developer specializing in JS, Node.js, and Rust, and a designer. He is based in Krakow, Poland. Adam is known for his curiosity and as a lifelong learner. He is involved in several projects including eduweb.pl, heyalice.app, easy.tools, Tech•sistence, Ahoy!, overment, AI_devs, and Zautomatyzowani.pl.

Adam is 193 cm tall.

#adam_gospodarcyk #entrepreneur #full-stack_developer #designer #krakow #projects
Binary file added memory/memories/vector_index.faiss
Binary file not shown.
1 change: 1 addition & 0 deletions memory/memories/vector_metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[[0,"8d9617cc-fa9d-4a24-9c2b-8773127dfb4c"]]
Loading

0 comments on commit 036230d

Please sign in to comment.