From dade68916002aecae433e33e748dcb8ecb7d77bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gill=C3=A9?= Date: Sun, 10 Mar 2024 18:54:45 +0100 Subject: [PATCH 1/8] Move existing example into subdirectory --- example/go.mod | 10 ---------- .../rag-wikipedia-ollama}/.gitignore | 0 .../rag-wikipedia-ollama}/README.md | 20 +++++++++---------- .../dbpedia_sample.jsonl | 0 examples/rag-wikipedia-ollama/go.mod | 10 ++++++++++ .../rag-wikipedia-ollama}/go.sum | 0 .../rag-wikipedia-ollama}/llm.go | 0 .../rag-wikipedia-ollama}/main.go | 0 8 files changed, 20 insertions(+), 20 deletions(-) delete mode 100644 example/go.mod rename {example => examples/rag-wikipedia-ollama}/.gitignore (100%) rename {example => examples/rag-wikipedia-ollama}/README.md (93%) rename {example => examples/rag-wikipedia-ollama}/dbpedia_sample.jsonl (100%) create mode 100644 examples/rag-wikipedia-ollama/go.mod rename {example => examples/rag-wikipedia-ollama}/go.sum (100%) rename {example => examples/rag-wikipedia-ollama}/llm.go (100%) rename {example => examples/rag-wikipedia-ollama}/main.go (100%) diff --git a/example/go.mod b/example/go.mod deleted file mode 100644 index e9238e8..0000000 --- a/example/go.mod +++ /dev/null @@ -1,10 +0,0 @@ -module github.com/philippgille/chromem-go/example - -go 1.21 - -require ( - github.com/philippgille/chromem-go v0.0.0 - github.com/sashabaranov/go-openai v1.17.9 -) - -replace github.com/philippgille/chromem-go => ./.. diff --git a/example/.gitignore b/examples/rag-wikipedia-ollama/.gitignore similarity index 100% rename from example/.gitignore rename to examples/rag-wikipedia-ollama/.gitignore diff --git a/example/README.md b/examples/rag-wikipedia-ollama/README.md similarity index 93% rename from example/README.md rename to examples/rag-wikipedia-ollama/README.md index 0604e32..5029b09 100644 --- a/example/README.md +++ b/examples/rag-wikipedia-ollama/README.md @@ -1,4 +1,4 @@ -# Example +# RAG Wikipedia Ollama This example shows a retrieval augmented generation (RAG) application, using `chromem-go` as knowledge base for finding relevant info for a question. @@ -48,10 +48,10 @@ Then, if you want to create the embeddings via OpenAI, but still use Gemma 2B as
Apply this patch ```diff -diff --git a/example/main.go b/example/main.go +diff --git a/examples/rag-wikipedia-ollama/main.go b/examples/rag-wikipedia-ollama/main.go index 55b3076..cee9561 100644 ---- a/example/main.go -+++ b/example/main.go +--- a/examples/rag-wikipedia-ollama/main.go ++++ b/examples/rag-wikipedia-ollama/main.go @@ -14,8 +14,6 @@ import ( const ( @@ -88,10 +88,10 @@ Or alternatively, if you want to use OpenAI for everything (embeddings creation
Apply this patch ```diff -diff --git a/example/llm.go b/example/llm.go +diff --git a/examples/rag-wikipedia-ollama/llm.go b/examples/rag-wikipedia-ollama/llm.go index 1fde4ec..7cb81cc 100644 ---- a/example/llm.go -+++ b/example/llm.go +--- a/examples/rag-wikipedia-ollama/llm.go ++++ b/examples/rag-wikipedia-ollama/llm.go @@ -2,23 +2,13 @@ package main import ( @@ -138,10 +138,10 @@ index 1fde4ec..7cb81cc 100644 Messages: messages, }) if err != nil { -diff --git a/example/main.go b/example/main.go +diff --git a/examples/rag-wikipedia-ollama/main.go b/examples/rag-wikipedia-ollama/main.go index 55b3076..044a246 100644 ---- a/example/main.go -+++ b/example/main.go +--- a/examples/rag-wikipedia-ollama/main.go ++++ b/examples/rag-wikipedia-ollama/main.go @@ -12,19 +12,11 @@ import ( "github.com/philippgille/chromem-go" ) diff --git a/example/dbpedia_sample.jsonl b/examples/rag-wikipedia-ollama/dbpedia_sample.jsonl similarity index 100% rename from example/dbpedia_sample.jsonl rename to examples/rag-wikipedia-ollama/dbpedia_sample.jsonl diff --git a/examples/rag-wikipedia-ollama/go.mod b/examples/rag-wikipedia-ollama/go.mod new file mode 100644 index 0000000..fdabe26 --- /dev/null +++ b/examples/rag-wikipedia-ollama/go.mod @@ -0,0 +1,10 @@ +module github.com/philippgille/chromem-go/examples/rag-wikipedia-ollama + +go 1.21 + +require ( + github.com/philippgille/chromem-go v0.0.0 + github.com/sashabaranov/go-openai v1.17.9 +) + +replace github.com/philippgille/chromem-go => ./../.. diff --git a/example/go.sum b/examples/rag-wikipedia-ollama/go.sum similarity index 100% rename from example/go.sum rename to examples/rag-wikipedia-ollama/go.sum diff --git a/example/llm.go b/examples/rag-wikipedia-ollama/llm.go similarity index 100% rename from example/llm.go rename to examples/rag-wikipedia-ollama/llm.go diff --git a/example/main.go b/examples/rag-wikipedia-ollama/main.go similarity index 100% rename from example/main.go rename to examples/rag-wikipedia-ollama/main.go From 441186b36f852f67597a5a03c1c48a7fb8da127b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gill=C3=A9?= Date: Sun, 10 Mar 2024 17:30:30 +0100 Subject: [PATCH 2/8] Add new example for semantic search on arXiv papers --- .../semantic-search-arxiv-openai/.gitignore | 1 + .../semantic-search-arxiv-openai/README.md | 83 +++++++++ examples/semantic-search-arxiv-openai/go.mod | 7 + examples/semantic-search-arxiv-openai/main.go | 168 ++++++++++++++++++ 4 files changed, 259 insertions(+) create mode 100644 examples/semantic-search-arxiv-openai/.gitignore create mode 100644 examples/semantic-search-arxiv-openai/README.md create mode 100644 examples/semantic-search-arxiv-openai/go.mod create mode 100644 examples/semantic-search-arxiv-openai/main.go diff --git a/examples/semantic-search-arxiv-openai/.gitignore b/examples/semantic-search-arxiv-openai/.gitignore new file mode 100644 index 0000000..c2de89b --- /dev/null +++ b/examples/semantic-search-arxiv-openai/.gitignore @@ -0,0 +1 @@ +/db diff --git a/examples/semantic-search-arxiv-openai/README.md b/examples/semantic-search-arxiv-openai/README.md new file mode 100644 index 0000000..632a29e --- /dev/null +++ b/examples/semantic-search-arxiv-openai/README.md @@ -0,0 +1,83 @@ +# Semantic search arXiv OpenAI + +This example shows a semantic search application, using `chromem-go` as vector database for finding semantically relevant search results. We load and search across ~5,000 arXiv papers in the "Computer Science - Computation and Language" category, which is the relevant one for Natural Language Processing (NLP) related papers. + +This is not a retrieval augmented generation (RAG) app, because after *retrieving* the semantically relevant results, we don't *augment* any prompt to an LLM. No LLM is generates the final output. + +## How to run + +1. Prepare the dataset + 1. Download `arxiv-metadata-oai-snapshot.json` from + 2. Filter by "Computer Science - Computation and Language" category (see [taxonomy](https://arxiv.org/category_taxonomy)), filter by updates from 2023 + 1. Ensure you have [ripgrep](https://github.com/BurntSushi/ripgrep) installed, or adapt the following commands to use grep + 2. Run `rg '"categories":"cs.CL"' ~/Downloads/arxiv-metadata-oai-snapshot.json | rg '"update_date":"2023' > /tmp/arxiv_cs-cl_2023.jsonl` (adapt input file path if necessary) + 3. Check the data + 1. `wc -l arxiv_cs-cl_2023.jsonl` should show ~5,000 lines + 2. `du -h arxiv_cs-cl_2023.jsonl` should show ~8.8 MB +2. Set the OpenAI API key in your env as `OPENAI_API_KEY` +3. Run the example: `go run .` + +## Output + +The output can differ slightly on each run, but it's along the lines of: + +```log + 2024/03/10 18:23:55 Setting up chromem-go... + 2024/03/10 18:23:55 Reading JSON lines... + 2024/03/10 18:23:55 Read and parsed 5006 documents. + 2024/03/10 18:23:55 Adding documents to chromem-go, including creating their embeddings via OpenAI API... + 2024/03/10 18:28:12 Querying chromem-go... + 2024/03/10 18:28:12 Search results: + 1) Similarity 0.488895: + URL: https://arxiv.org/abs/2209.15469 + Submitter: Christian Buck + Title: Zero-Shot Retrieval with Search Agents and Hybrid Environments + Abstract: Learning to search is the task of building artificial agents that learn to autonomously use a search... + 2) Similarity 0.480713: + URL: https://arxiv.org/abs/2305.11516 + Submitter: Ryo Nagata Dr. + Title: Contextualized Word Vector-based Methods for Discovering Semantic Differences with No Training nor Word Alignment + Abstract: In this paper, we propose methods for discovering semantic differences in words appearing in two cor... + 3) Similarity 0.476079: + URL: https://arxiv.org/abs/2310.14025 + Submitter: Maria Lymperaiou + Title: Large Language Models and Multimodal Retrieval for Visual Word Sense Disambiguation + Abstract: Visual Word Sense Disambiguation (VWSD) is a novel challenging task with the goal of retrieving an i... + 4) Similarity 0.474883: + URL: https://arxiv.org/abs/2302.14785 + Submitter: Teven Le Scao + Title: Joint Representations of Text and Knowledge Graphs for Retrieval and Evaluation + Abstract: A key feature of neural models is that they can produce semantic vector representations of objects (... + 5) Similarity 0.470326: + URL: https://arxiv.org/abs/2309.02403 + Submitter: Dallas Card + Title: Substitution-based Semantic Change Detection using Contextual Embeddings + Abstract: Measuring semantic change has thus far remained a task where methods using contextual embeddings hav... + 6) Similarity 0.466851: + URL: https://arxiv.org/abs/2309.08187 + Submitter: Vu Tran + Title: Encoded Summarization: Summarizing Documents into Continuous Vector Space for Legal Case Retrieval + Abstract: We present our method for tackling a legal case retrieval task by introducing our method of encoding... + 7) Similarity 0.461783: + URL: https://arxiv.org/abs/2307.16638 + Submitter: Maiia Bocharova Bocharova + Title: VacancySBERT: the approach for representation of titles and skills for semantic similarity search in the recruitment domain + Abstract: The paper focuses on deep learning semantic search algorithms applied in the HR domain. The aim of t... + 8) Similarity 0.460481: + URL: https://arxiv.org/abs/2106.07400 + Submitter: Clara Meister + Title: Determinantal Beam Search + Abstract: Beam search is a go-to strategy for decoding neural sequence models. The algorithm can naturally be ... + 9) Similarity 0.460001: + URL: https://arxiv.org/abs/2305.04049 + Submitter: Yuxia Wu + Title: Actively Discovering New Slots for Task-oriented Conversation + Abstract: Existing task-oriented conversational search systems heavily rely on domain ontologies with pre-defi... + 10) Similarity 0.458321: + URL: https://arxiv.org/abs/2305.08654 + Submitter: Taichi Aida + Title: Unsupervised Semantic Variation Prediction using the Distribution of Sibling Embeddings + Abstract: Languages are dynamic entities, where the meanings associated with words constantly change with time... +``` + +The majority of the time here is spent during the embeddings creation, where we are limited by the performance of the OpenAI API. diff --git a/examples/semantic-search-arxiv-openai/go.mod b/examples/semantic-search-arxiv-openai/go.mod new file mode 100644 index 0000000..5298d9a --- /dev/null +++ b/examples/semantic-search-arxiv-openai/go.mod @@ -0,0 +1,7 @@ +module github.com/philippgille/chromem-go/examples/semantic-search-arxiv-openai + +go 1.21 + +require github.com/philippgille/chromem-go v0.0.0 + +replace github.com/philippgille/chromem-go => ./../.. diff --git a/examples/semantic-search-arxiv-openai/main.go b/examples/semantic-search-arxiv-openai/main.go new file mode 100644 index 0000000..44f6be2 --- /dev/null +++ b/examples/semantic-search-arxiv-openai/main.go @@ -0,0 +1,168 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "os" + "runtime" + "strings" + + "github.com/philippgille/chromem-go" +) + +const searchTerm = "semantic search with vector databases" + +func main() { + ctx := context.Background() + + // Set up chromem-go with persistence, so that when the program restarts, the + // DB's data is still available. + log.Println("Setting up chromem-go...") + db, err := chromem.NewPersistentDB("./db") + if err != nil { + panic(err) + } + // Create collection if it wasn't loaded from persistent storage yet. + // We pass nil as embedding function to use the default (OpenAI text-embedding-3-small), + // which is very good and cheap. It requires the OPENAI_API_KEY environment + // variable to be set. + collection, err := db.GetOrCreateCollection("arXiv cs.CL 2023", nil, nil) + if err != nil { + panic(err) + } + // Add docs to the collection, if the collection was just created (and not + // loaded from persistent storage). + docs := []chromem.Document{} + if collection.Count() == 0 { + // Here we use an arXiv metadata sample, where each line contains the metadata + // of a paper, including its submitter, title and abstract. + f, err := os.Open("/tmp/arxiv_cs-cl_2023.jsonl") + if err != nil { + panic(err) + } + d := json.NewDecoder(f) + log.Println("Reading JSON lines...") + i := 0 + for { + var paper struct { + ID string `json:"id"` + Submitter string `json:"submitter"` + Title string `json:"title"` + Abstract string `json:"abstract"` + } + err := d.Decode(&paper) + if err == io.EOF { + break // reached end of file + } else if err != nil { + panic(err) + } + + title := strings.ReplaceAll(paper.Title, "\n", " ") + title = strings.ReplaceAll(title, " ", " ") + content := strings.TrimSpace(paper.Abstract) + docs = append(docs, chromem.Document{ + ID: paper.ID, + Metadata: map[string]string{"submitter": paper.Submitter, "title": title}, + Content: content, + }) + i++ + } + log.Println("Read and parsed", i, "documents.") + log.Println("Adding documents to chromem-go, including creating their embeddings via OpenAI API...") + err = collection.AddDocuments(ctx, docs, runtime.NumCPU()) + if err != nil { + panic(err) + } + } else { + log.Println("Not reading JSON lines because collection was loaded from persistent storage.") + } + + // Search for documents that are semantically similar to the search term. + // We ask for the 10 most similar documents, but you can use more or less depending + // on your needs. + // You can limit the search by filtering on content or metadata (like the paper's + // submitter), but we don't do that in this example. + log.Println("Querying chromem-go...") + docRes, err := collection.Query(ctx, searchTerm, 10, nil, nil) + if err != nil { + panic(err) + } + // Here you could filter out any documents whose similarity is below a certain threshold. + // if docRes[...].Similarity < 0.5 { ... + + // Print the retrieved documents and their similarity to the question. + buf := &strings.Builder{} + for i, res := range docRes { + content := strings.ReplaceAll(res.Content, "\n", " ") + content = content[:min(100, len(content))] + "..." + fmt.Fprintf(buf, "\t%d) Similarity %f:\n"+ + "\t\tURL: https://arxiv.org/abs/%s\n"+ + "\t\tSubmitter: %s\n"+ + "\t\tTitle: %s\n"+ + "\t\tAbstract: %s\n", + i+1, res.Similarity, res.ID, res.Metadata["submitter"], res.Metadata["title"], content) + } + log.Printf("Search results:\n%s\n", buf.String()) + + /* Output: + 2024/03/10 18:23:55 Setting up chromem-go... + 2024/03/10 18:23:55 Reading JSON lines... + 2024/03/10 18:23:55 Read and parsed 5006 documents. + 2024/03/10 18:23:55 Adding documents to chromem-go, including creating their embeddings via OpenAI API... + 2024/03/10 18:28:12 Querying chromem-go... + 2024/03/10 18:28:12 Search results: + 1) Similarity 0.488895: + URL: https://arxiv.org/abs/2209.15469 + Submitter: Christian Buck + Title: Zero-Shot Retrieval with Search Agents and Hybrid Environments + Abstract: Learning to search is the task of building artificial agents that learn to autonomously use a search... + 2) Similarity 0.480713: + URL: https://arxiv.org/abs/2305.11516 + Submitter: Ryo Nagata Dr. + Title: Contextualized Word Vector-based Methods for Discovering Semantic Differences with No Training nor Word Alignment + Abstract: In this paper, we propose methods for discovering semantic differences in words appearing in two cor... + 3) Similarity 0.476079: + URL: https://arxiv.org/abs/2310.14025 + Submitter: Maria Lymperaiou + Title: Large Language Models and Multimodal Retrieval for Visual Word Sense Disambiguation + Abstract: Visual Word Sense Disambiguation (VWSD) is a novel challenging task with the goal of retrieving an i... + 4) Similarity 0.474883: + URL: https://arxiv.org/abs/2302.14785 + Submitter: Teven Le Scao + Title: Joint Representations of Text and Knowledge Graphs for Retrieval and Evaluation + Abstract: A key feature of neural models is that they can produce semantic vector representations of objects (... + 5) Similarity 0.470326: + URL: https://arxiv.org/abs/2309.02403 + Submitter: Dallas Card + Title: Substitution-based Semantic Change Detection using Contextual Embeddings + Abstract: Measuring semantic change has thus far remained a task where methods using contextual embeddings hav... + 6) Similarity 0.466851: + URL: https://arxiv.org/abs/2309.08187 + Submitter: Vu Tran + Title: Encoded Summarization: Summarizing Documents into Continuous Vector Space for Legal Case Retrieval + Abstract: We present our method for tackling a legal case retrieval task by introducing our method of encoding... + 7) Similarity 0.461783: + URL: https://arxiv.org/abs/2307.16638 + Submitter: Maiia Bocharova Bocharova + Title: VacancySBERT: the approach for representation of titles and skills for semantic similarity search in the recruitment domain + Abstract: The paper focuses on deep learning semantic search algorithms applied in the HR domain. The aim of t... + 8) Similarity 0.460481: + URL: https://arxiv.org/abs/2106.07400 + Submitter: Clara Meister + Title: Determinantal Beam Search + Abstract: Beam search is a go-to strategy for decoding neural sequence models. The algorithm can naturally be ... + 9) Similarity 0.460001: + URL: https://arxiv.org/abs/2305.04049 + Submitter: Yuxia Wu + Title: Actively Discovering New Slots for Task-oriented Conversation + Abstract: Existing task-oriented conversational search systems heavily rely on domain ontologies with pre-defi... + 10) Similarity 0.458321: + URL: https://arxiv.org/abs/2305.08654 + Submitter: Taichi Aida + Title: Unsupervised Semantic Variation Prediction using the Distribution of Sibling Embeddings + Abstract: Languages are dynamic entities, where the meanings associated with words constantly change with time... + */ +} From 72f8b05d7f17ab1cbb37441c5cd1f3e05a8340b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gill=C3=A9?= Date: Sun, 10 Mar 2024 18:55:02 +0100 Subject: [PATCH 3/8] Update RAG example To be more consistent with the new semantic search one --- examples/rag-wikipedia-ollama/README.md | 4 ++-- examples/rag-wikipedia-ollama/main.go | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/rag-wikipedia-ollama/README.md b/examples/rag-wikipedia-ollama/README.md index 5029b09..7a30f2e 100644 --- a/examples/rag-wikipedia-ollama/README.md +++ b/examples/rag-wikipedia-ollama/README.md @@ -1,6 +1,6 @@ # RAG Wikipedia Ollama -This example shows a retrieval augmented generation (RAG) application, using `chromem-go` as knowledge base for finding relevant info for a question. +This example shows a retrieval augmented generation (RAG) application, using `chromem-go` as knowledge base for finding relevant info for a question. More specifically the app is doing *question answering*. The underlying data is 200 Wikipedia articles (or rather their lead section / introduction). We run the embeddings model and LLM in [Ollama](https://github.com/ollama/ollama), to showcase how a RAG application can run entirely offline, without relying on OpenAI or other third party APIs. It doesn't require a GPU, and a CPU like an 11th Gen Intel i5-1135G7 (like in the first generation Framework Laptop 13) is fast enough. @@ -35,7 +35,7 @@ The output can differ slightly on each run, but it's along the lines of: 2024/03/02 20:03:32 Reply after augmenting the question with knowledge: "The Monarch Company existed from 1896 to 1985." ``` -The majority of the time here is spent during the embeddings creation as well as the LLM conversation, which are not part of `chromem-go`. +The majority of the time here is spent during the embeddings creation, where we are limited by the performance of the Ollama API, which depends on your CPU/GPU and the embeddings model. ## OpenAI diff --git a/examples/rag-wikipedia-ollama/main.go b/examples/rag-wikipedia-ollama/main.go index 55b3076..5d32552 100644 --- a/examples/rag-wikipedia-ollama/main.go +++ b/examples/rag-wikipedia-ollama/main.go @@ -91,10 +91,11 @@ func main() { log.Println("Not reading JSON lines because collection was loaded from persistent storage.") } - // Search for documents similar to the one we added just by passing the original - // question. + // Search for documents that are semantically similar to the original question. // We ask for the two most similar documents, but you can use more or less depending // on your needs and the supported context size of the LLM you use. + // You can limit the search by filtering on content or metadata (like the article's + // category), but we don't do that in this example. log.Println("Querying chromem-go...") docRes, err := collection.Query(ctx, question, 2, nil, nil) if err != nil { From 54512c343b74929202f73adfdba155560c156f72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gill=C3=A9?= Date: Sun, 10 Mar 2024 19:15:29 +0100 Subject: [PATCH 4/8] Add examples top level README --- examples/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 examples/README.md diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..b03a432 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,9 @@ +# Examples + +1. [RAG Wikipedia Ollama](rag-wikipedia-ollama) + - This example shows a retrieval augmented generation (RAG) application, using `chromem-go` as knowledge base for finding relevant info for a question. More specifically the app is doing *question answering*. + - The underlying data is 200 Wikipedia articles (or rather their lead section / introduction). + - We run the embeddings model and LLM in [Ollama](https://github.com/ollama/ollama), to showcase how a RAG application can run entirely offline, without relying on OpenAI or other third party APIs. +2. [Semantic search arXiv OpenAI](semantic-search-arxiv-openai) + - This example shows a semantic search application, using `chromem-go` as vector database for finding semantically relevant search results. + - We load and search across ~5,000 arXiv papers in the "Computer Science - Computation and Language" category, which is the relevant one for Natural Language Processing (NLP) related papers. From 02234458c214f59b453c408738f624733b7bfd3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gill=C3=A9?= Date: Sun, 10 Mar 2024 20:54:28 +0100 Subject: [PATCH 5/8] Update main README With changed reference to examples --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ff936d7..7239aac 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ Fine-tuning an LLM can help a bit, but it's more meant to improve the LLMs reaso 4. In the question to the LLM, you provide this content alongside your question. 5. The LLM can take this up-to-date precise content into account when answering. -Check out the [example code](example) to see it in action! +Check out the [example code](examples) to see it in action! ## Interface @@ -176,7 +176,9 @@ See the Godoc for details: + +For full, working examples, using the vector database for retrieval augmented generation (RAG) and semantic search and using either OpenAI or locally running the embeddings model and LLM (in Ollama), see the [example code](examples). ## Motivation From c16b7d9c172a565ed43629112b76ae55291f5526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gill=C3=A9?= Date: Sun, 10 Mar 2024 21:01:13 +0100 Subject: [PATCH 6/8] Add missing file close to both examples --- examples/rag-wikipedia-ollama/main.go | 1 + examples/semantic-search-arxiv-openai/main.go | 1 + 2 files changed, 2 insertions(+) diff --git a/examples/rag-wikipedia-ollama/main.go b/examples/rag-wikipedia-ollama/main.go index 5d32552..263f091 100644 --- a/examples/rag-wikipedia-ollama/main.go +++ b/examples/rag-wikipedia-ollama/main.go @@ -62,6 +62,7 @@ func main() { if err != nil { panic(err) } + defer f.Close() d := json.NewDecoder(f) log.Println("Reading JSON lines...") for i := 1; ; i++ { diff --git a/examples/semantic-search-arxiv-openai/main.go b/examples/semantic-search-arxiv-openai/main.go index 44f6be2..508dbc4 100644 --- a/examples/semantic-search-arxiv-openai/main.go +++ b/examples/semantic-search-arxiv-openai/main.go @@ -43,6 +43,7 @@ func main() { if err != nil { panic(err) } + defer f.Close() d := json.NewDecoder(f) log.Println("Reading JSON lines...") i := 0 From acbe5d22ee898657096b544ebc36f134ca1ddbf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gill=C3=A9?= Date: Sun, 10 Mar 2024 21:12:16 +0100 Subject: [PATCH 7/8] Add query performance to examples --- examples/rag-wikipedia-ollama/README.md | 1 + examples/rag-wikipedia-ollama/main.go | 4 ++++ examples/semantic-search-arxiv-openai/README.md | 1 + examples/semantic-search-arxiv-openai/main.go | 4 ++++ 4 files changed, 10 insertions(+) diff --git a/examples/rag-wikipedia-ollama/README.md b/examples/rag-wikipedia-ollama/README.md index 7a30f2e..539e90c 100644 --- a/examples/rag-wikipedia-ollama/README.md +++ b/examples/rag-wikipedia-ollama/README.md @@ -29,6 +29,7 @@ The output can differ slightly on each run, but it's along the lines of: 2024/03/02 20:02:34 Reading JSON lines... 2024/03/02 20:02:34 Adding documents to chromem-go, including creating their embeddings via Ollama API... 2024/03/02 20:03:11 Querying chromem-go... +2024/03/02 20:03:11 Search took 231.672667ms 2024/03/02 20:03:11 Document 1 (similarity: 0.723627): "Malleable Iron Range Company was a company that existed from 1896 to 1985 and primarily produced kitchen ranges made of malleable iron but also produced a variety of other related products. The company's primary trademark was 'Monarch' and was colloquially often referred to as the Monarch Company or just Monarch." 2024/03/02 20:03:11 Document 2 (similarity: 0.550584): "The American Motor Car Company was a short-lived company in the automotive industry founded in 1906 lasting until 1913. It was based in Indianapolis Indiana United States. The American Motor Car Company pioneered the underslung design." 2024/03/02 20:03:11 Asking LLM with augmented question... diff --git a/examples/rag-wikipedia-ollama/main.go b/examples/rag-wikipedia-ollama/main.go index 263f091..4d451ae 100644 --- a/examples/rag-wikipedia-ollama/main.go +++ b/examples/rag-wikipedia-ollama/main.go @@ -8,6 +8,7 @@ import ( "os" "runtime" "strconv" + "time" "github.com/philippgille/chromem-go" ) @@ -97,11 +98,13 @@ func main() { // on your needs and the supported context size of the LLM you use. // You can limit the search by filtering on content or metadata (like the article's // category), but we don't do that in this example. + start := time.Now() log.Println("Querying chromem-go...") docRes, err := collection.Query(ctx, question, 2, nil, nil) if err != nil { panic(err) } + log.Println("Search took", time.Since(start)) // Here you could filter out any documents whose similarity is below a certain threshold. // if docRes[...].Similarity < 0.5 { ... @@ -126,6 +129,7 @@ func main() { 2024/03/02 20:02:34 Reading JSON lines... 2024/03/02 20:02:34 Adding documents to chromem-go, including creating their embeddings via Ollama API... 2024/03/02 20:03:11 Querying chromem-go... + 2024/03/02 20:03:11 Search took 231.672667ms 2024/03/02 20:03:11 Document 1 (similarity: 0.723627): "Malleable Iron Range Company was a company that existed from 1896 to 1985 and primarily produced kitchen ranges made of malleable iron but also produced a variety of other related products. The company's primary trademark was 'Monarch' and was colloquially often referred to as the Monarch Company or just Monarch." 2024/03/02 20:03:11 Document 2 (similarity: 0.550584): "The American Motor Car Company was a short-lived company in the automotive industry founded in 1906 lasting until 1913. It was based in Indianapolis Indiana United States. The American Motor Car Company pioneered the underslung design." 2024/03/02 20:03:11 Asking LLM with augmented question... diff --git a/examples/semantic-search-arxiv-openai/README.md b/examples/semantic-search-arxiv-openai/README.md index 632a29e..1292fcf 100644 --- a/examples/semantic-search-arxiv-openai/README.md +++ b/examples/semantic-search-arxiv-openai/README.md @@ -27,6 +27,7 @@ The output can differ slightly on each run, but it's along the lines of: 2024/03/10 18:23:55 Read and parsed 5006 documents. 2024/03/10 18:23:55 Adding documents to chromem-go, including creating their embeddings via OpenAI API... 2024/03/10 18:28:12 Querying chromem-go... + 2024/03/10 18:28:12 Search took 529.451163ms 2024/03/10 18:28:12 Search results: 1) Similarity 0.488895: URL: https://arxiv.org/abs/2209.15469 diff --git a/examples/semantic-search-arxiv-openai/main.go b/examples/semantic-search-arxiv-openai/main.go index 508dbc4..e0d341b 100644 --- a/examples/semantic-search-arxiv-openai/main.go +++ b/examples/semantic-search-arxiv-openai/main.go @@ -9,6 +9,7 @@ import ( "os" "runtime" "strings" + "time" "github.com/philippgille/chromem-go" ) @@ -87,10 +88,12 @@ func main() { // You can limit the search by filtering on content or metadata (like the paper's // submitter), but we don't do that in this example. log.Println("Querying chromem-go...") + start := time.Now() docRes, err := collection.Query(ctx, searchTerm, 10, nil, nil) if err != nil { panic(err) } + log.Println("Search took", time.Since(start)) // Here you could filter out any documents whose similarity is below a certain threshold. // if docRes[...].Similarity < 0.5 { ... @@ -114,6 +117,7 @@ func main() { 2024/03/10 18:23:55 Read and parsed 5006 documents. 2024/03/10 18:23:55 Adding documents to chromem-go, including creating their embeddings via OpenAI API... 2024/03/10 18:28:12 Querying chromem-go... + 2024/03/10 18:28:12 Search took 529.451163ms 2024/03/10 18:28:12 Search results: 1) Similarity 0.488895: URL: https://arxiv.org/abs/2209.15469 From 0a07c76ead0451b7c4259fadfd3a9dc34e2f091c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gill=C3=A9?= Date: Sun, 10 Mar 2024 21:12:33 +0100 Subject: [PATCH 8/8] Mention non-optimized performance in README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 7239aac..c9ca51b 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ Because `chromem-go` is embeddable it enables you to add retrieval augmented gen The focus is not scale or number of features, but simplicity. +Performance has not been a priority yet. Without optimizations (except some parallelization with goroutines) querying 5,000 documents takes ~500ms on a mid-range laptop CPU (11th Gen Intel i5-1135G7, like in the first generation Framework Laptop 13). + > ⚠️ The project is in beta, under heavy construction, and may introduce breaking changes in releases before `v1.0.0`. All changes are documented in the [`CHANGELOG`](./CHANGELOG.md). ## Contents