forked from run-llama/llama_index
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Elasticsearch vector store support (run-llama#7543)
- Loading branch information
1 parent
e2be21d
commit 644c034
Showing
9 changed files
with
1,358 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
138 changes: 138 additions & 0 deletions
138
docs/examples/vector_stores/ElasticsearchIndexDemo.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "307804a3-c02b-4a57-ac0d-172c30ddc851", | ||
"metadata": {}, | ||
"source": [ | ||
"# Elasticsearch Vector Store" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "d48af8e1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import logging\n", | ||
"import sys\n", | ||
"import os\n", | ||
"\n", | ||
"logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", | ||
"logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n", | ||
"\n", | ||
"import getpass\n", | ||
"\n", | ||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", | ||
"import openai\n", | ||
"\n", | ||
"openai.api_key = os.environ[\"OPENAI_API_KEY\"]" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "8ee4473a-094f-4d0a-a825-e1213db07240", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Load documents, build VectorStoreIndex with Elasticsearch" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "0a2bcc07", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llama_index import VectorStoreIndex, SimpleDirectoryReader\n", | ||
"from llama_index.vector_stores import ElasticsearchStore\n", | ||
"from IPython.display import Markdown, display" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "68cbd239-880e-41a3-98d8-dbb3fab55431", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# load documents\n", | ||
"documents = SimpleDirectoryReader(\"../data/paul_graham\").load_data()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "ba1558b3", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# initialize without metadata filter\n", | ||
"from llama_index.storage.storage_context import StorageContext\n", | ||
"\n", | ||
"vector_store = ElasticsearchStore(\n", | ||
" es_url=\"http://localhost:9200\", index_name=\"paul_graham\"\n", | ||
")\n", | ||
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", | ||
"index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "04304299-fc3e-40a0-8600-f50c3292767e", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Query Index" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 15, | ||
"id": "35369eda", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"INFO:elastic_transport.transport:POST http://localhost:9200/paul_graham/_search [status:200 duration:0.030s]\n", | ||
"POST http://localhost:9200/paul_graham/_search [status:200 duration:0.030s]\n", | ||
"POST http://localhost:9200/paul_graham/_search [status:200 duration:0.030s]\n", | ||
"The author learned several things during their time at Interleaf. They learned that it's better for technology companies to be run by product people than sales people, that code edited by too many people leads to bugs, that cheap office space can be depressing, that planned meetings are inferior to corridor conversations, that big bureaucratic customers can be a dangerous source of money, and that there's not much overlap between conventional office hours and the optimal time for hacking.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# set Logging to DEBUG for more detailed outputs\n", | ||
"query_engine = index.as_query_engine()\n", | ||
"response = query_engine.query(\"What did the author learn?\")\n", | ||
"print(response)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "307804a3-c02b-4a57-ac0d-172c30ddc851", | ||
"metadata": {}, | ||
"source": [ | ||
"# Elasticsearch\n", | ||
"\n", | ||
">[Elasticsearch](http://www.github.com/elastic/elasticsearch) is a search database, that supports full text and vector searches. \n" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "b5331b6b", | ||
"metadata": {}, | ||
"source": [ | ||
"## Basic Example\n", | ||
"\n", | ||
"In this basic example, we take the a Paul Graham essay, split it into chunks, embed it using an open-source embedding model, load it into Elasticsearch, and then query it." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "b3df0b97", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# !pip install llama-index elasticsearch --quiet\n", | ||
"# !pip install sentence-transformers\n", | ||
"# !pip install pydantic==1.10.11" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "d48af8e1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# import\n", | ||
"from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n", | ||
"from llama_index.vector_stores import ElasticsearchStore\n", | ||
"from llama_index.storage.storage_context import StorageContext\n", | ||
"from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", | ||
"from llama_index.embeddings import LangchainEmbedding\n", | ||
"from IPython.display import Markdown, display" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "374a148b", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# set up OpenAI\n", | ||
"import os\n", | ||
"import getpass\n", | ||
"\n", | ||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", | ||
"import openai\n", | ||
"\n", | ||
"openai.api_key = os.environ[\"OPENAI_API_KEY\"]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "667f3cb3-ce18-48d5-b9aa-bfc1a1f0f0f6", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/markdown": [ | ||
"<b>The author worked on writing and programming outside of school. They wrote short stories and tried writing programs on an IBM 1401 computer. They also built a microcomputer kit and started programming on it, writing simple games and a word processor.</b>" | ||
], | ||
"text/plain": [ | ||
"<IPython.core.display.Markdown object>" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
} | ||
], | ||
"source": [ | ||
"# define embedding function\n", | ||
"embed_model = LangchainEmbedding(\n", | ||
" HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")\n", | ||
")\n", | ||
"\n", | ||
"# load documents\n", | ||
"documents = SimpleDirectoryReader(\n", | ||
" \"../../../examples/paul_graham_essay/data\"\n", | ||
").load_data()\n", | ||
"\n", | ||
"vector_store = ElasticsearchStore(\n", | ||
" index_name=\"paul_graham_essay\", es_url=\"http://localhost:9200\"\n", | ||
")\n", | ||
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", | ||
"service_context = ServiceContext.from_defaults(embed_model=embed_model)\n", | ||
"index = VectorStoreIndex.from_documents(\n", | ||
" documents, storage_context=storage_context, service_context=service_context\n", | ||
")\n", | ||
"\n", | ||
"# Query Data\n", | ||
"query_engine = index.as_query_engine()\n", | ||
"response = query_engine.query(\"What did the author do growing up?\")\n", | ||
"display(Markdown(f\"<b>{response}</b>\"))" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.3" | ||
}, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "0ac390d292208ca2380c85f5bce7ded36a7a25670a97c40b8009630eb36cb06e" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.