-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Cache: Add
CrateDBSemanticCache
based on CrateDBVectorStore
- Loading branch information
Showing
6 changed files
with
555 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
# ruff: noqa: T201 | ||
""" | ||
Use CrateDB to cache LLM prompts and responses. | ||
The standard / full cache avoids invoking the LLM when the supplied | ||
prompt is exactly the same as one encountered already. | ||
The semantic cache allows users to retrieve cached prompts based on semantic | ||
similarity between the user input and previously cached inputs. | ||
When turning on the cache, redundant LLM conversations don't need | ||
to talk to the LLM (API), so they can also work offline. | ||
""" | ||
|
||
import sqlalchemy as sa | ||
from langchain.globals import set_llm_cache | ||
from langchain_openai import ChatOpenAI, OpenAIEmbeddings | ||
|
||
from langchain_cratedb import CrateDBCache, CrateDBSemanticCache | ||
|
||
|
||
def standard_cache(): | ||
""" | ||
Demonstrate LangChain standard cache with CrateDB. | ||
""" | ||
|
||
# Configure cache. | ||
engine = sa.create_engine("crate://crate@localhost:4200/?schema=testdrive") | ||
set_llm_cache(CrateDBCache(engine)) | ||
|
||
# Invoke LLM conversation. | ||
llm = ChatOpenAI( | ||
# model_name="gpt-3.5-turbo", | ||
# model_name="gpt-4o-mini", | ||
model_name="chatgpt-4o-latest", | ||
temperature=0.7, | ||
) | ||
print() | ||
print("Asking with standard cache:") | ||
answer = llm.invoke("What is the answer to everything?") | ||
print(answer.content) | ||
|
||
# Turn off cache. | ||
set_llm_cache(None) | ||
|
||
|
||
def semantic_cache(): | ||
""" | ||
Demonstrate LangChain semantic cache with CrateDB. | ||
""" | ||
|
||
# Configure LLM models. | ||
# model_name_embedding = "text-embedding-ada-002" | ||
model_name_embedding = "text-embedding-3-small" | ||
# model_name_embedding = "text-embedding-3-large" | ||
|
||
# model_name_chat = "gpt-3.5-turbo" | ||
# model_name_chat = "gpt-4o-mini" | ||
model_name_chat = "chatgpt-4o-latest" | ||
|
||
# Configure embeddings. | ||
embeddings = OpenAIEmbeddings(model=model_name_embedding) | ||
|
||
# Configure cache. | ||
engine = sa.create_engine("crate://crate@localhost:4200/?schema=testdrive") | ||
set_llm_cache( | ||
CrateDBSemanticCache( | ||
embedding=embeddings, | ||
connection=engine, | ||
search_threshold=1.0, | ||
) | ||
) | ||
|
||
# Invoke LLM conversation. | ||
llm = ChatOpenAI( | ||
model_name=model_name_chat, | ||
) | ||
print() | ||
print("Asking with semantic cache:") | ||
answer = llm.invoke("What is the answer to everything?") | ||
print(answer.content) | ||
|
||
# Turn off cache. | ||
set_llm_cache(None) | ||
|
||
|
||
if __name__ == "__main__": | ||
standard_cache() | ||
semantic_cache() | ||
|
||
|
||
""" | ||
What is the answer to everything? | ||
Date: 2024-12-23 | ||
## gpt-3.5-turbo | ||
The answer to everything is subjective and may vary depending on individual | ||
beliefs or philosophies. Some may say that love is the answer to everything, | ||
while others may say that knowledge or self-awareness is the key. Ultimately, | ||
the answer to everything may be different for each person and can only be | ||
discovered through personal reflection and introspection. | ||
## gpt-4o-mini | ||
The answer to the ultimate question of life, the universe, and everything, | ||
according to Douglas Adams' "The Hitchhiker's Guide to the Galaxy", is | ||
famously given as the number 42. However, the context and meaning behind | ||
that answer remains a philosophical and humorous mystery. In a broader | ||
sense, different people and cultures may have various interpretations of | ||
what the "answer to everything" truly is, often reflecting their beliefs, | ||
values, and experiences. | ||
## chatgpt-4o-latest, pure | ||
Ah, you're referencing the famous answer from Douglas Adams' | ||
*The Hitchhiker's Guide to the Galaxy*! In the book, the supercomputer | ||
Deep Thought determines that the "Answer to the Ultimate Question of | ||
Life, the Universe, and Everything" is **42**. | ||
Of course, the real kicker is that no one actually knows what the Ultimate | ||
Question is. So, while 42 is the answer, its true meaning remains a cosmic | ||
mystery! 😊 | ||
## chatgpt-4o-latest, with text-embedding-3-small embeddings | ||
Ah, you're referring to the famous answer from Douglas Adams' | ||
*The Hitchhiker's Guide to the Galaxy*! The answer to the ultimate question | ||
of life, the universe, and everything is **42**. However, as the story | ||
humorously points out, the actual *question* remains unknown. 😊 | ||
If you're looking for a deeper or more philosophical answer, feel free to | ||
elaborate! | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,16 @@ | ||
import typing as t | ||
|
||
import sqlalchemy as sa | ||
from langchain_community.cache import FullLLMCache, SQLAlchemyCache | ||
from langchain_community.cache import FullLLMCache, SQLAlchemyCache, _hash | ||
from langchain_core.caches import RETURN_VAL_TYPE, BaseCache | ||
from langchain_core.embeddings import Embeddings | ||
from langchain_core.load import dumps, loads | ||
from langchain_core.outputs import Generation | ||
from sqlalchemy_cratedb.support import refresh_after_dml | ||
|
||
from langchain_cratedb.vectorstores import CrateDBVectorStore | ||
from langchain_cratedb.vectorstores.main import DBConnection | ||
|
||
|
||
class CrateDBCache(SQLAlchemyCache): | ||
""" | ||
|
@@ -16,3 +23,146 @@ def __init__( | |
): | ||
refresh_after_dml(engine) | ||
super().__init__(engine, cache_schema) | ||
|
||
|
||
class CrateDBSemanticCache(BaseCache): | ||
""" | ||
CrateDB adapter for LangChain semantic cache subsystem. | ||
It uses CrateDBVectorStore as a backend. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
embedding: Embeddings, | ||
*, | ||
connection: t.Union[ | ||
None, DBConnection, sa.Engine, sa.ext.asyncio.AsyncEngine, str | ||
] = None, | ||
cache_table_prefix: str = "cache_", | ||
search_threshold: float = 0.2, | ||
**kwargs: t.Any, | ||
): | ||
"""Initialize with necessary components. | ||
Args: | ||
embedding (Embeddings): A text embedding model. | ||
cache_table_prefix (str, optional): Prefix for the cache table name. | ||
Defaults to "cache_". | ||
search_threshold (float, optional): The minimum similarity score for | ||
a search result to be considered a match. Defaults to 0.2. | ||
Examples: | ||
Basic Usage: | ||
.. code-block:: python | ||
import langchain | ||
from langchain_cratedb import CrateDBSemanticCache | ||
from langchain.embeddings import OpenAIEmbeddings | ||
langchain.llm_cache = CrateDBSemanticCache( | ||
embedding=OpenAIEmbeddings(), | ||
host="https://user:[email protected]:4200/?schema=testdrive" | ||
) | ||
Advanced Usage: | ||
.. code-block:: python | ||
import langchain | ||
from langchain_cratedb import CrateDBSemanticCache | ||
from langchain.embeddings import OpenAIEmbeddings | ||
langchain.llm_cache = = CrateDBSemanticCache( | ||
embeddings=OpenAIEmbeddings(), | ||
host="127.0.0.1", | ||
port=4200, | ||
user="user", | ||
password="password", | ||
database="crate", | ||
) | ||
""" | ||
|
||
self._cache_dict: t.Dict[str, CrateDBVectorStore] = {} | ||
self.embedding = embedding | ||
self.connection = connection | ||
self.cache_table_prefix = cache_table_prefix | ||
self.search_threshold = search_threshold | ||
|
||
# Pass the rest of the kwargs to the connection. | ||
self.connection_kwargs = kwargs | ||
|
||
def _index_name(self, llm_string: str) -> str: | ||
hashed_index = _hash(llm_string) | ||
return f"{self.cache_table_prefix}{hashed_index}" | ||
|
||
def _get_llm_cache(self, llm_string: str) -> CrateDBVectorStore: | ||
index_name = self._index_name(llm_string) | ||
|
||
# return vectorstore client for the specific llm string | ||
if index_name not in self._cache_dict: | ||
vs = self._cache_dict[index_name] = CrateDBVectorStore( | ||
embeddings=self.embedding, | ||
connection=self.connection, | ||
collection_name=index_name, | ||
**self.connection_kwargs, | ||
) | ||
_embedding = self.embedding.embed_query(text="test") | ||
vs._init_models(_embedding) | ||
vs.create_tables_if_not_exists() | ||
llm_cache = self._cache_dict[index_name] | ||
llm_cache.create_collection() | ||
return llm_cache | ||
|
||
def lookup(self, prompt: str, llm_string: str) -> t.Optional[RETURN_VAL_TYPE]: | ||
"""Look up based on prompt and llm_string.""" | ||
llm_cache = self._get_llm_cache(llm_string) | ||
generations: t.List = [] | ||
# Read from a Hash | ||
results = llm_cache.similarity_search_with_score( | ||
query=prompt, | ||
k=1, | ||
) | ||
""" | ||
from langchain_postgres.vectorstores import DistanceStrategy | ||
if llm_cache.distance_strategy != DistanceStrategy.EUCLIDEAN: | ||
raise NotImplementedError(f"CrateDB's vector store only implements Euclidean distance. " | ||
f"Your selection was: {llm_cache.distance_strategy}") | ||
""" # noqa: E501 | ||
if results: | ||
for document_score in results: | ||
if document_score[1] <= self.search_threshold: | ||
generations.extend(loads(document_score[0].metadata["return_val"])) | ||
return generations if generations else None | ||
|
||
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: | ||
"""Update cache based on prompt and llm_string.""" | ||
for gen in return_val: | ||
if not isinstance(gen, Generation): | ||
raise ValueError( | ||
"CrateDBSemanticCache only supports caching of " | ||
f"normal LLM generations, got {type(gen)}" | ||
) | ||
llm_cache = self._get_llm_cache(llm_string) | ||
metadata = { | ||
"llm_string": llm_string, | ||
"prompt": prompt, | ||
"return_val": dumps([g for g in return_val]), | ||
} | ||
llm_cache.add_texts(texts=[prompt], metadatas=[metadata]) | ||
|
||
def clear(self, **kwargs: t.Any) -> None: | ||
"""Clear semantic cache for a given llm_string.""" | ||
if "llm_string" in kwargs: | ||
index_name = self._index_name(kwargs["llm_string"]) | ||
if index_name in self._cache_dict: | ||
vs = self._cache_dict[index_name] | ||
with vs._make_sync_session() as session: | ||
collection = vs.get_collection(session) | ||
collection.embeddings.clear() | ||
session.commit() | ||
del self._cache_dict[index_name] | ||
else: | ||
raise NotImplementedError( | ||
"Clearing cache elements without constraints is not implemented yet" | ||
) |
Oops, something went wrong.