Skip to content

Commit

Permalink
Cache: Add CrateDBSemanticCache based on CrateDBVectorStore
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Dec 23, 2024
1 parent c41a9be commit 443051a
Show file tree
Hide file tree
Showing 6 changed files with 555 additions and 3 deletions.
3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

## Unreleased
- Added implementation and software tests for `CrateDBCache`,
deriving from `SQLAlchemyCache`.
deriving from `SQLAlchemyCache`, and `CrateDBSemanticCache`,
building upon `CrateDBVectorStore`.

## v0.0.0 - 2024-12-16
- Make it work
Expand Down
129 changes: 129 additions & 0 deletions examples/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# ruff: noqa: T201
"""
Use CrateDB to cache LLM prompts and responses.
The standard / full cache avoids invoking the LLM when the supplied
prompt is exactly the same as one encountered already.
The semantic cache allows users to retrieve cached prompts based on semantic
similarity between the user input and previously cached inputs.
When turning on the cache, redundant LLM conversations don't need
to talk to the LLM (API), so they can also work offline.
"""

import sqlalchemy as sa
from langchain.globals import set_llm_cache
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from langchain_cratedb import CrateDBCache, CrateDBSemanticCache


def standard_cache():
"""
Demonstrate LangChain standard cache with CrateDB.
"""

# Configure cache.
engine = sa.create_engine("crate://crate@localhost:4200/?schema=testdrive")
set_llm_cache(CrateDBCache(engine))

# Invoke LLM conversation.
llm = ChatOpenAI(
# model_name="gpt-3.5-turbo",
# model_name="gpt-4o-mini",
model_name="chatgpt-4o-latest",
temperature=0.7,
)
print()
print("Asking with standard cache:")
answer = llm.invoke("What is the answer to everything?")
print(answer.content)

# Turn off cache.
set_llm_cache(None)


def semantic_cache():
"""
Demonstrate LangChain semantic cache with CrateDB.
"""

# Configure LLM models.
# model_name_embedding = "text-embedding-ada-002"
model_name_embedding = "text-embedding-3-small"
# model_name_embedding = "text-embedding-3-large"

# model_name_chat = "gpt-3.5-turbo"
# model_name_chat = "gpt-4o-mini"
model_name_chat = "chatgpt-4o-latest"

# Configure embeddings.
embeddings = OpenAIEmbeddings(model=model_name_embedding)

# Configure cache.
engine = sa.create_engine("crate://crate@localhost:4200/?schema=testdrive")
set_llm_cache(
CrateDBSemanticCache(
embedding=embeddings,
connection=engine,
search_threshold=1.0,
)
)

# Invoke LLM conversation.
llm = ChatOpenAI(
model_name=model_name_chat,
)
print()
print("Asking with semantic cache:")
answer = llm.invoke("What is the answer to everything?")
print(answer.content)

# Turn off cache.
set_llm_cache(None)


if __name__ == "__main__":
standard_cache()
semantic_cache()


"""
What is the answer to everything?
Date: 2024-12-23
## gpt-3.5-turbo
The answer to everything is subjective and may vary depending on individual
beliefs or philosophies. Some may say that love is the answer to everything,
while others may say that knowledge or self-awareness is the key. Ultimately,
the answer to everything may be different for each person and can only be
discovered through personal reflection and introspection.
## gpt-4o-mini
The answer to the ultimate question of life, the universe, and everything,
according to Douglas Adams' "The Hitchhiker's Guide to the Galaxy", is
famously given as the number 42. However, the context and meaning behind
that answer remains a philosophical and humorous mystery. In a broader
sense, different people and cultures may have various interpretations of
what the "answer to everything" truly is, often reflecting their beliefs,
values, and experiences.
## chatgpt-4o-latest, pure
Ah, you're referencing the famous answer from Douglas Adams'
*The Hitchhiker's Guide to the Galaxy*! In the book, the supercomputer
Deep Thought determines that the "Answer to the Ultimate Question of
Life, the Universe, and Everything" is **42**.
Of course, the real kicker is that no one actually knows what the Ultimate
Question is. So, while 42 is the answer, its true meaning remains a cosmic
mystery! 😊
## chatgpt-4o-latest, with text-embedding-3-small embeddings
Ah, you're referring to the famous answer from Douglas Adams'
*The Hitchhiker's Guide to the Galaxy*! The answer to the ultimate question
of life, the universe, and everything is **42**. However, as the story
humorously points out, the actual *question* remains unknown. 😊
If you're looking for a deeper or more philosophical answer, feel free to
elaborate!
"""
3 changes: 2 additions & 1 deletion langchain_cratedb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

patch_sqlalchemy_dialect()

from langchain_cratedb.cache import CrateDBCache
from langchain_cratedb.cache import CrateDBCache, CrateDBSemanticCache
from langchain_cratedb.chat_history import CrateDBChatMessageHistory
from langchain_cratedb.loaders import CrateDBLoader
from langchain_cratedb.vectorstores import (
Expand All @@ -24,6 +24,7 @@
"CrateDBCache",
"CrateDBChatMessageHistory",
"CrateDBLoader",
"CrateDBSemanticCache",
"CrateDBVectorStore",
"CrateDBVectorStoreMultiCollection",
"__version__",
Expand Down
152 changes: 151 additions & 1 deletion langchain_cratedb/cache.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import typing as t

import sqlalchemy as sa
from langchain_community.cache import FullLLMCache, SQLAlchemyCache
from langchain_community.cache import FullLLMCache, SQLAlchemyCache, _hash
from langchain_core.caches import RETURN_VAL_TYPE, BaseCache
from langchain_core.embeddings import Embeddings
from langchain_core.load import dumps, loads
from langchain_core.outputs import Generation
from sqlalchemy_cratedb.support import refresh_after_dml

from langchain_cratedb.vectorstores import CrateDBVectorStore
from langchain_cratedb.vectorstores.main import DBConnection


class CrateDBCache(SQLAlchemyCache):
"""
Expand All @@ -16,3 +23,146 @@ def __init__(
):
refresh_after_dml(engine)
super().__init__(engine, cache_schema)


class CrateDBSemanticCache(BaseCache):
"""
CrateDB adapter for LangChain semantic cache subsystem.
It uses CrateDBVectorStore as a backend.
"""

def __init__(
self,
embedding: Embeddings,
*,
connection: t.Union[
None, DBConnection, sa.Engine, sa.ext.asyncio.AsyncEngine, str
] = None,
cache_table_prefix: str = "cache_",
search_threshold: float = 0.2,
**kwargs: t.Any,
):
"""Initialize with necessary components.
Args:
embedding (Embeddings): A text embedding model.
cache_table_prefix (str, optional): Prefix for the cache table name.
Defaults to "cache_".
search_threshold (float, optional): The minimum similarity score for
a search result to be considered a match. Defaults to 0.2.
Examples:
Basic Usage:
.. code-block:: python
import langchain
from langchain_cratedb import CrateDBSemanticCache
from langchain.embeddings import OpenAIEmbeddings
langchain.llm_cache = CrateDBSemanticCache(
embedding=OpenAIEmbeddings(),
host="https://user:[email protected]:4200/?schema=testdrive"
)
Advanced Usage:
.. code-block:: python
import langchain
from langchain_cratedb import CrateDBSemanticCache
from langchain.embeddings import OpenAIEmbeddings
langchain.llm_cache = = CrateDBSemanticCache(
embeddings=OpenAIEmbeddings(),
host="127.0.0.1",
port=4200,
user="user",
password="password",
database="crate",
)
"""

self._cache_dict: t.Dict[str, CrateDBVectorStore] = {}
self.embedding = embedding
self.connection = connection
self.cache_table_prefix = cache_table_prefix
self.search_threshold = search_threshold

# Pass the rest of the kwargs to the connection.
self.connection_kwargs = kwargs

def _index_name(self, llm_string: str) -> str:
hashed_index = _hash(llm_string)
return f"{self.cache_table_prefix}{hashed_index}"

def _get_llm_cache(self, llm_string: str) -> CrateDBVectorStore:
index_name = self._index_name(llm_string)

# return vectorstore client for the specific llm string
if index_name not in self._cache_dict:
vs = self._cache_dict[index_name] = CrateDBVectorStore(
embeddings=self.embedding,
connection=self.connection,
collection_name=index_name,
**self.connection_kwargs,
)
_embedding = self.embedding.embed_query(text="test")
vs._init_models(_embedding)
vs.create_tables_if_not_exists()
llm_cache = self._cache_dict[index_name]
llm_cache.create_collection()
return llm_cache

def lookup(self, prompt: str, llm_string: str) -> t.Optional[RETURN_VAL_TYPE]:
"""Look up based on prompt and llm_string."""
llm_cache = self._get_llm_cache(llm_string)
generations: t.List = []
# Read from a Hash
results = llm_cache.similarity_search_with_score(
query=prompt,
k=1,
)
"""
from langchain_postgres.vectorstores import DistanceStrategy
if llm_cache.distance_strategy != DistanceStrategy.EUCLIDEAN:
raise NotImplementedError(f"CrateDB's vector store only implements Euclidean distance. "
f"Your selection was: {llm_cache.distance_strategy}")
""" # noqa: E501
if results:
for document_score in results:
if document_score[1] <= self.search_threshold:
generations.extend(loads(document_score[0].metadata["return_val"]))
return generations if generations else None

def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
"""Update cache based on prompt and llm_string."""
for gen in return_val:
if not isinstance(gen, Generation):
raise ValueError(
"CrateDBSemanticCache only supports caching of "
f"normal LLM generations, got {type(gen)}"
)
llm_cache = self._get_llm_cache(llm_string)
metadata = {
"llm_string": llm_string,
"prompt": prompt,
"return_val": dumps([g for g in return_val]),
}
llm_cache.add_texts(texts=[prompt], metadatas=[metadata])

def clear(self, **kwargs: t.Any) -> None:
"""Clear semantic cache for a given llm_string."""
if "llm_string" in kwargs:
index_name = self._index_name(kwargs["llm_string"])
if index_name in self._cache_dict:
vs = self._cache_dict[index_name]
with vs._make_sync_session() as session:
collection = vs.get_collection(session)
collection.embeddings.clear()
session.commit()
del self._cache_dict[index_name]
else:
raise NotImplementedError(
"Clearing cache elements without constraints is not implemented yet"
)
Loading

0 comments on commit 443051a

Please sign in to comment.