Cache: Add CrateDBSemanticCache based on CrateDBVectorStore

crate · Dec 23, 2024 · 443051a · 443051a
1 parent c41a9be
commit 443051a
Show file tree

Hide file tree

Showing 6 changed files with 555 additions and 3 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -3,7 +3,8 @@
 
 ## Unreleased
 - Added implementation and software tests for `CrateDBCache`,
-  deriving from `SQLAlchemyCache`.
+  deriving from `SQLAlchemyCache`, and `CrateDBSemanticCache`,
+  building upon `CrateDBVectorStore`.
 
 ## v0.0.0 - 2024-12-16
 - Make it work

diff --git a/examples/cache.py b/examples/cache.py
@@ -0,0 +1,129 @@
+# ruff: noqa: T201
+"""
+Use CrateDB to cache LLM prompts and responses.
+
+The standard / full cache avoids invoking the LLM when the supplied
+prompt is exactly the same as one encountered already.
+
+The semantic cache allows users to retrieve cached prompts based on semantic
+similarity between the user input and previously cached inputs.
+
+When turning on the cache, redundant LLM conversations don't need
+to talk to the LLM (API), so they can also work offline.
+"""
+
+import sqlalchemy as sa
+from langchain.globals import set_llm_cache
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+
+from langchain_cratedb import CrateDBCache, CrateDBSemanticCache
+
+
+def standard_cache():
+    """
+    Demonstrate LangChain standard cache with CrateDB.
+    """
+
+    # Configure cache.
+    engine = sa.create_engine("crate://crate@localhost:4200/?schema=testdrive")
+    set_llm_cache(CrateDBCache(engine))
+
+    # Invoke LLM conversation.
+    llm = ChatOpenAI(
+        # model_name="gpt-3.5-turbo",
+        # model_name="gpt-4o-mini",
+        model_name="chatgpt-4o-latest",
+        temperature=0.7,
+    )
+    print()
+    print("Asking with standard cache:")
+    answer = llm.invoke("What is the answer to everything?")
+    print(answer.content)
+
+    # Turn off cache.
+    set_llm_cache(None)
+
+
+def semantic_cache():
+    """
+    Demonstrate LangChain semantic cache with CrateDB.
+    """
+
+    # Configure LLM models.
+    # model_name_embedding = "text-embedding-ada-002"
+    model_name_embedding = "text-embedding-3-small"
+    # model_name_embedding = "text-embedding-3-large"
+
+    # model_name_chat = "gpt-3.5-turbo"
+    # model_name_chat = "gpt-4o-mini"
+    model_name_chat = "chatgpt-4o-latest"
+
+    # Configure embeddings.
+    embeddings = OpenAIEmbeddings(model=model_name_embedding)
+
+    # Configure cache.
+    engine = sa.create_engine("crate://crate@localhost:4200/?schema=testdrive")
+    set_llm_cache(
+        CrateDBSemanticCache(
+            embedding=embeddings,
+            connection=engine,
+            search_threshold=1.0,
+        )
+    )
+
+    # Invoke LLM conversation.
+    llm = ChatOpenAI(
+        model_name=model_name_chat,
+    )
+    print()
+    print("Asking with semantic cache:")
+    answer = llm.invoke("What is the answer to everything?")
+    print(answer.content)
+
+    # Turn off cache.
+    set_llm_cache(None)
+
+
+if __name__ == "__main__":
+    standard_cache()
+    semantic_cache()
+
+
+"""
+What is the answer to everything?
+
+Date: 2024-12-23
+
+## gpt-3.5-turbo
+The answer to everything is subjective and may vary depending on individual
+beliefs or philosophies. Some may say that love is the answer to everything,
+while others may say that knowledge or self-awareness is the key. Ultimately,
+the answer to everything may be different for each person and can only be
+discovered through personal reflection and introspection.
+
+## gpt-4o-mini
+The answer to the ultimate question of life, the universe, and everything,
+according to Douglas Adams' "The Hitchhiker's Guide to the Galaxy", is
+famously given as the number 42. However, the context and meaning behind
+that answer remains a philosophical and humorous mystery. In a broader
+sense, different people and cultures may have various interpretations of
+what the "answer to everything" truly is, often reflecting their beliefs,
+values, and experiences.
+
+## chatgpt-4o-latest, pure
+Ah, you're referencing the famous answer from Douglas Adams'
+*The Hitchhiker's Guide to the Galaxy*! In the book, the supercomputer
+Deep Thought determines that the "Answer to the Ultimate Question of
+Life, the Universe, and Everything" is **42**.
+Of course, the real kicker is that no one actually knows what the Ultimate
+Question is. So, while 42 is the answer, its true meaning remains a cosmic
+mystery! 😊
+
+## chatgpt-4o-latest, with text-embedding-3-small embeddings
+Ah, you're referring to the famous answer from Douglas Adams'
+*The Hitchhiker's Guide to the Galaxy*! The answer to the ultimate question
+of life, the universe, and everything is **42**. However, as the story
+humorously points out, the actual *question* remains unknown. 😊
+If you're looking for a deeper or more philosophical answer, feel free to
+elaborate!
+"""
diff --git a/langchain_cratedb/__init__.py b/langchain_cratedb/__init__.py
@@ -5,7 +5,7 @@
 
 patch_sqlalchemy_dialect()
 
-from langchain_cratedb.cache import CrateDBCache
+from langchain_cratedb.cache import CrateDBCache, CrateDBSemanticCache
 from langchain_cratedb.chat_history import CrateDBChatMessageHistory
 from langchain_cratedb.loaders import CrateDBLoader
 from langchain_cratedb.vectorstores import (
@@ -24,6 +24,7 @@
     "CrateDBCache",
     "CrateDBChatMessageHistory",
     "CrateDBLoader",
+    "CrateDBSemanticCache",
     "CrateDBVectorStore",
     "CrateDBVectorStoreMultiCollection",
     "__version__",

diff --git a/langchain_cratedb/cache.py b/langchain_cratedb/cache.py
@@ -1,9 +1,16 @@
 import typing as t
 
 import sqlalchemy as sa
-from langchain_community.cache import FullLLMCache, SQLAlchemyCache
+from langchain_community.cache import FullLLMCache, SQLAlchemyCache, _hash
+from langchain_core.caches import RETURN_VAL_TYPE, BaseCache
+from langchain_core.embeddings import Embeddings
+from langchain_core.load import dumps, loads
+from langchain_core.outputs import Generation
 from sqlalchemy_cratedb.support import refresh_after_dml
 
+from langchain_cratedb.vectorstores import CrateDBVectorStore
+from langchain_cratedb.vectorstores.main import DBConnection
+
 
 class CrateDBCache(SQLAlchemyCache):
     """
@@ -16,3 +23,146 @@ def __init__(
     ):
         refresh_after_dml(engine)
         super().__init__(engine, cache_schema)
+
+
+class CrateDBSemanticCache(BaseCache):
+    """
+    CrateDB adapter for LangChain semantic cache subsystem.
+    It uses CrateDBVectorStore as a backend.
+    """
+
+    def __init__(
+        self,
+        embedding: Embeddings,
+        *,
+        connection: t.Union[
+            None, DBConnection, sa.Engine, sa.ext.asyncio.AsyncEngine, str
+        ] = None,
+        cache_table_prefix: str = "cache_",
+        search_threshold: float = 0.2,
+        **kwargs: t.Any,
+    ):
+        """Initialize with necessary components.
+
+        Args:
+            embedding (Embeddings): A text embedding model.
+            cache_table_prefix (str, optional): Prefix for the cache table name.
+                Defaults to "cache_".
+            search_threshold (float, optional): The minimum similarity score for
+                a search result to be considered a match. Defaults to 0.2.
+
+        Examples:
+            Basic Usage:
+
+            .. code-block:: python
+
+                import langchain
+                from langchain_cratedb import CrateDBSemanticCache
+                from langchain.embeddings import OpenAIEmbeddings
+
+                langchain.llm_cache = CrateDBSemanticCache(
+                    embedding=OpenAIEmbeddings(),
+                    host="https://user:[email protected]:4200/?schema=testdrive"
+                )
+
+            Advanced Usage:
+
+            .. code-block:: python
+
+                import langchain
+                from langchain_cratedb import CrateDBSemanticCache
+                from langchain.embeddings import OpenAIEmbeddings
+
+                langchain.llm_cache = = CrateDBSemanticCache(
+                    embeddings=OpenAIEmbeddings(),
+                    host="127.0.0.1",
+                    port=4200,
+                    user="user",
+                    password="password",
+                    database="crate",
+                )
+        """
+
+        self._cache_dict: t.Dict[str, CrateDBVectorStore] = {}
+        self.embedding = embedding
+        self.connection = connection
+        self.cache_table_prefix = cache_table_prefix
+        self.search_threshold = search_threshold
+
+        # Pass the rest of the kwargs to the connection.
+        self.connection_kwargs = kwargs
+
+    def _index_name(self, llm_string: str) -> str:
+        hashed_index = _hash(llm_string)
+        return f"{self.cache_table_prefix}{hashed_index}"
+
+    def _get_llm_cache(self, llm_string: str) -> CrateDBVectorStore:
+        index_name = self._index_name(llm_string)
+
+        # return vectorstore client for the specific llm string
+        if index_name not in self._cache_dict:
+            vs = self._cache_dict[index_name] = CrateDBVectorStore(
+                embeddings=self.embedding,
+                connection=self.connection,
+                collection_name=index_name,
+                **self.connection_kwargs,
+            )
+            _embedding = self.embedding.embed_query(text="test")
+            vs._init_models(_embedding)
+            vs.create_tables_if_not_exists()
+        llm_cache = self._cache_dict[index_name]
+        llm_cache.create_collection()
+        return llm_cache
+
+    def lookup(self, prompt: str, llm_string: str) -> t.Optional[RETURN_VAL_TYPE]:
+        """Look up based on prompt and llm_string."""
+        llm_cache = self._get_llm_cache(llm_string)
+        generations: t.List = []
+        # Read from a Hash
+        results = llm_cache.similarity_search_with_score(
+            query=prompt,
+            k=1,
+        )
+        """
+        from langchain_postgres.vectorstores import DistanceStrategy
+        if llm_cache.distance_strategy != DistanceStrategy.EUCLIDEAN:
+            raise NotImplementedError(f"CrateDB's vector store only implements Euclidean distance. "
+                                      f"Your selection was: {llm_cache.distance_strategy}")
+        """  # noqa: E501
+        if results:
+            for document_score in results:
+                if document_score[1] <= self.search_threshold:
+                    generations.extend(loads(document_score[0].metadata["return_val"]))
+        return generations if generations else None
+
+    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
+        """Update cache based on prompt and llm_string."""
+        for gen in return_val:
+            if not isinstance(gen, Generation):
+                raise ValueError(
+                    "CrateDBSemanticCache only supports caching of "
+                    f"normal LLM generations, got {type(gen)}"
+                )
+        llm_cache = self._get_llm_cache(llm_string)
+        metadata = {
+            "llm_string": llm_string,
+            "prompt": prompt,
+            "return_val": dumps([g for g in return_val]),
+        }
+        llm_cache.add_texts(texts=[prompt], metadatas=[metadata])
+
+    def clear(self, **kwargs: t.Any) -> None:
+        """Clear semantic cache for a given llm_string."""
+        if "llm_string" in kwargs:
+            index_name = self._index_name(kwargs["llm_string"])
+            if index_name in self._cache_dict:
+                vs = self._cache_dict[index_name]
+                with vs._make_sync_session() as session:
+                    collection = vs.get_collection(session)
+                    collection.embeddings.clear()
+                    session.commit()
+                del self._cache_dict[index_name]
+        else:
+            raise NotImplementedError(
+                "Clearing cache elements without constraints is not implemented yet"
+            )