diff --git a/docs/encoders/aurelio-bm25.ipynb b/docs/encoders/aurelio-bm25.ipynb new file mode 100644 index 00000000..75a6bc70 --- /dev/null +++ b/docs/encoders/aurelio-bm25.ipynb @@ -0,0 +1,378 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/encoders/aurelio-bm25.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/encoders/aurelio-bm25.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using Aurelio AI BM25 Encoder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The 3rd generation embedding models from OpenAI (`text-embedding-3-small` and `text-embedding-3-large`) can both be used with our `OpenAIEncoder` and usage is primarily the same as with the 2nd generation `text-embedding-ada-002`. However, there is a new `dimensions` parameter — which we will discuss below." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Started" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by installing semantic-router. Support for the new `dimensions` parameter was added in `semantic-router==0.0.19` and `openai==1.10.0`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU \"semantic-router==0.1.0.dev2\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by defining a dictionary mapping routes to example phrases that should trigger those routes." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jamesbriggs/Library/Caches/pypoetry/virtualenvs/semantic-router-C1zr4a78-py3.12/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from semantic_router import Route\n", + "\n", + "politics = Route(\n", + " name=\"politics\",\n", + " utterances=[\n", + " \"isn't politics the best thing ever\",\n", + " \"why don't you tell me about your political opinions\",\n", + " \"don't you just love the president\",\n", + " \"don't you just hate the president\",\n", + " \"they're going to destroy this country!\",\n", + " \"they will save the country!\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's define another for good measure:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "chitchat = Route(\n", + " name=\"chitchat\",\n", + " utterances=[\n", + " \"how's the weather today?\",\n", + " \"how are things going?\",\n", + " \"lovely weather today\",\n", + " \"the weather is horrendous\",\n", + " \"let's go to the chippy\",\n", + " ],\n", + ")\n", + "\n", + "routes = [politics, chitchat]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we initialize our embedding model, we will use the `-3-large` model alongside a `dimensions` value of `256`. This will produce _tiny_ 256-dimensional vectors that — according to OpenAI — outperform the 1536-dimensional vectors produced by `text-embedding-ada-002`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from semantic_router.encoders.aurelio import AurelioSparseEncoder\n", + "\n", + "sparse_encoder = AurelioSparseEncoder(name=\"bm25\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "from semantic_router.encoders import OpenAIEncoder\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\") or getpass(\n", + " \"Enter OpenAI API Key: \"\n", + ")\n", + "\n", + "encoder = OpenAIEncoder(\n", + " name=\"text-embedding-3-large\", score_threshold=0.5, dimensions=256\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will specify our index:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from semantic_router.index.hybrid_local import HybridLocalIndex\n", + "\n", + "index = HybridLocalIndex()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we define the `RouteLayer`. When called, the route layer will consume text (a query) and output the category (`Route`) it belongs to — to initialize a `RouteLayer` we need our `encoder` model and a list of `routes`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-24 12:25:32 - semantic_router.utils.logger - INFO - hybrid.py:157 - add() - Encoding route politics\n", + "2024-11-24 12:25:32 - httpx - INFO - _client.py:1013 - _send_single_request() - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "2024-11-24 12:25:33 - semantic_router.utils.logger - INFO - hybrid.py:157 - add() - Encoding route chitchat\n", + "2024-11-24 12:25:33 - httpx - INFO - _client.py:1013 - _send_single_request() - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + } + ], + "source": [ + "from semantic_router.routers import HybridRouter\n", + "\n", + "router = HybridRouter(\n", + " encoder=encoder,\n", + " sparse_encoder=sparse_encoder,\n", + " routes=routes,\n", + " index=index,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can check the dimensionality of our vectors by looking at the `index` attribute of the `RouteLayer`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(11, 256)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "router.index.index.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(router.index.sparse_index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We do have 256-dimensional vectors. Now let's test them:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-24 12:25:37 - httpx - INFO - _client.py:1013 - _send_single_request() - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/plain": [ + "RouteChoice(name='politics', function_call=None, similarity_score=1.2995813276471633)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "router(\"don't you love politics?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-24 12:25:38 - httpx - INFO - _client.py:1013 - _send_single_request() - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/plain": [ + "RouteChoice(name='chitchat', function_call=None, similarity_score=1.8563758628277611)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "router(\"how's the weather today?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Both are classified accurately, what if we send a query that is unrelated to our existing `Route` objects?" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-24 12:25:41 - httpx - INFO - _client.py:1013 - _send_single_request() - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/plain": [ + "RouteChoice(name=None, function_call=None, similarity_score=None)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "router(\"I'm interested in learning about llama 2\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this case, we return `None` because no matches were identified. We always recommend optimizing your `RouteLayer` for optimal performance, you can see how in [this notebook](https://github.com/aurelio-labs/semantic-router/blob/main/docs/06-threshold-optimization.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "decision-layer", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/semantic_router/encoders/__init__.py b/semantic_router/encoders/__init__.py index 4256e2e5..893d1d61 100644 --- a/semantic_router/encoders/__init__.py +++ b/semantic_router/encoders/__init__.py @@ -51,6 +51,8 @@ def __init__(self, type: str, name: Optional[str]): self.model = CohereEncoder(name=name) elif self.type == EncoderType.OPENAI: self.model = OpenAIEncoder(name=name) + elif self.type == EncoderType.AURELIO: + self.model = AurelioSparseEncoder(name=name) elif self.type == EncoderType.BM25: if name is None: name = "bm25" diff --git a/semantic_router/encoders/aurelio.py b/semantic_router/encoders/aurelio.py index 3cc9fc86..c257b514 100644 --- a/semantic_router/encoders/aurelio.py +++ b/semantic_router/encoders/aurelio.py @@ -4,7 +4,7 @@ from aurelio_sdk import AurelioClient, AsyncAurelioClient, EmbeddingResponse -from semantic_router.encoders import BaseEncoder +from semantic_router.encoders.base import BaseEncoder class AurelioSparseEncoder(BaseEncoder): diff --git a/semantic_router/index/__init__.py b/semantic_router/index/__init__.py index 4e566d51..3a43abe9 100644 --- a/semantic_router/index/__init__.py +++ b/semantic_router/index/__init__.py @@ -1,6 +1,5 @@ from semantic_router.index.base import BaseIndex from semantic_router.index.hybrid_local import HybridLocalIndex -from semantic_router.index.hybrid_local_opt import HybridLocalOptIndex from semantic_router.index.local import LocalIndex from semantic_router.index.pinecone import PineconeIndex from semantic_router.index.qdrant import QdrantIndex @@ -8,7 +7,6 @@ __all__ = [ "BaseIndex", "HybridLocalIndex", - "HybridLocalOptIndex", "LocalIndex", "QdrantIndex", "PineconeIndex", diff --git a/semantic_router/index/hybrid_local.py b/semantic_router/index/hybrid_local.py index a56463b3..28a3d83b 100644 --- a/semantic_router/index/hybrid_local.py +++ b/semantic_router/index/hybrid_local.py @@ -11,12 +11,11 @@ class HybridLocalIndex(LocalIndex): type: str = "hybrid_local" - sparse_index: Optional[np.ndarray] = None + sparse_index: Optional[list[dict]] = None route_names: Optional[np.ndarray] = None - class Config: - # Stop pydantic from complaining about Optional[np.ndarray]type hints. - arbitrary_types_allowed = True + def __init__(self): + super().__init__() def add( self, @@ -25,7 +24,7 @@ def add( utterances: List[str], function_schemas: Optional[List[Dict[str, Any]]] = None, metadata_list: List[Dict[str, Any]] = [], - sparse_embeddings: Optional[List[List[float]]] = None, + sparse_embeddings: Optional[List[dict[int, float]]] = None, ): if sparse_embeddings is None: raise ValueError("Sparse embeddings are required for HybridLocalIndex.") @@ -34,7 +33,6 @@ def add( if metadata_list: raise ValueError("Metadata is not supported for HybridLocalIndex.") embeds = np.array(embeddings) - sparse_embeds = np.array(sparse_embeddings) routes_arr = np.array(routes) if isinstance(utterances[0], str): utterances_arr = np.array(utterances) @@ -42,13 +40,13 @@ def add( utterances_arr = np.array(utterances, dtype=object) if self.index is None or self.sparse_index is None: self.index = embeds - self.sparse_index = sparse_embeds + self.sparse_index = sparse_embeddings self.routes = routes_arr self.utterances = utterances_arr else: # TODO: we should probably switch to an `upsert` method and standardize elsewhere self.index = np.concatenate([self.index, embeds]) - self.sparse_index = np.concatenate([self.sparse_index, sparse_embeds]) + self.sparse_index.extend(sparse_embeddings) self.routes = np.concatenate([self.routes, routes_arr]) self.utterances = np.concatenate([self.utterances, utterances_arr]) @@ -68,13 +66,23 @@ def describe(self) -> Dict: "dimensions": self.index.shape[1] if self.index is not None else 0, "vectors": self.index.shape[0] if self.index is not None else 0, } + + def _sparse_dot_product(self, vec_a: dict[int, float], vec_b: dict[int, float]) -> float: + # switch vecs to ensure first is smallest for more efficiency + if len(vec_a) > len(vec_b): + vec_a, vec_b = vec_b, vec_a + return sum(vec_a[i] * vec_b.get(i, 0) for i in vec_a) + + def _sparse_index_dot_product(self, vec_a: dict[int, float]) -> list[float]: + dot_products = [self._sparse_dot_product(vec_a, vec_b) for vec_b in self.sparse_index] + return dot_products def query( self, vector: np.ndarray, top_k: int = 5, route_filter: Optional[List[str]] = None, - sparse_vector: Optional[np.ndarray] = None, + sparse_vector: Optional[dict[int, float]] = None, ) -> Tuple[np.ndarray, List[str]]: """Search the index for the query and return top_k results. @@ -85,7 +93,7 @@ def query( :param route_filter: A list of route names to filter the search results, defaults to None. :type route_filter: Optional[List[str]], optional :param sparse_vector: The sparse vector to search for, must be provided. - :type sparse_vector: np.ndarray + :type sparse_vector: dict[int, float] """ if route_filter: raise ValueError("Route filter is not supported for HybridLocalIndex.") @@ -101,11 +109,7 @@ def query( xq_d_norm = norm(xq_d) # TODO: this used to be xq_d.T, should work without sim_d = np.squeeze(np.dot(self.index, xq_d.T)) / (index_norm * xq_d_norm) # calculate sparse vec similarity - sparse_norm = norm(self.sparse_index, axis=1) - xq_s_norm = norm(xq_s) # TODO: this used to be xq_s.T, should work without - sim_s = np.squeeze(np.dot(self.sparse_index, xq_s.T)) / ( - sparse_norm * xq_s_norm - ) + sim_s = np.array(self._sparse_index_dot_product(xq_s)) total_sim = sim_d + sim_s # get indices of top_k records top_k = min(top_k, total_sim.shape[0]) @@ -122,7 +126,7 @@ async def aquery( vector: np.ndarray, top_k: int = 5, route_filter: Optional[List[str]] = None, - sparse_vector: Optional[np.ndarray] = None, + sparse_vector: Optional[dict[int, float]] = None, ) -> Tuple[np.ndarray, List[str]]: """Search the index for the query and return top_k results. This method calls the sync `query` method as everything uses numpy computations which is CPU-bound @@ -135,7 +139,7 @@ async def aquery( :param route_filter: A list of route names to filter the search results, defaults to None. :type route_filter: Optional[List[str]], optional :param sparse_vector: The sparse vector to search for, must be provided. - :type sparse_vector: np.ndarray + :type sparse_vector: dict[int, float] """ return self.query( vector=vector, @@ -145,10 +149,10 @@ async def aquery( ) def aget_routes(self): - logger.error("Sync remove is not implemented for LocalIndex.") + logger.error(f"Sync remove is not implemented for {self.__class__.__name__}.") def _write_config(self, config: ConfigParameter): - logger.warning("No config is written for LocalIndex.") + logger.warning(f"No config is written for {self.__class__.__name__}.") def delete(self, route_name: str): """ diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py index 6e66142a..5ecede72 100644 --- a/semantic_router/routers/hybrid.py +++ b/semantic_router/routers/hybrid.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List, Optional, Tuple import asyncio -from pydantic.v1 import validator, Field +from pydantic.v1 import Field import numpy as np @@ -21,14 +21,13 @@ class HybridRouter(BaseRouter): """A hybrid layer that uses both dense and sparse embeddings to classify routes.""" # there are a few additional attributes for hybrid - sparse_encoder: BM25Encoder = Field(default_factory=BM25Encoder) + sparse_encoder: Optional[BaseEncoder] = Field(default=None) alpha: float = 0.3 - index: HybridLocalIndex = Field(default_factory=HybridLocalIndex) def __init__( self, encoder: BaseEncoder, - sparse_encoder: Optional[BM25Encoder] = None, + sparse_encoder: Optional[BaseEncoder] = None, llm: Optional[BaseLLM] = None, routes: List[Route] = [], index: Optional[HybridLocalIndex] = None, @@ -40,46 +39,47 @@ def __init__( super().__init__( encoder=encoder, llm=llm, - routes=routes.copy(), + #routes=routes.copy(), index=index, top_k=top_k, aggregation=aggregation, auto_sync=auto_sync, ) # initialize sparse encoder - if sparse_encoder is None: - logger.warning("No sparse_encoder provided. Using default BM25Encoder.") - self.sparse_encoder = BM25Encoder() - else: - self.sparse_encoder = sparse_encoder + self._set_sparse_encoder(sparse_encoder=sparse_encoder) # set alpha self.alpha = alpha + # create copy of routes + routes_copy = routes.copy() # fit sparse encoder if needed if isinstance(self.sparse_encoder, TfidfEncoder) and hasattr( self.sparse_encoder, "fit" ): - self.sparse_encoder.fit(routes) + self.sparse_encoder.fit(routes_copy) # initialize index if not provided - # TODO: add check for hybrid compatible index - if self.index is None: - logger.warning("No index provided. Using default HybridLocalIndex.") - self.index = HybridLocalIndex() + self._set_index(index=index) # add routes if we have them - if routes: - for route in routes: + if routes_copy: + for route in routes_copy: self.add(route) # set score threshold using default method self._set_score_threshold() # TODO: we can't really use this with hybrid... - @validator("sparse_encoder", pre=True, always=True) - def set_sparse_encoder(cls, v): - return v if v is not None else BM25Encoder() - - @validator("index", pre=True, always=True) - def set_index(cls, v): - return v if v is not None else HybridLocalIndex() + def _set_index(self, index: Optional[HybridLocalIndex]): + if index is None: + logger.warning("No index provided. Using default HybridLocalIndex.") + self.index = HybridLocalIndex() + else: + self.index = index + + def _set_sparse_encoder(self, sparse_encoder: Optional[BaseEncoder]): + if sparse_encoder is None: + logger.warning("No sparse_encoder provided. Using default BM25Encoder.") + self.sparse_encoder = BM25Encoder() + else: + self.sparse_encoder = sparse_encoder - def _encode(self, text: List[str]) -> Any: + def _encode(self, text: list[str]) -> tuple[np.ndarray, list[dict[int, float]]]: """Given some text, generates dense and sparse embeddings, then scales them using the chosen alpha value. """ @@ -88,12 +88,12 @@ def _encode(self, text: List[str]) -> Any: # create dense query vector xq_d = np.array(self.encoder(text)) # xq_d = np.squeeze(xq_d) # Reduce to 1d array. - # create sparse query vector - xq_s = np.array(self.sparse_encoder(text)) + # create sparse query vector dict + xq_s_dict = self.sparse_encoder(text) # xq_s = np.squeeze(xq_s) # convex scaling - xq_d, xq_s = self._convex_scaling(xq_d, xq_s) - return xq_d, xq_s + xq_d, xq_s_dict = self._convex_scaling(xq_d, xq_s_dict) + return xq_d, xq_s_dict async def _async_encode(self, text: List[str]) -> Any: """Given some text, generates dense and sparse embeddings, then scales them @@ -121,7 +121,7 @@ def __call__( vector: Optional[List[float]] = None, simulate_static: bool = False, route_filter: Optional[List[str]] = None, - sparse_vector: Optional[List[float]] = None, + sparse_vector: Optional[dict[int, float]] = None, ) -> RouteChoice: # if no vector provided, encode text to get vector if vector is None: @@ -137,11 +137,7 @@ def __call__( vector=np.array(vector) if isinstance(vector, list) else vector, top_k=self.top_k, route_filter=route_filter, - sparse_vector=( - np.array(sparse_vector) - if isinstance(sparse_vector, list) - else sparse_vector - ), + sparse_vector=sparse_vector[0] ) top_class, top_class_scores = self._semantic_classify( list(zip(scores, route_names)) @@ -169,11 +165,13 @@ def add(self, route: Route): # TODO: in some places we say vector, sparse_vector and in others # TODO: we say embeddings, sparse_embeddings - def _convex_scaling(self, dense: np.ndarray, sparse: np.ndarray): + def _convex_scaling(self, dense: np.ndarray, sparse: list[dict[int, float]]): # scale sparse and dense vecs - dense = np.array(dense) * self.alpha - sparse = np.array(sparse) * (1 - self.alpha) - return dense, sparse + scaled_dense = np.array(dense) * self.alpha + scaled_sparse = [] + for sparse_dict in sparse: + scaled_sparse.append({k: v * (1 - self.alpha) for k, v in sparse_dict.items()}) + return scaled_dense, scaled_sparse def _set_aggregation_method(self, aggregation: str = "sum"): if aggregation == "sum": diff --git a/semantic_router/schema.py b/semantic_router/schema.py index ef2b2190..eea86b2e 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -7,6 +7,7 @@ class EncoderType(Enum): + AURELIO = "aurelio" AZURE = "azure" COHERE = "cohere" OPENAI = "openai"