Skip to content

Commit

Permalink
Merge branch 'main' into james/async-sync
Browse files Browse the repository at this point in the history
  • Loading branch information
jamescalam authored Dec 23, 2024
2 parents 2148380 + 2b9720f commit 0204002
Show file tree
Hide file tree
Showing 9 changed files with 91 additions and 28 deletions.
2 changes: 1 addition & 1 deletion docs/encoders/openai-embed-3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/encoders/fastembed.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/encoders/fastembed.ipynb)"
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/encoders/openai-embed-3.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/encoders/openai-embed-3.ipynb)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
project = "Semantic Router"
copyright = "2024, Aurelio AI"
author = "Aurelio AI"
release = "0.1.0.dev3"
release = "0.1.0.dev5"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "semantic-router"
version = "0.1.0.dev3"
version = "0.1.0.dev5"
description = "Super fast semantic router for AI decision making"
authors = ["Aurelio AI <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion semantic_router/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

__all__ = ["SemanticRouter", "HybridRouter", "Route", "RouterConfig"]

__version__ = "0.1.0.dev3"
__version__ = "0.1.0.dev5"
3 changes: 3 additions & 0 deletions semantic_router/index/pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,9 @@ async def _async_fetch_metadata(self, vector_id: str) -> dict:
"ids": [vector_id],
}

if self.namespace:
params["namespace"] = [self.namespace]

headers = {
"Api-Key": self.api_key,
}
Expand Down
52 changes: 36 additions & 16 deletions semantic_router/routers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import random
import hashlib
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing_extensions import deprecated
from pydantic import BaseModel, Field

import numpy as np
Expand Down Expand Up @@ -280,6 +281,20 @@ def get_hash(self) -> ConfigParameter:
)


def xq_reshape(xq: List[float] | np.ndarray) -> np.ndarray:
# convert to numpy array if not already
if not isinstance(xq, np.ndarray):
xq = np.array(xq)
# check if vector is 1D and expand to 2D if necessary
if len(xq.shape) == 1:
xq = np.expand_dims(xq, axis=0)
if xq.shape[0] != 1:
raise ValueError(
f"Expected (1, x) dimensional input for query, got {xq.shape}."
)
return xq


class BaseRouter(BaseModel):
encoder: DenseEncoder = Field(default_factory=OpenAIEncoder)
index: BaseIndex = Field(default_factory=BaseIndex)
Expand Down Expand Up @@ -402,7 +417,7 @@ def check_for_matching_routes(self, top_class: str) -> Optional[Route]:
def __call__(
self,
text: Optional[str] = None,
vector: Optional[List[float]] = None,
vector: Optional[List[float] | np.ndarray] = None,
simulate_static: bool = False,
route_filter: Optional[List[str]] = None,
) -> RouteChoice:
Expand All @@ -411,6 +426,9 @@ def __call__(
if text is None:
raise ValueError("Either text or vector must be provided")
vector = self._encode(text=[text])
# convert to numpy array if not already
vector = xq_reshape(vector)
# calculate semantics
route, top_class_scores = self._retrieve_top_route(vector, route_filter)
passed = self._check_threshold(top_class_scores, route)
if passed and route is not None and not simulate_static:
Expand Down Expand Up @@ -444,7 +462,7 @@ def __call__(
async def acall(
self,
text: Optional[str] = None,
vector: Optional[List[float]] = None,
vector: Optional[List[float] | np.ndarray] = None,
simulate_static: bool = False,
route_filter: Optional[List[str]] = None,
) -> RouteChoice:
Expand All @@ -453,7 +471,9 @@ async def acall(
if text is None:
raise ValueError("Either text or vector must be provided")
vector = await self._async_encode(text=[text])

# convert to numpy array if not already
vector = xq_reshape(vector)
# calculate semantics
route, top_class_scores = await self._async_retrieve_top_route(
vector, route_filter
)
Expand Down Expand Up @@ -483,19 +503,21 @@ async def acall(
# if no route passes threshold, return empty route choice
return RouteChoice()

# TODO: add multiple routes return to __call__ and acall
@deprecated("This method is deprecated. Use `__call__` instead.")
def retrieve_multiple_routes(
self,
text: Optional[str] = None,
vector: Optional[List[float]] = None,
vector: Optional[List[float] | np.ndarray] = None,
) -> List[RouteChoice]:
if vector is None:
if text is None:
raise ValueError("Either text or vector must be provided")
vector_arr = self._encode(text=[text])
else:
vector_arr = np.array(vector)
vector = self._encode(text=[text])
# convert to numpy array if not already
vector = xq_reshape(vector)
# get relevant utterances
results = self._retrieve(xq=vector_arr)
results = self._retrieve(xq=vector)
# decide most relevant routes
categories_with_scores = self._semantic_classify_multiple_routes(results)
return [
Expand All @@ -514,28 +536,26 @@ def retrieve_multiple_routes(
# return route_choices

def _retrieve_top_route(
self, vector: List[float], route_filter: Optional[List[str]] = None
self, vector: np.ndarray, route_filter: Optional[List[str]] = None
) -> Tuple[Optional[Route], List[float]]:
"""
Retrieve the top matching route based on the given vector.
Returns a tuple of the route (if any) and the scores of the top class.
"""
# get relevant results (scores and routes)
results = self._retrieve(
xq=np.array(vector), top_k=self.top_k, route_filter=route_filter
)
results = self._retrieve(xq=vector, top_k=self.top_k, route_filter=route_filter)
# decide most relevant routes
top_class, top_class_scores = self._semantic_classify(results)
# TODO do we need this check?
route = self.check_for_matching_routes(top_class)
return route, top_class_scores

async def _async_retrieve_top_route(
self, vector: List[float], route_filter: Optional[List[str]] = None
self, vector: np.ndarray, route_filter: Optional[List[str]] = None
) -> Tuple[Optional[Route], List[float]]:
# get relevant results (scores and routes)
results = await self._async_retrieve(
xq=np.array(vector), top_k=self.top_k, route_filter=route_filter
xq=vector, top_k=self.top_k, route_filter=route_filter
)
# decide most relevant routes
top_class, top_class_scores = await self._async_semantic_classify(results)
Expand Down Expand Up @@ -1080,7 +1100,7 @@ def _retrieve(
"""Given a query vector, retrieve the top_k most similar records."""
# get scores and routes
scores, routes = self.index.query(
vector=xq, top_k=top_k, route_filter=route_filter
vector=xq[0], top_k=top_k, route_filter=route_filter
)
return [{"route": d, "score": s.item()} for d, s in zip(routes, scores)]

Expand All @@ -1090,7 +1110,7 @@ async def _async_retrieve(
"""Given a query vector, retrieve the top_k most similar records."""
# get scores and routes
scores, routes = await self.index.aquery(
vector=xq, top_k=top_k, route_filter=route_filter
vector=xq[0], top_k=top_k, route_filter=route_filter
)
return [{"route": d, "score": s.item()} for d, s in zip(routes, scores)]

Expand Down
12 changes: 6 additions & 6 deletions semantic_router/routers/hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from semantic_router.index import BaseIndex, HybridLocalIndex
from semantic_router.schema import RouteChoice, SparseEmbedding, Utterance
from semantic_router.utils.logger import logger
from semantic_router.routers.base import BaseRouter
from semantic_router.routers.base import BaseRouter, xq_reshape
from semantic_router.llms import BaseLLM


Expand Down Expand Up @@ -197,18 +197,19 @@ async def _async_encode(
def __call__(
self,
text: Optional[str] = None,
vector: Optional[List[float]] = None,
vector: Optional[List[float] | np.ndarray] = None,
simulate_static: bool = False,
route_filter: Optional[List[str]] = None,
sparse_vector: dict[int, float] | SparseEmbedding | None = None,
) -> RouteChoice:
vector_arr: np.ndarray | None = None
potential_sparse_vector: List[SparseEmbedding] | None = None
# if no vector provided, encode text to get vector
if vector is None:
if text is None:
raise ValueError("Either text or vector must be provided")
vector_arr, potential_sparse_vector = self._encode(text=[text])
vector, potential_sparse_vector = self._encode(text=[text])
# convert to numpy array if not already
vector = xq_reshape(vector)
if sparse_vector is None:
if text is None:
raise ValueError("Either text or sparse_vector must be provided")
Expand All @@ -217,10 +218,9 @@ def __call__(
)
if sparse_vector is None:
raise ValueError("Sparse vector is required for HybridLocalIndex.")
vector_arr = vector_arr if vector_arr is not None else np.array(vector)
# TODO: add alpha as a parameter
scores, route_names = self.index.query(
vector=vector_arr,
vector=vector,
top_k=self.top_k,
route_filter=route_filter,
sparse_vector=sparse_vector,
Expand Down
2 changes: 0 additions & 2 deletions semantic_router/routers/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,12 @@ def _encode(self, text: list[str]) -> Any:
"""Given some text, encode it."""
# create query vector
xq = np.array(self.encoder(text))
xq = np.squeeze(xq) # Reduce to 1d array.
return xq

async def _async_encode(self, text: list[str]) -> Any:
"""Given some text, encode it."""
# create query vector
xq = np.array(await self.encoder.acall(docs=text))
xq = np.squeeze(xq) # Reduce to 1d array.
return xq

def add(self, routes: List[Route] | Route):
Expand Down
42 changes: 42 additions & 0 deletions tests/unit/test_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,13 @@ def routes_4():
]


@pytest.fixture
def route_single_utterance():
return [
Route(name="Route 3", utterances=["Hello"]),
]


@pytest.fixture
def dynamic_routes():
return [
Expand Down Expand Up @@ -251,6 +258,39 @@ def test_initialization_dynamic_route(
)
assert route_layer.score_threshold == openai_encoder.score_threshold

def test_add_single_utterance(
self, routes, route_single_utterance, openai_encoder, index_cls
):
index = init_index(index_cls)
route_layer = SemanticRouter(
encoder=openai_encoder,
routes=routes,
index=index,
auto_sync="local",
)
route_layer.add(routes=route_single_utterance)
assert route_layer.score_threshold == openai_encoder.score_threshold
if index_cls is PineconeIndex:
time.sleep(PINECONE_SLEEP) # allow for index to be updated
_ = route_layer("Hello")
assert len(route_layer.index.get_utterances()) == 6

def test_init_and_add_single_utterance(
self, route_single_utterance, openai_encoder, index_cls
):
index = init_index(index_cls)
route_layer = SemanticRouter(
encoder=openai_encoder,
index=index,
auto_sync="local",
)
if index_cls is PineconeIndex:
time.sleep(PINECONE_SLEEP) # allow for index to be updated
route_layer.add(routes=route_single_utterance)
assert route_layer.score_threshold == openai_encoder.score_threshold
_ = route_layer("Hello")
assert len(route_layer.index.get_utterances()) == 1

def test_delete_index(self, openai_encoder, routes, index_cls):
# TODO merge .delete_index() and .delete_all() and get working
index = init_index(index_cls)
Expand Down Expand Up @@ -786,6 +826,8 @@ def test_retrieve_with_vector(self, openai_encoder, routes, index_cls):
auto_sync="local",
)
vector = [0.1, 0.2, 0.3]
if index_cls is PineconeIndex:
time.sleep(PINECONE_SLEEP) # allow for index to be populated
results = route_layer.retrieve_multiple_routes(vector=vector)
assert len(results) >= 1, "Expected at least one result"
assert any(
Expand Down

0 comments on commit 0204002

Please sign in to comment.