Skip to content

Commit

Permalink
fix: allow types to work between pinecone and hybrid
Browse files Browse the repository at this point in the history
  • Loading branch information
jamescalam committed Jan 1, 2025
1 parent 71311ef commit 69c2e9b
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 53 deletions.
119 changes: 68 additions & 51 deletions semantic_router/index/pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,59 @@
def clean_route_name(route_name: str) -> str:
return route_name.strip().replace(" ", "-")

def build_records(
embeddings: List[List[float]],
routes: List[str],
utterances: List[str],
function_schemas: Optional[Optional[List[Dict[str, Any]]]] = None,
metadata_list: List[Dict[str, Any]] = [],
sparse_embeddings: Optional[Optional[List[SparseEmbedding]]] = None,
) -> List[Dict]:
if function_schemas is None:
function_schemas = [{}] * len(embeddings)
if sparse_embeddings is None:
vectors_to_upsert = [
PineconeRecord(
values=vector,
route=route,
utterance=utterance,
function_schema=json.dumps(function_schema),
metadata=metadata,
).to_dict()
for vector, route, utterance, function_schema, metadata in zip(
embeddings,
routes,
utterances,
function_schemas,
metadata_list,
)
]
else:
vectors_to_upsert = [
PineconeRecord(
values=vector,
sparse_values=sparse_emb.to_pinecone(),
route=route,
utterance=utterance,
function_schema=json.dumps(function_schema),
metadata=metadata,
).to_dict()
for vector, route, utterance, function_schema, metadata, sparse_emb in zip(
embeddings,
routes,
utterances,
function_schemas,
metadata_list,
sparse_embeddings,
)
]
return vectors_to_upsert


class PineconeRecord(BaseModel):
id: str = ""
values: List[float]
sparse_values: Optional[dict[int, float]] = None
sparse_values: Optional[dict[str, list]] = None
route: str
utterance: str
function_schema: str = "{}"
Expand All @@ -49,10 +97,7 @@ def to_dict(self):
"metadata": self.metadata,
}
if self.sparse_values:
d["sparse_values"] = {
"indices": list(self.sparse_values.keys()),
"values": list(self.sparse_values.values()),
}
d["sparse_values"] = self.sparse_values
return d


Expand Down Expand Up @@ -255,34 +300,20 @@ def add(
function_schemas: Optional[Optional[List[Dict[str, Any]]]] = None,
metadata_list: List[Dict[str, Any]] = [],
batch_size: int = 100,
sparse_embeddings: Optional[Optional[List[dict[int, float]]]] = None,
sparse_embeddings: Optional[Optional[List[SparseEmbedding]]] = None,
):
"""Add vectors to Pinecone in batches."""
if self.index is None:
self.dimensions = self.dimensions or len(embeddings[0])
self.index = self._init_index(force_create=True)
if function_schemas is None:
function_schemas = [{}] * len(embeddings)
if sparse_embeddings is None:
sparse_embeddings = [{}] * len(embeddings)
vectors_to_upsert = [
PineconeRecord(
values=vector,
sparse_values=sparse_dict,
route=route,
utterance=utterance,
function_schema=json.dumps(function_schema),
metadata=metadata,
).to_dict()
for vector, route, utterance, function_schema, metadata, sparse_dict in zip(
embeddings,
routes,
utterances,
function_schemas,
metadata_list,
sparse_embeddings,
)
]
vectors_to_upsert = build_records(
embeddings=embeddings,
routes=routes,
utterances=utterances,
function_schemas=function_schemas,
metadata_list=metadata_list,
sparse_embeddings=sparse_embeddings,
)

for i in range(0, len(vectors_to_upsert), batch_size):
batch = vectors_to_upsert[i : i + batch_size]
Expand All @@ -296,34 +327,20 @@ async def aadd(
function_schemas: Optional[Optional[List[Dict[str, Any]]]] = None,
metadata_list: List[Dict[str, Any]] = [],
batch_size: int = 100,
sparse_embeddings: Optional[Optional[List[dict[int, float]]]] = None,
sparse_embeddings: Optional[Optional[List[SparseEmbedding]]] = None,
):
"""Add vectors to Pinecone in batches."""
if self.index is None:
self.dimensions = self.dimensions or len(embeddings[0])
self.index = await self._init_async_index(force_create=True)
if function_schemas is None:
function_schemas = [{}] * len(embeddings)
if sparse_embeddings is None:
sparse_embeddings = [{}] * len(embeddings)
vectors_to_upsert = [
PineconeRecord(
values=vector,
sparse_values=sparse_dict,
route=route,
utterance=utterance,
function_schema=json.dumps(function_schema),
metadata=metadata,
).to_dict()
for vector, route, utterance, function_schema, metadata, sparse_dict in zip(
embeddings,
routes,
utterances,
function_schemas,
metadata_list,
sparse_embeddings,
)
]
vectors_to_upsert = build_records(
embeddings=embeddings,
routes=routes,
utterances=utterances,
function_schemas=function_schemas,
metadata_list=metadata_list,
sparse_embeddings=sparse_embeddings,
)

for i in range(0, len(vectors_to_upsert), batch_size):
batch = vectors_to_upsert[i : i + batch_size]
Expand Down
4 changes: 2 additions & 2 deletions semantic_router/routers/hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def add(self, routes: List[Route] | Route):
utterances=all_utterances,
function_schemas=all_function_schemas,
metadata_list=all_metadata,
sparse_embeddings=sparse_emb, # type: ignore
sparse_embeddings=sparse_emb,
)

self.routes.extend(routes)
Expand Down Expand Up @@ -129,7 +129,7 @@ def _execute_sync_strategy(self, strategy: Dict[str, Dict[str, List[Utterance]]]
utt.function_schemas for utt in strategy["remote"]["upsert"] # type: ignore
],
metadata_list=[utt.metadata for utt in strategy["remote"]["upsert"]],
sparse_embeddings=sparse_emb, # type: ignore
sparse_embeddings=sparse_emb,
)
if strategy["local"]["delete"]:
self._local_delete(utterances=strategy["local"]["delete"])
Expand Down

0 comments on commit 69c2e9b

Please sign in to comment.