diff --git a/poetry.lock b/poetry.lock index d6c0340d..668ba3e8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5562,4 +5562,4 @@ vision = ["pillow", "torch", "torchvision", "transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.14" -content-hash = "e88cbb248e65005bf3265b1ef31c768baf6f979d08e829458163cb567e09ffa3" +content-hash = "7a8c31e645e649de60985ecb0ddec1aefcdeb1cfcbe23b47185a0bd18407a900" diff --git a/pyproject.toml b/pyproject.toml index 5a52b774..a6b71d9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.9,<3.14" -pydantic = "^2.5.3" +pydantic = "^2.10.2" openai = ">=1.10.0,<2.0.0" cohere = {version = ">=5.9.4,<6.00", optional = true} mistralai= {version = ">=0.0.12,<0.1.0", optional = true} diff --git a/semantic_router/encoders/base.py b/semantic_router/encoders/base.py index 0a25f210..993093c1 100644 --- a/semantic_router/encoders/base.py +++ b/semantic_router/encoders/base.py @@ -1,6 +1,6 @@ from typing import Any, Coroutine, List, Optional -from pydantic.v1 import BaseModel, Field, validator +from pydantic import BaseModel, Field, field_validator import numpy as np from semantic_router.schema import SparseEmbedding @@ -14,7 +14,7 @@ class DenseEncoder(BaseModel): class Config: arbitrary_types_allowed = True - @validator("score_threshold", pre=True, always=True) + @field_validator("score_threshold") def set_score_threshold(cls, v): return float(v) if v is not None else None diff --git a/semantic_router/encoders/clip.py b/semantic_router/encoders/clip.py index 65fbdb8f..d30f5e1f 100644 --- a/semantic_router/encoders/clip.py +++ b/semantic_router/encoders/clip.py @@ -9,7 +9,6 @@ class CLIPEncoder(DenseEncoder): name: str = "openai/clip-vit-base-patch16" type: str = "huggingface" - score_threshold: float = 0.2 tokenizer_kwargs: Dict = {} processor_kwargs: Dict = {} model_kwargs: Dict = {} @@ -21,6 +20,8 @@ class CLIPEncoder(DenseEncoder): _Image: Any = PrivateAttr() def __init__(self, **data): + if data.get("score_threshold") is None: + data["score_threshold"] = 0.2 super().__init__(**data) self._tokenizer, self._processor, self._model = self._initialize_hf_model() diff --git a/semantic_router/encoders/huggingface.py b/semantic_router/encoders/huggingface.py index 7c7e56f7..c791e1c3 100644 --- a/semantic_router/encoders/huggingface.py +++ b/semantic_router/encoders/huggingface.py @@ -34,7 +34,6 @@ class HuggingFaceEncoder(DenseEncoder): name: str = "sentence-transformers/all-MiniLM-L6-v2" type: str = "huggingface" - score_threshold: float = 0.5 tokenizer_kwargs: Dict = {} model_kwargs: Dict = {} device: Optional[str] = None @@ -43,6 +42,8 @@ class HuggingFaceEncoder(DenseEncoder): _torch: Any = PrivateAttr() def __init__(self, **data): + if data.get("score_threshold") is None: + data["score_threshold"] = 0.5 super().__init__(**data) self._tokenizer, self._model = self._initialize_hf_model() @@ -153,7 +154,6 @@ class HFEndpointEncoder(DenseEncoder): name: str = "hugging_face_custom_endpoint" huggingface_url: Optional[str] = None huggingface_api_key: Optional[str] = None - score_threshold: float = 0.8 def __init__( self, @@ -180,6 +180,8 @@ def __init__( """ huggingface_url = huggingface_url or os.getenv("HF_API_URL") huggingface_api_key = huggingface_api_key or os.getenv("HF_API_KEY") + if score_threshold is None: + score_threshold = 0.8 super().__init__(name=name, score_threshold=score_threshold) # type: ignore diff --git a/semantic_router/encoders/vit.py b/semantic_router/encoders/vit.py index 73cb0582..dec768e4 100644 --- a/semantic_router/encoders/vit.py +++ b/semantic_router/encoders/vit.py @@ -1,6 +1,6 @@ -from typing import Any, List, Optional, Dict +from typing import Any, Dict, List, Optional -from pydantic.v1 import PrivateAttr +from pydantic import PrivateAttr from semantic_router.encoders import DenseEncoder @@ -8,7 +8,6 @@ class VitEncoder(DenseEncoder): name: str = "google/vit-base-patch16-224" type: str = "huggingface" - score_threshold: float = 0.5 processor_kwargs: Dict = {} model_kwargs: Dict = {} device: Optional[str] = None @@ -19,6 +18,8 @@ class VitEncoder(DenseEncoder): _Image: Any = PrivateAttr() def __init__(self, **data): + if data.get("score_threshold") is None: + data["score_threshold"] = 0.5 super().__init__(**data) self._processor, self._model = self._initialize_hf_model() diff --git a/semantic_router/index/postgres.py b/semantic_router/index/postgres.py index 6d445f2b..71ea32e8 100644 --- a/semantic_router/index/postgres.py +++ b/semantic_router/index/postgres.py @@ -103,7 +103,6 @@ class PostgresIndex(BaseIndex): connection_string: Optional[str] = None index_prefix: str = "semantic_router_" index_name: str = "index" - dimensions: int = 1536 metric: Metric = Metric.COSINE namespace: Optional[str] = "" conn: Optional["psycopg2.extensions.connection"] = None @@ -115,9 +114,9 @@ def __init__( connection_string: Optional[str] = None, index_prefix: str = "semantic_router_", index_name: str = "index", - dimensions: int = 1536, metric: Metric = Metric.COSINE, namespace: Optional[str] = "", + dimensions: int | None = None, ): """ Initializes the Postgres index with the specified parameters. @@ -135,6 +134,8 @@ def __init__( :param namespace: An optional namespace for the index. :type namespace: Optional[str] """ + if dimensions is None: + dimensions = 1536 super().__init__() # try and import psycopg2 try: