From 42dea8dd0b1fe5f331abe602b043f2bd97f0f543 Mon Sep 17 00:00:00 2001 From: James Briggs <35938317+jamescalam@users.noreply.github.com> Date: Sat, 2 Dec 2023 14:03:55 -0800 Subject: [PATCH] update tests and removed WIP modules/objects --- semantic_router/encoders/huggingface.py | 9 ---- semantic_router/rankers/__init__.py | 0 semantic_router/rankers/base.py | 12 ----- semantic_router/rankers/cohere.py | 31 ----------- tests/unit/encoders/test_bm25.py | 21 ++++++++ tests/unit/test_layer.py | 70 ++++++++++++++++++++++++- 6 files changed, 90 insertions(+), 53 deletions(-) delete mode 100644 semantic_router/encoders/huggingface.py delete mode 100644 semantic_router/rankers/__init__.py delete mode 100644 semantic_router/rankers/base.py delete mode 100644 semantic_router/rankers/cohere.py create mode 100644 tests/unit/encoders/test_bm25.py diff --git a/semantic_router/encoders/huggingface.py b/semantic_router/encoders/huggingface.py deleted file mode 100644 index 52ddecd2..00000000 --- a/semantic_router/encoders/huggingface.py +++ /dev/null @@ -1,9 +0,0 @@ -from semantic_router.encoders import BaseEncoder - - -class HuggingFaceEncoder(BaseEncoder): - def __init__(self, name: str): - self.name = name - - def __call__(self, docs: list[str]) -> list[float]: - raise NotImplementedError diff --git a/semantic_router/rankers/__init__.py b/semantic_router/rankers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/semantic_router/rankers/base.py b/semantic_router/rankers/base.py deleted file mode 100644 index 5d326f33..00000000 --- a/semantic_router/rankers/base.py +++ /dev/null @@ -1,12 +0,0 @@ -from pydantic import BaseModel - - -class BaseRanker(BaseModel): - name: str - top_n: int = 5 - - class Config: - arbitrary_types_allowed = True - - def __call__(self, query: str, docs: list[str]) -> list[str]: - raise NotImplementedError("Subclasses must implement this method") diff --git a/semantic_router/rankers/cohere.py b/semantic_router/rankers/cohere.py deleted file mode 100644 index 7e6e8ad6..00000000 --- a/semantic_router/rankers/cohere.py +++ /dev/null @@ -1,31 +0,0 @@ -import os - -import cohere - -from semantic_router.rankers import BaseRanker - - -class CohereRanker(BaseRanker): - client: cohere.Client | None - - def __init__( - self, - name: str = "rerank-english-v2.0", - top_n: int = 5, - cohere_api_key: str | None = None, - ): - super().__init__(name=name, top_n=top_n) - cohere_api_key = cohere_api_key or os.getenv("COHERE_API_KEY") - if cohere_api_key is None: - raise ValueError("Cohere API key cannot be 'None'.") - self.client = cohere.Client(cohere_api_key) - - def __call__(self, query: str, docs: list[str]) -> list[str]: - # get top_n results - results = self.client.rerank( - query=query, documents=docs, top_n=self.top_n, model=self.name - ) - # get indices of entries that are ranked highest by cohere - top_idx = [r.index for r in results] - top_docs = [docs[i] for i in top_idx] - return top_idx, top_docs diff --git a/tests/unit/encoders/test_bm25.py b/tests/unit/encoders/test_bm25.py new file mode 100644 index 00000000..8c0e9bc4 --- /dev/null +++ b/tests/unit/encoders/test_bm25.py @@ -0,0 +1,21 @@ +import pytest + +from semantic_router.encoders import BM25Encoder + + +@pytest.fixture +def bm25_encoder(): + return BM25Encoder() + + +class TestBM25Encoder: + def test_initialization(self): + self.bm25_encoder = BM25Encoder() + assert len(self.bm25_encoder.idx_mapping) != 0 + + def test_call_method(self): + result = self.bm25_encoder(["test"]) + assert isinstance(result, list), "Result should be a list" + assert all( + isinstance(sublist, list) for sublist in result + ), "Each item in result should be a list" diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py index 0b9842fb..611aff45 100644 --- a/tests/unit/test_layer.py +++ b/tests/unit/test_layer.py @@ -1,7 +1,7 @@ import pytest from semantic_router.encoders import BaseEncoder, CohereEncoder, OpenAIEncoder -from semantic_router.layer import DecisionLayer # Replace with the actual module name +from semantic_router.layer import DecisionLayer, HybridDecisionLayer # Replace with the actual module name from semantic_router.schema import Decision @@ -111,5 +111,73 @@ def test_failover_score_threshold(self, base_encoder): decision_layer = DecisionLayer(encoder=base_encoder) assert decision_layer.score_threshold == 0.82 +class TestHybridDecisionLayer: + def test_initialization(self, openai_encoder, decisions): + decision_layer = HybridDecisionLayer(encoder=openai_encoder, decisions=decisions) + assert decision_layer.score_threshold == 0.82 + assert len(decision_layer.index) == 5 + assert len(set(decision_layer.categories)) == 2 + + def test_initialization_different_encoders(self, cohere_encoder, openai_encoder): + decision_layer_cohere = HybridDecisionLayer(encoder=cohere_encoder) + assert decision_layer_cohere.score_threshold == 0.3 + + decision_layer_openai = HybridDecisionLayer(encoder=openai_encoder) + assert decision_layer_openai.score_threshold == 0.82 + + def test_add_decision(self, openai_encoder): + decision_layer = HybridDecisionLayer(encoder=openai_encoder) + decision = Decision(name="Decision 3", utterances=["Yes", "No"]) + decision_layer.add(decision) + assert len(decision_layer.index) == 2 + assert len(set(decision_layer.categories)) == 1 + + def test_add_multiple_decisions(self, openai_encoder, decisions): + decision_layer = HybridDecisionLayer(encoder=openai_encoder) + for decision in decisions: + decision_layer.add(decision) + assert len(decision_layer.index) == 5 + assert len(set(decision_layer.categories)) == 2 + + def test_query_and_classification(self, openai_encoder, decisions): + decision_layer = HybridDecisionLayer(encoder=openai_encoder, decisions=decisions) + query_result = decision_layer("Hello") + assert query_result in ["Decision 1", "Decision 2"] + + def test_query_with_no_index(self, openai_encoder): + decision_layer = HybridDecisionLayer(encoder=openai_encoder) + assert decision_layer("Anything") is None + + def test_semantic_classify(self, openai_encoder, decisions): + decision_layer = HybridDecisionLayer(encoder=openai_encoder, decisions=decisions) + classification, score = decision_layer._semantic_classify( + [ + {"decision": "Decision 1", "score": 0.9}, + {"decision": "Decision 2", "score": 0.1}, + ] + ) + assert classification == "Decision 1" + assert score == [0.9] + + def test_semantic_classify_multiple_decisions(self, openai_encoder, decisions): + decision_layer = HybridDecisionLayer(encoder=openai_encoder, decisions=decisions) + classification, score = decision_layer._semantic_classify( + [ + {"decision": "Decision 1", "score": 0.9}, + {"decision": "Decision 2", "score": 0.1}, + {"decision": "Decision 1", "score": 0.8}, + ] + ) + assert classification == "Decision 1" + assert score == [0.9, 0.8] + + def test_pass_threshold(self, openai_encoder): + decision_layer = HybridDecisionLayer(encoder=openai_encoder) + assert not decision_layer._pass_threshold([], 0.5) + assert decision_layer._pass_threshold([0.6, 0.7], 0.5) + + def test_failover_score_threshold(self, base_encoder): + decision_layer = HybridDecisionLayer(encoder=base_encoder) + assert decision_layer.score_threshold == 0.82 # Add more tests for edge cases and error handling as needed.