cleanup

aurelio-labs · Nov 20, 2023 · 605fb0e · 605fb0e
1 parent 30a50b4
commit 605fb0e
Show file tree

Hide file tree

Showing 6 changed files with 4 additions and 175 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,11 +1,12 @@
 [tool.poetry]
 name = "semantic-router"
-version = "0.0.1"
+version = "0.0.5"
 description = "Super fast semantic router for AI decision making"
 authors = [
     "James Briggs <[email protected]>",
     "Siraj Aizlewood <[email protected]>",
-    "Simonas Jakubonis <[email protected]>"
+    "Simonas Jakubonis <[email protected]>",
+    "Luca Mannini <[email protected]>"
 ]
 readme = "README.md"
 

diff --git a/semantic_router/layer.py b/semantic_router/layer.py
@@ -7,104 +7,9 @@
     OpenAIRetriever,
     BM25Retriever
 )
-from semantic_router.rankers import BaseRanker
-from semantic_router.matchers import BaseMatcher
 from semantic_router.schema import Decision
 
 
-class MatcherDecisionLayer:
-    index: None
-    decision_arr: None
-    score_threshold: float
-
-    def __init__(self, matcher: BaseMatcher, decisions: list[Decision] = []):
-        self.matcher = matcher
-        # if decisions list has been passed and we have retriever 
-        # we initialize index now
-        if matcher.retriever and decisions:
-            # initialize index now
-            for decision in decisions:
-                self._add_decision(decision=decision)
-
-    def __call__(self, text: str) -> str | None:
-        raise NotImplementedError
-
-class RankDecisionLayer:
-    def __init__(self, ranker: BaseRanker, decisions: list[Decision] = []):
-        self.ranker = ranker
-        # if decisions list has been passed, we initialize decision array
-        if decisions:
-            for decision in decisions:
-                self._add_decision(decision=decision)
-
-    def __call__(self, text: str) -> str | None:
-        results = self._query(text)
-        top_class, top_class_scores = self._semantic_classify(results)
-        passed = self._pass_threshold(top_class_scores, self.score_threshold)
-        if passed:
-            return top_class
-        else:
-            return None
-
-    def add(self, decision: Decision):
-        self._add_decision(decision.utterances)
-
-    def _add_decision(self, decision: Decision):
-        # create decision categories array
-        if self.categories is None:
-            self.categories = np.array([decision.name] * len(decision.utterances))
-            self.utterances = np.array(decision.utterances)
-        else:
-            str_arr = np.array([decision.name] * len(decision.utterances))
-            self.categories = np.concatenate([self.categories, str_arr])
-            self.utterances = np.concatenate([
-                self.utterances,
-                np.array(decision.utterances)
-            ])
-
-    def _query(self, text: str, top_k: int = 5):
-        """Given some text, encodes and searches the index vector space to
-        retrieve the top_k most similar records.
-        """
-        if self.categories:
-            self.rerank.top_n = top_k
-            idx, docs = self.ranker(query=text, docs=self.utterances)
-            # create scores based on rank
-            scores = [1/(i+1) for i in range(len(docs))]
-            # get the utterance categories (decision names)
-            decisions = self.categories[idx] if self.categories is not None else []
-            return [
-                {"decision": d, "score": s.item()} for d, s in zip(decisions, scores)
-            ]
-        else:
-            return []
-
-    def _semantic_classify(self, query_results: list[dict]) -> tuple[str, list[float]]:
-        scores_by_class = {}
-        for result in query_results:
-            score = result["score"]
-            decision = result["decision"]
-            if decision in scores_by_class:
-                scores_by_class[decision].append(score)
-            else:
-                scores_by_class[decision] = [score]
-
-        # Calculate total score for each class
-        total_scores = {
-            decision: sum(scores) for decision, scores in scores_by_class.items()
-        }
-        top_class = max(total_scores, key=lambda x: total_scores[x], default=None)
-
-        # Return the top class and its associated scores
-        return str(top_class), scores_by_class.get(top_class, [])
-
-    def _pass_threshold(self, scores: list[float], threshold: float) -> bool:
-        if scores:
-            return max(scores) > threshold
-        else:
-            return False
-
-
 class DecisionLayer:
     index = None
     categories = None
@@ -217,6 +122,7 @@ def __init__(
     ):
         self.encoder = encoder
         self.sparse_encoder = BM25Retriever()
+        self.alpha = alpha
         # decide on default threshold based on encoder
         if isinstance(encoder, OpenAIRetriever):
             self.score_threshold = 0.82

diff --git a/semantic_router/matchers/__init__.py b/semantic_router/matchers/__init__.py
diff --git a/semantic_router/matchers/base.py b/semantic_router/matchers/base.py
diff --git a/semantic_router/matchers/ranker_only.py b/semantic_router/matchers/ranker_only.py
diff --git a/semantic_router/matchers/two_stage.py b/semantic_router/matchers/two_stage.py