Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
jamescalam committed Nov 20, 2023
1 parent 30a50b4 commit 605fb0e
Show file tree
Hide file tree
Showing 6 changed files with 4 additions and 175 deletions.
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
[tool.poetry]
name = "semantic-router"
version = "0.0.1"
version = "0.0.5"
description = "Super fast semantic router for AI decision making"
authors = [
"James Briggs <[email protected]>",
"Siraj Aizlewood <[email protected]>",
"Simonas Jakubonis <[email protected]>"
"Simonas Jakubonis <[email protected]>",
"Luca Mannini <[email protected]>"
]
readme = "README.md"

Expand Down
96 changes: 1 addition & 95 deletions semantic_router/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,104 +7,9 @@
OpenAIRetriever,
BM25Retriever
)
from semantic_router.rankers import BaseRanker
from semantic_router.matchers import BaseMatcher
from semantic_router.schema import Decision


class MatcherDecisionLayer:
index: None
decision_arr: None
score_threshold: float

def __init__(self, matcher: BaseMatcher, decisions: list[Decision] = []):
self.matcher = matcher
# if decisions list has been passed and we have retriever
# we initialize index now
if matcher.retriever and decisions:
# initialize index now
for decision in decisions:
self._add_decision(decision=decision)

def __call__(self, text: str) -> str | None:
raise NotImplementedError

class RankDecisionLayer:
def __init__(self, ranker: BaseRanker, decisions: list[Decision] = []):
self.ranker = ranker
# if decisions list has been passed, we initialize decision array
if decisions:
for decision in decisions:
self._add_decision(decision=decision)

def __call__(self, text: str) -> str | None:
results = self._query(text)
top_class, top_class_scores = self._semantic_classify(results)
passed = self._pass_threshold(top_class_scores, self.score_threshold)
if passed:
return top_class
else:
return None

def add(self, decision: Decision):
self._add_decision(decision.utterances)

def _add_decision(self, decision: Decision):
# create decision categories array
if self.categories is None:
self.categories = np.array([decision.name] * len(decision.utterances))
self.utterances = np.array(decision.utterances)
else:
str_arr = np.array([decision.name] * len(decision.utterances))
self.categories = np.concatenate([self.categories, str_arr])
self.utterances = np.concatenate([
self.utterances,
np.array(decision.utterances)
])

def _query(self, text: str, top_k: int = 5):
"""Given some text, encodes and searches the index vector space to
retrieve the top_k most similar records.
"""
if self.categories:
self.rerank.top_n = top_k
idx, docs = self.ranker(query=text, docs=self.utterances)
# create scores based on rank
scores = [1/(i+1) for i in range(len(docs))]
# get the utterance categories (decision names)
decisions = self.categories[idx] if self.categories is not None else []
return [
{"decision": d, "score": s.item()} for d, s in zip(decisions, scores)
]
else:
return []

def _semantic_classify(self, query_results: list[dict]) -> tuple[str, list[float]]:
scores_by_class = {}
for result in query_results:
score = result["score"]
decision = result["decision"]
if decision in scores_by_class:
scores_by_class[decision].append(score)
else:
scores_by_class[decision] = [score]

# Calculate total score for each class
total_scores = {
decision: sum(scores) for decision, scores in scores_by_class.items()
}
top_class = max(total_scores, key=lambda x: total_scores[x], default=None)

# Return the top class and its associated scores
return str(top_class), scores_by_class.get(top_class, [])

def _pass_threshold(self, scores: list[float], threshold: float) -> bool:
if scores:
return max(scores) > threshold
else:
return False


class DecisionLayer:
index = None
categories = None
Expand Down Expand Up @@ -217,6 +122,7 @@ def __init__(
):
self.encoder = encoder
self.sparse_encoder = BM25Retriever()
self.alpha = alpha
# decide on default threshold based on encoder
if isinstance(encoder, OpenAIRetriever):
self.score_threshold = 0.82
Expand Down
Empty file.
18 changes: 0 additions & 18 deletions semantic_router/matchers/base.py

This file was deleted.

1 change: 0 additions & 1 deletion semantic_router/matchers/ranker_only.py

This file was deleted.

59 changes: 0 additions & 59 deletions semantic_router/matchers/two_stage.py

This file was deleted.

0 comments on commit 605fb0e

Please sign in to comment.