-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from Clarifai/add-ragas
[DEVX-385] - Add ragas
- Loading branch information
Showing
9 changed files
with
216 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 5 additions & 0 deletions
5
clarifai_model_utils/llm_eval/evaluator/harness_eval/python_templates/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from .ragas_eval import RAGAS | ||
|
||
PYTHON_TEMPLATES = {"ragas": RAGAS} | ||
|
||
ALL_PYTHON_TEMPLATES = list(PYTHON_TEMPLATES.keys()) |
8 changes: 8 additions & 0 deletions
8
clarifai_model_utils/llm_eval/evaluator/harness_eval/python_templates/base.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from abc import ABC, abstractmethod | ||
|
||
|
||
class _BasePythonTemplate(ABC): | ||
|
||
@abstractmethod | ||
def to_harness_dict_config(self) -> dict: | ||
"""convert current config to Harness Eval TaskConfig Dictionary""" |
127 changes: 127 additions & 0 deletions
127
clarifai_model_utils/llm_eval/evaluator/harness_eval/python_templates/ragas_eval.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import logging | ||
import math | ||
from dataclasses import asdict, dataclass, field | ||
from typing import Dict, List, Optional | ||
|
||
from datasets import Dataset as HFDataset | ||
from langchain_community.embeddings import ClarifaiEmbeddings | ||
from langchain_community.llms import Clarifai | ||
from lm_eval.api.task import TaskConfig | ||
from ragas import evaluate | ||
from ragas.llms import LangchainLLMWrapper | ||
from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness | ||
|
||
from ....constant import BGE_BASE_EMBED_MODEL | ||
from .base import _BasePythonTemplate | ||
|
||
logger = logging.getLogger("ragas") | ||
logger.disabled = True | ||
|
||
|
||
@dataclass | ||
class RAGAS(_BasePythonTemplate): | ||
|
||
langchain_llm_kwargs: dict = field(default_factory=lambda: {}) | ||
langchain_llm: Optional[Clarifai] = None | ||
has_ground_truth: Optional[bool] = True | ||
embedder: Optional[ClarifaiEmbeddings] = None | ||
config: TaskConfig = field(default_factory=TaskConfig) | ||
|
||
def __post_init__(self) -> None: | ||
|
||
self.config.task = "ragas" | ||
self.config.group = "ragas" | ||
self.config.dataset_path = "csv" | ||
self.config.dataset_name = None | ||
self.config.output_type = "generate_until" | ||
self.config.validation_split = "validation" | ||
self.config.doc_to_text = "{{question}}" | ||
self.config.doc_to_target = "" | ||
self.config.repeats = 1 | ||
self.config.num_fewshot = 0 | ||
|
||
self.config.metric_list = [{ | ||
"metric": "faithfulness", | ||
"aggregation": "mean", | ||
"higher_is_better": True, | ||
}, { | ||
"metric": "answer_relevancy", | ||
"aggregation": "mean", | ||
"higher_is_better": True, | ||
}] | ||
|
||
self.ragas_metrics = [ | ||
faithfulness, | ||
answer_relevancy, | ||
] | ||
|
||
if self.has_ground_truth: | ||
self.config.metric_list.extend([{ | ||
"metric": "context_precision", | ||
"aggregation": "mean", | ||
"higher_is_better": True, | ||
}, { | ||
"metric": "context_recall", | ||
"aggregation": "mean", | ||
"higher_is_better": True, | ||
}]) | ||
self.ragas_metrics.extend([context_precision, context_recall]) | ||
|
||
self.config.process_results = self.process_results_func | ||
|
||
def process_results_func(self, doc: dict, results: List[List]) -> Dict[str, float]: | ||
"""Compute RAGAS metrics per row of dataset | ||
Args: | ||
doc (dict): A dictionary representing a row of dataset data. It must include `question` and optionally `ground_truth` if `has_ground_truth` is set. | ||
results (List[List]): result list has length equal to batch size (1) contains [context, answer] of RAG workflow | ||
Example: | ||
>>> doc = dict(question="What is Clarifai?") | ||
>>> rag_results = [["Context: Long document about Clarifai...", "Clarifai is Clarifai is the leading Full Stack AI, LLM, and computer vision production platform..."]] | ||
>>> scores = ragas_eval_instance.process_results_func(doc, rag_results) | ||
>>> print(scores) | ||
>>> {'faithfulness': 0.999, 'answer_relevancy': 0.99} | ||
Returns: | ||
Dict[str, float]: ragas scores | ||
""" | ||
assert isinstance(results, | ||
list) and len(results[0]) > 1, "results must be a list of [context, answer]" | ||
|
||
pat = self.langchain_llm_kwargs.get("pat", None) | ||
token = self.langchain_llm_kwargs.get("token", None) | ||
if self.embedder is None: | ||
self.embedder = ClarifaiEmbeddings(model_url=BGE_BASE_EMBED_MODEL, pat=pat, token=token) | ||
self.langchain_llm = Clarifai(**self.langchain_llm_kwargs) | ||
# context from Clarifai RAG workflow | ||
# NOTE: discard context in dataset only use context of RAG workflow | ||
context = results[0][0] | ||
# answer from Clarifai RAG workflow | ||
answer = results[0][1] | ||
# Take value of question | ||
question = doc["question"] | ||
try: | ||
ground_truth = doc["ground_truth"] | ||
except: | ||
ground_truth = "" | ||
data = { | ||
"question": [question], | ||
"answer": [answer], | ||
"contexts": [[context]], | ||
"ground_truth": [ground_truth] | ||
} | ||
dataset = HFDataset.from_dict(data) | ||
|
||
ragas_results = evaluate( | ||
dataset=dataset, | ||
llm=LangchainLLMWrapper(self.langchain_llm), | ||
embeddings=self.embedder, | ||
metrics=self.ragas_metrics) | ||
|
||
# FIXME: replace nan value as 0. | ||
return {k: 0. if math.isnan(v) else v for k, v in ragas_results.items()} | ||
|
||
def to_harness_dict_config(self) -> dict: | ||
d = asdict(self.config) | ||
return d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import os | ||
|
||
import pandas as pd | ||
|
||
from clarifai.client import Workflow | ||
from clarifai_model_utils import ClarifaiEvaluator | ||
from clarifai_model_utils.llm_eval.constant import JUDGE_LLMS | ||
|
||
# set PAT | ||
os.environ["CLARIFAI_PAT"] = "" | ||
|
||
# Load Clarifai RAG workflow | ||
wf = Workflow(url= ...) | ||
|
||
evaluator = ClarifaiEvaluator(predictor=wf, is_rag_workflow=True) | ||
|
||
# Create a dummy dataset | ||
df = [dict(question="What is WC 2022"), dict(question="Who won the title?")] | ||
df = pd.DataFrame(df) | ||
|
||
# Run evaluate | ||
out = evaluator.evaluate( | ||
template="ragas", | ||
upload=False, | ||
judge_llm_url=JUDGE_LLMS.DBRX_INSTRUCT, # use databricks/DBRX-Instruct in RAGAS | ||
dataset=df, | ||
) | ||
|
||
print(out.df_to_pandas()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters