Skip to content

Commit

Permalink
Merge pull request #731 from NVIDIA/feature/eval-tags-support
Browse files Browse the repository at this point in the history
Add support for tags in the Eval UI.
  • Loading branch information
drazvan authored Sep 4, 2024
2 parents 74b105a + 2758bf0 commit 937c9d4
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 5 deletions.
4 changes: 4 additions & 0 deletions nemoguardrails/eval/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ class InteractionSet(BaseModel):
description="Additional context that can be used when evaluating the compliance for various policies. "
"Can be used in the prompt templates. ",
)
tags: List[str] = Field(
default_factory=list,
description="A list of tags that should be associated with the interactions. Useful for filtering when reporting.",
)

@root_validator(pre=True)
def instantiate_expected_output(cls, values: Any):
Expand Down
58 changes: 53 additions & 5 deletions nemoguardrails/eval/ui/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import streamlit as st
from pandas import DataFrame

from nemoguardrails.eval.models import EvalConfig
from nemoguardrails.eval.models import EvalConfig, EvalOutput
from nemoguardrails.eval.ui.chart_utils import (
plot_as_series,
plot_bar_series,
Expand All @@ -35,9 +35,12 @@
pd.options.mode.chained_assignment = None


def _render_sidebar(output_names: List[str], policy_options: List[str]):
def _render_sidebar(
output_names: List[str], policy_options: List[str], tags: List[str]
):
_output_names = []
_policy_options = []
_tags = []

with st.sidebar:
st.write(
Expand All @@ -64,7 +67,12 @@ def _render_sidebar(output_names: List[str], policy_options: List[str]):
if st.checkbox(policy, True):
_policy_options.append(policy)

return _output_names, _policy_options
with st.sidebar.expander("Tags", expanded=True):
for tag in tags:
if st.checkbox(tag, True, key=f"tag-{tag}"):
_tags.append(tag)

return _output_names, _policy_options, _tags


def _get_compliance_df(
Expand Down Expand Up @@ -116,6 +124,7 @@ def _render_compliance_data(
eval_data: EvalData,
short: bool = False,
):
st.text(f"({len(eval_data.eval_outputs[output_names[0]].results)} interactions)")
st.header("Compliance")
st.markdown(
"""
Expand Down Expand Up @@ -503,13 +512,52 @@ def render_summary(short: bool = False):
st.title("Evaluation Summary")

# Load the evaluation data
eval_data = load_eval_data()
eval_data = load_eval_data().copy()
eval_config = eval_data.eval_config

# Extract the list of tags from the interactions
all_tags = []
for interaction_set in eval_config.interactions:
for tag in interaction_set.tags:
if tag not in all_tags:
all_tags.append(tag)

output_names = list(eval_data.eval_outputs.keys())
policy_options = [policy.id for policy in eval_config.policies]

# Sidebar
output_names, policy_options = _render_sidebar(output_names, policy_options)
output_names, policy_options, tags = _render_sidebar(
output_names, policy_options, all_tags
)

# If all tags are selected, we don't do the filtering.
# Like this, interactions without tags will also be included.
if len(tags) != len(all_tags):
# We filter the interactions to only those that have the right tags
filtered_interaction_ids = []
for interaction_set in eval_config.interactions:
include = False
for tag in tags:
if tag in interaction_set.tags:
include = True
break
if include:
filtered_interaction_ids.append(interaction_set.id)

new_eval_outputs = {}
for output_name in output_names:
eval_output = eval_data.eval_outputs[output_name]
_results = []
_logs = []
for i in range(len(eval_output.results)):
interaction_id = eval_output.results[i].id.split("/")[0]
if interaction_id in filtered_interaction_ids:
_results.append(eval_output.results[i])
_logs.append(eval_output.logs[i])

new_eval_outputs[output_name] = EvalOutput(results=_results, logs=_logs)

eval_data.eval_outputs = new_eval_outputs

# Compliance data
_render_compliance_data(output_names, policy_options, eval_data, short=short)
Expand Down

0 comments on commit 937c9d4

Please sign in to comment.