Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Legal Opinion Sentiment Classification scenario #3286

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions src/helm/benchmark/run_specs/enterprise_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
from helm.benchmark.scenarios.scenario import ScenarioSpec


# Finance


@run_spec_function("gold_commodity_news")
def get_news_headline_spec(category: str) -> RunSpec:
from helm.benchmark.scenarios.gold_commodity_news_scenario import GoldCommodityNewsScenario
Expand All @@ -44,7 +41,7 @@ def get_news_headline_spec(category: str) -> RunSpec:


@run_spec_function("legal_contract_summarization")
def get_legal_contract_spec() -> RunSpec:
def get_legal_contract_summarization_spec() -> RunSpec:
scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.legal_contract_summarization_scenario.LegalContractSummarizationScenario",
args={},
Expand All @@ -67,6 +64,28 @@ def get_legal_contract_spec() -> RunSpec:
)


@run_spec_function("legal_opinion_sentiment_classification")
def get_legal_opinion_sentiment_classification_spec() -> RunSpec:
scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.legal_opinion_sentiment_classification_scenario.LegalOpinionSentimentClassificationScenario", # noqa: E501
)

instructions = "Classify the sentences into one of the 3 sentiment categories. Possible labels: positive, neutral, negative." # noqa: E501
adapter_spec = get_generation_adapter_spec(
instructions=instructions,
output_noun="Label",
)

return RunSpec(
name="legal_opinion_sentiment_classification",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
# TODO: Switch to using weighted F1
metric_specs=get_exact_match_metric_specs() + get_classification_metric_specs(),
groups=["legal_opinion_sentiment_classification"],
)


@run_spec_function("casehold")
def get_casehold_spec() -> RunSpec:
scenario_spec = ScenarioSpec(class_name="helm.benchmark.scenarios.casehold_scenario.CaseHOLDScenario", args={})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import os
from typing import List

import pandas as pd

from helm.benchmark.scenarios.scenario import (
Scenario,
Instance,
Reference,
TRAIN_SPLIT,
TEST_SPLIT,
CORRECT_TAG,
Input,
Output,
)
from helm.common.general import ensure_file_downloaded, ensure_directory_exists


class LegalOpinionSentimentClassificationScenario(Scenario):
"""
A legal opinion sentiment classification task based on the paper
Effective Approach to Develop a Sentiment Annotator For Legal Domain in a Low Resource Setting
[(Ratnayaka et al., 2020)](https://arxiv.org/pdf/2011.00318.pdf).

Example prompt:
Classify the sentences into one of the 3 sentiment categories. Possible labels: positive, neutral, negative.
{Sentence}
Label: {positive/neutral/negative}

"""

# Names of the tasks we support

name = "legal_opinion"
description = "Predicting the sentiment of the legal text in the positive, negative, or neutral."
tags = ["classification", "sentiment analysis", "legal"]

SENTIMENT_CLASSES = ["positive", "negative", "neutral"]
SPLIT_TO_URL = {
TRAIN_SPLIT: "https://osf.io/download/hfn62/",
TEST_SPLIT: "https://osf.io/download/q4adh/",
}

def create_instances(self, df: pd.DataFrame, split: str) -> List[Instance]:
instances: List[Instance] = []
assert split in [TRAIN_SPLIT, TEST_SPLIT]
if split == TRAIN_SPLIT:
phrase_column_name = "Phrase"
label_column_name = "Label"
else:
phrase_column_name = "sentence"
label_column_name = "label"
for row in df.itertuples():
phrase = getattr(row, phrase_column_name)
label_index = int(getattr(row, label_column_name))
label = LegalOpinionSentimentClassificationScenario.SENTIMENT_CLASSES[label_index]
instance = Instance(
input=Input(text=phrase), references=[Reference(Output(text=label), tags=[CORRECT_TAG])], split=split
)
instances.append(instance)
return instances

def get_instances(self, output_path: str) -> List[Instance]:
self.data_dir = os.path.join(output_path, "data")
data_dir = self.data_dir
ensure_directory_exists(data_dir)
instances: List[Instance] = []
for split, url in LegalOpinionSentimentClassificationScenario.SPLIT_TO_URL.items():
file_name = f"{split.lower()}.xlsx"
file_path = os.path.join(data_dir, file_name)
ensure_file_downloaded(
source_url=url,
target_path=os.path.join(data_dir, file_name),
)
df = pd.read_excel(file_path)
instances.extend(self.create_instances(df, split))
return instances
18 changes: 18 additions & 0 deletions src/helm/benchmark/static/schema_enterprise.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ run_groups:
category: All scenarios
subgroups:
- legal_contract_summarization
- legal_opinion_sentiment_classification
- casehold

- name: climate_scenarios
display_name: Climate Scenarios
Expand Down Expand Up @@ -182,6 +184,22 @@ run_groups:
when: before 2021
language: English

- name: legal_opinion_sentiment_classification
display_name: Legal Opinion Sentiment Classification
description: A legal opinion sentiment classification task based on the paper Effective Approach to Develop a Sentiment Annotator For Legal Domain in a Low Resource Setting [(Ratnayaka et al., 2020)](https://arxiv.org/pdf/2011.00318.pdf).
metric_groups:
- accuracy
- general_information
environment:
main_name: quasi_exact_match
main_split: test
taxonomy:
task: sentiment analysis
what: United States legal opinion texts
who: United States courts
when: Before 2020
language: English

- name: sumosum
display_name: SUMO Web Claims Summarization
description: A summarization benchmark based on the climate subset of the SUMO dataset ([Mishra et al., 2020](https://aclanthology.org/2020.wnut-1.12/)).
Expand Down
Loading