-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
82 lines (67 loc) · 2.85 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import torch
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from utils import make_data
from collections import defaultdict
from config import Config
from evaluate import Evaluator
from transformers import AutoTokenizer, AutoModelForCausalLM
def main(config):
if not os.path.exists('data/'):
print("Loading data...")
os.makedirs(config.dev_dir, exist_ok=True)
os.makedirs(config.test_dir, exist_ok=True)
make_data()
sub_categories = config.sub_categories
categories = config.categories
# load model & tokenizer
model = AutoModelForCausalLM.from_pretrained(config.model_name, device_map="cuda")
tokenizer = AutoTokenizer.from_pretrained(config.model_name)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("================= Start evaluation =================")
total_text, total_label = [], []
sub_group = defaultdict(list)
print(f"{'Subject':<30}{'':<15}{'Acc':<5}")
print("--------------------------------------------------")
for subject in sub_categories.keys():
test_df = pd.read_csv(os.path.join(config.test_dir, subject + '.csv'))
# load evaluator
evaluator = Evaluator(config, subject, test_df, model, tokenizer, device)
text_acc, _, label_acc = evaluator.eval()
total_text.append(text_acc)
total_label.append(label_acc)
sub_group[sub_categories[subject][0]].append(label_acc)
print("==================================================")
print(f"{'Method':<40}{'Acc':<5}")
print("--------------------------------------------------")
total_text_acc = np.mean(total_text)
total_label_acc = np.mean(total_label)
print(f"{'Text match':<40}{total_text_acc:<5.3f}")
print(f"{'Label match':<40}{total_label_acc:<5.3f}")
print("==================================================")
print(f"{'Sub-group':<40}{'Acc':<5}")
print("--------------------------------------------------")
for k, v in sub_group.items():
sub_acc = np.mean(v)
print(f"{k:<40}{sub_acc:<5.3f}")
print("==================================================")
print(f"{'Group':<40}{'Acc':<5}")
print("--------------------------------------------------")
for k, v in categories.items():
group_lst = []
for sub in v:
group_lst.extend(sub_group[sub])
group_acc= np.mean(group_lst)
print(f"{k:<40}{group_acc:<5.3f}")
print("==================================================")
print("--------------------------------------------------")
ppl_score = evaluator.get_ppl_score()
print(f"{'Perplexity':<40}{'Score':<5}")
print(f"{ppl_score:>45.3f}")
print("==================================================")
if __name__ == "__main__":
config = Config('./config.json')
main(config)