-
Notifications
You must be signed in to change notification settings - Fork 1
/
eval_event_allentune.py
119 lines (95 loc) · 4.22 KB
/
eval_event_allentune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import argparse
import json
import os
import shutil
import subprocess
from typing import Any, Dict
import sys
import pandas as pd
from eval_utils import read_coref_file, depparse_base, allpairs_base, get_openie_predictor,get_srl_predictor,allenlp_base_relations, ie_eval_event, ie_span_eval, ie_errors
import pathlib
from pathlib import Path
import pandas as pd
from tabulate import tabulate
"""
Usage:
python eval_event_allentune.py --pred_path ./predictions/cofie-t/ --gold_path ./cofie-t-gold/
python eval_event_allentune.py --pred_path ./predictions/cofie-t/ --gold_path ./cofie-t-gold/ --test_data --test_index 17
"""
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--pred_path',
type=Path,
help='dataset folder, contains the predicted files',
default="",
required=True)
parser.add_argument('--gold_path',
type=Path,
help='folder, contains the gold data',
default="",
required=True)
parser.add_argument('--test_data',
action='store_true')
parser.add_argument('--test_index',
type=int,
default=0)
args = parser.parse_args()
if args.test_data:
gold_path = pathlib.Path(args.gold_path) / 'test-gold.tsv'
else:
gold_path = pathlib.Path(args.gold_path) / 'dev-gold.tsv'
pred_dir = pathlib.Path(args.pred_path)
coref = None
GOLD_PATH = pathlib.Path(gold_path)
PREDS_PATH = pathlib.Path(pred_dir)
golddf = pd.read_csv(GOLD_PATH, sep="\t",header=None, names=["id","text","arg0","trigger","arg1"])
best_run_index = 0
best_run_score = 0
prediction_dict = {}
for file in os.listdir(str(pred_dir)):
trail_strat_str = "run_"
if args.test_data:
trail_strat_str = trail_strat_str + str(args.test_index)
if file.startswith(trail_strat_str):
run_pred_dir = pred_dir / file /"pred.tsv"
PREDS_PATH = pathlib.Path(run_pred_dir)
#read predictions, place in dictionary
run_index = file[4:file.index('_', 5)]
try:
predf = pd.read_csv(PREDS_PATH, sep="\t",names=["id","text","arg0","trigger","arg1","arg0_logit","trigger_logit","arg1_logit","arg0_softmax","trigger_softmax","arg1_softmax"])
except:
continue
if len(predf) > 1000:
continue
prediction_dict[str("events") + '_run_' + run_index] = predf[["id","arg0","trigger","arg1"]]
#get results
res_list = []
res_latex_list = []
for k,v in prediction_dict.items():
print(k)
trial_score = 0.0
print ("****")
if not len(v):
print(k," -- NO PREDICTIONS -- ")
continue
#only try non-collapsed labels for relations that have it (i.e. ours and gold)
collapse_opt = [False,True]
for match_metric in ["substring","exact"]:
for consider_reverse in [False]:
for collapse in collapse_opt:
th_opts = [1]
for th in th_opts:
corr_pred, precision,recall, F1 = ie_eval_event(v,golddf,coref=coref,collapse = collapse, match_metric=match_metric,jaccard_thresh=th,consider_reverse=consider_reverse,transivity=False)
trial_score += F1
res = [k, 100*round(precision,3), 100*round(recall,3), 100*round(F1,3), collapse, match_metric, th, consider_reverse]
# if collapse == True and consider_reverse == True:
res_latex = [k, match_metric, 100*round(precision,3), 100*round(recall,3), 100*round(F1,3)]
res_latex_list.append(res_latex)
res_list.append(res)
if trial_score > best_run_score:
best_run_score = trial_score
best_run_index = k
print(tabulate(res_list, headers =["model","P","R","F1","collapse","match_mettric","threshold", "consider_reverse"]))
print ("****")
if args.test_data == False:
print("best run is " + str(best_run_index))