-
Notifications
You must be signed in to change notification settings - Fork 0
/
TestingModels.py
160 lines (121 loc) · 4.4 KB
/
TestingModels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""
File used to test models being used.
To test a model and use this script, model needs to implement:
__init__(input_size)
display_model()
train(data, protected, labels, batch_size)
test(data, protected, labels, batch_size)
confusion_matrix(data, protected, labels, batch_size)
create_figs(epoch, folder)
result_graph_info() * static
model_types:
Basic model - 0
CAN - 1
Jack Amend
3/2/2020
"""
import csv
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from DataLoader import get_data_labels, get_balanced_data
from BasicModel import BasicModel
from CAN import CAN
from pathlib import Path
from datetime import datetime
def fold_bars(values, title, path):
# Fold Test Classifier Accuracy
plt.figure(figsize=(10, 15))
plt.title(title)
plt.bar([str(x) for x in range(len(values))], values)
plt.savefig(path)
plt.close()
def write_to_file(file_name, information):
with open(file_name, 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(information)
def record_test_stats(model, data, protected, labels, test_idx, batch_size, epoch,
fold_fig_folder, start, start_string, i, fold_model_folder,
data_type, fold_time_str):
# Testing data
test_data = data[test_idx]
test_attr = protected[test_idx]
test_label = labels[test_idx]
# Model test statistics
test_stats = model.test(test_data, test_attr, test_label, batch_size)
# Confusion Matrix from model
confusion_mat = model.confusion_matrix(test_data, test_attr, test_label, batch_size)
# Recording Information
model.create_figs(epoch, fold_fig_folder)
# Variables to write to CSV
curr_time = datetime.now()
curr_time_string = curr_time.strftime("%d/%m/%Y %H:%M:%S")
diff_time = curr_time - start
record_vars = [start_string, curr_time_string, diff_time, i, epoch, *test_stats,
*confusion_mat, batch_size, fold_fig_folder, fold_model_folder, data_type]
write_to_file(fold_time_str + 'overview.csv', record_vars)
# Parameters for the script
model_type = 1
data_type = 0 # 0 for original data, 1 for balanced gender
verbose = True
batch_size = 128
num_folds = 5
testing_inv = 10
epochs = 201
fig_folder = 'model_testing/'
if model_type == 0:
fig_folder += 'basic/'
elif model_type == 1:
fig_folder += 'can/'
else:
print("Saving graphs to home directory")
# Record datetime
start = datetime.now()
start_string = start.strftime("%d/%m/%Y %H:%M:%S")
fold_time_str = fig_folder + 'epochs{}_'.format(epochs) + start.strftime("%d-%m-%Y_%H-%M-%S") +'/'
csv_file_str = fold_time_str
model_folder = fold_time_str + 'models/'
fig_folder = fold_time_str + 'graphs/'
# Loading data
if verbose:
print("Loading data.")
if data_type == 0:
data, labels, protected = get_data_labels("adult.data")
else:
data, labels, protected = get_balanced_data("adult.data")
# Testing basic model
if verbose:
print("Creating model.")
input_size = data.shape[1]
kf = KFold(n_splits=num_folds)
test_results = np.array([])
for i, (train_idx, test_idx) in enumerate(kf.split(data)):
if model_type == 0:
model = BasicModel(input_size)
elif model_type == 1:
model = CAN(input_size)
if verbose:
model.display_models()
# Create folders for fold
fold_fig_folder = fig_folder + 'fold{}/'.format(i)
fold_model_folder = model_folder + 'fold{}/'.format(i)
Path(fold_fig_folder).mkdir(parents=True, exist_ok=True)
Path(fold_model_folder).mkdir(parents=True, exist_ok=True)
# Get training data
train_data = data[train_idx]
train_prtd = protected[train_idx]
train_label = labels[train_idx]
for epoch in range(epochs):
print("Epoch: {}".format(epoch))
# Train model
model.train(train_data, train_prtd, train_label, batch_size)
# Testing at defined interval
if epoch % testing_inv == 0 or epoch == epochs-1:
record_test_stats(model, data, protected, labels, test_idx, batch_size, epoch,
fold_fig_folder, start, start_string, i, fold_model_folder,
data_type, fold_time_str)
result_names, fig_files = model.result_graph_info()
incr = len(test_results) // epochs
for i in range(incr):
graph_vals = test_results[i::incr]
fold_bars(graph_vals, result_names[i], fig_folder + fig_files[i])