-
Notifications
You must be signed in to change notification settings - Fork 0
/
post_processing.py
123 lines (89 loc) · 4.28 KB
/
post_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from preprocessor import Preprocessor
class PickleStructure:
def __init__(self, horizontal_step, metric_name, model, model_name, preprocessor, training_level, series_name,
time_to_fit):
self.folder = series_name + '/' + metric_name + '/' + training_level + '/'
self.horizontal_step = horizontal_step
self.model = model
self.model_key = model_name + str(preprocessor.sliding_window_size)
self.model_name = model_name
self.model_path = self.folder + self.model_key
self.preprocessor = preprocessor
self.training_level = training_level
self.series_name = series_name
self.metric_name = metric_name
self.time_to_fit = time_to_fit / 30
if self.model_name in ['deep-ar', 'deep-state', 'tft']:
from gluonts.dataset.pandas import PandasDataset
from pandas import DataFrame
from gluonts.dataset.util import to_pandas
ds_test = PandasDataset(DataFrame(self.preprocessor.time_series, columns=['target'])
[len(self.preprocessor.training_set) + len(self.preprocessor.validation_set):])
true_values = to_pandas(list(ds_test)[0])
data = []
for i in range(0, len(true_values) - int(self.preprocessor.sliding_window_size)):
data.append(PandasDataset([true_values[0:i + int(self.preprocessor.sliding_window_size)]]))
self.preprocessor.testing_set = data
if self.time_to_fit is not None:
import time
self.model_key = self.model_name
self.model_path = self.folder + self.model_key
start_time = time.time()
for i in range(0, 30):
predict_data(self.preprocessor, model, self.model_name, self.preprocessor.lags)
self.time_to_predict = (time.time() - start_time) / 30
save_pickle(self.__dict__)
def save_pickle(df: dict):
from pickle import dump
from os import makedirs
from os.path import dirname
makedirs(dirname('pickle/' + df['folder']), exist_ok=True)
dump(df, open('pickle/' + df['model_path'] + '.pkl', 'wb'))
def load_pickle(file_path: str):
from pickle import load
return load(open('pickle/' + file_path + '.pkl', 'rb'))
def save_model(metric_name: str, model: object, model_name: str, preprocessor: Preprocessor, training_level: str,
series_name: str, horizontal_step: int = 1, time_to_fit=None):
PickleStructure(horizontal_step, metric_name, model, model_name, preprocessor, training_level, series_name,
time_to_fit)
def measure_models_accuracy(actual: list, predicted: list, accuracy_measure: str = 'mse', **kwargs):
from sklearn.metrics import mean_squared_error
weights = kwargs.get('weights')
if accuracy_measure == 'mse':
return mean_squared_error(actual, predicted, sample_weight=weights)
def predict_data(data, model, ml_model: str, lags, **kwargs):
if ml_model[0: 3] == 'svr' or ml_model[0: 3] == 'mlp' or ml_model[0:2] == 'rf':
data = data.testing_set
return model.predict(data[:, lags])
elif ml_model[0: 7] == 'xgboost':
data = data.testing_set
return model.predict(data[:, lags])
elif ml_model[0: 4] == 'lstm':
data = data.testing_set
data = data[:, lags]
data = data.reshape((data.shape[0], data.shape[1], 1))
return model.predict(data).ravel()
elif ml_model[0: 3] in ['dee', 'tft']:
data = data.testing_x
pred = []
for d in data:
predictions = model.predict(d)
for x in predictions:
pred.append(x.mean[0])
return pred
elif ml_model[0: 6] == 'da-rnn':
data = data.testing_x
import torch
with torch.no_grad():
a = model(data.cuda())
return list(a.cpu().repeat(1, 2).numpy()[:, -1])
elif ml_model[0: 5] == 'arima':
data = data.testing_set
arima_forecast = kwargs.get('arima_forecast')
if arima_forecast:
if arima_forecast == 'in_sample':
return model.predict_in_sample(data)[data]
elif arima_forecast == 'out_sample':
return model.predict(data + 1)[-1]
else:
return model.predict(len(data))