-
Notifications
You must be signed in to change notification settings - Fork 0
/
arima_weather.py
108 lines (91 loc) · 3.32 KB
/
arima_weather.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import warnings
import pandas as pd
import numpy as np
import itertools as iter
import statsmodels.api as stats
import matplotlib.pyplot as plt
from pandas.plotting import autocorrelation_plot
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import adfuller
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error, r2_score
warnings.filterwarnings("ignore")
# Pre-processing of the data
df_raw = pd.read_csv('assets/hourly_load&weather_data.csv', header=None, skiprows=1) # loading raw data from the CSV
df_raw_array = df_raw.values # numpy array
# y_train = df_raw[2]/100
# y_train = y_train[0:328]
y_test = df_raw[1]/100000
# y_test = y_test[31925:]
# For daily data
# for i in range(0, len(df_raw_array)):
# if (i%24) == 0:
# y_test.append(df_raw[2].iloc[i:i+24].sum())
# y_test = np.array(y_test)
# print("y_test: ",y_test.shape,"\n", y_test, "\n")
# #For finding the value of d
# result = adfuller(y_test)
# print('ADF Statistic: %f' % result[0])
# print('p-value: %f' % result[1])
# #p<0.05 and so d = 0
#
#
# #For finding the value of p
# autocorrelation_plot(y_test)
# plt.show()
# fig = plt.figure(figsize=(12,8))
# ax1 = fig.add_subplot(211)
# fig = plot_acf(y_test.iloc[13:],lags=40,ax=ax1)
# ax2 = fig.add_subplot(212)
# fig = plot_pacf(y_test.iloc[13:],lags=40,ax=ax2)
#
# # For finding the best set of values by using a brute-force approach
# p = d = q = range(0, 2)
#
# # p, q, d values
# pdq = list(iter.product(p, d, q))
#
# i = 0
# AIC = []
# ARIMA_model = []
# for param in pdq:
# i += 1
# model = stats.tsa.arima.ARIMA(y_test, order=param,
# enforce_stationarity=False, enforce_invertibility=False)
#
# results = model.fit()
#
# print('ARIMA: ', param, '\nAIC:',results.aic,'\n')
# AIC.append(results.aic)
# ARIMA_model.append([param])
#
# print('The smallest AIC is {} for model ARIMA{}'.format(min(AIC), ARIMA_model[AIC.index(min(AIC))][0],
# ARIMA_model[AIC.index(min(AIC))][0]))
# ARIMA model
model = stats.tsa.arima.ARIMA(y_test[0:354], order=[1,0,0], enforce_stationarity=False, enforce_invertibility=False)
results = model.fit()
print(results.summary())
y_pred = results.predict(start=354, end=363, dynamic=True)
# print(y_pred)
# print(y_test[354:])
mse = mean_squared_error(y_test[354:]*100000, y_pred*100000)
print("MSE: ", mse)
print('RMSE:', mean_squared_error(y_test[354:] * 100000, y_pred*100000, squared=False))
print('R-squared:', r2_score(y_test[354:], y_pred))
print('MAPE:', np.mean(np.abs(y_test[354:] - y_pred) / (y_test[354:])) * 100,'\n')
# Plotting the results
fig = plt.figure(figsize=(60, 8))
plt.plot(y_test[354:]*100000, label='Actual')
plt.plot(y_pred*100000, label='Predicted')
plt.legend(loc='upper right')
plt.title("ARIMA", fontsize=14)
plt.xlabel('Hour')
plt.ylabel('Electricity load')
plt.show()
fig.savefig('results/ARIMA_weather/final_output.jpg', bbox_inches='tight')
# Storing the result in a file: 'load_forecasting_result.txt'
predicted_test_result = y_pred * 100000
np.savetxt('results/ARIMA_weather/predicted_values.txt', predicted_test_result)
actual_test_result = y_test[354:] * 100000
np.savetxt('results/ARIMA_weather/test_values.txt', actual_test_result)