forked from haugjo/fires
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ofs.py
129 lines (110 loc) · 4.92 KB
/
ofs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np
class OFS:
def __init__(self, regularization_param, step_size,
n_selected_ftr, n_total_ftr):
"""OFS algorithm for online features selection for binray data as
proposed in "Online Feature Selection and Its Applications" by
Wang et al 2014
:param regularization_param: [description]
:type regularization_param: float
:param step_size: Step size for adjusting perceptron vectors
:type step_size: float
:param n_selected_ftr: Amount of features that should be selected
:type n_selected_ftr: int
:param n_total_ftr: Amount of features in the dataset
:type n_total_ftr: int
"""
self.regularization_param = regularization_param
self.step_size = step_size
self.n_selected_ftr = n_selected_ftr
self.w = np.zeros(n_total_ftr)
def train(self, x, y):
"""Training function for the ofs algorithm, gets one trainig sample at
a time.
:param x: observation with all features
:type x: np.ndarray
:param y: label, should be -1 or 1, 0 will be seen as -1
:type y: int
"""
if y == 0:
y = -1
if np.dot(x, self.w) * y <= 1: # should be 0, shouldn't it
w_tilde = (1-self.regularization_param * self.step_size)*self.w + \
self.step_size * y * x
w_hat = min(1, (1/np.sqrt(self.regularization_param)) / \
np.linalg.norm(w_tilde) )*w_tilde
self.w = self.__truncate(w_hat, self.n_selected_ftr)
else:
self.w *= (1-self.regularization_param*self.step_size)
def __truncate(self, weights_array, B):
w = np.zeros(len(weights_array))
indices = np.argsort(weights_array)[::-1][:B-1]
w[indices] = weights_array[indices]
return w
def get_feature_indices(self):
return np.argsort(self.w)[::-1][:self.n_selected_ftr - 1]
class MC_OFS:
def __init__(self, regularization_param, step_size, n_selected_ftr,
n_total_ftr, n_classes):
"""Extension of the known OFS algorithm by Wang et al for multiclass
classification. Idea for it is a work in progress and motivated by
multiclass perceptrons.
:param regularization_param: [description]
:type regularization_param: float
:param step_size: Step size for learning new input
:type step_size: flaot
:param n_selected_ftr: amount of features that shall be selected
:type n_selected_ftr: int
:param n_total_ftr: total features of the dataset
:type n_total_ftr: int
:param n_classes: amount of classes wittin the dataset
:type n_classes: n
"""
self.regularization_param = regularization_param
self.step_size = step_size
self.n_selected_ftr = n_selected_ftr
self.w = np.zeros((n_classes, n_total_ftr))
def train(self, x, y):
"""Training of the MC_OFS for one instance, class labels have to be from
0 to k
:param x: observation with all features
:type x: np.ndarray
:param y: class label
:type y: int
"""
predictions = np.dot(self.w, x)
prediction = np.where(predictions == np.amax(predictions))[0][0]
if y != prediction:
#reduce wrong
w_tilde = (1-self.regularization_param * self.step_size) * \
self.w[prediction] - self.step_size * x
w_hat = min(1, (1/np.sqrt(self.regularization_param)) /
np.linalg.norm(w_tilde) ) * w_tilde
self.w[prediction] = self.__truncate(w_hat, self.n_selected_ftr)
#increase right
w_tilde = (1-self.regularization_param * self.step_size) * \
self.w[y] + self.step_size * x
w_hat = min(1, (1/np.sqrt(self.regularization_param)) /
np.linalg.norm(w_tilde) )*w_tilde
self.w[y] = self.__truncate(w_hat, self.n_selected_ftr)
else:
self.w[y] *= (1-self.regularization_param*self.step_size)
def __truncate(self, weights_array, B):
w = np.zeros(len(weights_array))
indices = np.argsort(weights_array)[::-1][:B-1]
w[indices] = weights_array[indices]
return w
def get_feature_indices(self, return_weights=False):
"""Returns the indices of the selceted features, based on all features
given.
:param return_weights: shall alse the weights be returned
:param return_weights: boolean
:return: Array with feature indices
:rtype: np.ndarray
"""
w_mean = np.mean(self.w, axis=0)
feature_indices = np.argsort(w_mean)[::-1][:self.n_selected_ftr - 1]
if return_weights:
return (feature_indices, w_mean)
else:
return feature_indices