-
Notifications
You must be signed in to change notification settings - Fork 0
/
LDA.py
100 lines (75 loc) · 2.79 KB
/
LDA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
import scipy.linalg as sc
def mcol(v):
# 1-dim vectors -> column vectors.
return v.reshape((v.size, 1))
def computeBetweenClassCovarianceMatrix(dataset, labels, SVM=False):
# Compute mean over columns of the dataset matrix
mu = dataset.mean(axis=1)
# Reshape the 1-D array mu to a column vector 4x1
mu = mcol(mu)
# Compute classes means over columns of the dataset matrix
if not SVM:
mu0 = dataset[:, labels == 0].mean(axis=1)
else:
mu0 = dataset[:, labels == -1].mean(axis=1)
mu1 = dataset[:, labels == 1].mean(axis=1)
mu2 = dataset[:, labels == 2].mean(axis=1)
# Reshape all of them as column vectors
mu0 = mcol(mu0)
mu1 = mcol(mu1)
mu2 = mcol(mu2)
# Count number of elements in each class
if not SVM:
n0 = dataset[:, labels == 0].shape[1]
else:
n0 = dataset[:, labels == -1].shape[1]
n1 = dataset[:, labels == 1].shape[1]
n2 = dataset[:, labels == 2].shape[1]
result = (1 / (n0 + n1)) * (
(n0 * np.dot(mu0 - mu, (mu0 - mu).T)) + (n1 * np.dot(mu1 - mu, (mu1 - mu).T) + (n2 * np.dot(mu2 - mu, (mu2 - mu).T)))
)
return result
def computeWithinClassCovarianceMatrix(dataset, labels, SVM=False):
# Compute classes means over columns of the dataset matrix
if not SVM:
mu0 = dataset[:, labels == 0].mean(axis=1)
else:
mu0 = dataset[:, labels == -1].mean(axis=1)
mu1 = dataset[:, labels == 1].mean(axis=1)
mu2 = dataset[:, labels == 2].mean(axis=1)
# Reshape all of them as column vectors
mu0 = mcol(mu0)
mu1 = mcol(mu1)
mu2 = mcol(mu2)
# Count number of elements in each class
if not SVM:
n0 = dataset[:, labels == 0].shape[1]
else:
n0 = dataset[:, labels == -1].shape[1]
n1 = dataset[:, labels == 1].shape[1]
n2 = dataset[:, labels == 2].shape[1]
# Compute within covariance matrix for each class
Sw0 = (1 / n0) * np.dot(
dataset[:, labels == 0] - mu0, (dataset[:, labels == 0] - mu0).T
)
Sw1 = (1 / n1) * np.dot(
dataset[:, labels == 1] - mu1, (dataset[:, labels == 1] - mu1).T
)
Sw2 = (1 / n2) * np.dot(
dataset[:, labels == 2] - mu2, (dataset[:, labels == 2] - mu2).T
)
result = (1 / (n0 + n1 + n2)) * (n0 * Sw0 + n1 * Sw1 + n2 * Sw2)
return result
def computeLDA(SB, SW, dataset, m):
# Solve the generalized eigenvalue problem
_, U = sc.eigh(SB, SW)
# Compute W matrix from U
W = U[:, ::-1][:, 0:m]
# LDA projection matrix
projection_matrix = np.dot(W.T, dataset)
return projection_matrix
def LDA(dataset, labels, m, SVM=False):
SB = computeBetweenClassCovarianceMatrix(dataset, labels, SVM)
SW = computeWithinClassCovarianceMatrix(dataset, labels, SVM)
return computeLDA(SB, SW, dataset, m)