-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathPCA_examples.py
128 lines (103 loc) · 3.72 KB
/
PCA_examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from PyDSTool import *
def princomp(A):
""" performs principal components analysis
(PCA) on the n-by-p data matrix A
Rows of A correspond to observations, columns to variables.
Returns :
coeff :
is a p-by-p matrix, each column containing coefficients
for one principal component.
score :
the principal component scores; that is, the representation
of A in the principal component space. Rows of SCORE
correspond to observations, columns to components.
latent :
a vector containing the eigenvalues
of the covariance matrix of A.
Taken from: http://glowingpython.blogspot.com/2011/07/principal-component-analysis-with-numpy.html
"""
# computing eigenvalues and eigenvectors of covariance matrix
# subtract the mean (along columns)
M = (A-mean(A.T,axis=1)).T
# attention:not always sorted
[latent,coeff] = linalg.eig(cov(M))
# projection of the data in the new space
score = dot(coeff.T,M)
return coeff,score,latent
def annotate(ax, name, start, end):
arrow = ax.annotate(name,
xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
return arrow
# Surrogate data
N = 1000
wTrue = np.linspace(0, 1000, N)
yTrue = 20*np.sin(wTrue/100.0) + 0.1 * wTrue
xTrue = 3 * wTrue + 10*yTrue
wData = wTrue + np.random.normal(0, 100, N)
xData = xTrue + np.random.normal(0, 100, N)
# Add to dimensions with uncorrelated random values
yData = yTrue + np.random.normal(0, 6, N)
#yData = np.random.uniform(-10, 10, N)
zData = np.random.uniform(-20, 20, N)
wData = np.reshape(wData, (N, 1))
xData = np.reshape(xData, (N, 1))
yData = np.reshape(yData, (N, 1))
zData = np.reshape(zData, (N, 1))
data = np.hstack((wData, xData, yData, zData))
mu = mean(data, axis=0)
def alt_SVD(data):
data = data - mu
# data = (data - mu)/data.std(axis=0) # Uncomment this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(
data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
sigma = projected_data.std(axis=0).mean()
##print(eigenvectors)
return eigenvectors, eigenvalues, projected_data, sigma
##fig, ax = plt.subplots()
##ax.scatter(xData, yData)
##ax.set_aspect('equal')
##for axis in eigenvectors:
## annotate(ax, '', mu, mu + sigma * axis)
coeff, score, latent = princomp(data)
perc = cumsum(latent)/sum(latent)
kscale = 300
plt.figure(1)
plt.subplot(121)
# every eigenvector describe the direction
# of a principal component.
ax = plt.gca()
plt.plot(data.T[0,:], data.T[1,:],'ob') # the data
plt.plot([0, -coeff[0,0]*kscale]+mu[0], [0, -coeff[0,1]*kscale]+mu[1],'-k', lw=3)
plt.plot([0, coeff[1,0]*kscale]+mu[0], [0, coeff[1,1]*kscale]+mu[1],'-k', lw=3)
eigenvectors, eigenvalues, proj_data, sigma = alt_SVD(data)
#proj_data = np.dot(data, coeff)
#sigma = proj_data.std(axis=0).mean()
#for eigvec in coeff:
# annotate(ax, '', mu, mu+sigma*eigvec)
for eigvec in eigenvectors[0:2]:
annotate(ax, '', mu[0:2], mu[0:2]+sigma*eigvec[0:2])
ax.set_aspect('equal') #plt.axis('equal')
plt.subplot(122)
# projected data
plt.plot(score[0,:],score[1,:],'*g')
plt.axis('equal')
1/0
plt.figure(2)
plt.subplot(121)
ax = plt.gca()
# every eigenvector describe the direction
# of a principal component.
plt.plot(data.T[2,:],data.T[3,:],'ob') # the data
plt.plot([0, -coeff[0,2]*kscale]+mu[2], [0, -coeff[0,3]*kscale]+mu[3],'-k', lw=3)
plt.plot([0, coeff[1,2]*kscale]+mu[2], [0, coeff[1,3]*kscale]+mu[3],'-k', lw=3)
for eigvec in eigenvectors[2:4]:
annotate(ax, '', mu[2:4], mu[2:4]+sigma*eigvec[2:4])
plt.axis('equal')
plt.subplot(122)
# projected data
plt.plot(score[2,:],score[3,:],'*g')
plt.axis('equal')
plt.show()