-
Notifications
You must be signed in to change notification settings - Fork 16
/
feature_selection.py
92 lines (72 loc) · 2.76 KB
/
feature_selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt # nopep8
from sklearn.preprocessing import StandardScaler # nopep8
from sklearn.decomposition import PCA # nopep8
import tensorflow as tf # nopep8
import numpy as np # nopep8
from data import PascalVOC, read_tfrecord # nopep8
from grapher import SegmentationGrapher # nopep8
from segmentation.algorithm import slic_generator # nopep8
from segmentation import adjacency_unweighted # nopep8
from patchy import PatchySan # nopep8
pascal = PascalVOC()
grapher = SegmentationGrapher(slic_generator(num_segments=400),
[adjacency_unweighted])
num_channels = grapher.num_node_channels
patchy = PatchySan(pascal, grapher,
data_dir='/tmp/patchy_san_slic_pascal_voc_data',
num_nodes=400, node_stride=1, neighborhood_size=1)
filename_queue = tf.train.string_input_producer(patchy.train_filenames,
num_epochs=1, shuffle=False)
# Load the node features. We are not interested in the labels.
data, _ = read_tfrecord(filename_queue,
{'nodes': [-1, num_channels],
'neighborhood': [400, 1]})
data = data['nodes']
# The data queue.
data_batch = tf.train.batch(
[data],
batch_size=128,
num_threads=16,
capacity=300,
dynamic_pad=True,
allow_smaller_final_batch=True)
data_batch = tf.reshape(data_batch, [-1, num_channels])
sess = tf.Session()
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
all_nodes = np.zeros((0, num_channels))
print('Analyzing dataset. This can take a few minutes.')
try:
while(True):
nodes = sess.run(data_batch)
nodes = nodes[~np.all(nodes == 0, axis=1)]
all_nodes = np.concatenate((all_nodes, nodes))
except KeyboardInterrupt:
print('')
except tf.errors.OutOfRangeError:
pass
finally:
coord.request_stop()
coord.join(threads)
sess.close()
pca = PCA()
pca.fit(StandardScaler().fit_transform(all_nodes))
# Build correlation coefficients for all node channels.
cov = pca.get_covariance()
plt.pcolor(cov)
plt.colorbar()
plt.yticks(np.arange(0.5, num_channels + 0.5, 5),
np.arange(0, num_channels, 5))
plt.xticks(np.arange(0.5, num_channels + 0.5, 5),
np.arange(0, num_channels, 5))
plt.savefig('covariance.svg', format='svg')
plt.close()
# Plot the cumulative explained variance ratio.
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of components')
plt.ylabel('Cumulative explained variance')
plt.savefig('pca.svg', format='svg')
plt.close()