forked from philipperemy/tensorflow-cnn-time-series
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
philipperemy
committed
Apr 26, 2017
1 parent
34bf3cb
commit 8cb6ce6
Showing
4 changed files
with
261 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import os | ||
from glob import glob | ||
from random import shuffle | ||
|
||
import numpy as np | ||
import skimage.io | ||
import skimage.transform | ||
|
||
|
||
def load_image(path): | ||
try: | ||
img = skimage.io.imread(path).astype(float) | ||
# TODO http://scikit-image.org/docs/dev/api/skimage.color.html rgb2gray | ||
# TODO cropping. | ||
img = skimage.transform.resize(img, (224, 224), mode='constant') | ||
except: | ||
return None | ||
if img is None: | ||
return None | ||
if len(img.shape) < 2: | ||
return None | ||
if len(img.shape) == 4: | ||
return None | ||
if len(img.shape) == 2: | ||
img = np.tile(img[:, :, None], 3) | ||
if img.shape[2] == 4: | ||
img = img[:, :, :3] | ||
if img.shape[2] > 4: | ||
return None | ||
|
||
img /= 255. | ||
return img | ||
|
||
|
||
def next_batch(x_y, index, batch_size): | ||
has_reset = False | ||
index *= batch_size | ||
updated_index = index % len(x_y) | ||
if updated_index + batch_size > len(x_y): | ||
updated_index = 0 | ||
has_reset = True | ||
beg = updated_index | ||
end = updated_index + batch_size | ||
output = x_y[beg:end] | ||
x = np.array([e[0] for e in output]) | ||
y = np.array([e[1] for e in output]) | ||
return x, y, has_reset | ||
|
||
|
||
def read_dataset(folder, max_num_training_images, max_num_testing_images, class_mapper): | ||
training_inputs = read_set(folder, 'train', max_num_training_images, class_mapper) | ||
testing_inputs = read_set(folder, 'test', max_num_testing_images, class_mapper) | ||
return training_inputs, testing_inputs | ||
|
||
|
||
def read_set(folder, phase, max_num_of_images, class_mapper): | ||
images_folder = os.path.join(folder, phase) | ||
inputs = [] | ||
list_images = glob(images_folder + '/**/*.png') | ||
shuffle(list_images) | ||
for i, image_name in enumerate(list_images): | ||
if len(inputs) >= max_num_of_images: | ||
break | ||
class_name = image_name.split('/')[-2] | ||
if i % 100 == 0: | ||
print(i) | ||
inputs.append([load_image(image_name), class_mapper[class_name]]) # TODO make them 256x256 | ||
return inputs | ||
|
||
|
||
def compute_mean_not_optimised(inputs): | ||
matrix_all_images = [] | ||
for image, label in inputs: | ||
matrix_all_images.append(image) | ||
return np.mean(np.array(matrix_all_images), axis=0) | ||
|
||
|
||
def compute_mean(inputs): | ||
image_mean = np.array(inputs[0][0]) | ||
image_mean.fill(0) | ||
for i, (image, label) in enumerate(inputs): | ||
image_mean += image | ||
if i % 100 == 0: | ||
print(i) | ||
return image_mean / len(inputs) | ||
|
||
|
||
def subtract_mean(inputs, mean_image): | ||
new_inputs = [] | ||
for image, label in inputs: | ||
new_inputs.append([image - mean_image, label]) | ||
return new_inputs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import tensorflow as tf | ||
from keras.layers.convolutional import Conv2D, MaxPooling2D | ||
from keras.layers.core import Dense, Activation, Flatten | ||
|
||
|
||
# import tensorflow as tf | ||
# tf.python.control_flow_ops = tf # some hack to get tf running with Dropout | ||
|
||
# 224x224 | ||
# https://gist.github.com/JBed/c2fb3ce8ed299f197eff | ||
def alex_net_keras(x, num_classes=2, keep_prob=0.5): | ||
x = Conv2D(92, kernel_size=(11, 11), strides=(4, 4), padding='same')(x) # conv 1 | ||
# x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
# LRN is missing here - Caffe. | ||
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) # pool 1 | ||
|
||
x = Conv2D(256, kernel_size=(5, 5), padding='same')(x) # miss group and pad param # conv 2 | ||
# x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x) # pool 2 | ||
|
||
x = Conv2D(384, kernel_size=(3, 3), padding='same')(x) # conv 3 | ||
# x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
# x = MaxPooling2D(pool_size=(3, 3))(x) | ||
|
||
x = Conv2D(384, kernel_size=(3, 3), padding='same')(x) # conv 4 | ||
# x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
# x = MaxPooling2D(pool_size=(3, 3))(x) | ||
|
||
x = Conv2D(256, kernel_size=(3, 3), padding='same')(x) # conv 5 | ||
# x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Flatten()(x) | ||
x = Dense(4096, kernel_initializer='normal')(x) # fc6 | ||
# dropout 0.5 | ||
x = tf.nn.dropout(x, keep_prob=keep_prob) | ||
# x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
x = Dense(4096, kernel_initializer='normal')(x) # fc7 | ||
# dropout 0.5 | ||
x = tf.nn.dropout(x, keep_prob=keep_prob) | ||
# x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
x = Dense(num_classes)(x) | ||
# x = BatchNormalization()(x) | ||
# x = Activation('softmax')(x) | ||
return x |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import numpy as np | ||
import tensorflow as tf | ||
|
||
from alexnet_data import read_dataset, next_batch, compute_mean, subtract_mean | ||
from generate_data import DATA_FOLDER | ||
|
||
if __name__ == '__main__': | ||
|
||
NUM_TRAINING_IMAGES = 1000 | ||
NUM_TESTING_IMAGES = 1000 | ||
names = ['DOWN', 'UP'] # maybe to be changed. | ||
NUM_CLASSES = len(names) | ||
class_mapper = {names[0]: 0.0, names[1]: 1.0} | ||
BATCH_SIZE = 128 | ||
HEIGHT = 224 | ||
WIDTH = 224 | ||
CHANNELS = 3 | ||
LEARNING_RATE = 0.01 | ||
data_percentage = 1 | ||
num_training_images = data_percentage * NUM_TRAINING_IMAGES | ||
num_testing_images = data_percentage * NUM_TESTING_IMAGES | ||
|
||
print('read_dataset() start') | ||
training_inputs, testing_inputs = read_dataset(DATA_FOLDER, num_training_images, num_testing_images, class_mapper) | ||
print('read_dataset() done') | ||
print('compute_mean() start') | ||
mean_image = compute_mean(training_inputs) | ||
print('compute_mean() done') | ||
training_inputs = subtract_mean(training_inputs, mean_image) | ||
testing_inputs = subtract_mean(testing_inputs, mean_image) | ||
print(len(training_inputs), 'training inputs') | ||
print(len(testing_inputs), 'testing inputs') | ||
|
||
x = tf.placeholder(tf.float32, shape=[None, HEIGHT, WIDTH, CHANNELS]) | ||
y = tf.placeholder(tf.int64, [None]) | ||
keep_prob = tf.placeholder(tf.float32) | ||
|
||
from alexnet_keras import alex_net_keras | ||
|
||
logits = alex_net_keras(x, num_classes=len(names), keep_prob=keep_prob) | ||
|
||
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)) | ||
|
||
train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy) | ||
|
||
correct_prediction = tf.equal(tf.argmax(logits, 1), y) | ||
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) | ||
|
||
sess = tf.Session() | ||
sess.run(tf.global_variables_initializer()) | ||
|
||
for i in range(int(1e9)): | ||
batch_xs, batch_ys, _ = next_batch(training_inputs, i, BATCH_SIZE) | ||
tr_loss, _ = sess.run([cross_entropy, train_step], | ||
feed_dict={x: batch_xs, y: batch_ys, keep_prob: 0.5}) | ||
print('[TRAINING] #batch = {0}, tr_loss = {1:.3f}'.format(i, tr_loss)) | ||
if i % 100 == 0: | ||
accuracy_list = [] | ||
j = 0 | ||
while True: | ||
batch_xt, batch_yt, reset = next_batch(testing_inputs, j, BATCH_SIZE) | ||
if reset: | ||
break | ||
te_loss, te_acc = sess.run([cross_entropy, accuracy], | ||
feed_dict={x: batch_xt, y: batch_yt, keep_prob: 1.0}) | ||
print('[TESTING] #batch = {0}, te_loss = {1:.3f}, te_acc = {2:.3f}'.format(i, te_loss, te_acc)) | ||
accuracy_list.append(te_acc) | ||
j += 1 | ||
print('[ALL] total batches = {0} total mean accuracy on testing set = {1:.2f}'.format(i, np.mean( | ||
accuracy_list))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import errno | ||
import os | ||
|
||
import matplotlib | ||
|
||
matplotlib.use('Agg') | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
|
||
DATA_FOLDER = '/tmp/cnn-time-series/' | ||
|
||
|
||
def mkdir_p(path): | ||
try: | ||
os.makedirs(path) | ||
except OSError as exc: | ||
if exc.errno == errno.EEXIST and os.path.isdir(path): | ||
pass | ||
else: | ||
raise | ||
|
||
|
||
def generate_time_series(arr, filename): | ||
fig = plt.figure() | ||
plt.plot(arr) | ||
plt.savefig(filename) | ||
plt.close(fig) | ||
|
||
|
||
def generate(): | ||
for i in range(200): | ||
if i % 2 == 0: | ||
direction = 'UP' | ||
else: | ||
direction = 'DOWN' | ||
train_output_dir = os.path.join(DATA_FOLDER, 'train', direction) | ||
mkdir_p(train_output_dir) | ||
arr = np.cumsum(np.random.standard_normal(1024)) | ||
generate_time_series(arr, os.path.join(train_output_dir, 'img_{}.png'.format(i))) | ||
|
||
test_output_dir = os.path.join(DATA_FOLDER, 'test', direction) | ||
mkdir_p(test_output_dir) | ||
# arr = np.cumsum(np.random.standard_normal(1024)) | ||
generate_time_series(arr, os.path.join(test_output_dir, 'img_{}.png'.format(i))) | ||
|
||
|
||
if __name__ == '__main__': | ||
generate() |