forked from philipperemy/tensorflow-cnn-time-series
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalexnet_data.py
123 lines (97 loc) · 3.13 KB
/
alexnet_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import matplotlib
matplotlib.use('Agg')
from random import shuffle
import errno
import os
from glob import glob
import skimage.io
import skimage.transform
import matplotlib.pyplot as plt
import numpy as np
DATA_FOLDER = '/tmp/cnn-time-series/'
def load_image(path):
try:
img = skimage.io.imread(path).astype(float)
# TODO http://scikit-image.org/docs/dev/api/skimage.color.html rgb2gray
# TODO cropping.
img = skimage.transform.resize(img, (224, 224), mode='constant')
except:
return None
if img is None:
return None
if len(img.shape) < 2:
return None
if len(img.shape) == 4:
return None
if len(img.shape) == 2:
img = np.tile(img[:, :, None], 3)
if img.shape[2] == 4:
img = img[:, :, :3]
if img.shape[2] > 4:
return None
img /= 255.
return img
def next_batch(x_y, index, batch_size):
has_reset = False
index *= batch_size
updated_index = index % len(x_y)
if updated_index + batch_size > len(x_y):
updated_index = 0
has_reset = True
beg = updated_index
end = updated_index + batch_size
output = x_y[beg:end]
x = np.array([e[0] for e in output])
y = np.array([e[1] for e in output])
return x, y, has_reset
def read_dataset(folder, max_num_training_images, max_num_testing_images, class_mapper):
training_inputs = read_set(folder, 'train', max_num_training_images, class_mapper)
testing_inputs = read_set(folder, 'test', max_num_testing_images, class_mapper)
return training_inputs, testing_inputs
def read_set(folder, phase, max_num_of_images, class_mapper):
images_folder = os.path.join(folder, phase)
inputs = []
list_images = glob(images_folder + '/**/*.png')
shuffle(list_images)
for i, image_name in enumerate(list_images):
if len(inputs) >= max_num_of_images:
break
class_name = image_name.split('/')[-2]
if i % 100 == 0:
print(i)
inputs.append([load_image(image_name), class_mapper[class_name]]) # TODO make them 256x256
return inputs
def compute_mean_not_optimised(inputs):
matrix_all_images = []
for image, label in inputs:
matrix_all_images.append(image)
return np.mean(np.array(matrix_all_images), axis=0)
def compute_mean(inputs):
image_mean = np.array(inputs[0][0])
image_mean.fill(0)
for i, (image, label) in enumerate(inputs):
image_mean += image
if i % 100 == 0:
print(i)
return image_mean / len(inputs)
def subtract_mean(inputs, mean_image):
new_inputs = []
for image, label in inputs:
new_inputs.append([image - mean_image, label])
return new_inputs
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
def generate_time_series(arr, filename):
generate_multi_time_series([arr], filename)
def generate_multi_time_series(arr_list, filename):
fig = plt.figure()
for arr in arr_list:
plt.plot(arr)
plt.savefig(filename)
plt.close(fig)