-
Notifications
You must be signed in to change notification settings - Fork 6
/
utils.py
212 lines (181 loc) · 8.26 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
from keras.preprocessing import image
from keras import backend as K
import numpy as np
import pandas as pd
import os
import cv2
def path_to_tensor(img_path, size=(224, 224)):
# loads RGB image as PIL.Image.Image type
img = image.load_img(img_path, target_size=size)
# convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
x = image.img_to_array(img)
# convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
return np.expand_dims(x, axis=0)
def paths_to_tensor(img_paths, size=(224, 224)):
list_of_tensors = [path_to_tensor(img_path, size) for img_path in img_paths]
return np.vstack(list_of_tensors)
def calculate_mean_std(img_paths):
"""
Calculate the image per channel mean and standard deviation.
# References
https://gist.github.com/jdhao/9a86d4b9e4f79c5330d54de991461fd6
"""
# Number of channels of the dataset image, 3 for color jpg, 1 for grayscale img
channel_num = 3
pixel_num = 0 # store all pixel number in the dataset
channel_sum = np.zeros(channel_num)
channel_sum_squared = np.zeros(channel_num)
for path in img_paths:
im = cv2.imread(path) # image in M*N*CHANNEL_NUM shape, channel in BGR order
im = im/255.
pixel_num += (im.size/channel_num)
channel_sum += np.sum(im, axis=(0, 1))
channel_sum_squared += np.sum(np.square(im), axis=(0, 1))
bgr_mean = channel_sum / pixel_num
bgr_std = np.sqrt(channel_sum_squared / pixel_num - np.square(bgr_mean))
# change the format from bgr to rgb
rgb_mean = list(bgr_mean)[::-1]
rgb_std = list(bgr_std)[::-1]
return rgb_mean, rgb_std
def preprocess_input(x, data_format=None, **kwargs):
"""Preprocesses a numpy array encoding a batch of images. Each image is normalized by subtracting the mean and dividing by the standard deviation channel-wise.
This function only implements the 'torch' mode which scale pixels between 0 and 1 and then will normalize each channel with respect to the training dataset of approach 1 (not include validation set).
# Arguments
x: a 3D or 4D numpy array consists of RGB values within [0, 255].
data_format: data format of the image tensor.
# Returns
Preprocessed array.
# References
https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py
"""
if not issubclass(x.dtype.type, np.floating):
x = x.astype(K.floatx(), copy=False)
# Mean and STD from ImageNet
# mean = [0.485, 0.456, 0.406]
# std = [0.229, 0.224, 0.225]
# Mean and STD calculated over the Training Set
# Mean:[0.6236094091893962, 0.5198354883713194, 0.5038435406338101]
# STD:[0.2421814437693499, 0.22354427793687906, 0.2314805420919389]
x /= 255.
mean = [0.6236, 0.5198, 0.5038]
std = [0.2422, 0.2235, 0.2315]
if data_format is None:
data_format = K.image_data_format()
# Zero-center by mean pixel
if data_format == 'channels_first':
if x.ndim == 3:
x[0, :, :] -= mean[0]
x[1, :, :] -= mean[1]
x[2, :, :] -= mean[2]
if std is not None:
x[0, :, :] /= std[0]
x[1, :, :] /= std[1]
x[2, :, :] /= std[2]
else:
x[:, 0, :, :] -= mean[0]
x[:, 1, :, :] -= mean[1]
x[:, 2, :, :] -= mean[2]
if std is not None:
x[:, 0, :, :] /= std[0]
x[:, 1, :, :] /= std[1]
x[:, 2, :, :] /= std[2]
else:
x[..., 0] -= mean[0]
x[..., 1] -= mean[1]
x[..., 2] -= mean[2]
if std is not None:
x[..., 0] /= std[0]
x[..., 1] /= std[1]
x[..., 2] /= std[2]
return x
def preprocess_input_2(x, data_format=None, **kwargs):
"""Preprocesses a numpy array encoding a batch of images. Each image is normalized by subtracting the mean and dividing by the standard deviation channel-wise.
This function only implements the 'torch' mode which scale pixels between 0 and 1 and then will normalize each channel with respect to the training dataset of approach 2 (not include validation set).
# Arguments
x: a 3D or 4D numpy array consists of RGB values within [0, 255].
data_format: data format of the image tensor.
# Returns
Preprocessed array.
# References
https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py
"""
if not issubclass(x.dtype.type, np.floating):
x = x.astype(K.floatx(), copy=False)
# Mean and STD calculated over the training set of approach 2
# Mean:[0.6296238064420809, 0.5202302775509949, 0.5032952297664738]
# STD:[0.24130893564897463, 0.22150225707876617, 0.2297057828857888]
x /= 255.
mean = [0.6296, 0.5202, 0.5033]
std = [0.2413, 0.2215, 0.2297]
if data_format is None:
data_format = K.image_data_format()
# Zero-center by mean pixel
if data_format == 'channels_first':
if x.ndim == 3:
x[0, :, :] -= mean[0]
x[1, :, :] -= mean[1]
x[2, :, :] -= mean[2]
if std is not None:
x[0, :, :] /= std[0]
x[1, :, :] /= std[1]
x[2, :, :] /= std[2]
else:
x[:, 0, :, :] -= mean[0]
x[:, 1, :, :] -= mean[1]
x[:, 2, :, :] -= mean[2]
if std is not None:
x[:, 0, :, :] /= std[0]
x[:, 1, :, :] /= std[1]
x[:, 2, :, :] /= std[2]
else:
x[..., 0] -= mean[0]
x[..., 1] -= mean[1]
x[..., 2] -= mean[2]
if std is not None:
x[..., 0] /= std[0]
x[..., 1] /= std[1]
x[..., 2] /= std[2]
return x
def ensemble_predictions(result_folder, category_names, save_file=True,
model_names=['DenseNet201', 'Xception', 'ResNeXt50'],
postfixes=['best_balanced_acc', 'best_loss', 'latest']):
""" Ensemble predictions of different models. """
for postfix in postfixes:
# Load models' predictions
df_dict = {model_name : pd.read_csv(os.path.join(result_folder, "{}_{}.csv".format(model_name, postfix))) for model_name in model_names}
# Check row number
for i in range(1, len(model_names)):
if len(df_dict[model_names[0]]) != len(df_dict[model_names[i]]):
raise ValueError("Row numbers are inconsistent between {} and {}".format(model_names[0], model_names[i]))
# Check whether values of image column are consistent
for i in range(1, len(model_names)):
inconsistent_idx = np.where(df_dict[model_names[0]].image != df_dict[model_names[i]].image)[0]
if len(inconsistent_idx) > 0:
raise ValueError("{} values of image column are inconsistent between {} and {}"
.format(len(inconsistent_idx), model_names[0], model_names[i]))
# Copy the first model's predictions
df_ensemble = df_dict[model_names[0]].drop(columns=['pred_category'])
# Add up predictions
for category_name in category_names:
for i in range(1, len(model_names)):
df_ensemble[category_name] = df_ensemble[category_name] + df_dict[model_names[i]][category_name]
# Take average of predictions
for category_name in category_names:
df_ensemble[category_name] = df_ensemble[category_name] / len(model_names)
# Ensemble Predictions
df_ensemble['pred_category'] = np.argmax(np.array(df_ensemble.iloc[:,1:(1+len(category_names))]), axis=1)
# Save Ensemble Predictions
if save_file:
ensemble_file = os.path.join(result_folder, "Ensemble_{}.csv".format(postfix))
df_ensemble.to_csv(path_or_buf=ensemble_file, index=False)
print('Save "{}"'.format(ensemble_file))
return df_ensemble
def logistic(x, x0=0, L=1, k=1):
""" Calculate the value of a logistic function.
# Arguments
x0: The x-value of the sigmoid's midpoint.
L: The curve's maximum value.
k: The logistic growth rate or steepness of the curve.
# References https://en.wikipedia.org/wiki/Logistic_function
"""
return L / (1 + np.exp(-k*(x-x0)))