-
Notifications
You must be signed in to change notification settings - Fork 7
/
global.py
127 lines (102 loc) · 3.98 KB
/
global.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#-----------------------------------
# GLOBAL FEATURE EXTRACTION
#-----------------------------------
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py
#--------------------
# tunable-parameters
#--------------------
images_per_class = 80
fixed_size = tuple((300, 300))
train_path = "dataset/train"
h5_data = 'output/data.h5'
h5_labels = 'output/labels.h5'
bins = 8
# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
feature = cv2.HuMoments(cv2.moments(image)).flatten()
return feature
# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
# convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# compute the haralick texture feature vector
haralick = mahotas.features.haralick(gray).mean(axis=0)
# return the result
return haralick
# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
# convert the image to HSV color-space
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# compute the color histogram
hist = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
# normalize the histogram
cv2.normalize(hist, hist)
# return the histogram
return hist.flatten()
# get the training labels
train_labels = os.listdir(train_path)
# sort the training labels
train_labels.sort()
print(train_labels)
# empty lists to hold feature vectors and labels
global_features = []
labels = []
# loop over the training data sub-folders
for training_name in train_labels:
# join the training data path and each species training folder
dir = os.path.join(train_path, training_name)
# get the current training label
current_label = training_name
# loop over the images in each sub-folder
for x in range(1,images_per_class+1):
# get the image file name
# file = dir + "/" + str(x) + ".jpg"
file = dir + "/" + "{}_{}.jpg".format(training_name, int(training_name)*100 + x)
# read the image and resize it to a fixed-size
image = cv2.imread(file)
image = cv2.resize(image, fixed_size)
####################################
# Global Feature extraction
####################################
fv_hu_moments = fd_hu_moments(image)
fv_haralick = fd_haralick(image)
fv_histogram = fd_histogram(image)
###################################
# Concatenate global features
###################################
global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])
# update the list of labels and feature vectors
labels.append(current_label)
global_features.append(global_feature)
print("[STATUS] processed folder: {}".format(current_label))
print("[STATUS] completed Global Feature Extraction...")
# get the overall feature vector size
print("[STATUS] feature vector size {}".format(np.array(global_features).shape))
# get the overall training label size
print("[STATUS] training Labels {}".format(np.array(labels).shape))
# encode the target labels
targetNames = np.unique(labels)
le = LabelEncoder()
target = le.fit_transform(labels)
print("[STATUS] training labels encoded...")
# scale features in the range (0-1)
scaler = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print("[STATUS] feature vector normalized...")
print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))
# save the feature vector using HDF5
h5f_data = h5py.File(h5_data, 'w')
h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))
h5f_label = h5py.File(h5_labels, 'w')
h5f_label.create_dataset('dataset_1', data=np.array(target))
h5f_data.close()
h5f_label.close()
print("[STATUS] end of training..")