-
Notifications
You must be signed in to change notification settings - Fork 0
/
algorithms.py
186 lines (150 loc) · 6.06 KB
/
algorithms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import cv2
import numpy as np
from skimage.metrics import structural_similarity
def compare_p_hash(image1, image2):
hash1 = p_hash(image1)
hash2 = p_hash(image2)
n3 = cmp_hash(hash1, hash2)
return 1 - float(n3 / 64)
def round_clip(image, crop_width, crop_height):
start_x = crop_width
start_y = crop_height
end_x = image.shape[1] - crop_width
end_y = image.shape[0] - crop_height
cropped_image = image[start_y:end_y, start_x:end_x]
return cropped_image
# 均值哈希算法
def a_hash(img):
img = cv2.resize(img, (8, 8))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
s = 0
hash_str = ''
for i in range(8):
for j in range(8):
s = s + gray[i, j]
avg = s / 64
for i in range(8):
for j in range(8):
if gray[i, j] > avg:
hash_str = hash_str + '1'
else:
hash_str = hash_str + '0'
return hash_str
# 差值哈希算法
def d_hash(img):
img = cv2.resize(img, (9, 8))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
hash_str = ''
for i in range(8):
for j in range(8):
if gray[i, j] > gray[i, j + 1]:
hash_str = hash_str + '1'
else:
hash_str = hash_str + '0'
return hash_str
# 感知哈希算法
def p_hash(img):
img = cv2.resize(img, (32, 32))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
dct = cv2.dct(np.float32(gray))
dct_roi = dct[0:8, 0:8]
res = []
avreage = np.mean(dct_roi)
for i in range(dct_roi.shape[0]):
for j in range(dct_roi.shape[1]):
if dct_roi[i, j] > avreage:
res.append(1)
else:
res.append(0)
return res
# 灰度直方图算法
def compare_hist_with_split(image1, image2):
hist1 = cv2.calcHist([image1], [0], None, [256], [0.0, 255.0])
hist2 = cv2.calcHist([image2], [0], None, [256], [0.0, 255.0])
degree = 0
for i in range(len(hist1)):
if hist1[i] != hist2[i]:
degree = degree + \
(1 - abs(hist1[i] - hist2[i]) / max(hist1[i], hist2[i]))
else:
degree = degree + 1
degree = degree / len(hist1)
return degree
def normalize_compared_hist_with_split(image1, image2, size=(256, 256)):
image1 = cv2.resize(image1, size)
image2 = cv2.resize(image2, size)
sub_image1 = cv2.split(image1)
sub_image2 = cv2.split(image2)
sub_data = 0
for im1, im2 in zip(sub_image1, sub_image2):
sub_data += compare_hist_with_split(im1, im2)
sub_data = sub_data / 3
return sub_data
# Hash值对比
# 算法中1和0顺序组合起来的即是图片的指纹hash。顺序不固定,但是比较的时候必须是相同的顺序。
# 对比两幅图的指纹,计算汉明距离,即两个64位的hash值有多少是不一样的,不同的位数越小,图片越相似
# 汉明距离:一组二进制数据变成另一组数据所需要的步骤,可以衡量两图的差异,汉明距离越小,则相似度越高。汉明距离为0,即两张图片完全一样
def cmp_hash(hash1, hash2):
n = 0
if len(hash1) != len(hash2):
return -1
for i in range(len(hash1)):
if hash1[i] != hash2[i]:
n = n + 1
return n
def compare_ssim(image1, image2):
before = image1
after = image2
before = before[0:after.shape[0], 0:after.shape[1]]
after = after[0:before.shape[0], 0:before.shape[1]]
before_gray = cv2.cvtColor(before, cv2.COLOR_BGR2GRAY)
after_gray = cv2.cvtColor(after, cv2.COLOR_BGR2GRAY)
(score, diff) = structural_similarity(before_gray, after_gray, win_size=None, gradient=False, data_range=255,
channel_axis=None, multichannel=False, gaussian_weights=False, full=True)
return score
def is_similarity(image1, image2, debug=False):
phash = compare_p_hash(image1, image2)
ssim = compare_ssim(image1, image2)
hist_with_split_rs = normalize_compared_hist_with_split(image1, image2)
if debug:
print("hist_with_split: %s, ssim: %s, phash: %s" % (hist_with_split_rs, ssim, phash))
return hist_with_split_rs >= 0.8 or ssim > 0.7 or (hist_with_split_rs > 0.7 and phash > 0.89 and ssim > 0.2)
def compare_a_hash(image1, image2):
hash1 = a_hash(image1)
hash2 = a_hash(image2)
n3 = cmp_hash(hash1, hash2)
return 1 - float(n3 / 64)
def compare_d_hash(image1, image2):
hash1 = d_hash(image1)
hash2 = d_hash(image2)
n3 = cmp_hash(hash1, hash2)
return 1 - float(n3 / 64)
def runAllImageSimilaryFun(para1, para2, crop_width, crop_height):
# 均值、差值、感知哈希算法三种算法值越小,则越相似,相同图片值为0
# 三直方图算法和单通道的直方图 0-1之间,值越大,越相似。 相同图片为1
# t1,t2 14;19;10; 0.70;0.75
# t1,t3 39 33 18 0.58 0.49
# s1,s2 7 23 11 0.83 0.86 挺相似的图片
# c1,c2 11 29 17 0.30 0.31
print("---------------------")
img1 = round_clip(cv2.imread(para1), crop_width, crop_height)
img2 = round_clip(cv2.imread(para2), crop_width, crop_height)
print(is_similarity(img1, img2, True))
n1 = compare_a_hash(img1, img2);
print('均值哈希算法相似度aHash:', n1)
n2 = compare_d_hash(img1, img2)
print('差值哈希算法相似度dHash:', n2)
n3 = compare_p_hash(img1, img2)
print('感知哈希算法相似度pHash:', n3)
n4 = normalize_compared_hist_with_split(img1, img2)
print('三直方图算法相似度:', n4)
n5 = compare_hist_with_split(img1, img2)
print("单通道的直方图", n5)
ssim1 = compare_ssim(img1, img2)
print('ssim: ', ssim1)
print("%d %d %d %.2f %.2f " % (n1, n2, n3, round(n4[0], 2), n5[0]))
print("aHash: %.2f, dHash: %.2f, pHash: %.2f, 三直方图: %.2f, 单通道: %.2f, ssim: %.2f" %
n1, 1 - float(n2 / 64), n3, round(n4[0], 2), n5[0], ssim1)
if __name__ == "__main__":
runAllImageSimilaryFun("target/category/35.png", "target/category/36.png")
runAllImageSimilaryFun("target/category/34.png", "target/grouped/34/107.png", 3, 3)