-
Notifications
You must be signed in to change notification settings - Fork 1
/
create_es_index.py
80 lines (63 loc) · 1.9 KB
/
create_es_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import hashlib
import os
from PIL import ImageFile
from elasticsearch import Elasticsearch
from gevent.pool import Pool, joinall
from redis import Redis
from image_match.elasticsearch_driver import SignatureES
from local_config import IMAGE_PATHS
ImageFile.LOAD_TRUNCATED_IMAGES = True
es = Elasticsearch()
print(es.cluster)
ses = SignatureES(es)
redis_client = Redis()
pool = Pool(100)
print('初始化完毕')
def insert_es(img_path):
if not validate_format(img_path):
return
file_hash = get_md5(img_path)
if redis_client.get(file_hash):
# print("已入库:%s" % img_path)
return
try:
ses.add_image(img_path)
except Exception as e:
'图片入库异常,路径:{},异常:{}'.format(img_path,str(e))
# print(traceback.format_exc())
else:
print('入库完成:%s' % img_path)
redis_client.set(file_hash, 1)
def get_md5(file_path):
md5_obj = hashlib.md5()
md5_obj.update(file_path.encode('utf-8'))
hash_code = md5_obj.hexdigest()
return hash_code
def validate_format(image_path):
# try:
# Image.open(image_path)
# except Exception as e:
# print(e, image_path)
# return False
format = image_path.split('.')[-1].lower()
if format not in ['jpg', 'png', 'gif', 'jpeg', 'bmp']:
return False
return True
# return file_md5
def handle_one(path):
print('处理根目录:%s' % path)
pool_list = []
for root, dirs, files in os.walk(path):
for file in files:
image_path = os.path.join(root, file)
pool_list.append(pool.spawn(insert_es, image_path))
joinall(pool_list)
def main():
print('启动程序')
all_root_path_list = IMAGE_PATHS
# all_root_path_list = ['K:\新建文件夹']
for root_path in all_root_path_list:
handle_one(root_path)
if __name__ == '__main__':
# redis_client.flushall()
main()