diff --git a/.directory b/.directory index a20d0f7..744bba8 100644 --- a/.directory +++ b/.directory @@ -1,3 +1,3 @@ [Dolphin] -Timestamp=2019,4,25,22,44,42 +Timestamp=2019,5,17,5,4,4 Version=4 diff --git a/CHKIPCamera.py b/CHKIPCamera.py deleted file mode 100644 index 0540a04..0000000 --- a/CHKIPCamera.py +++ /dev/null @@ -1,23 +0,0 @@ -import numpy as np -import ctypes as C - -lib = C.cdll.LoadLibrary('libHKCamera_v4.so') - -class HKIPCamera(object): - - def __init__(self, ip, port, name, password): - self.obj = lib.HKIPCamera_init(ip, port, name, password) - - def start(self): - lib.HKIPCamera_start(self.obj) - - def stop(self): - lib.HKIPCamera_stop(self.obj) - - def frame(self, rows=1080, cols=1920): - res = np.zeros(dtype=np.uint8, shape=(rows, cols, 3)) - - lib.HKIPCamera_frame(self.obj, rows, cols, - res.ctypes.data_as(C.POINTER(C.c_ubyte))) - - return res diff --git a/CXMIPCamera.py b/CXMIPCamera.py deleted file mode 100644 index e49932f..0000000 --- a/CXMIPCamera.py +++ /dev/null @@ -1,23 +0,0 @@ -import numpy as np -import ctypes as C - -lib = C.cdll.LoadLibrary('libXMCamera_v4.so') - -class XMIPCamera(object): - - def __init__(self, ip, port, name, password): - self.obj = lib.XMIPCamera_init(ip, port, name, password) - - def start(self): - lib.XMIPCamera_start(self.obj) - - def stop(self): - lib.XMIPCamera_stop(self.obj) - - def frame(self, rows=1080, cols=1920): - res = np.zeros(dtype=np.uint8, shape=(rows, cols, 3)) - - lib.XMIPCamera_frame(self.obj, rows, cols, - res.ctypes.data_as(C.POINTER(C.c_ubyte))) - - return res diff --git a/Embedding/danger.npy b/Embedding/danger.npy new file mode 100644 index 0000000..ad54041 Binary files /dev/null and b/Embedding/danger.npy differ diff --git a/Embedding/safe.npy b/Embedding/safe.npy new file mode 100644 index 0000000..efcbdff Binary files /dev/null and b/Embedding/safe.npy differ diff --git a/Embedding/train.npy b/Embedding/train.npy new file mode 100644 index 0000000..bc38c13 Binary files /dev/null and b/Embedding/train.npy differ diff --git a/README.md b/README.md index b96457c..09f5e62 100644 --- a/README.md +++ b/README.md @@ -36,77 +36,7 @@ Please check [Dataset-Zoo](https://github.com/deepinsight/insightface/wiki/Datas * Please check *src/data/face2rec2.py* on how to build a binary face dataset. Any public available *MTCNN* can be used to align the faces, and the performance should not change. We will improve the face normalisation step by full pose alignment methods recently. -### Train - -1. Install `MXNet` with GPU support (Python 2.7). - -``` -pip install mxnet-cu90 -``` - -2. Clone the InsightFace repository. We call the directory insightface as *`INSIGHTFACE_ROOT`*. - -``` -git clone --recursive https://github.com/deepinsight/insightface.git -``` - -3. Download the training set (`MS1M-Arcface`) and place it in *`$INSIGHTFACE_ROOT/datasets/`*. Each training dataset includes at least following 6 files: - -```Shell - faces_emore/ - train.idx - train.rec - property - lfw.bin - cfp_fp.bin - agedb_30.bin -``` - -The first three files are the training dataset while the last three files are verification sets. - -4. Train deep face recognition models. -In this part, we assume you are in the directory *`$INSIGHTFACE_ROOT/recognition/`*. -```Shell -export MXNET_CPU_WORKER_NTHREADS=24 -export MXNET_ENGINE_TYPE=ThreadedEnginePerDevice -``` - -Place and edit config file: -```Shell -cp sample_config.py config.py -vim config.py # edit dataset path etc.. -``` - -We give some examples below. Our experiments were conducted on the Tesla P40 GPU. - -(1). Train ArcFace with LResNet100E-IR. - -```Shell -CUDA_VISIBLE_DEVICES='0,1,2,3' python -u train.py --network r100 --loss arcface --dataset emore -``` -It will output verification results of *LFW*, *CFP-FP* and *AgeDB-30* every 2000 batches. You can check all options in *config.py*. -This model can achieve *LFW 99.80+* and *MegaFace 98.3%+*. - -(2). Train CosineFace with LResNet50E-IR. - -```Shell -CUDA_VISIBLE_DEVICES='0,1,2,3' python -u train.py --network r50 --loss cosface --dataset emore -``` - -(3). Train Softmax with LMobileNet-GAP. - -```Shell -CUDA_VISIBLE_DEVICES='0,1,2,3' python -u train.py --network m1 --loss softmax --dataset emore -``` - -(4). Fine-turn the above Softmax model with Triplet loss. - -```Shell -CUDA_VISIBLE_DEVICES='0,1,2,3' python -u train.py --network m1 --loss triplet --lr 0.005 --pretrained ./models/m1-softmax-emore,1 -``` - - -5. Verification results. +### Verification results. *LResNet100E-IR* network trained on *MS1M-Arcface* dataset with ArcFace loss: diff --git a/face_detector.py b/face_detector.py index d389e62..f5bae1d 100644 --- a/face_detector.py +++ b/face_detector.py @@ -5,47 +5,44 @@ import mxnet as mx import cv2 import time -from mtcnn_detector import MtcnnDetector - -import face_preprocess -import face_image +from retinaface import RetinaFace +from face_preprocess import preprocess class DetectorModel: def __init__(self, args): - ctx = mx.cpu() if args.gpu == -1 else mx.gpu(args.gpu) - mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') - + self.detector = RetinaFace(args.retina_model, 0, args.gpu, 'net3') + self.threshold = args.threshold + self.scales = args.scales self.max_face_number = args.max_face_number - self.face_counter = 0 - self.detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, - minsize=args.mtcnn_minsize, factor=args.mtcnn_factor, - accurate_landmark=True, threshold=args.mtcnn_threshold) + self.counter = 0 + self.image_size = args.image_size - def get_all_boxes(self, face_img, save_img=False): - face_num = self.max_face_number - ret = self.detector.detect_face(face_img, det_type=0) - if ret is None: - return [] + def save_image(self, image): + cv2.imwrite('./Temp/{}-{}.jpg'.format(time.time(), self.counter), + image) + self.counter += 1 - bbox, points = ret + def get_all_boxes(self, img, save_img=False): + faces, landmarks = self.detector.detect(img, + self.threshold, + scales=self.scales) - sorted_index = bbox[:, 0].argsort() - bbox = bbox[sorted_index] - points = points[sorted_index] + sorted_index = faces[:, 0].argsort() + faces = faces[sorted_index] + landmarks = landmarks[sorted_index] aligned = [] - for index in range(0, len(bbox[:face_num])): - item_bbox = bbox[index, 0:4] - item_points = points[index, :].reshape((2, 5)).T - nimg = face_preprocess.preprocess( - face_img, item_bbox, item_points, image_size='112,112') + # print('find', faces.shape[0], 'faces') + for i in range(len(faces[:self.max_face_number])): + nimg = preprocess(img, + faces[i], + landmarks[i], + image_size=self.image_size) if save_img: - cv2.imwrite('./Temp/{}-{}.jpg'. - format(time.time(), self.face_counter), nimg) - self.face_counter += 1 + self.save_image(nimg) aligned.append(nimg) - return zip(aligned, bbox) + return zip(aligned, faces) \ No newline at end of file diff --git a/face_embedding.py b/face_embedding.py index 29d8b06..0d2f10f 100644 --- a/face_embedding.py +++ b/face_embedding.py @@ -6,10 +6,7 @@ import cv2 import time import sklearn -from time import sleep - import face_preprocess -import face_image def get_model(ctx, image_size, model_str, layer): @@ -17,10 +14,10 @@ def get_model(ctx, image_size, model_str, layer): assert len(_vec) == 2 prefix = _vec[0] epoch = int(_vec[1]) - print('loading', prefix, epoch) + # print('loading', prefix, epoch) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() - sym = all_layers[layer+'_output'] + sym = all_layers[layer + '_output'] model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) model.set_params(arg_params, aux_params) @@ -34,23 +31,88 @@ def __init__(self, args): assert len(_vec) == 2 image_size = (int(_vec[0]), int(_vec[1])) - self.model = get_model(ctx, image_size, args.model, 'fc1') + self.model = get_model(ctx, image_size, args.arcface_model, 'fc1') self.face_counter = 0 - - def get_feature(self, aligned, from_disk=False): + def get_one_feature(self, aligned, from_disk=True): if from_disk: - aligned = np.transpose(cv2.cvtColor( - aligned, cv2.COLOR_BGR2RGB), (2, 0, 1)) + aligned = np.transpose(cv2.cvtColor(aligned, cv2.COLOR_BGR2RGB), + (2, 0, 1)) input_blob = np.expand_dims(aligned, axis=0) data = mx.nd.array(input_blob) - db = mx.io.DataBatch(data=(data,)) + db = mx.io.DataBatch(data=(data, )) self.model.forward(db, is_train=False) embedding = self.model.get_outputs()[0].asnumpy() embedding = sklearn.preprocessing.normalize(embedding).flatten() return embedding + def get_features_from_path(self, img_paths): + result = [] + for counter, path in enumerate(img_paths): + # print(type(len(img_paths))) + # print(type(counter)) + nimg = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB) + aligned = np.transpose(nimg, (2, 0, 1)) + embedding = None + for flipid in [0, 1]: + if flipid == 1 and self.args.flip == 1: + do_flip(aligned) + + input_blob = np.expand_dims(aligned, axis=0) + data = mx.nd.array(input_blob) + db = mx.io.DataBatch(data=(data, )) + self.model.forward(db, is_train=False) + _embedding = self.model.get_outputs()[0].asnumpy() + # print(_embedding.shape) + if embedding is None: + embedding = _embedding + else: + embedding += _embedding + embedding = sklearn.preprocessing.normalize(embedding).flatten() + result.append(embedding) + # print() + print('特征转换已完成%2f%%' % (counter * 100 / len(img_paths))) + return result + + def get_feature_from_raw(self, face_img): + # face_img is bgr image + ret = self.detector.detect_face_limited(face_img, + det_type=self.args.det) + if ret is None: + return None + bbox, points = ret + if bbox.shape[0] == 0: + return None + bbox = bbox[0, 0:4] + points = points[0, :].reshape((2, 5)).T + # print(bbox) + # print(points) + nimg = face_preprocess.preprocess(face_img, + bbox, + points, + image_size='112,112') - def get_one_feature(self, nimg): - return self.get_feature( - np.transpose(cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB), (2, 0, 1))) + # cv2.imshow(' ', nimg) + # cv2.waitKey(0) + + nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) + aligned = np.transpose(nimg, (2, 0, 1)) + # print(nimg.shape) + embedding = None + for flipid in [0, 1]: + if flipid == 1: + if self.args.flip == 0: + break + do_flip(aligned) + input_blob = np.expand_dims(aligned, axis=0) + data = mx.nd.array(input_blob) + db = mx.io.DataBatch(data=(data, )) + self.model.forward(db, is_train=False) + _embedding = self.model.get_outputs()[0].asnumpy() + # print(_embedding.shape) + if embedding is None: + embedding = _embedding + else: + embedding += _embedding + embedding = sklearn.preprocessing.normalize(embedding).flatten() + return embedding diff --git a/face_model.py b/face_model.py deleted file mode 100644 index 688a9f2..0000000 --- a/face_model.py +++ /dev/null @@ -1,326 +0,0 @@ -from scipy import misc -import sys -import os -import argparse -import numpy as np -import mxnet as mx -import random -import cv2 -import time -import sklearn -from sklearn.decomposition import PCA -from time import sleep -from easydict import EasyDict as edict -from mtcnn_detector import MtcnnDetector - -import face_preprocess -import face_image - - -def do_flip(data): - for idx in range(data.shape[0]): - data[idx, :, :] = np.fliplr(data[idx, :, :]) - - -def get_model(ctx, image_size, model_str, layer): - _vec = model_str.split(',') - assert len(_vec) == 2 - prefix = _vec[0] - epoch = int(_vec[1]) - print('loading', prefix, epoch) - sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) - all_layers = sym.get_internals() - sym = all_layers[layer+'_output'] - model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) - #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) - model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) - model.set_params(arg_params, aux_params) - return model - - -class FaceModel: - def __init__(self, args): - self.args = args - ctx = mx.cpu() if args.gpu == -1 else mx.gpu(args.gpu) - #if args.gpu == -1: - # ctx = mx.cpu() - #else: - # ctx = mx.gpu(args.gpu) - _vec = args.image_size.split(',') - assert len(_vec) == 2 - image_size = (int(_vec[0]), int(_vec[1])) - - self.max_face_number = args.max_face_number - self.threshold = args.threshold - self.image_size = image_size - - if args.only_detector: - self.det_minsize = args.mtcnn_minsize - self.det_threshold = args.mtcnn_threshold - self.det_factor = args.mtcnn_factor - mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') - if args.det == 0: - detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, - minsize=self.det_minsize, factor=self.det_factor, - accurate_landmark=True, threshold=self.det_threshold) - else: - detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, - num_worker=1, accurate_landmark=True, threshold=[0.0, 0.0, 0.2]) - self.detector = detector - else: - self.model = None - if len(args.model) > 0: - self.model = get_model(ctx, image_size, args.model, 'fc1') - - self.face_counter = 0 - - # def __init__(self, args): - # self.args = args - # if args.gpu == -1: - # ctx = mx.cpu() - # else: - # ctx = mx.gpu(args.gpu) - # _vec = args.image_size.split(',') - # assert len(_vec) == 2 - # image_size = (int(_vec[0]), int(_vec[1])) - # self.model = None - # if len(args.model) > 0: - # self.model = get_model(ctx, image_size, args.model, 'fc1') - - # self.max_face_number = args.max_face_number - # self.threshold = args.threshold - # self.det_minsize = args.mtcnn_minsize - # self.det_threshold = args.mtcnn_threshold - # self.det_factor = args.mtcnn_factor - # self.image_size = image_size - # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') - # if args.det == 0: - # detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, - # minsize=self.det_minsize, factor=self.det_factor, - # accurate_landmark=True, threshold=self.det_threshold) - # else: - # detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, - # num_worker=1, accurate_landmark=True, threshold=[0.0, 0.0, 0.2]) - # self.detector = detector - # self.face_counter = 0 - - def get_input(self, face_img): - ret = self.detector.detect_face(face_img, det_type=self.args.det) - if ret is None: - return None - bbox, points = ret - if bbox.shape[0] == 0: - return None - bbox = bbox[0, 0:4] - points = points[0, :].reshape((2, 5)).T - # print(bbox) - # print(points) - nimg = face_preprocess.preprocess( - face_img, bbox, points, image_size='112,112') - nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) - aligned = np.transpose(nimg, (2, 0, 1)) - return aligned - - def get_all_input(self, face_img, save_img=False): - face_num = self.max_face_number - ret = self.detector.detect_face(face_img, det_type=self.args.det) - if ret is None: - return [] - - bbox, points = ret - - sorted_index = bbox[:, 0].argsort() - bbox = bbox[sorted_index] - points = points[sorted_index] - - # print(bbox) - # print(points) - - if bbox.shape[0] == 0: - return None - - aligned = [] - for index in range(0, len(bbox[:face_num])): - item_bbox = bbox[index, 0:4] - item_points = points[index, :].reshape((2, 5)).T - # print(bbox) - # print(points) - nimg = face_preprocess.preprocess( - face_img, item_bbox, item_points, image_size='112,112') - - if save_img: - cv2.imwrite('./Temp/{}-{}.jpg'.format(time.time(), - self.face_counter), nimg) - # print(self.face_counter) - self.face_counter += 1 - - nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) - aligned.append(np.transpose(nimg, (2, 0, 1))) - - # print(aligned) - return zip(aligned, bbox) - - def get_feature(self, aligned, from_disk=False): - if from_disk: - aligned = np.transpose(cv2.cvtColor( - aligned, cv2.COLOR_BGR2RGB), (2, 0, 1)) - input_blob = np.expand_dims(aligned, axis=0) - data = mx.nd.array(input_blob) - db = mx.io.DataBatch(data=(data,)) - self.model.forward(db, is_train=False) - embedding = self.model.get_outputs()[0].asnumpy() - embedding = sklearn.preprocessing.normalize(embedding).flatten() - return embedding - - def get_all_boxes(self, face_img, save_img=False): - face_num = self.max_face_number - ret = self.detector.detect_face(face_img, det_type=self.args.det) - if ret is None: - return [] - - bbox, points = ret - - sorted_index = bbox[:, 0].argsort() - bbox = bbox[sorted_index] - points = points[sorted_index] - - aligned = [] - for index in range(0, len(bbox[:face_num])): - item_bbox = bbox[index, 0:4] - item_points = points[index, :].reshape((2, 5)).T - nimg = face_preprocess.preprocess( - face_img, item_bbox, item_points, image_size='112,112') - - if save_img: - cv2.imwrite('./Temp/{}-{}.jpg'. - format(time.time(), self.face_counter), nimg) - self.face_counter += 1 - - aligned.append(nimg) - - return zip(aligned, bbox) - - def get_all_features(self, face_img, save_img=False): - face_num = self.max_face_number - ret = self.detector.detect_face(face_img, det_type=self.args.det) - if ret is None: - return [] - - bbox, points = ret - - sorted_index = bbox[:, 0].argsort() - bbox = bbox[sorted_index] - points = points[sorted_index] - - # print(bbox) - # print(points) - - if bbox.shape[0] == 0: - return None - - aligned = [] - features = [] - for index in range(0, len(bbox[:face_num])): - item_bbox = bbox[index, 0:4] - item_points = points[index, :].reshape((2, 5)).T - # print(bbox) - # print(points) - nimg = face_preprocess.preprocess( - face_img, item_bbox, item_points, image_size='112,112') - - if save_img: - cv2.imwrite('./Temp/{}-{}.jpg'.format(time.time(), - self.face_counter), nimg) - # print(self.face_counter) - self.face_counter += 1 - aligned.append(nimg) - - features.append(self.get_feature( - np.transpose( - cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB), (2, 0, 1)))) - - # print(aligned) - return zip(aligned, features, bbox) - - def get_one_feature(self, nimg): - return self.get_feature( - np.transpose(cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB), (2, 0, 1))) - - def get_features_from_path(self, img_paths): - result = [] - for counter, path in enumerate(img_paths): - # print(type(len(img_paths))) - # print(type(counter)) - nimg = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB) - aligned = np.transpose(nimg, (2, 0, 1)) - embedding = None - for flipid in [0, 1]: - if flipid == 1 and self.args.flip == 1: - do_flip(aligned) - - input_blob = np.expand_dims(aligned, axis=0) - data = mx.nd.array(input_blob) - db = mx.io.DataBatch(data=(data,)) - self.model.forward(db, is_train=False) - _embedding = self.model.get_outputs()[0].asnumpy() - # print(_embedding.shape) - if embedding is None: - embedding = _embedding - else: - embedding += _embedding - embedding = sklearn.preprocessing.normalize(embedding).flatten() - result.append(embedding) - # print() - print('特征转换已完成%2f%%' % (counter*100/len(img_paths))) - return result - - def get_feature_from_raw(self, face_img): - # face_img is bgr image - ret = self.detector.detect_face_limited( - face_img, det_type=self.args.det) - if ret is None: - return None - bbox, points = ret - if bbox.shape[0] == 0: - return None - bbox = bbox[0, 0:4] - points = points[0, :].reshape((2, 5)).T - # print(bbox) - # print(points) - nimg = face_preprocess.preprocess( - face_img, bbox, points, image_size='112,112') - - # cv2.imshow(' ', nimg) - # cv2.waitKey(0) - - nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) - aligned = np.transpose(nimg, (2, 0, 1)) - # print(nimg.shape) - embedding = None - for flipid in [0, 1]: - if flipid == 1: - if self.args.flip == 0: - break - do_flip(aligned) - input_blob = np.expand_dims(aligned, axis=0) - data = mx.nd.array(input_blob) - db = mx.io.DataBatch(data=(data,)) - self.model.forward(db, is_train=False) - _embedding = self.model.get_outputs()[0].asnumpy() - # print(_embedding.shape) - if embedding is None: - embedding = _embedding - else: - embedding += _embedding - embedding = sklearn.preprocessing.normalize(embedding).flatten() - return embedding - -# frame = cv2.imread('./t1.jpg') -# aligned = arcface.get_all_input(frame) - -# for face in aligned: -# f1 = arcface.get_feature(face) -# print(f1[:10]) - -# [h, w] = frame.shape[:2] -# "%s (%d, %d)" % (ip_address, w, h) diff --git a/face_preprocess.py b/face_preprocess.py index b6a70e8..134038d 100644 --- a/face_preprocess.py +++ b/face_preprocess.py @@ -70,6 +70,7 @@ def preprocess(img, bbox=None, landmark=None, **kwargs): tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] + #M = cv2.estimateRigidTransform( dst.reshape(1,5,2), src.reshape(1,5,2), False) if M is None: diff --git a/helper.py b/helper.py index 46523bf..adf0f78 100644 --- a/helper.py +++ b/helper.py @@ -1,4 +1,5 @@ # coding: utf-8 +import base64 import math import cv2 import pickle @@ -7,6 +8,7 @@ import os import sys from CImageName import ImageName +from scipy.spatial import distance as dist def nms(boxes, overlap_threshold, mode='Union'): @@ -65,8 +67,9 @@ def nms(boxes, overlap_threshold, mode='Union'): overlap = inter / (area[i] + area[idxs[:last]] - inter) # delete all indexes from the index list that have - idxs = np.delete(idxs, np.concatenate(([last], - np.where(overlap > overlap_threshold)[0]))) + idxs = np.delete( + idxs, + np.concatenate(([last], np.where(overlap > overlap_threshold)[0]))) return pick @@ -91,7 +94,7 @@ def adjust_input(in_data): out_data = out_data.transpose((2, 0, 1)) out_data = np.expand_dims(out_data, 0) - out_data = (out_data - 127.5)*0.0078125 + out_data = (out_data - 127.5) * 0.0078125 return out_data @@ -125,12 +128,12 @@ def generate_bbox(map, reg, scale, threshold): reg = np.array([dx1, dy1, dx2, dy2]) score = map[t_index[0], t_index[1]] - boundingbox = np.vstack([np.round((stride*t_index[1]+1)/scale), - np.round((stride*t_index[0]+1)/scale), - np.round((stride*t_index[1]+1+cellsize)/scale), - np.round((stride*t_index[0]+1+cellsize)/scale), - score, - reg]) + boundingbox = np.vstack([ + np.round((stride * t_index[1] + 1) / scale), + np.round((stride * t_index[0] + 1) / scale), + np.round((stride * t_index[1] + 1 + cellsize) / scale), + np.round((stride * t_index[0] + 1 + cellsize) / scale), score, reg + ]) return boundingbox.T @@ -189,36 +192,46 @@ def start_up_init(): # if not train_mode: # parser.add_argument('ip_address', type=str, # help='相机的IP地址或测试用视频文件名') - # parser.add_argument('--face_recognize_threshold', type=float, - # help='可疑人员识别阈值', default=0.95) - parser.add_argument('--max_face_number', type=int, - help='同时检测的最大人脸数量', default=8) - parser.add_argument('--max_frame_rate', type=int, - help='最大FPS', default=25) - parser.add_argument('--image-size', default='112,112', + parser.add_argument('--max_face_number', + type=int, + help='同时检测的最大人脸数量', + default=8) + parser.add_argument('--max_frame_rate', type=int, help='最大FPS', default=25) + parser.add_argument('--image_size', + default='112,112', help='输入特征提取网络的图片大小') - # parser.add_argument('--dangerous_threshold', type=int, - # help='1/2报警窗口长度', default=16) - parser.add_argument('--model', default='./model-r100-ii/arcface,0', + parser.add_argument('--arcface_model', + default='./model/arcface, 0', help='特征提取网络预训练模型路径') - parser.add_argument('--gpu', default=0, type=int, - help='GPU设备ID,-1代表使用CPU') - parser.add_argument('--det', default=0, type=int, + parser.add_argument('--retina_model', + default='./model/R50', + help='人脸检测网络预训练模型路径') + parser.add_argument('--classification', + default='./model/mlp.pkl', + help='人脸识别分类器模型路径') + parser.add_argument('--gpu', default=0, type=int, help='GPU设备ID,-1代表使用CPU') + parser.add_argument('--det', + default=0, + type=int, help='设置为1代表使用R+O网络进行检测, 0代表使用P+R+O进行检测') - parser.add_argument('--flip', default=1, type=int, - help='是否在训练时进行左右翻转相加操作') - parser.add_argument('--threshold', default=1.24, type=float, - help='空间向量距离阈值') + parser.add_argument('--flip', default=1, type=int, help='是否在训练时进行左右翻转相加操作') + parser.add_argument('--threshold', + default=0.7, + type=float, + help='RetinaNet的人脸检测阈值') + parser.add_argument('--embedding_threshold', + default=0.97, + type=float, + help='需要进行特征提取的人脸可信度阈值') # parser.add_argument('-v', '--video_mode', action="store_true", # help='设置从视频读取帧数据', default=False) # parser.add_argument('-c', '--cv_test_mode', action="store_true", # help='设置本地预览', default=False) - parser.add_argument('--mtcnn_minsize', type=int, - help='mtcnn最小检测框的尺寸(越小检测精度越高)', default=48) - parser.add_argument('--mtcnn_factor', type=float, - help='mtcnn图像缩放系数(关联图像金字塔层数,越大检测精度越高)', default=0.809) - parser.add_argument('--mtcnn_threshold', type=float, nargs='+', - help='mtcnn三层阈值', default=[0.6, 0.7, 0.92]) + parser.add_argument('--scales', + type=float, + nargs='+', + help='RetinaNet的图像缩放系数', + default=[0.6]) return parser.parse_args() @@ -234,8 +247,10 @@ def get_image_paths(facedir): def get_dataset(path, has_class_directories=True): dataset = [] path_exp = os.path.expanduser(path) - classes = [path for path in os.listdir(path_exp) - if os.path.isdir(os.path.join(path_exp, path))] + classes = [ + path for path in os.listdir(path_exp) + if os.path.isdir(os.path.join(path_exp, path)) + ] classes.sort() nrof_classes = len(classes) for i in range(nrof_classes): @@ -269,3 +284,77 @@ def load_data(image_paths): def encode_image(image, quality=80): encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality] return cv2.imencode('.jpg', image, encode_param)[1].tostring() + + +def draw_points(image, poi, margin=5, color=[255, 255, 0]): + for index in range(5): + image[poi[index, 1] - margin:poi[index, 1] + margin, poi[index, 0] - + margin:poi[index, 0] + margin] = color + + +K = [ + 6.5308391993466671e+002, 0.0, 3.1950000000000000e+002, 0.0, + 6.5308391993466671e+002, 2.3950000000000000e+002, 0.0, 0.0, 1.0 +] +D = [ + 7.0834633684407095e-002, 6.9140193737175351e-002, 0.0, 0.0, + -1.3073460323689292e+000 +] + +cam_matrix = np.array(K).reshape(3, 3).astype(np.float32) +dist_coeffs = np.array(D).reshape(5, 1).astype(np.float32) + +object_pts = np.float32([[6.825897, 6.760612, 4.402142], + [1.330353, 7.122144, 6.903745], + [-1.330353, 7.122144, 6.903745], + [-6.825897, 6.760612, 4.402142], + [5.311432, 5.485328, 3.987654], + [1.789930, 5.393625, 4.413414], + [-1.789930, 5.393625, 4.413414], + [-5.311432, 5.485328, 3.987654], + [2.005628, 1.409845, 6.165652], + [-2.005628, 1.409845, 6.165652], + [2.774015, -2.080775, 5.048531], + [-2.774015, -2.080775, 5.048531], + [0.000000, -3.116408, 6.097667], + [0.000000, -7.415691, 4.070434]]) + +reprojectsrc = np.float32([[10.0, 10.0, 10.0], [10.0, 10.0, -10.0], + [10.0, -10.0, -10.0], [10.0, -10.0, 10.0], + [-10.0, 10.0, 10.0], [-10.0, 10.0, -10.0], + [-10.0, -10.0, -10.0], [-10.0, -10.0, 10.0]]) + +line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], + [0, 4], [1, 5], [2, 6], [3, 7]] + + +def get_head_pose(shape): + image_pts = np.float32([ + shape[17], shape[21], shape[22], shape[26], shape[36], shape[39], + shape[42], shape[45], shape[31], shape[35], shape[48], shape[54], + shape[57], shape[8] + ]) + + _, rotation_vec, translation_vec = cv2.solvePnP(object_pts, image_pts, + cam_matrix, dist_coeffs) + + reprojectdst, _ = cv2.projectPoints(reprojectsrc, rotation_vec, + translation_vec, cam_matrix, + dist_coeffs) + + reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2))) + + # calc euler angle + rotation_mat, _ = cv2.Rodrigues(rotation_vec) + pose_mat = cv2.hconcat((rotation_mat, translation_vec)) + _, _, _, _, _, _, euler_angle = cv2.decomposeProjectionMatrix(pose_mat) + + return reprojectdst, euler_angle + + +def eye_aspect_ratio(eye): + A = dist.euclidean(eye[1], eye[5]) + B = dist.euclidean(eye[2], eye[4]) + C = dist.euclidean(eye[0], eye[3]) + ear = (A + B) / (2.0 * C) + return ear diff --git a/mikomiko_hk_v2.py b/mikomiko_hk_v3.py similarity index 77% rename from mikomiko_hk_v2.py rename to mikomiko_hk_v3.py index f30ec9b..496e61b 100644 --- a/mikomiko_hk_v2.py +++ b/mikomiko_hk_v3.py @@ -1,22 +1,22 @@ # coding: utf-8 -import face_embedding -import face_detector import cv2 import os -import sys import numpy as np import time from termcolor import colored from helper import read_pkl_model, start_up_init, encode_image -import asyncio from multiprocessing import Process, Queue +import asyncio import socketio -from CHKIPCamera import HKIPCamera +import IPCamera.interface as ipc +import face_embedding +import face_detector async def upload_loop(url="http://127.0.0.1:6789"): # =====================Uploader Setsup======================== sio = socketio.AsyncClient() + @sio.on('response', namespace='/remilia') async def on_response(data): current_address, upload_frame = upstream_frame_queue.get() @@ -28,13 +28,16 @@ async def on_response(data): await sio.emit('frame_data', image_string, namespace='/remilia') try: img, dt, prob, name = result_queue.get_nowait() - result_string = {'image': encode_image(img), - 'time': dt, 'name': name, 'prob': prob} + result_string = { + 'image': encode_image(img), + 'time': dt, + 'name': name, + 'prob': prob + } await sio.emit('result_data', result_string, namespace='/remilia') except Exception as e: pass # print(mid_time-strat_time, time.time()-mid_time) - # sys.stdout.flush() @sio.on('connect', namespace='/remilia') async def on_connect(): @@ -46,23 +49,23 @@ async def on_connect(): async def embedding_loop(preload): # =================== FR MODEL ==================== - mlp, class_names = read_pkl_model('./model-mlp/mlp.pkl') - preload.gpu = -1 + mlp, class_names = read_pkl_model(preload.classification) embedding = face_embedding.EmbeddingModel(preload) while True: img = suspicion_face_queue.get() dt = time.strftime('%m-%d %H:%M:%S') - predict = mlp.predict_proba([embedding.get_one_feature(img)]) prob = predict.max(1)[0] - - result_queue.put((img, dt, prob, class_names[predict.argmax(1)[0]])) + name = class_names[predict.argmax(1)[0]] + result_queue.put((img, dt, prob, name)) + # [[0.30044544 0.31831665 0.30363247 0.07760544]] async def detection_loop(preload, frame_queue): # =================== FD MODEL ==================== detector = face_detector.DetectorModel(preload) ip_address = preload.ip_address + embedding_threshold = preload.embedding_threshold loop = asyncio.get_running_loop() while True: @@ -72,25 +75,26 @@ async def detection_loop(preload, frame_queue): # tracker = cv2.MultiTracker_create() # t_box = [] for img, box in detector.get_all_boxes(head_frame, save_img=False): - try: - if box[4] > 0.98: + if box[4] > embedding_threshold: + try: suspicion_face_queue.put_nowait(img) - except Exception as e: - pass + except Exception as _: + pass - box = box.astype(int) - cv2.rectangle( - head_frame, (box[0], box[1]), (box[2], box[3]), [255, 255, 0], 2) + box = box.astype(np.int) + cv2.rectangle(head_frame, (box[0], box[1]), (box[2], box[3]), + [255, 255, 0], 2) # t_box.append(box[:4]/2) - print(colored(loop.time()-start_time, 'blue')) + # print(colored(loop.time() - start_time, 'blue')) # head_frame = cv2.resize(head_frame, (960, 540), cv2.INTER_AREA) # for item in t_box: # tracker.add(cv2.TrackerMedianFlow_create(), head_frame, tuple(item)) upstream_frame_queue.put((ip_address, head_frame)) + print(colored(loop.time() - start_time, 'red'), flush=True) - for i in range(int((loop.time() - start_time) * 25 + 1)): + for i in range(int((loop.time() - start_time) * 25)): body_frame = frame_queue.get() # ok, tricker_boxes = tracker.update(body_frame) # if ok: @@ -103,11 +107,10 @@ async def detection_loop(preload, frame_queue): # end_time = loop.time() # print(colored(loop.time()-track_time, 'red')) - sys.stdout.flush() async def camera_loop(preload): - reciprocal_of_max_frame_rate = 1/preload.max_frame_rate + reciprocal_of_max_frame_rate = 1 / preload.max_frame_rate address_dict = preload.address_dict camera_dict = {} @@ -117,9 +120,9 @@ async def camera_loop(preload): # xmcp.start() # camera_dict[address] = xmcp - from CHKIPCamera import HKIPCamera for address in address_dict: - hkcp = HKIPCamera(address.encode('UTF-8'), 8000, b"admin", b"humanmotion01") + hkcp = ipc.HKIPCamera(address.encode('UTF-8'), 8000, b"admin", + b"humanmotion01") hkcp.start() camera_dict[address] = hkcp @@ -129,7 +132,8 @@ async def camera_loop(preload): # =================== ETERNAL LOOP ==================== while True: start_time = loop.time() - frame_queue_231.put(camera_dict['10.41.0.231'].frame(rows=540, cols=960)) + frame_queue_231.put(camera_dict['10.41.0.231'].frame(rows=540, + cols=960)) # frame_queue_231.put(camera_dict['10.41.0.198'].frame(rows=540, cols=960)) # frame_queue_232.put(camera_dict['10.41.0.199'].frame(rows=540, cols=960)) @@ -144,11 +148,15 @@ async def camera_loop(preload): if restime > 0: await asyncio.sleep(restime) + # =================== INIT ==================== # address_dict = ['10.41.0.198', '10.41.0.199'] address_dict = ['10.41.0.231'] +# frame_queue_232 = Queue(maxsize=frame_buffer_size) +# Process(target=lambda: asyncio.run( +# detection_loop(args, frame_queue_232))).start() os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' -frame_buffer_size = 10 * len(address_dict) +frame_buffer_size = 25 * len(address_dict) upstream_frame_queue = Queue(maxsize=frame_buffer_size) suspicion_face_queue = Queue(maxsize=frame_buffer_size) result_queue = Queue(maxsize=frame_buffer_size) @@ -160,8 +168,8 @@ async def camera_loop(preload): # =================== Process On ==================== args.ip_address = '10.41.0.231' frame_queue_231 = Queue(maxsize=frame_buffer_size) -Process(target=lambda: asyncio.run( - detection_loop(args, frame_queue_231))).start() +Process( + target=lambda: asyncio.run(detection_loop(args, frame_queue_231))).start() # args.ip_address = '10.41.0.232' # frame_queue_232 = Queue(maxsize=frame_buffer_size) diff --git a/mlp_update.py b/mlp_update.py new file mode 100644 index 0000000..53a0bdd --- /dev/null +++ b/mlp_update.py @@ -0,0 +1,17 @@ +import pickle +from sklearn.preprocessing import label_binarize +from sklearn.neural_network import MLPClassifier +from helper import get_dataset + +data_train = './Temp/train_data' +dataset_train = get_dataset(data_train) + +class_names = [cls.name.replace('_', ' ') for cls in dataset_train] +print(class_names) + +with open('./zoo/model-mlp/updated2.pkl', 'rb') as infile: + (mlp, class_names) = pickle.load(infile) + +with open('./zoo/model-mlp/updated.pkl', 'wb') as outfile: + pickle.dump((mlp, class_names), outfile) + diff --git a/model-mlp/tongzhou_mlp.pkl b/model-mlp/tongzhou_mlp.pkl deleted file mode 100644 index f7c26b7..0000000 Binary files a/model-mlp/tongzhou_mlp.pkl and /dev/null differ diff --git a/model/M25-0000.params b/model/M25-0000.params new file mode 100644 index 0000000..252905e --- /dev/null +++ b/model/M25-0000.params @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97714773ae67259a62fbde6c23c5defb14604dd2227ce9a2a1523797ad6331ed +size 1757080 diff --git a/model/M25-symbol.json b/model/M25-symbol.json new file mode 100644 index 0000000..195ddb2 --- /dev/null +++ b/model/M25-symbol.json @@ -0,0 +1,5464 @@ +{ + "nodes": [ + { + "op": "null", + "name": "data", + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_conv0_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(8L, 3L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv0_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "8", + "num_group": "1", + "pad": "(1, 1)", + "stride": "(2, 2)" + }, + "inputs": [[0, 0, 0], [1, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm0_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(8L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm0_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(8L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm0_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(8L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm0_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(8L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm0_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[2, 0, 0], [3, 0, 0], [4, 0, 0], [5, 0, 1], [6, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu0_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[7, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv1_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(8L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv1_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "8", + "num_group": "8", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[8, 0, 0], [9, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm1_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(8L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm1_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(8L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm1_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(8L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm1_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(8L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm1_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[10, 0, 0], [11, 0, 0], [12, 0, 0], [13, 0, 1], [14, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu1_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[15, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv2_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(16L, 8L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv2_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "16", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[16, 0, 0], [17, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm2_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(16L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm2_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(16L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm2_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(16L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm2_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(16L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm2_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[18, 0, 0], [19, 0, 0], [20, 0, 0], [21, 0, 1], [22, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu2_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[23, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv3_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(16L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv3_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "16", + "num_group": "16", + "pad": "(1, 1)", + "stride": "(2, 2)" + }, + "inputs": [[24, 0, 0], [25, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm3_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(16L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm3_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(16L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm3_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(16L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm3_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(16L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm3_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[26, 0, 0], [27, 0, 0], [28, 0, 0], [29, 0, 1], [30, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu3_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[31, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv4_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(32L, 16L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv4_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "32", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[32, 0, 0], [33, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm4_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm4_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm4_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm4_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm4_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[34, 0, 0], [35, 0, 0], [36, 0, 0], [37, 0, 1], [38, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu4_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[39, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv5_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(32L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv5_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "32", + "num_group": "32", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[40, 0, 0], [41, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm5_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm5_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm5_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm5_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm5_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[42, 0, 0], [43, 0, 0], [44, 0, 0], [45, 0, 1], [46, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu5_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[47, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv6_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(32L, 32L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv6_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "32", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[48, 0, 0], [49, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm6_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm6_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm6_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm6_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm6_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[50, 0, 0], [51, 0, 0], [52, 0, 0], [53, 0, 1], [54, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu6_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[55, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv7_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(32L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv7_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "32", + "num_group": "32", + "pad": "(1, 1)", + "stride": "(2, 2)" + }, + "inputs": [[56, 0, 0], [57, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm7_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm7_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm7_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm7_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(32L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm7_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[58, 0, 0], [59, 0, 0], [60, 0, 0], [61, 0, 1], [62, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu7_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[63, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv8_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(64L, 32L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv8_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "64", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[64, 0, 0], [65, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm8_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm8_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm8_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm8_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm8_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[66, 0, 0], [67, 0, 0], [68, 0, 0], [69, 0, 1], [70, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu8_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[71, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv9_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(64L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv9_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "64", + "num_group": "64", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[72, 0, 0], [73, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm9_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm9_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm9_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm9_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm9_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[74, 0, 0], [75, 0, 0], [76, 0, 0], [77, 0, 1], [78, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu9_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[79, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv10_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(64L, 64L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv10_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "64", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[80, 0, 0], [81, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm10_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm10_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm10_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm10_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm10_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[82, 0, 0], [83, 0, 0], [84, 0, 0], [85, 0, 1], [86, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu10_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[87, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv11_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(64L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv11_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "64", + "num_group": "64", + "pad": "(1, 1)", + "stride": "(2, 2)" + }, + "inputs": [[88, 0, 0], [89, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm11_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm11_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm11_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm11_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(64L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm11_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[90, 0, 0], [91, 0, 0], [92, 0, 0], [93, 0, 1], [94, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu11_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[95, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv12_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 64L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv12_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[96, 0, 0], [97, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm12_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm12_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm12_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm12_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm12_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[98, 0, 0], [99, 0, 0], [100, 0, 0], [101, 0, 1], [102, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu12_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[103, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv13_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv13_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[104, 0, 0], [105, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm13_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm13_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm13_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm13_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm13_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[106, 0, 0], [107, 0, 0], [108, 0, 0], [109, 0, 1], [110, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu13_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[111, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv14_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 128L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv14_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[112, 0, 0], [113, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm14_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm14_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm14_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm14_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm14_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[114, 0, 0], [115, 0, 0], [116, 0, 0], [117, 0, 1], [118, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu14_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[119, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv15_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv15_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[120, 0, 0], [121, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm15_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm15_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm15_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm15_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm15_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[122, 0, 0], [123, 0, 0], [124, 0, 0], [125, 0, 1], [126, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu15_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[127, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv16_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 128L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv16_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[128, 0, 0], [129, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm16_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm16_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm16_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm16_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm16_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[130, 0, 0], [131, 0, 0], [132, 0, 0], [133, 0, 1], [134, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu16_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[135, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv17_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv17_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[136, 0, 0], [137, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm17_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm17_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm17_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm17_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm17_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[138, 0, 0], [139, 0, 0], [140, 0, 0], [141, 0, 1], [142, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu17_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[143, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv18_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 128L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv18_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[144, 0, 0], [145, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm18_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm18_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm18_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm18_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm18_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[146, 0, 0], [147, 0, 0], [148, 0, 0], [149, 0, 1], [150, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu18_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[151, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv19_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv19_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[152, 0, 0], [153, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm19_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm19_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm19_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm19_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm19_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[154, 0, 0], [155, 0, 0], [156, 0, 0], [157, 0, 1], [158, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu19_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[159, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv20_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 128L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv20_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[160, 0, 0], [161, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm20_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm20_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm20_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm20_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm20_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[162, 0, 0], [163, 0, 0], [164, 0, 0], [165, 0, 1], [166, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu20_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[167, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv21_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv21_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[168, 0, 0], [169, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm21_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm21_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm21_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm21_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm21_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[170, 0, 0], [171, 0, 0], [172, 0, 0], [173, 0, 1], [174, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu21_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[175, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv22_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 128L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv22_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[176, 0, 0], [177, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm22_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm22_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm22_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm22_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm22_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[178, 0, 0], [179, 0, 0], [180, 0, 0], [181, 0, 1], [182, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu22_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[183, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv23_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(128L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv23_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "128", + "num_group": "128", + "pad": "(1, 1)", + "stride": "(2, 2)" + }, + "inputs": [[184, 0, 0], [185, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm23_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm23_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm23_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm23_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(128L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm23_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[186, 0, 0], [187, 0, 0], [188, 0, 0], [189, 0, 1], [190, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu23_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[191, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv24_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(256L, 128L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv24_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[192, 0, 0], [193, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm24_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm24_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm24_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm24_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm24_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[194, 0, 0], [195, 0, 0], [196, 0, 0], [197, 0, 1], [198, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu24_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[199, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv25_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(256L, 1L, 3L, 3L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv25_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(3, 3)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "256", + "num_group": "256", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[200, 0, 0], [201, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm25_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm25_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm25_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm25_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm25_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[202, 0, 0], [203, 0, 0], [204, 0, 0], [205, 0, 1], [206, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu25_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[207, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_conv26_weight", + "attrs": { + "__dtype__": "0", + "__lr_mult__": "1.0", + "__shape__": "(256L, 256L, 1L, 1L)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "mobilenet0_conv26_fwd", + "attrs": { + "dilate": "(1, 1)", + "kernel": "(1, 1)", + "layout": "NCHW", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[208, 0, 0], [209, 0, 0]] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm26_gamma", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm26_beta", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm26_running_mean", + "attrs": { + "__dtype__": "0", + "__init__": "zeros", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "mobilenet0_batchnorm26_running_var", + "attrs": { + "__dtype__": "0", + "__init__": "ones", + "__lr_mult__": "1.0", + "__shape__": "(256L,)", + "__storage_type__": "0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "mobilenet0_batchnorm26_fwd", + "attrs": { + "axis": "1", + "eps": "1e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[210, 0, 0], [211, 0, 0], [212, 0, 0], [213, 0, 1], [214, 0, 1]] + }, + { + "op": "Activation", + "name": "mobilenet0_relu26_fwd", + "attrs": {"act_type": "relu"}, + "inputs": [[215, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_lateral_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_lateral_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c3_lateral", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "64", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[216, 0, 0], [217, 0, 0], [218, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_lateral_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_lateral_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_lateral_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_lateral_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c3_lateral_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[219, 0, 0], [220, 0, 0], [221, 0, 0], [222, 0, 1], [223, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c3_lateral_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[224, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_det_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c3_det_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "32", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[225, 0, 0], [226, 0, 0], [227, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_det_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c3_det_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[228, 0, 0], [229, 0, 0], [230, 0, 0], [231, 0, 1], [232, 0, 1]] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c3_det_context_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[225, 0, 0], [234, 0, 0], [235, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c3_det_context_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[236, 0, 0], [237, 0, 0], [238, 0, 0], [239, 0, 1], [240, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c3_det_context_conv1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[241, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c3_det_context_conv2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[242, 0, 0], [243, 0, 0], [244, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c3_det_context_conv2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[245, 0, 0], [246, 0, 0], [247, 0, 0], [248, 0, 1], [249, 0, 1]] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c3_det_context_conv3_1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[242, 0, 0], [251, 0, 0], [252, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c3_det_context_conv3_1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[253, 0, 0], [254, 0, 0], [255, 0, 0], [256, 0, 1], [257, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c3_det_context_conv3_1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[258, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c3_det_context_conv3_2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[259, 0, 0], [260, 0, 0], [261, 0, 0]] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c3_det_context_conv3_2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c3_det_context_conv3_2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[262, 0, 0], [263, 0, 0], [264, 0, 0], [265, 0, 1], [266, 0, 1]] + }, + { + "op": "Concat", + "name": "rf_c3_det_concat", + "attrs": { + "dim": "1", + "num_args": "3" + }, + "inputs": [[233, 0, 0], [250, 0, 0], [267, 0, 0]] + }, + { + "op": "Activation", + "name": "rf_c3_det_concat_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[268, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride32_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride32_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_cls_score_stride32", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "4", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[269, 0, 0], [270, 0, 0], [271, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_score_reshape_stride32", + "attrs": {"shape": "(0, 2, -1, 0)"}, + "inputs": [[272, 0, 0]] + }, + { + "op": "SoftmaxActivation", + "name": "face_rpn_cls_prob_stride32", + "attrs": {"mode": "channel"}, + "inputs": [[273, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_prob_reshape_stride32", + "attrs": {"shape": "(0, 4, -1, 0)"}, + "inputs": [[274, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride32_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride32_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_bbox_pred_stride32", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "8", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[269, 0, 0], [276, 0, 0], [277, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride32_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride32_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_landmark_pred_stride32", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "20", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[269, 0, 0], [279, 0, 0], [280, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_lateral_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_lateral_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c2_lateral", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "64", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[184, 0, 0], [282, 0, 0], [283, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_lateral_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_lateral_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_lateral_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_lateral_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c2_lateral_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[284, 0, 0], [285, 0, 0], [286, 0, 0], [287, 0, 1], [288, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c2_lateral_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[289, 0, 0]] + }, + { + "op": "UpSampling", + "name": "rf_c3_upsampling", + "attrs": { + "num_args": "1", + "sample_type": "nearest", + "scale": "2", + "workspace": "512" + }, + "inputs": [[225, 0, 0]] + }, + { + "op": "Crop", + "name": "crop0", + "attrs": {"num_args": "2"}, + "inputs": [[291, 0, 0], [290, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus0", + "inputs": [[290, 0, 0], [292, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_aggr_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_aggr_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c2_aggr", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "64", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[293, 0, 0], [294, 0, 0], [295, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_aggr_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_aggr_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_aggr_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_aggr_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c2_aggr_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[296, 0, 0], [297, 0, 0], [298, 0, 0], [299, 0, 1], [300, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c2_aggr_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[301, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_det_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c2_det_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "32", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[302, 0, 0], [303, 0, 0], [304, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_det_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c2_det_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[305, 0, 0], [306, 0, 0], [307, 0, 0], [308, 0, 1], [309, 0, 1]] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c2_det_context_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[302, 0, 0], [311, 0, 0], [312, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c2_det_context_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[313, 0, 0], [314, 0, 0], [315, 0, 0], [316, 0, 1], [317, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c2_det_context_conv1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[318, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c2_det_context_conv2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[319, 0, 0], [320, 0, 0], [321, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c2_det_context_conv2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[322, 0, 0], [323, 0, 0], [324, 0, 0], [325, 0, 1], [326, 0, 1]] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c2_det_context_conv3_1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[319, 0, 0], [328, 0, 0], [329, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c2_det_context_conv3_1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[330, 0, 0], [331, 0, 0], [332, 0, 0], [333, 0, 1], [334, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c2_det_context_conv3_1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[335, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c2_det_context_conv3_2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[336, 0, 0], [337, 0, 0], [338, 0, 0]] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c2_det_context_conv3_2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c2_det_context_conv3_2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[339, 0, 0], [340, 0, 0], [341, 0, 0], [342, 0, 1], [343, 0, 1]] + }, + { + "op": "Concat", + "name": "rf_c2_det_concat", + "attrs": { + "dim": "1", + "num_args": "3" + }, + "inputs": [[310, 0, 0], [327, 0, 0], [344, 0, 0]] + }, + { + "op": "Activation", + "name": "rf_c2_det_concat_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[345, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride16_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride16_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_cls_score_stride16", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "4", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[346, 0, 0], [347, 0, 0], [348, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_score_reshape_stride16", + "attrs": {"shape": "(0, 2, -1, 0)"}, + "inputs": [[349, 0, 0]] + }, + { + "op": "SoftmaxActivation", + "name": "face_rpn_cls_prob_stride16", + "attrs": {"mode": "channel"}, + "inputs": [[350, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_prob_reshape_stride16", + "attrs": {"shape": "(0, 4, -1, 0)"}, + "inputs": [[351, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride16_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride16_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_bbox_pred_stride16", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "8", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[346, 0, 0], [353, 0, 0], [354, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride16_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride16_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_landmark_pred_stride16", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "20", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[346, 0, 0], [356, 0, 0], [357, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_red_conv_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_red_conv_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c1_red_conv", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "64", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[88, 0, 0], [359, 0, 0], [360, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_red_conv_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_red_conv_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_red_conv_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_red_conv_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c1_red_conv_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[361, 0, 0], [362, 0, 0], [363, 0, 0], [364, 0, 1], [365, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c1_red_conv_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[366, 0, 0]] + }, + { + "op": "UpSampling", + "name": "rf_c2_upsampling", + "attrs": { + "num_args": "1", + "sample_type": "nearest", + "scale": "2", + "workspace": "512" + }, + "inputs": [[302, 0, 0]] + }, + { + "op": "Crop", + "name": "crop1", + "attrs": {"num_args": "2"}, + "inputs": [[368, 0, 0], [367, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus1", + "inputs": [[367, 0, 0], [369, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_aggr_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_aggr_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c1_aggr", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "64", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[370, 0, 0], [371, 0, 0], [372, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_aggr_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_aggr_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_aggr_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_aggr_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c1_aggr_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[373, 0, 0], [374, 0, 0], [375, 0, 0], [376, 0, 1], [377, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c1_aggr_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[378, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_det_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c1_det_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "32", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[379, 0, 0], [380, 0, 0], [381, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_det_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c1_det_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[382, 0, 0], [383, 0, 0], [384, 0, 0], [385, 0, 1], [386, 0, 1]] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c1_det_context_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[379, 0, 0], [388, 0, 0], [389, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c1_det_context_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[390, 0, 0], [391, 0, 0], [392, 0, 0], [393, 0, 1], [394, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c1_det_context_conv1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[395, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c1_det_context_conv2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[396, 0, 0], [397, 0, 0], [398, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c1_det_context_conv2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[399, 0, 0], [400, 0, 0], [401, 0, 0], [402, 0, 1], [403, 0, 1]] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c1_det_context_conv3_1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[396, 0, 0], [405, 0, 0], [406, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c1_det_context_conv3_1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[407, 0, 0], [408, 0, 0], [409, 0, 0], [410, 0, 1], [411, 0, 1]] + }, + { + "op": "Activation", + "name": "rf_c1_det_context_conv3_1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[412, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "rf_c1_det_context_conv3_2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "16", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[413, 0, 0], [414, 0, 0], [415, 0, 0]] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "rf_c1_det_context_conv3_2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "rf_c1_det_context_conv3_2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[416, 0, 0], [417, 0, 0], [418, 0, 0], [419, 0, 1], [420, 0, 1]] + }, + { + "op": "Concat", + "name": "rf_c1_det_concat", + "attrs": { + "dim": "1", + "num_args": "3" + }, + "inputs": [[387, 0, 0], [404, 0, 0], [421, 0, 0]] + }, + { + "op": "Activation", + "name": "rf_c1_det_concat_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[422, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride8_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride8_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_cls_score_stride8", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "4", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[423, 0, 0], [424, 0, 0], [425, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_score_reshape_stride8", + "attrs": {"shape": "(0, 2, -1, 0)"}, + "inputs": [[426, 0, 0]] + }, + { + "op": "SoftmaxActivation", + "name": "face_rpn_cls_prob_stride8", + "attrs": {"mode": "channel"}, + "inputs": [[427, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_prob_reshape_stride8", + "attrs": {"shape": "(0, 4, -1, 0)"}, + "inputs": [[428, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride8_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride8_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_bbox_pred_stride8", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "8", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[423, 0, 0], [430, 0, 0], [431, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride8_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride8_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_landmark_pred_stride8", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "20", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[423, 0, 0], [433, 0, 0], [434, 0, 0]] + } + ], + "arg_nodes": [ + 0, + 1, + 3, + 4, + 5, + 6, + 9, + 11, + 12, + 13, + 14, + 17, + 19, + 20, + 21, + 22, + 25, + 27, + 28, + 29, + 30, + 33, + 35, + 36, + 37, + 38, + 41, + 43, + 44, + 45, + 46, + 49, + 51, + 52, + 53, + 54, + 57, + 59, + 60, + 61, + 62, + 65, + 67, + 68, + 69, + 70, + 73, + 75, + 76, + 77, + 78, + 81, + 83, + 84, + 85, + 86, + 89, + 91, + 92, + 93, + 94, + 97, + 99, + 100, + 101, + 102, + 105, + 107, + 108, + 109, + 110, + 113, + 115, + 116, + 117, + 118, + 121, + 123, + 124, + 125, + 126, + 129, + 131, + 132, + 133, + 134, + 137, + 139, + 140, + 141, + 142, + 145, + 147, + 148, + 149, + 150, + 153, + 155, + 156, + 157, + 158, + 161, + 163, + 164, + 165, + 166, + 169, + 171, + 172, + 173, + 174, + 177, + 179, + 180, + 181, + 182, + 185, + 187, + 188, + 189, + 190, + 193, + 195, + 196, + 197, + 198, + 201, + 203, + 204, + 205, + 206, + 209, + 211, + 212, + 213, + 214, + 217, + 218, + 220, + 221, + 222, + 223, + 226, + 227, + 229, + 230, + 231, + 232, + 234, + 235, + 237, + 238, + 239, + 240, + 243, + 244, + 246, + 247, + 248, + 249, + 251, + 252, + 254, + 255, + 256, + 257, + 260, + 261, + 263, + 264, + 265, + 266, + 270, + 271, + 276, + 277, + 279, + 280, + 282, + 283, + 285, + 286, + 287, + 288, + 294, + 295, + 297, + 298, + 299, + 300, + 303, + 304, + 306, + 307, + 308, + 309, + 311, + 312, + 314, + 315, + 316, + 317, + 320, + 321, + 323, + 324, + 325, + 326, + 328, + 329, + 331, + 332, + 333, + 334, + 337, + 338, + 340, + 341, + 342, + 343, + 347, + 348, + 353, + 354, + 356, + 357, + 359, + 360, + 362, + 363, + 364, + 365, + 371, + 372, + 374, + 375, + 376, + 377, + 380, + 381, + 383, + 384, + 385, + 386, + 388, + 389, + 391, + 392, + 393, + 394, + 397, + 398, + 400, + 401, + 402, + 403, + 405, + 406, + 408, + 409, + 410, + 411, + 414, + 415, + 417, + 418, + 419, + 420, + 424, + 425, + 430, + 431, + 433, + 434 + ], + "node_row_ptr": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530 + ], + "heads": [[275, 0, 0], [278, 0, 0], [281, 0, 0], [352, 0, 0], [355, 0, 0], [358, 0, 0], [429, 0, 0], [432, 0, 0], [435, 0, 0]], + "attrs": {"mxnet_version": ["int", 10400]} +} \ No newline at end of file diff --git a/model/R50-0000.params b/model/R50-0000.params new file mode 100644 index 0000000..3010f21 --- /dev/null +++ b/model/R50-0000.params @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb0ae8c33324b5b0b48718c7e7792e5bd1c01bb255fa53cbc0b6ede599832c43 +size 118010124 diff --git a/model/R50-symbol.json b/model/R50-symbol.json new file mode 100644 index 0000000..7bcea37 --- /dev/null +++ b/model/R50-symbol.json @@ -0,0 +1,6979 @@ +{ + "nodes": [ + { + "op": "null", + "name": "data", + "inputs": [] + }, + { + "op": "null", + "name": "bn_data_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "bn_data_beta", + "inputs": [] + }, + { + "op": "null", + "name": "bn_data_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "True", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "bn_data_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "True", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "bn_data", + "attrs": { + "eps": "2e-05", + "fix_gamma": "True", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0], [3, 0, 0], [4, 0, 0]] + }, + { + "op": "null", + "name": "conv0_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "conv0", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(7,7)", + "no_bias": "True", + "num_filter": "64", + "num_group": "1", + "pad": "(3,3)", + "stride": "(2,2)", + "workspace": "256" + }, + "inputs": [[5, 0, 0], [6, 0, 0]] + }, + { + "op": "null", + "name": "bn0_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "bn0_beta", + "inputs": [] + }, + { + "op": "null", + "name": "bn0_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "bn0_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "bn0", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[7, 0, 0], [8, 0, 0], [9, 0, 0], [10, 0, 0], [11, 0, 0]] + }, + { + "op": "Activation", + "name": "relu0", + "attrs": {"act_type": "relu"}, + "inputs": [[12, 0, 0]] + }, + { + "op": "Pooling", + "name": "pooling0", + "attrs": { + "global_pool": "False", + "kernel": "(3,3)", + "pad": "(1,1)", + "pool_type": "max", + "stride": "(2,2)" + }, + "inputs": [[13, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit1_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit1_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit1_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit1_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage1_unit1_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[14, 0, 0], [15, 0, 0], [16, 0, 0], [17, 0, 0], [18, 0, 0]] + }, + { + "op": "Activation", + "name": "stage1_unit1_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[19, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit1_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit1_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "64", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[20, 0, 0], [21, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit1_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit1_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit1_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit1_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage1_unit1_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[22, 0, 0], [23, 0, 0], [24, 0, 0], [25, 0, 0], [26, 0, 0]] + }, + { + "op": "Activation", + "name": "stage1_unit1_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[27, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit1_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit1_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "64", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[28, 0, 0], [29, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit1_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit1_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit1_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit1_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage1_unit1_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[30, 0, 0], [31, 0, 0], [32, 0, 0], [33, 0, 0], [34, 0, 0]] + }, + { + "op": "Activation", + "name": "stage1_unit1_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[35, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit1_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit1_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[36, 0, 0], [37, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit1_sc_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit1_sc", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[20, 0, 0], [39, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus0", + "inputs": [[38, 0, 0], [40, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit2_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit2_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit2_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit2_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage1_unit2_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[41, 0, 0], [42, 0, 0], [43, 0, 0], [44, 0, 0], [45, 0, 0]] + }, + { + "op": "Activation", + "name": "stage1_unit2_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[46, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit2_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit2_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "64", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[47, 0, 0], [48, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit2_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit2_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit2_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit2_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage1_unit2_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[49, 0, 0], [50, 0, 0], [51, 0, 0], [52, 0, 0], [53, 0, 0]] + }, + { + "op": "Activation", + "name": "stage1_unit2_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[54, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit2_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit2_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "64", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[55, 0, 0], [56, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit2_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit2_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit2_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit2_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage1_unit2_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[57, 0, 0], [58, 0, 0], [59, 0, 0], [60, 0, 0], [61, 0, 0]] + }, + { + "op": "Activation", + "name": "stage1_unit2_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[62, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit2_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit2_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[63, 0, 0], [64, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus1", + "inputs": [[65, 0, 0], [41, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit3_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit3_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit3_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit3_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage1_unit3_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[66, 0, 0], [67, 0, 0], [68, 0, 0], [69, 0, 0], [70, 0, 0]] + }, + { + "op": "Activation", + "name": "stage1_unit3_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[71, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit3_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit3_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "64", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[72, 0, 0], [73, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit3_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit3_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit3_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit3_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage1_unit3_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[74, 0, 0], [75, 0, 0], [76, 0, 0], [77, 0, 0], [78, 0, 0]] + }, + { + "op": "Activation", + "name": "stage1_unit3_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[79, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit3_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit3_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "64", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[80, 0, 0], [81, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit3_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit3_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit3_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage1_unit3_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage1_unit3_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[82, 0, 0], [83, 0, 0], [84, 0, 0], [85, 0, 0], [86, 0, 0]] + }, + { + "op": "Activation", + "name": "stage1_unit3_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[87, 0, 0]] + }, + { + "op": "null", + "name": "stage1_unit3_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage1_unit3_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[88, 0, 0], [89, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus2", + "inputs": [[90, 0, 0], [66, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit1_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit1_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit1_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit1_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit1_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[91, 0, 0], [92, 0, 0], [93, 0, 0], [94, 0, 0], [95, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit1_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[96, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit1_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit1_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[97, 0, 0], [98, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit1_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit1_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit1_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit1_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit1_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[99, 0, 0], [100, 0, 0], [101, 0, 0], [102, 0, 0], [103, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit1_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[104, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit1_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit1_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(1,1)", + "stride": "(2,2)", + "workspace": "256" + }, + "inputs": [[105, 0, 0], [106, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit1_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit1_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit1_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit1_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit1_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[107, 0, 0], [108, 0, 0], [109, 0, 0], [110, 0, 0], [111, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit1_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[112, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit1_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit1_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[113, 0, 0], [114, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit1_sc_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit1_sc", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(0,0)", + "stride": "(2,2)", + "workspace": "256" + }, + "inputs": [[97, 0, 0], [116, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus3", + "inputs": [[115, 0, 0], [117, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit2_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit2_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit2_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit2_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit2_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[118, 0, 0], [119, 0, 0], [120, 0, 0], [121, 0, 0], [122, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit2_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[123, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit2_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit2_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[124, 0, 0], [125, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit2_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit2_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit2_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit2_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit2_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[126, 0, 0], [127, 0, 0], [128, 0, 0], [129, 0, 0], [130, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit2_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[131, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit2_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit2_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[132, 0, 0], [133, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit2_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit2_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit2_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit2_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit2_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[134, 0, 0], [135, 0, 0], [136, 0, 0], [137, 0, 0], [138, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit2_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[139, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit2_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit2_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[140, 0, 0], [141, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus4", + "inputs": [[142, 0, 0], [118, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit3_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit3_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit3_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit3_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit3_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[143, 0, 0], [144, 0, 0], [145, 0, 0], [146, 0, 0], [147, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit3_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[148, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit3_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit3_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[149, 0, 0], [150, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit3_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit3_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit3_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit3_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit3_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[151, 0, 0], [152, 0, 0], [153, 0, 0], [154, 0, 0], [155, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit3_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[156, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit3_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit3_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[157, 0, 0], [158, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit3_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit3_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit3_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit3_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit3_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[159, 0, 0], [160, 0, 0], [161, 0, 0], [162, 0, 0], [163, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit3_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[164, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit3_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit3_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[165, 0, 0], [166, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus5", + "inputs": [[167, 0, 0], [143, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit4_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit4_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit4_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit4_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit4_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[168, 0, 0], [169, 0, 0], [170, 0, 0], [171, 0, 0], [172, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit4_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[173, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit4_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit4_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[174, 0, 0], [175, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit4_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit4_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit4_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit4_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit4_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[176, 0, 0], [177, 0, 0], [178, 0, 0], [179, 0, 0], [180, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit4_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[181, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit4_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit4_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "128", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[182, 0, 0], [183, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit4_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit4_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit4_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage2_unit4_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage2_unit4_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[184, 0, 0], [185, 0, 0], [186, 0, 0], [187, 0, 0], [188, 0, 0]] + }, + { + "op": "Activation", + "name": "stage2_unit4_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[189, 0, 0]] + }, + { + "op": "null", + "name": "stage2_unit4_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage2_unit4_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[190, 0, 0], [191, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus6", + "inputs": [[192, 0, 0], [168, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit1_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit1_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit1_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit1_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit1_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[193, 0, 0], [194, 0, 0], [195, 0, 0], [196, 0, 0], [197, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit1_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[198, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit1_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit1_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[199, 0, 0], [200, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit1_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit1_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit1_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit1_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit1_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[201, 0, 0], [202, 0, 0], [203, 0, 0], [204, 0, 0], [205, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit1_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[206, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit1_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit1_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(1,1)", + "stride": "(2,2)", + "workspace": "256" + }, + "inputs": [[207, 0, 0], [208, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit1_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit1_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit1_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit1_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit1_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[209, 0, 0], [210, 0, 0], [211, 0, 0], [212, 0, 0], [213, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit1_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[214, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit1_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit1_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "1024", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[215, 0, 0], [216, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit1_sc_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit1_sc", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "1024", + "num_group": "1", + "pad": "(0,0)", + "stride": "(2,2)", + "workspace": "256" + }, + "inputs": [[199, 0, 0], [218, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus7", + "inputs": [[217, 0, 0], [219, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit2_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit2_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit2_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit2_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit2_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[220, 0, 0], [221, 0, 0], [222, 0, 0], [223, 0, 0], [224, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit2_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[225, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit2_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit2_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[226, 0, 0], [227, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit2_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit2_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit2_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit2_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit2_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[228, 0, 0], [229, 0, 0], [230, 0, 0], [231, 0, 0], [232, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit2_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[233, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit2_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit2_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[234, 0, 0], [235, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit2_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit2_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit2_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit2_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit2_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[236, 0, 0], [237, 0, 0], [238, 0, 0], [239, 0, 0], [240, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit2_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[241, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit2_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit2_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "1024", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[242, 0, 0], [243, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus8", + "inputs": [[244, 0, 0], [220, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit3_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit3_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit3_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit3_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit3_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[245, 0, 0], [246, 0, 0], [247, 0, 0], [248, 0, 0], [249, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit3_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[250, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit3_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit3_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[251, 0, 0], [252, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit3_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit3_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit3_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit3_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit3_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[253, 0, 0], [254, 0, 0], [255, 0, 0], [256, 0, 0], [257, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit3_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[258, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit3_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit3_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[259, 0, 0], [260, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit3_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit3_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit3_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit3_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit3_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[261, 0, 0], [262, 0, 0], [263, 0, 0], [264, 0, 0], [265, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit3_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[266, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit3_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit3_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "1024", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[267, 0, 0], [268, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus9", + "inputs": [[269, 0, 0], [245, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit4_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit4_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit4_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit4_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit4_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[270, 0, 0], [271, 0, 0], [272, 0, 0], [273, 0, 0], [274, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit4_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[275, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit4_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit4_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[276, 0, 0], [277, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit4_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit4_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit4_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit4_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit4_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[278, 0, 0], [279, 0, 0], [280, 0, 0], [281, 0, 0], [282, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit4_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[283, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit4_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit4_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[284, 0, 0], [285, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit4_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit4_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit4_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit4_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit4_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[286, 0, 0], [287, 0, 0], [288, 0, 0], [289, 0, 0], [290, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit4_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[291, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit4_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit4_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "1024", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[292, 0, 0], [293, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus10", + "inputs": [[294, 0, 0], [270, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit5_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit5_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit5_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit5_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit5_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[295, 0, 0], [296, 0, 0], [297, 0, 0], [298, 0, 0], [299, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit5_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[300, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit5_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit5_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[301, 0, 0], [302, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit5_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit5_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit5_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit5_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit5_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[303, 0, 0], [304, 0, 0], [305, 0, 0], [306, 0, 0], [307, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit5_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[308, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit5_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit5_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[309, 0, 0], [310, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit5_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit5_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit5_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit5_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit5_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[311, 0, 0], [312, 0, 0], [313, 0, 0], [314, 0, 0], [315, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit5_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[316, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit5_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit5_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "1024", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[317, 0, 0], [318, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus11", + "inputs": [[319, 0, 0], [295, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit6_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit6_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit6_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit6_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit6_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[320, 0, 0], [321, 0, 0], [322, 0, 0], [323, 0, 0], [324, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit6_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[325, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit6_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit6_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[326, 0, 0], [327, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit6_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit6_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit6_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit6_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit6_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[328, 0, 0], [329, 0, 0], [330, 0, 0], [331, 0, 0], [332, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit6_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[333, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit6_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit6_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "256", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[334, 0, 0], [335, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit6_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit6_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit6_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage3_unit6_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage3_unit6_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[336, 0, 0], [337, 0, 0], [338, 0, 0], [339, 0, 0], [340, 0, 0]] + }, + { + "op": "Activation", + "name": "stage3_unit6_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[341, 0, 0]] + }, + { + "op": "null", + "name": "stage3_unit6_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage3_unit6_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "1024", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[342, 0, 0], [343, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus12", + "inputs": [[344, 0, 0], [320, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit1_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit1_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit1_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit1_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage4_unit1_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[345, 0, 0], [346, 0, 0], [347, 0, 0], [348, 0, 0], [349, 0, 0]] + }, + { + "op": "Activation", + "name": "stage4_unit1_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[350, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit1_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit1_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[351, 0, 0], [352, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit1_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit1_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit1_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit1_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage4_unit1_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[353, 0, 0], [354, 0, 0], [355, 0, 0], [356, 0, 0], [357, 0, 0]] + }, + { + "op": "Activation", + "name": "stage4_unit1_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[358, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit1_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit1_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(1,1)", + "stride": "(2,2)", + "workspace": "256" + }, + "inputs": [[359, 0, 0], [360, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit1_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit1_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit1_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit1_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage4_unit1_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[361, 0, 0], [362, 0, 0], [363, 0, 0], [364, 0, 0], [365, 0, 0]] + }, + { + "op": "Activation", + "name": "stage4_unit1_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[366, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit1_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit1_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "2048", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[367, 0, 0], [368, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit1_sc_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit1_sc", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "2048", + "num_group": "1", + "pad": "(0,0)", + "stride": "(2,2)", + "workspace": "256" + }, + "inputs": [[351, 0, 0], [370, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus13", + "inputs": [[369, 0, 0], [371, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit2_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit2_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit2_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit2_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage4_unit2_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[372, 0, 0], [373, 0, 0], [374, 0, 0], [375, 0, 0], [376, 0, 0]] + }, + { + "op": "Activation", + "name": "stage4_unit2_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[377, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit2_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit2_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[378, 0, 0], [379, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit2_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit2_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit2_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit2_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage4_unit2_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[380, 0, 0], [381, 0, 0], [382, 0, 0], [383, 0, 0], [384, 0, 0]] + }, + { + "op": "Activation", + "name": "stage4_unit2_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[385, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit2_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit2_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[386, 0, 0], [387, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit2_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit2_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit2_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit2_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage4_unit2_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[388, 0, 0], [389, 0, 0], [390, 0, 0], [391, 0, 0], [392, 0, 0]] + }, + { + "op": "Activation", + "name": "stage4_unit2_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[393, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit2_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit2_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "2048", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[394, 0, 0], [395, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus14", + "inputs": [[396, 0, 0], [372, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit3_bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit3_bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit3_bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit3_bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage4_unit3_bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[397, 0, 0], [398, 0, 0], [399, 0, 0], [400, 0, 0], [401, 0, 0]] + }, + { + "op": "Activation", + "name": "stage4_unit3_relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[402, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit3_conv1_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit3_conv1", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[403, 0, 0], [404, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit3_bn2_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit3_bn2_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit3_bn2_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit3_bn2_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage4_unit3_bn2", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[405, 0, 0], [406, 0, 0], [407, 0, 0], [408, 0, 0], [409, 0, 0]] + }, + { + "op": "Activation", + "name": "stage4_unit3_relu2", + "attrs": {"act_type": "relu"}, + "inputs": [[410, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit3_conv2_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit3_conv2", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(3,3)", + "no_bias": "True", + "num_filter": "512", + "num_group": "1", + "pad": "(1,1)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[411, 0, 0], [412, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit3_bn3_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit3_bn3_beta", + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit3_bn3_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "stage4_unit3_bn3_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "stage4_unit3_bn3", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[413, 0, 0], [414, 0, 0], [415, 0, 0], [416, 0, 0], [417, 0, 0]] + }, + { + "op": "Activation", + "name": "stage4_unit3_relu3", + "attrs": {"act_type": "relu"}, + "inputs": [[418, 0, 0]] + }, + { + "op": "null", + "name": "stage4_unit3_conv3_weight", + "inputs": [] + }, + { + "op": "Convolution", + "name": "stage4_unit3_conv3", + "attrs": { + "cudnn_tune": "limited_workspace", + "dilate": "(1,1)", + "kernel": "(1,1)", + "no_bias": "True", + "num_filter": "2048", + "num_group": "1", + "pad": "(0,0)", + "stride": "(1,1)", + "workspace": "256" + }, + "inputs": [[419, 0, 0], [420, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus15", + "inputs": [[421, 0, 0], [397, 0, 0]] + }, + { + "op": "null", + "name": "bn1_gamma", + "inputs": [] + }, + { + "op": "null", + "name": "bn1_beta", + "inputs": [] + }, + { + "op": "null", + "name": "bn1_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "null", + "name": "bn1_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "bn1", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9", + "use_global_stats": "False" + }, + "inputs": [[422, 0, 0], [423, 0, 0], [424, 0, 0], [425, 0, 0], [426, 0, 0]] + }, + { + "op": "Activation", + "name": "relu1", + "attrs": {"act_type": "relu"}, + "inputs": [[427, 0, 0]] + }, + { + "op": "null", + "name": "ssh_c3_lateral_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c3_lateral_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_c3_lateral", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "256", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[428, 0, 0], [429, 0, 0], [430, 0, 0]] + }, + { + "op": "null", + "name": "ssh_c3_lateral_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c3_lateral_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c3_lateral_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c3_lateral_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_c3_lateral_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[431, 0, 0], [432, 0, 0], [433, 0, 0], [434, 0, 1], [435, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_c3_lateral_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[436, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m3_det_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m3_det_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "256", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[437, 0, 0], [438, 0, 0], [439, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m3_det_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m3_det_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[440, 0, 0], [441, 0, 0], [442, 0, 0], [443, 0, 1], [444, 0, 1]] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m3_det_context_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[437, 0, 0], [446, 0, 0], [447, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m3_det_context_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[448, 0, 0], [449, 0, 0], [450, 0, 0], [451, 0, 1], [452, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_m3_det_context_conv1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[453, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m3_det_context_conv2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[454, 0, 0], [455, 0, 0], [456, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m3_det_context_conv2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[457, 0, 0], [458, 0, 0], [459, 0, 0], [460, 0, 1], [461, 0, 1]] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m3_det_context_conv3_1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[454, 0, 0], [463, 0, 0], [464, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m3_det_context_conv3_1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[465, 0, 0], [466, 0, 0], [467, 0, 0], [468, 0, 1], [469, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_m3_det_context_conv3_1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[470, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m3_det_context_conv3_2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[471, 0, 0], [472, 0, 0], [473, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m3_det_context_conv3_2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m3_det_context_conv3_2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[474, 0, 0], [475, 0, 0], [476, 0, 0], [477, 0, 1], [478, 0, 1]] + }, + { + "op": "Concat", + "name": "ssh_m3_det_concat", + "attrs": { + "dim": "1", + "num_args": "3" + }, + "inputs": [[445, 0, 0], [462, 0, 0], [479, 0, 0]] + }, + { + "op": "Activation", + "name": "ssh_m3_det_concat_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[480, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride32_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride32_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_cls_score_stride32", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "4", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[481, 0, 0], [482, 0, 0], [483, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_score_reshape_stride32", + "attrs": {"shape": "(0, 2, -1, 0)"}, + "inputs": [[484, 0, 0]] + }, + { + "op": "SoftmaxActivation", + "name": "face_rpn_cls_prob_stride32", + "attrs": {"mode": "channel"}, + "inputs": [[485, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_prob_reshape_stride32", + "attrs": {"shape": "(0, 4, -1, 0)"}, + "inputs": [[486, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride32_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride32_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_bbox_pred_stride32", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "8", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[481, 0, 0], [488, 0, 0], [489, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride32_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride32_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_landmark_pred_stride32", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "20", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[481, 0, 0], [491, 0, 0], [492, 0, 0]] + }, + { + "op": "null", + "name": "ssh_c2_lateral_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c2_lateral_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_c2_lateral", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "256", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[359, 0, 0], [494, 0, 0], [495, 0, 0]] + }, + { + "op": "null", + "name": "ssh_c2_lateral_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c2_lateral_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c2_lateral_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c2_lateral_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_c2_lateral_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[496, 0, 0], [497, 0, 0], [498, 0, 0], [499, 0, 1], [500, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_c2_lateral_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[501, 0, 0]] + }, + { + "op": "UpSampling", + "name": "ssh_c3_up", + "attrs": { + "num_args": "1", + "sample_type": "nearest", + "scale": "2", + "workspace": "512" + }, + "inputs": [[437, 0, 0]] + }, + { + "op": "Crop", + "name": "crop0", + "attrs": {"num_args": "2"}, + "inputs": [[503, 0, 0], [502, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus0", + "inputs": [[502, 0, 0], [504, 0, 0]] + }, + { + "op": "null", + "name": "ssh_c2_aggr_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c2_aggr_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_c2_aggr", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "256", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[505, 0, 0], [506, 0, 0], [507, 0, 0]] + }, + { + "op": "null", + "name": "ssh_c2_aggr_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c2_aggr_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c2_aggr_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c2_aggr_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_c2_aggr_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[508, 0, 0], [509, 0, 0], [510, 0, 0], [511, 0, 1], [512, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_c2_aggr_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[513, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m2_det_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m2_det_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "256", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[514, 0, 0], [515, 0, 0], [516, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m2_det_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m2_det_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[517, 0, 0], [518, 0, 0], [519, 0, 0], [520, 0, 1], [521, 0, 1]] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m2_det_context_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[514, 0, 0], [523, 0, 0], [524, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m2_det_context_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[525, 0, 0], [526, 0, 0], [527, 0, 0], [528, 0, 1], [529, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_m2_det_context_conv1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[530, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m2_det_context_conv2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[531, 0, 0], [532, 0, 0], [533, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m2_det_context_conv2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[534, 0, 0], [535, 0, 0], [536, 0, 0], [537, 0, 1], [538, 0, 1]] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m2_det_context_conv3_1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[531, 0, 0], [540, 0, 0], [541, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m2_det_context_conv3_1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[542, 0, 0], [543, 0, 0], [544, 0, 0], [545, 0, 1], [546, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_m2_det_context_conv3_1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[547, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m2_det_context_conv3_2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[548, 0, 0], [549, 0, 0], [550, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m2_det_context_conv3_2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m2_det_context_conv3_2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[551, 0, 0], [552, 0, 0], [553, 0, 0], [554, 0, 1], [555, 0, 1]] + }, + { + "op": "Concat", + "name": "ssh_m2_det_concat", + "attrs": { + "dim": "1", + "num_args": "3" + }, + "inputs": [[522, 0, 0], [539, 0, 0], [556, 0, 0]] + }, + { + "op": "Activation", + "name": "ssh_m2_det_concat_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[557, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride16_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride16_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_cls_score_stride16", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "4", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[558, 0, 0], [559, 0, 0], [560, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_score_reshape_stride16", + "attrs": {"shape": "(0, 2, -1, 0)"}, + "inputs": [[561, 0, 0]] + }, + { + "op": "SoftmaxActivation", + "name": "face_rpn_cls_prob_stride16", + "attrs": {"mode": "channel"}, + "inputs": [[562, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_prob_reshape_stride16", + "attrs": {"shape": "(0, 4, -1, 0)"}, + "inputs": [[563, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride16_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride16_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_bbox_pred_stride16", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "8", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[558, 0, 0], [565, 0, 0], [566, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride16_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride16_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_landmark_pred_stride16", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "20", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[558, 0, 0], [568, 0, 0], [569, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_red_conv_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_red_conv_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m1_red_conv", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "256", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[207, 0, 0], [571, 0, 0], [572, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_red_conv_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_red_conv_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_red_conv_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_red_conv_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m1_red_conv_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[573, 0, 0], [574, 0, 0], [575, 0, 0], [576, 0, 1], [577, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_m1_red_conv_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[578, 0, 0]] + }, + { + "op": "UpSampling", + "name": "ssh_m2_red_up", + "attrs": { + "num_args": "1", + "sample_type": "nearest", + "scale": "2", + "workspace": "512" + }, + "inputs": [[514, 0, 0]] + }, + { + "op": "Crop", + "name": "crop1", + "attrs": {"num_args": "2"}, + "inputs": [[580, 0, 0], [579, 0, 0]] + }, + { + "op": "elemwise_add", + "name": "_plus1", + "inputs": [[579, 0, 0], [581, 0, 0]] + }, + { + "op": "null", + "name": "ssh_c1_aggr_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c1_aggr_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_c1_aggr", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "256", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[582, 0, 0], [583, 0, 0], [584, 0, 0]] + }, + { + "op": "null", + "name": "ssh_c1_aggr_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c1_aggr_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c1_aggr_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_c1_aggr_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_c1_aggr_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[585, 0, 0], [586, 0, 0], [587, 0, 0], [588, 0, 1], [589, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_c1_aggr_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[590, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_det_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m1_det_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "256", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[591, 0, 0], [592, 0, 0], [593, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_det_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m1_det_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[594, 0, 0], [595, 0, 0], [596, 0, 0], [597, 0, 1], [598, 0, 1]] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m1_det_context_conv1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[591, 0, 0], [600, 0, 0], [601, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m1_det_context_conv1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[602, 0, 0], [603, 0, 0], [604, 0, 0], [605, 0, 1], [606, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_m1_det_context_conv1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[607, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m1_det_context_conv2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[608, 0, 0], [609, 0, 0], [610, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m1_det_context_conv2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[611, 0, 0], [612, 0, 0], [613, 0, 0], [614, 0, 1], [615, 0, 1]] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_1_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_1_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m1_det_context_conv3_1", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[608, 0, 0], [617, 0, 0], [618, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_1_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_1_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_1_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_1_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m1_det_context_conv3_1_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[619, 0, 0], [620, 0, 0], [621, 0, 0], [622, 0, 1], [623, 0, 1]] + }, + { + "op": "Activation", + "name": "ssh_m1_det_context_conv3_1_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[624, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_2_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_2_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "ssh_m1_det_context_conv3_2", + "attrs": { + "kernel": "(3, 3)", + "num_filter": "128", + "pad": "(1, 1)", + "stride": "(1, 1)" + }, + "inputs": [[625, 0, 0], [626, 0, 0], [627, 0, 0]] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_2_bn_gamma", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_2_bn_beta", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_2_bn_moving_mean", + "attrs": { + "__init__": "[\"zero\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "null", + "name": "ssh_m1_det_context_conv3_2_bn_moving_var", + "attrs": { + "__init__": "[\"one\", {}]", + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [] + }, + { + "op": "BatchNorm", + "name": "ssh_m1_det_context_conv3_2_bn", + "attrs": { + "eps": "2e-05", + "fix_gamma": "False", + "momentum": "0.9" + }, + "inputs": [[628, 0, 0], [629, 0, 0], [630, 0, 0], [631, 0, 1], [632, 0, 1]] + }, + { + "op": "Concat", + "name": "ssh_m1_det_concat", + "attrs": { + "dim": "1", + "num_args": "3" + }, + "inputs": [[599, 0, 0], [616, 0, 0], [633, 0, 0]] + }, + { + "op": "Activation", + "name": "ssh_m1_det_concat_relu", + "attrs": {"act_type": "relu"}, + "inputs": [[634, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride8_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_cls_score_stride8_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_cls_score_stride8", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "4", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[635, 0, 0], [636, 0, 0], [637, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_score_reshape_stride8", + "attrs": {"shape": "(0, 2, -1, 0)"}, + "inputs": [[638, 0, 0]] + }, + { + "op": "SoftmaxActivation", + "name": "face_rpn_cls_prob_stride8", + "attrs": {"mode": "channel"}, + "inputs": [[639, 0, 0]] + }, + { + "op": "Reshape", + "name": "face_rpn_cls_prob_reshape_stride8", + "attrs": {"shape": "(0, 4, -1, 0)"}, + "inputs": [[640, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride8_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_bbox_pred_stride8_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_bbox_pred_stride8", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "8", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[635, 0, 0], [642, 0, 0], [643, 0, 0]] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride8_weight", + "attrs": { + "__init__": "[\"normal\", {\"sigma\": 0.01}]", + "__lr_mult__": "1.0" + }, + "inputs": [] + }, + { + "op": "null", + "name": "face_rpn_landmark_pred_stride8_bias", + "attrs": { + "__init__": "[\"constant\", {\"value\": 0.0}]", + "__lr_mult__": "2.0", + "__wd_mult__": "0.0" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "face_rpn_landmark_pred_stride8", + "attrs": { + "kernel": "(1, 1)", + "num_filter": "20", + "pad": "(0, 0)", + "stride": "(1, 1)" + }, + "inputs": [[635, 0, 0], [645, 0, 0], [646, 0, 0]] + } + ], + "arg_nodes": [ + 0, + 1, + 2, + 3, + 4, + 6, + 8, + 9, + 10, + 11, + 15, + 16, + 17, + 18, + 21, + 23, + 24, + 25, + 26, + 29, + 31, + 32, + 33, + 34, + 37, + 39, + 42, + 43, + 44, + 45, + 48, + 50, + 51, + 52, + 53, + 56, + 58, + 59, + 60, + 61, + 64, + 67, + 68, + 69, + 70, + 73, + 75, + 76, + 77, + 78, + 81, + 83, + 84, + 85, + 86, + 89, + 92, + 93, + 94, + 95, + 98, + 100, + 101, + 102, + 103, + 106, + 108, + 109, + 110, + 111, + 114, + 116, + 119, + 120, + 121, + 122, + 125, + 127, + 128, + 129, + 130, + 133, + 135, + 136, + 137, + 138, + 141, + 144, + 145, + 146, + 147, + 150, + 152, + 153, + 154, + 155, + 158, + 160, + 161, + 162, + 163, + 166, + 169, + 170, + 171, + 172, + 175, + 177, + 178, + 179, + 180, + 183, + 185, + 186, + 187, + 188, + 191, + 194, + 195, + 196, + 197, + 200, + 202, + 203, + 204, + 205, + 208, + 210, + 211, + 212, + 213, + 216, + 218, + 221, + 222, + 223, + 224, + 227, + 229, + 230, + 231, + 232, + 235, + 237, + 238, + 239, + 240, + 243, + 246, + 247, + 248, + 249, + 252, + 254, + 255, + 256, + 257, + 260, + 262, + 263, + 264, + 265, + 268, + 271, + 272, + 273, + 274, + 277, + 279, + 280, + 281, + 282, + 285, + 287, + 288, + 289, + 290, + 293, + 296, + 297, + 298, + 299, + 302, + 304, + 305, + 306, + 307, + 310, + 312, + 313, + 314, + 315, + 318, + 321, + 322, + 323, + 324, + 327, + 329, + 330, + 331, + 332, + 335, + 337, + 338, + 339, + 340, + 343, + 346, + 347, + 348, + 349, + 352, + 354, + 355, + 356, + 357, + 360, + 362, + 363, + 364, + 365, + 368, + 370, + 373, + 374, + 375, + 376, + 379, + 381, + 382, + 383, + 384, + 387, + 389, + 390, + 391, + 392, + 395, + 398, + 399, + 400, + 401, + 404, + 406, + 407, + 408, + 409, + 412, + 414, + 415, + 416, + 417, + 420, + 423, + 424, + 425, + 426, + 429, + 430, + 432, + 433, + 434, + 435, + 438, + 439, + 441, + 442, + 443, + 444, + 446, + 447, + 449, + 450, + 451, + 452, + 455, + 456, + 458, + 459, + 460, + 461, + 463, + 464, + 466, + 467, + 468, + 469, + 472, + 473, + 475, + 476, + 477, + 478, + 482, + 483, + 488, + 489, + 491, + 492, + 494, + 495, + 497, + 498, + 499, + 500, + 506, + 507, + 509, + 510, + 511, + 512, + 515, + 516, + 518, + 519, + 520, + 521, + 523, + 524, + 526, + 527, + 528, + 529, + 532, + 533, + 535, + 536, + 537, + 538, + 540, + 541, + 543, + 544, + 545, + 546, + 549, + 550, + 552, + 553, + 554, + 555, + 559, + 560, + 565, + 566, + 568, + 569, + 571, + 572, + 574, + 575, + 576, + 577, + 583, + 584, + 586, + 587, + 588, + 589, + 592, + 593, + 595, + 596, + 597, + 598, + 600, + 601, + 603, + 604, + 605, + 606, + 609, + 610, + 612, + 613, + 614, + 615, + 617, + 618, + 620, + 621, + 622, + 623, + 626, + 627, + 629, + 630, + 631, + 632, + 636, + 637, + 642, + 643, + 645, + 646 + ], + "node_row_ptr": [ + 0, + 1, + 2, + 3, + 4, + 5, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 674, + 675, + 676, + 677, + 678, + 679, + 680, + 681, + 682, + 685, + 686, + 687, + 688, + 689, + 690, + 691, + 692, + 693, + 694, + 695, + 696, + 697, + 698, + 699, + 700, + 701, + 702, + 703, + 704, + 705, + 706, + 709, + 710, + 711, + 712, + 713, + 714, + 715, + 716, + 717, + 718, + 719, + 720, + 723, + 724, + 725, + 726, + 727, + 728, + 729, + 730, + 731, + 734, + 735, + 736, + 737, + 738, + 739, + 740, + 741, + 744, + 745, + 746, + 747, + 748, + 749, + 750, + 751, + 752, + 755, + 756, + 757, + 758, + 759, + 760, + 761, + 762, + 765, + 766, + 767, + 768, + 769, + 770, + 771, + 772, + 773, + 776, + 777, + 778, + 779, + 780, + 781, + 782, + 783, + 784, + 785, + 786, + 787, + 788, + 789, + 790 + ], + "heads": [[487, 0, 0], [490, 0, 0], [493, 0, 0], [564, 0, 0], [567, 0, 0], [570, 0, 0], [641, 0, 0], [644, 0, 0], [647, 0, 0]], + "attrs": {"mxnet_version": ["int", 10400]} +} \ No newline at end of file diff --git a/model-r100-ii/arcface-0000.params b/model/arcface-0000.params similarity index 100% rename from model-r100-ii/arcface-0000.params rename to model/arcface-0000.params diff --git a/model-r100-ii/arcface-symbol.json b/model/arcface-symbol.json similarity index 100% rename from model-r100-ii/arcface-symbol.json rename to model/arcface-symbol.json diff --git a/model/landmarks.dat b/model/landmarks.dat new file mode 100644 index 0000000..e0ec20d Binary files /dev/null and b/model/landmarks.dat differ diff --git a/model-mlp/mlp.pkl b/model/mlp.pkl similarity index 99% rename from model-mlp/mlp.pkl rename to model/mlp.pkl index 3c31eff..ebd3abe 100644 Binary files a/model-mlp/mlp.pkl and b/model/mlp.pkl differ diff --git a/mtcnn-model/det1-0001.params b/mtcnn-model/det1-0001.params deleted file mode 100644 index 618cf8f..0000000 --- a/mtcnn-model/det1-0001.params +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5e974b82ac1d8c88f64fd1a1af2d6be693d1b8c9937808238dc54b38197c98f7 -size 27406 diff --git a/mtcnn-model/det1-symbol.json b/mtcnn-model/det1-symbol.json deleted file mode 100644 index 0c4a203..0000000 --- a/mtcnn-model/det1-symbol.json +++ /dev/null @@ -1,259 +0,0 @@ -{ - "nodes": [ - { - "op": "null", - "name": "data", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv1", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "10", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] - }, - { - "op": "null", - "name": "prelu1_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu1", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[3, 0, 0], [4, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool1", - "attrs": { - "global_pool": "False", - "kernel": "(2,2)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[5, 0, 0]] - }, - { - "op": "null", - "name": "conv2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv2_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv2", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "16", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[6, 0, 0], [7, 0, 0], [8, 0, 0]] - }, - { - "op": "null", - "name": "prelu2_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu2", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[9, 0, 0], [10, 0, 0]] - }, - { - "op": "null", - "name": "conv3_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv3_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv3", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "32", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[11, 0, 0], [12, 0, 0], [13, 0, 0]] - }, - { - "op": "null", - "name": "prelu3_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu3", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[14, 0, 0], [15, 0, 0]] - }, - { - "op": "null", - "name": "conv4_2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv4_2_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv4_2", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(1,1)", - "no_bias": "False", - "num_filter": "4", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[16, 0, 0], [17, 0, 0], [18, 0, 0]] - }, - { - "op": "null", - "name": "conv4_1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv4_1_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv4_1", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(1,1)", - "no_bias": "False", - "num_filter": "2", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[16, 0, 0], [20, 0, 0], [21, 0, 0]] - }, - { - "op": "SoftmaxActivation", - "name": "prob1", - "attrs": {"mode": "channel"}, - "inputs": [[22, 0, 0]] - } - ], - "arg_nodes": [ - 0, - 1, - 2, - 4, - 7, - 8, - 10, - 12, - 13, - 15, - 17, - 18, - 20, - 21 - ], - "node_row_ptr": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24 - ], - "heads": [[19, 0, 0], [23, 0, 0]], - "attrs": {"mxnet_version": ["int", 10400]} -} \ No newline at end of file diff --git a/mtcnn-model/det1.caffemodel b/mtcnn-model/det1.caffemodel deleted file mode 100644 index 79e93b4..0000000 Binary files a/mtcnn-model/det1.caffemodel and /dev/null differ diff --git a/mtcnn-model/det1.prototxt b/mtcnn-model/det1.prototxt deleted file mode 100644 index c5c1657..0000000 --- a/mtcnn-model/det1.prototxt +++ /dev/null @@ -1,177 +0,0 @@ -name: "PNet" -input: "data" -input_dim: 1 -input_dim: 3 -input_dim: 12 -input_dim: 12 - -layer { - name: "conv1" - type: "Convolution" - bottom: "data" - top: "conv1" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 10 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "PReLU1" - type: "PReLU" - bottom: "conv1" - top: "conv1" -} -layer { - name: "pool1" - type: "Pooling" - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } -} - -layer { - name: "conv2" - type: "Convolution" - bottom: "pool1" - top: "conv2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 16 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "PReLU2" - type: "PReLU" - bottom: "conv2" - top: "conv2" -} - -layer { - name: "conv3" - type: "Convolution" - bottom: "conv2" - top: "conv3" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 32 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "PReLU3" - type: "PReLU" - bottom: "conv3" - top: "conv3" -} - - -layer { - name: "conv4-1" - type: "Convolution" - bottom: "conv3" - top: "conv4-1" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 2 - kernel_size: 1 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} - -layer { - name: "conv4-2" - type: "Convolution" - bottom: "conv3" - top: "conv4-2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 4 - kernel_size: 1 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prob1" - type: "Softmax" - bottom: "conv4-1" - top: "prob1" -} diff --git a/mtcnn-model/det2-0001.params b/mtcnn-model/det2-0001.params deleted file mode 100644 index b3daed6..0000000 --- a/mtcnn-model/det2-0001.params +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e75323f91687c2016a4890a7626e797911d881132b9a88b2f5080ed6febbd6db -size 401732 diff --git a/mtcnn-model/det2-symbol.json b/mtcnn-model/det2-symbol.json deleted file mode 100644 index 6c38521..0000000 --- a/mtcnn-model/det2-symbol.json +++ /dev/null @@ -1,314 +0,0 @@ -{ - "nodes": [ - { - "op": "null", - "name": "data", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv1", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "28", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] - }, - { - "op": "null", - "name": "prelu1_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu1", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[3, 0, 0], [4, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool1", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[5, 0, 0]] - }, - { - "op": "null", - "name": "conv2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv2_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv2", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "48", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[6, 0, 0], [7, 0, 0], [8, 0, 0]] - }, - { - "op": "null", - "name": "prelu2_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu2", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[9, 0, 0], [10, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool2", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[11, 0, 0]] - }, - { - "op": "null", - "name": "conv3_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv3_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv3", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(2,2)", - "no_bias": "False", - "num_filter": "64", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[12, 0, 0], [13, 0, 0], [14, 0, 0]] - }, - { - "op": "null", - "name": "prelu3_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu3", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[15, 0, 0], [16, 0, 0]] - }, - { - "op": "null", - "name": "conv4_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv4_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "conv4", - "attrs": { - "no_bias": "False", - "num_hidden": "128" - }, - "inputs": [[17, 0, 0], [18, 0, 0], [19, 0, 0]] - }, - { - "op": "null", - "name": "prelu4_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu4", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[20, 0, 0], [21, 0, 0]] - }, - { - "op": "null", - "name": "conv5_2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv5_2_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "conv5_2", - "attrs": { - "no_bias": "False", - "num_hidden": "4" - }, - "inputs": [[22, 0, 0], [23, 0, 0], [24, 0, 0]] - }, - { - "op": "null", - "name": "conv5_1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv5_1_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "conv5_1", - "attrs": { - "no_bias": "False", - "num_hidden": "2" - }, - "inputs": [[22, 0, 0], [26, 0, 0], [27, 0, 0]] - }, - { - "op": "null", - "name": "prob1_label", - "inputs": [] - }, - { - "op": "SoftmaxOutput", - "name": "prob1", - "attrs": { - "grad_scale": "1", - "ignore_label": "-1", - "multi_output": "False", - "normalization": "null", - "use_ignore": "False" - }, - "inputs": [[28, 0, 0], [29, 0, 0]] - } - ], - "arg_nodes": [ - 0, - 1, - 2, - 4, - 7, - 8, - 10, - 13, - 14, - 16, - 18, - 19, - 21, - 23, - 24, - 26, - 27, - 29 - ], - "node_row_ptr": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30, - 31 - ], - "heads": [[25, 0, 0], [30, 0, 0]], - "attrs": {"mxnet_version": ["int", 10400]} -} \ No newline at end of file diff --git a/mtcnn-model/det2.caffemodel b/mtcnn-model/det2.caffemodel deleted file mode 100644 index a5a540c..0000000 Binary files a/mtcnn-model/det2.caffemodel and /dev/null differ diff --git a/mtcnn-model/det2.prototxt b/mtcnn-model/det2.prototxt deleted file mode 100644 index 51093e6..0000000 --- a/mtcnn-model/det2.prototxt +++ /dev/null @@ -1,228 +0,0 @@ -name: "RNet" -input: "data" -input_dim: 1 -input_dim: 3 -input_dim: 24 -input_dim: 24 - - -########################## -###################### -layer { - name: "conv1" - type: "Convolution" - bottom: "data" - top: "conv1" - param { - lr_mult: 0 - decay_mult: 0 - } - param { - lr_mult: 0 - decay_mult: 0 - } - convolution_param { - num_output: 28 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prelu1" - type: "PReLU" - bottom: "conv1" - top: "conv1" - propagate_down: true -} -layer { - name: "pool1" - type: "Pooling" - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} - -layer { - name: "conv2" - type: "Convolution" - bottom: "pool1" - top: "conv2" - param { - lr_mult: 0 - decay_mult: 0 - } - param { - lr_mult: 0 - decay_mult: 0 - } - convolution_param { - num_output: 48 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prelu2" - type: "PReLU" - bottom: "conv2" - top: "conv2" - propagate_down: true -} -layer { - name: "pool2" - type: "Pooling" - bottom: "conv2" - top: "pool2" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -#################################### - -################################## -layer { - name: "conv3" - type: "Convolution" - bottom: "pool2" - top: "conv3" - param { - lr_mult: 0 - decay_mult: 0 - } - param { - lr_mult: 0 - decay_mult: 0 - } - convolution_param { - num_output: 64 - kernel_size: 2 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prelu3" - type: "PReLU" - bottom: "conv3" - top: "conv3" - propagate_down: true -} -############################### - -############################### - -layer { - name: "conv4" - type: "InnerProduct" - bottom: "conv3" - top: "conv4" - param { - lr_mult: 0 - decay_mult: 0 - } - param { - lr_mult: 0 - decay_mult: 0 - } - inner_product_param { - num_output: 128 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prelu4" - type: "PReLU" - bottom: "conv4" - top: "conv4" -} - -layer { - name: "conv5-1" - type: "InnerProduct" - bottom: "conv4" - top: "conv5-1" - param { - lr_mult: 0 - decay_mult: 0 - } - param { - lr_mult: 0 - decay_mult: 0 - } - inner_product_param { - num_output: 2 - #kernel_size: 1 - #stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv5-2" - type: "InnerProduct" - bottom: "conv4" - top: "conv5-2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 4 - #kernel_size: 1 - #stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prob1" - type: "Softmax" - bottom: "conv5-1" - top: "prob1" -} \ No newline at end of file diff --git a/mtcnn-model/det3-0001.params b/mtcnn-model/det3-0001.params deleted file mode 100644 index 52126c2..0000000 --- a/mtcnn-model/det3-0001.params +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e2f75887f8440f1343e1fb507dd6647ac27118f096c3a83c0e9bcbe64843ed97 -size 1557492 diff --git a/mtcnn-model/det3-symbol.json b/mtcnn-model/det3-symbol.json deleted file mode 100644 index aab3b15..0000000 --- a/mtcnn-model/det3-symbol.json +++ /dev/null @@ -1,404 +0,0 @@ -{ - "nodes": [ - { - "op": "null", - "name": "data", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv1", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "32", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] - }, - { - "op": "null", - "name": "prelu1_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu1", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[3, 0, 0], [4, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool1", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[5, 0, 0]] - }, - { - "op": "null", - "name": "conv2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv2_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv2", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "64", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[6, 0, 0], [7, 0, 0], [8, 0, 0]] - }, - { - "op": "null", - "name": "prelu2_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu2", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[9, 0, 0], [10, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool2", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[11, 0, 0]] - }, - { - "op": "null", - "name": "conv3_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv3_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv3", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "64", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[12, 0, 0], [13, 0, 0], [14, 0, 0]] - }, - { - "op": "null", - "name": "prelu3_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu3", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[15, 0, 0], [16, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool3", - "attrs": { - "global_pool": "False", - "kernel": "(2,2)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[17, 0, 0]] - }, - { - "op": "null", - "name": "conv4_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv4_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv4", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(2,2)", - "no_bias": "False", - "num_filter": "128", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[18, 0, 0], [19, 0, 0], [20, 0, 0]] - }, - { - "op": "null", - "name": "prelu4_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu4", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[21, 0, 0], [22, 0, 0]] - }, - { - "op": "null", - "name": "conv5_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv5_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "conv5", - "attrs": { - "no_bias": "False", - "num_hidden": "256" - }, - "inputs": [[23, 0, 0], [24, 0, 0], [25, 0, 0]] - }, - { - "op": "null", - "name": "prelu5_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu5", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[26, 0, 0], [27, 0, 0]] - }, - { - "op": "null", - "name": "conv6_3_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv6_3_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "conv6_3", - "attrs": { - "no_bias": "False", - "num_hidden": "10" - }, - "inputs": [[28, 0, 0], [29, 0, 0], [30, 0, 0]] - }, - { - "op": "null", - "name": "conv6_2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv6_2_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "conv6_2", - "attrs": { - "no_bias": "False", - "num_hidden": "4" - }, - "inputs": [[28, 0, 0], [32, 0, 0], [33, 0, 0]] - }, - { - "op": "null", - "name": "conv6_1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv6_1_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "conv6_1", - "attrs": { - "no_bias": "False", - "num_hidden": "2" - }, - "inputs": [[28, 0, 0], [35, 0, 0], [36, 0, 0]] - }, - { - "op": "null", - "name": "prob1_label", - "inputs": [] - }, - { - "op": "SoftmaxOutput", - "name": "prob1", - "attrs": { - "grad_scale": "1", - "ignore_label": "-1", - "multi_output": "False", - "normalization": "null", - "use_ignore": "False" - }, - "inputs": [[37, 0, 0], [38, 0, 0]] - } - ], - "arg_nodes": [ - 0, - 1, - 2, - 4, - 7, - 8, - 10, - 13, - 14, - 16, - 19, - 20, - 22, - 24, - 25, - 27, - 29, - 30, - 32, - 33, - 35, - 36, - 38 - ], - "node_row_ptr": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30, - 31, - 32, - 33, - 34, - 35, - 36, - 37, - 38, - 39, - 40 - ], - "heads": [[31, 0, 0], [34, 0, 0], [39, 0, 0]], - "attrs": {"mxnet_version": ["int", 10400]} -} \ No newline at end of file diff --git a/mtcnn-model/det3.caffemodel b/mtcnn-model/det3.caffemodel deleted file mode 100644 index 7b4b8a4..0000000 Binary files a/mtcnn-model/det3.caffemodel and /dev/null differ diff --git a/mtcnn-model/det3.prototxt b/mtcnn-model/det3.prototxt deleted file mode 100644 index a192307..0000000 --- a/mtcnn-model/det3.prototxt +++ /dev/null @@ -1,294 +0,0 @@ -name: "ONet" -input: "data" -input_dim: 1 -input_dim: 3 -input_dim: 48 -input_dim: 48 -################################## -layer { - name: "conv1" - type: "Convolution" - bottom: "data" - top: "conv1" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 32 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prelu1" - type: "PReLU" - bottom: "conv1" - top: "conv1" -} -layer { - name: "pool1" - type: "Pooling" - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -layer { - name: "conv2" - type: "Convolution" - bottom: "pool1" - top: "conv2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 64 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} - -layer { - name: "prelu2" - type: "PReLU" - bottom: "conv2" - top: "conv2" -} -layer { - name: "pool2" - type: "Pooling" - bottom: "conv2" - top: "pool2" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} - -layer { - name: "conv3" - type: "Convolution" - bottom: "pool2" - top: "conv3" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 64 - kernel_size: 3 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prelu3" - type: "PReLU" - bottom: "conv3" - top: "conv3" -} -layer { - name: "pool3" - type: "Pooling" - bottom: "conv3" - top: "pool3" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } -} -layer { - name: "conv4" - type: "Convolution" - bottom: "pool3" - top: "conv4" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 128 - kernel_size: 2 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prelu4" - type: "PReLU" - bottom: "conv4" - top: "conv4" -} - - -layer { - name: "conv5" - type: "InnerProduct" - bottom: "conv4" - top: "conv5" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - #kernel_size: 3 - num_output: 256 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} - -layer { - name: "drop5" - type: "Dropout" - bottom: "conv5" - top: "conv5" - dropout_param { - dropout_ratio: 0.25 - } -} -layer { - name: "prelu5" - type: "PReLU" - bottom: "conv5" - top: "conv5" -} - - -layer { - name: "conv6-1" - type: "InnerProduct" - bottom: "conv5" - top: "conv6-1" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - #kernel_size: 1 - num_output: 2 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv6-2" - type: "InnerProduct" - bottom: "conv5" - top: "conv6-2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - #kernel_size: 1 - num_output: 4 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv6-3" - type: "InnerProduct" - bottom: "conv5" - top: "conv6-3" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - #kernel_size: 1 - num_output: 10 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "prob1" - type: "Softmax" - bottom: "conv6-1" - top: "prob1" -} diff --git a/mtcnn-model/det4-0001.params b/mtcnn-model/det4-0001.params deleted file mode 100644 index 0209bd2..0000000 --- a/mtcnn-model/det4-0001.params +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:00f98948d7cb1486afa97a17dc80b335319ad5436aa216eea4197fcbfe05c761 -size 3797020 diff --git a/mtcnn-model/det4-symbol.json b/mtcnn-model/det4-symbol.json deleted file mode 100644 index 7930fb0..0000000 --- a/mtcnn-model/det4-symbol.json +++ /dev/null @@ -1,1343 +0,0 @@ -{ - "nodes": [ - { - "op": "null", - "name": "data", - "inputs": [] - }, - { - "op": "SliceChannel", - "name": "slice", - "attrs": { - "axis": "1", - "num_outputs": "5", - "squeeze_axis": "False" - }, - "inputs": [[0, 0, 0]] - }, - { - "op": "null", - "name": "conv1_1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_1_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv1_1", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "28", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[1, 0, 0], [2, 0, 0], [3, 0, 0]] - }, - { - "op": "null", - "name": "prelu1_1_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu1_1", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[4, 0, 0], [5, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool1_1", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[6, 0, 0]] - }, - { - "op": "null", - "name": "conv2_1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv2_1_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv2_1", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "48", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[7, 0, 0], [8, 0, 0], [9, 0, 0]] - }, - { - "op": "null", - "name": "prelu2_1_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu2_1", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[10, 0, 0], [11, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool2_1", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[12, 0, 0]] - }, - { - "op": "null", - "name": "conv3_1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv3_1_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv3_1", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(2,2)", - "no_bias": "False", - "num_filter": "64", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[13, 0, 0], [14, 0, 0], [15, 0, 0]] - }, - { - "op": "null", - "name": "prelu3_1_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu3_1", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[16, 0, 0], [17, 0, 0]] - }, - { - "op": "null", - "name": "conv1_2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_2_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv1_2", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "28", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[1, 1, 0], [19, 0, 0], [20, 0, 0]] - }, - { - "op": "null", - "name": "prelu1_2_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu1_2", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[21, 0, 0], [22, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool1_2", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[23, 0, 0]] - }, - { - "op": "null", - "name": "conv2_2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv2_2_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv2_2", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "48", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[24, 0, 0], [25, 0, 0], [26, 0, 0]] - }, - { - "op": "null", - "name": "prelu2_2_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu2_2", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[27, 0, 0], [28, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool2_2", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[29, 0, 0]] - }, - { - "op": "null", - "name": "conv3_2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv3_2_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv3_2", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(2,2)", - "no_bias": "False", - "num_filter": "64", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[30, 0, 0], [31, 0, 0], [32, 0, 0]] - }, - { - "op": "null", - "name": "prelu3_2_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu3_2", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[33, 0, 0], [34, 0, 0]] - }, - { - "op": "null", - "name": "conv1_3_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_3_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv1_3", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "28", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[1, 2, 0], [36, 0, 0], [37, 0, 0]] - }, - { - "op": "null", - "name": "prelu1_3_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu1_3", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[38, 0, 0], [39, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool1_3", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[40, 0, 0]] - }, - { - "op": "null", - "name": "conv2_3_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv2_3_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv2_3", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "48", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[41, 0, 0], [42, 0, 0], [43, 0, 0]] - }, - { - "op": "null", - "name": "prelu2_3_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu2_3", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[44, 0, 0], [45, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool2_3", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[46, 0, 0]] - }, - { - "op": "null", - "name": "conv3_3_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv3_3_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv3_3", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(2,2)", - "no_bias": "False", - "num_filter": "64", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[47, 0, 0], [48, 0, 0], [49, 0, 0]] - }, - { - "op": "null", - "name": "prelu3_3_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu3_3", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[50, 0, 0], [51, 0, 0]] - }, - { - "op": "null", - "name": "conv1_4_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_4_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv1_4", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "28", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[1, 3, 0], [53, 0, 0], [54, 0, 0]] - }, - { - "op": "null", - "name": "prelu1_4_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu1_4", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[55, 0, 0], [56, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool1_4", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[57, 0, 0]] - }, - { - "op": "null", - "name": "conv2_4_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv2_4_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv2_4", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "48", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[58, 0, 0], [59, 0, 0], [60, 0, 0]] - }, - { - "op": "null", - "name": "prelu2_4_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu2_4", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[61, 0, 0], [62, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool2_4", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[63, 0, 0]] - }, - { - "op": "null", - "name": "conv3_4_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv3_4_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv3_4", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(2,2)", - "no_bias": "False", - "num_filter": "64", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[64, 0, 0], [65, 0, 0], [66, 0, 0]] - }, - { - "op": "null", - "name": "prelu3_4_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu3_4", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[67, 0, 0], [68, 0, 0]] - }, - { - "op": "null", - "name": "conv1_5_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv1_5_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv1_5", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "28", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[1, 4, 0], [70, 0, 0], [71, 0, 0]] - }, - { - "op": "null", - "name": "prelu1_5_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu1_5", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[72, 0, 0], [73, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool1_5", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[74, 0, 0]] - }, - { - "op": "null", - "name": "conv2_5_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv2_5_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv2_5", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(3,3)", - "no_bias": "False", - "num_filter": "48", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[75, 0, 0], [76, 0, 0], [77, 0, 0]] - }, - { - "op": "null", - "name": "prelu2_5_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu2_5", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[78, 0, 0], [79, 0, 0]] - }, - { - "op": "Pooling", - "name": "pool2_5", - "attrs": { - "global_pool": "False", - "kernel": "(3,3)", - "pad": "(0,0)", - "pool_type": "max", - "pooling_convention": "full", - "stride": "(2,2)" - }, - "inputs": [[80, 0, 0]] - }, - { - "op": "null", - "name": "conv3_5_weight", - "inputs": [] - }, - { - "op": "null", - "name": "conv3_5_bias", - "inputs": [] - }, - { - "op": "Convolution", - "name": "conv3_5", - "attrs": { - "cudnn_off": "False", - "cudnn_tune": "off", - "dilate": "(1,1)", - "kernel": "(2,2)", - "no_bias": "False", - "num_filter": "64", - "num_group": "1", - "pad": "(0,0)", - "stride": "(1,1)", - "workspace": "1024" - }, - "inputs": [[81, 0, 0], [82, 0, 0], [83, 0, 0]] - }, - { - "op": "null", - "name": "prelu3_5_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu3_5", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[84, 0, 0], [85, 0, 0]] - }, - { - "op": "Concat", - "name": "concat", - "attrs": { - "dim": "1", - "num_args": "5" - }, - "inputs": [[18, 0, 0], [35, 0, 0], [52, 0, 0], [69, 0, 0], [86, 0, 0]] - }, - { - "op": "null", - "name": "fc4_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc4_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc4", - "attrs": { - "no_bias": "False", - "num_hidden": "256" - }, - "inputs": [[87, 0, 0], [88, 0, 0], [89, 0, 0]] - }, - { - "op": "null", - "name": "prelu4_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu4", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[90, 0, 0], [91, 0, 0]] - }, - { - "op": "null", - "name": "fc4_1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc4_1_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc4_1", - "attrs": { - "no_bias": "False", - "num_hidden": "64" - }, - "inputs": [[92, 0, 0], [93, 0, 0], [94, 0, 0]] - }, - { - "op": "null", - "name": "prelu4_1_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu4_1", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[95, 0, 0], [96, 0, 0]] - }, - { - "op": "null", - "name": "fc5_1_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc5_1_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc5_1", - "attrs": { - "no_bias": "False", - "num_hidden": "2" - }, - "inputs": [[97, 0, 0], [98, 0, 0], [99, 0, 0]] - }, - { - "op": "null", - "name": "fc4_2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc4_2_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc4_2", - "attrs": { - "no_bias": "False", - "num_hidden": "64" - }, - "inputs": [[92, 0, 0], [101, 0, 0], [102, 0, 0]] - }, - { - "op": "null", - "name": "prelu4_2_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu4_2", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[103, 0, 0], [104, 0, 0]] - }, - { - "op": "null", - "name": "fc5_2_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc5_2_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc5_2", - "attrs": { - "no_bias": "False", - "num_hidden": "2" - }, - "inputs": [[105, 0, 0], [106, 0, 0], [107, 0, 0]] - }, - { - "op": "null", - "name": "fc4_3_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc4_3_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc4_3", - "attrs": { - "no_bias": "False", - "num_hidden": "64" - }, - "inputs": [[92, 0, 0], [109, 0, 0], [110, 0, 0]] - }, - { - "op": "null", - "name": "prelu4_3_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu4_3", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[111, 0, 0], [112, 0, 0]] - }, - { - "op": "null", - "name": "fc5_3_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc5_3_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc5_3", - "attrs": { - "no_bias": "False", - "num_hidden": "2" - }, - "inputs": [[113, 0, 0], [114, 0, 0], [115, 0, 0]] - }, - { - "op": "null", - "name": "fc4_4_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc4_4_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc4_4", - "attrs": { - "no_bias": "False", - "num_hidden": "64" - }, - "inputs": [[92, 0, 0], [117, 0, 0], [118, 0, 0]] - }, - { - "op": "null", - "name": "prelu4_4_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu4_4", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[119, 0, 0], [120, 0, 0]] - }, - { - "op": "null", - "name": "fc5_4_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc5_4_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc5_4", - "attrs": { - "no_bias": "False", - "num_hidden": "2" - }, - "inputs": [[121, 0, 0], [122, 0, 0], [123, 0, 0]] - }, - { - "op": "null", - "name": "fc4_5_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc4_5_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc4_5", - "attrs": { - "no_bias": "False", - "num_hidden": "64" - }, - "inputs": [[92, 0, 0], [125, 0, 0], [126, 0, 0]] - }, - { - "op": "null", - "name": "prelu4_5_gamma", - "attrs": {"__init__": "[\"Constant\", {\"value\": 0.25}]"}, - "inputs": [] - }, - { - "op": "LeakyReLU", - "name": "prelu4_5", - "attrs": { - "act_type": "prelu", - "lower_bound": "0.125", - "slope": "0.25", - "upper_bound": "0.334" - }, - "inputs": [[127, 0, 0], [128, 0, 0]] - }, - { - "op": "null", - "name": "fc5_5_weight", - "inputs": [] - }, - { - "op": "null", - "name": "fc5_5_bias", - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc5_5", - "attrs": { - "no_bias": "False", - "num_hidden": "2" - }, - "inputs": [[129, 0, 0], [130, 0, 0], [131, 0, 0]] - } - ], - "arg_nodes": [ - 0, - 2, - 3, - 5, - 8, - 9, - 11, - 14, - 15, - 17, - 19, - 20, - 22, - 25, - 26, - 28, - 31, - 32, - 34, - 36, - 37, - 39, - 42, - 43, - 45, - 48, - 49, - 51, - 53, - 54, - 56, - 59, - 60, - 62, - 65, - 66, - 68, - 70, - 71, - 73, - 76, - 77, - 79, - 82, - 83, - 85, - 88, - 89, - 91, - 93, - 94, - 96, - 98, - 99, - 101, - 102, - 104, - 106, - 107, - 109, - 110, - 112, - 114, - 115, - 117, - 118, - 120, - 122, - 123, - 125, - 126, - 128, - 130, - 131 - ], - "node_row_ptr": [ - 0, - 1, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30, - 31, - 32, - 33, - 34, - 35, - 36, - 37, - 38, - 39, - 40, - 41, - 42, - 43, - 44, - 45, - 46, - 47, - 48, - 49, - 50, - 51, - 52, - 53, - 54, - 55, - 56, - 57, - 58, - 59, - 60, - 61, - 62, - 63, - 64, - 65, - 66, - 67, - 68, - 69, - 70, - 71, - 72, - 73, - 74, - 75, - 76, - 77, - 78, - 79, - 80, - 81, - 82, - 83, - 84, - 85, - 86, - 87, - 88, - 89, - 90, - 91, - 92, - 93, - 94, - 95, - 96, - 97, - 98, - 99, - 100, - 101, - 102, - 103, - 104, - 105, - 106, - 107, - 108, - 109, - 110, - 111, - 112, - 113, - 114, - 115, - 116, - 117, - 118, - 119, - 120, - 121, - 122, - 123, - 124, - 125, - 126, - 127, - 128, - 129, - 130, - 131, - 132, - 133, - 134, - 135, - 136, - 137 - ], - "heads": [[100, 0, 0], [108, 0, 0], [116, 0, 0], [124, 0, 0], [132, 0, 0]], - "attrs": {"mxnet_version": ["int", 10400]} -} \ No newline at end of file diff --git a/mtcnn-model/det4.caffemodel b/mtcnn-model/det4.caffemodel deleted file mode 100644 index 38353c4..0000000 Binary files a/mtcnn-model/det4.caffemodel and /dev/null differ diff --git a/mtcnn-model/det4.prototxt b/mtcnn-model/det4.prototxt deleted file mode 100644 index 4cdc329..0000000 --- a/mtcnn-model/det4.prototxt +++ /dev/null @@ -1,995 +0,0 @@ -name: "LNet" -input: "data" -input_dim: 1 -input_dim: 15 -input_dim: 24 -input_dim: 24 - -layer { - name: "slicer_data" - type: "Slice" - bottom: "data" - top: "data241" - top: "data242" - top: "data243" - top: "data244" - top: "data245" - slice_param { - axis: 1 - slice_point: 3 - slice_point: 6 - slice_point: 9 - slice_point: 12 - } -} -layer { - name: "conv1_1" - type: "Convolution" - bottom: "data241" - top: "conv1_1" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 28 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu1_1" - type: "PReLU" - bottom: "conv1_1" - top: "conv1_1" - -} -layer { - name: "pool1_1" - type: "Pooling" - bottom: "conv1_1" - top: "pool1_1" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} - -layer { - name: "conv2_1" - type: "Convolution" - bottom: "pool1_1" - top: "conv2_1" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 48 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu2_1" - type: "PReLU" - bottom: "conv2_1" - top: "conv2_1" -} -layer { - name: "pool2_1" - type: "Pooling" - bottom: "conv2_1" - top: "pool2_1" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } - -} -layer { - name: "conv3_1" - type: "Convolution" - bottom: "pool2_1" - top: "conv3_1" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 64 - kernel_size: 2 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu3_1" - type: "PReLU" - bottom: "conv3_1" - top: "conv3_1" -} -########################## -layer { - name: "conv1_2" - type: "Convolution" - bottom: "data242" - top: "conv1_2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 28 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu1_2" - type: "PReLU" - bottom: "conv1_2" - top: "conv1_2" - -} -layer { - name: "pool1_2" - type: "Pooling" - bottom: "conv1_2" - top: "pool1_2" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} - -layer { - name: "conv2_2" - type: "Convolution" - bottom: "pool1_2" - top: "conv2_2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 48 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu2_2" - type: "PReLU" - bottom: "conv2_2" - top: "conv2_2" -} -layer { - name: "pool2_2" - type: "Pooling" - bottom: "conv2_2" - top: "pool2_2" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } - -} -layer { - name: "conv3_2" - type: "Convolution" - bottom: "pool2_2" - top: "conv3_2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 64 - kernel_size: 2 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu3_2" - type: "PReLU" - bottom: "conv3_2" - top: "conv3_2" -} -########################## -########################## -layer { - name: "conv1_3" - type: "Convolution" - bottom: "data243" - top: "conv1_3" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 28 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu1_3" - type: "PReLU" - bottom: "conv1_3" - top: "conv1_3" - -} -layer { - name: "pool1_3" - type: "Pooling" - bottom: "conv1_3" - top: "pool1_3" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} - -layer { - name: "conv2_3" - type: "Convolution" - bottom: "pool1_3" - top: "conv2_3" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 48 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu2_3" - type: "PReLU" - bottom: "conv2_3" - top: "conv2_3" -} -layer { - name: "pool2_3" - type: "Pooling" - bottom: "conv2_3" - top: "pool2_3" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } - -} -layer { - name: "conv3_3" - type: "Convolution" - bottom: "pool2_3" - top: "conv3_3" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 64 - kernel_size: 2 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu3_3" - type: "PReLU" - bottom: "conv3_3" - top: "conv3_3" -} -########################## -########################## -layer { - name: "conv1_4" - type: "Convolution" - bottom: "data244" - top: "conv1_4" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 28 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu1_4" - type: "PReLU" - bottom: "conv1_4" - top: "conv1_4" - -} -layer { - name: "pool1_4" - type: "Pooling" - bottom: "conv1_4" - top: "pool1_4" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} - -layer { - name: "conv2_4" - type: "Convolution" - bottom: "pool1_4" - top: "conv2_4" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 48 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu2_4" - type: "PReLU" - bottom: "conv2_4" - top: "conv2_4" -} -layer { - name: "pool2_4" - type: "Pooling" - bottom: "conv2_4" - top: "pool2_4" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } - -} -layer { - name: "conv3_4" - type: "Convolution" - bottom: "pool2_4" - top: "conv3_4" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 64 - kernel_size: 2 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu3_4" - type: "PReLU" - bottom: "conv3_4" - top: "conv3_4" -} -########################## -########################## -layer { - name: "conv1_5" - type: "Convolution" - bottom: "data245" - top: "conv1_5" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 28 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu1_5" - type: "PReLU" - bottom: "conv1_5" - top: "conv1_5" - -} -layer { - name: "pool1_5" - type: "Pooling" - bottom: "conv1_5" - top: "pool1_5" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} - -layer { - name: "conv2_5" - type: "Convolution" - bottom: "pool1_5" - top: "conv2_5" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 48 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu2_5" - type: "PReLU" - bottom: "conv2_5" - top: "conv2_5" -} -layer { - name: "pool2_5" - type: "Pooling" - bottom: "conv2_5" - top: "pool2_5" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } - -} -layer { - name: "conv3_5" - type: "Convolution" - bottom: "pool2_5" - top: "conv3_5" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - convolution_param { - num_output: 64 - kernel_size: 2 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu3_5" - type: "PReLU" - bottom: "conv3_5" - top: "conv3_5" -} -########################## -layer { - name: "concat" - bottom: "conv3_1" - bottom: "conv3_2" - bottom: "conv3_3" - bottom: "conv3_4" - bottom: "conv3_5" - top: "conv3" - type: "Concat" - concat_param { - axis: 1 - } -} -########################## -layer { - name: "fc4" - type: "InnerProduct" - bottom: "conv3" - top: "fc4" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 256 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu4" - type: "PReLU" - bottom: "fc4" - top: "fc4" -} -############################ -layer { - name: "fc4_1" - type: "InnerProduct" - bottom: "fc4" - top: "fc4_1" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 64 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu4_1" - type: "PReLU" - bottom: "fc4_1" - top: "fc4_1" -} -layer { - name: "fc5_1" - type: "InnerProduct" - bottom: "fc4_1" - top: "fc5_1" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 2 - weight_filler { - type: "xavier" - #type: "constant" - #value: 0 - } - bias_filler { - type: "constant" - value: 0 - } - } -} - - -######################### -layer { - name: "fc4_2" - type: "InnerProduct" - bottom: "fc4" - top: "fc4_2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 64 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu4_2" - type: "PReLU" - bottom: "fc4_2" - top: "fc4_2" -} -layer { - name: "fc5_2" - type: "InnerProduct" - bottom: "fc4_2" - top: "fc5_2" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 2 - weight_filler { - type: "xavier" - #type: "constant" - #value: 0 - } - bias_filler { - type: "constant" - value: 0 - } - } -} - -######################### -layer { - name: "fc4_3" - type: "InnerProduct" - bottom: "fc4" - top: "fc4_3" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 64 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu4_3" - type: "PReLU" - bottom: "fc4_3" - top: "fc4_3" -} -layer { - name: "fc5_3" - type: "InnerProduct" - bottom: "fc4_3" - top: "fc5_3" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 2 - weight_filler { - type: "xavier" - #type: "constant" - #value: 0 - } - bias_filler { - type: "constant" - value: 0 - } - } -} - -######################### -layer { - name: "fc4_4" - type: "InnerProduct" - bottom: "fc4" - top: "fc4_4" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 64 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu4_4" - type: "PReLU" - bottom: "fc4_4" - top: "fc4_4" -} -layer { - name: "fc5_4" - type: "InnerProduct" - bottom: "fc4_4" - top: "fc5_4" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 2 - weight_filler { - type: "xavier" - #type: "constant" - #value: 0 - } - bias_filler { - type: "constant" - value: 0 - } - } -} - -######################### -layer { - name: "fc4_5" - type: "InnerProduct" - bottom: "fc4" - top: "fc4_5" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 64 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } - -} -layer { - name: "prelu4_5" - type: "PReLU" - bottom: "fc4_5" - top: "fc4_5" -} -layer { - name: "fc5_5" - type: "InnerProduct" - bottom: "fc4_5" - top: "fc5_5" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 1 - } - inner_product_param { - num_output: 2 - weight_filler { - type: "xavier" - #type: "constant" - #value: 0 - } - bias_filler { - type: "constant" - value: 0 - } - } -} - -######################### - diff --git a/mtcnn_detector.py b/mtcnn_detector.py deleted file mode 100644 index b704138..0000000 --- a/mtcnn_detector.py +++ /dev/null @@ -1,691 +0,0 @@ -# coding: utf-8 -import os -import mxnet as mx -import numpy as np -import math -import cv2 -from multiprocessing import Pool -from itertools import repeat -from helper import nms, adjust_input, generate_bbox, detect_first_stage_warpper - - -class MtcnnDetector(object): - """ - Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks - see https://github.com/kpzhang93/MTCNN_face_detection_alignment - this is a mxnet version - """ - - def __init__(self, - model_folder='.', - minsize=200, - threshold=[0.6, 0.7, 0.8], - factor=0.709, - num_worker=1, - accurate_landmark=True, - ctx=mx.cpu()): - """ - Initialize the detector - - Parameters: - ---------- - model_folder : string - path for the models - minsize : float number - minimal face to detect - threshold : float number - detect threshold for 3 stages - factor: float number - scale factor for image pyramid - num_worker: int number - number of processes we use for first stage - accurate_landmark: bool - use accurate landmark localization or not - - """ - self.num_worker = num_worker - self.accurate_landmark = accurate_landmark - - # load 4 models from folder - models = ['det1', 'det2', 'det3', 'det4'] - models = [os.path.join(model_folder, f) for f in models] - - self.PNets = [] - for i in range(num_worker): - workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx) - self.PNets.append(workner_net) - - #self.Pool = Pool(num_worker) - - self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx) - self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx) - self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx) - - self.minsize = float(minsize) - self.factor = float(factor) - self.threshold = threshold - - def convert_to_square(self, bbox): - """ - convert bbox to square - - Parameters: - ---------- - bbox: numpy array , shape n x 5 - input bbox - - Returns: - ------- - square bbox - """ - square_bbox = bbox.copy() - - h = bbox[:, 3] - bbox[:, 1] + 1 - w = bbox[:, 2] - bbox[:, 0] + 1 - max_side = np.maximum(h, w) - square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5 - square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5 - square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1 - square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1 - return square_bbox - - def calibrate_box(self, bbox, reg): - """ - calibrate bboxes - - Parameters: - ---------- - bbox: numpy array, shape n x 5 - input bboxes - reg: numpy array, shape n x 4 - bboxex adjustment - - Returns: - ------- - bboxes after refinement - - """ - w = bbox[:, 2] - bbox[:, 0] + 1 - w = np.expand_dims(w, 1) - h = bbox[:, 3] - bbox[:, 1] + 1 - h = np.expand_dims(h, 1) - reg_m = np.hstack([w, h, w, h]) - aug = reg_m * reg - bbox[:, 0:4] = bbox[:, 0:4] + aug - return bbox - - def pad(self, bboxes, w, h): - """ - pad the the bboxes, alse restrict the size of it - - Parameters: - ---------- - bboxes: numpy array, n x 5 - input bboxes - w: float number - width of the input image - h: float number - height of the input image - Returns : - ------s - dy, dx : numpy array, n x 1 - start point of the bbox in target image - edy, edx : numpy array, n x 1 - end point of the bbox in target image - y, x : numpy array, n x 1 - start point of the bbox in original image - ex, ex : numpy array, n x 1 - end point of the bbox in original image - tmph, tmpw: numpy array, n x 1 - height and width of the bbox - - """ - tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + \ - 1, bboxes[:, 3] - bboxes[:, 1] + 1 - num_box = bboxes.shape[0] - - dx, dy = np.zeros((num_box, )), np.zeros((num_box, )) - edx, edy = tmpw.copy()-1, tmph.copy()-1 - - x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3] - - tmp_index = np.where(ex > w-1) - edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index] - ex[tmp_index] = w - 1 - - tmp_index = np.where(ey > h-1) - edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index] - ey[tmp_index] = h - 1 - - tmp_index = np.where(x < 0) - dx[tmp_index] = 0 - x[tmp_index] - x[tmp_index] = 0 - - tmp_index = np.where(y < 0) - dy[tmp_index] = 0 - y[tmp_index] - y[tmp_index] = 0 - - return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] - return_list = [item.astype(np.int32) for item in return_list] - - return return_list - - def slice_index(self, number): - """ - slice the index into (n,n,m), m < n - Parameters: - ---------- - number: int number - number - """ - def chunks(l, n): - """Yield successive n-sized chunks from l.""" - for i in range(0, len(l), n): - yield l[i:i + n] - num_list = range(number) - return list(chunks(num_list, self.num_worker)) - - def detect_face_limited(self, img, det_type=2): - height, width, _ = img.shape - if det_type >= 2: - total_boxes = np.array( - [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]], dtype=np.float32) - num_box = total_boxes.shape[0] - - # pad the bbox - [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad( - total_boxes, width, height) - # (3, 24, 24) is the input shape for RNet - input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32) - - for i in range(num_box): - tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) - tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, - :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :] - # input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24))) - input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24))) - - output = self.RNet.predict(input_buf) - - # filter the total_boxes with threshold - passed = np.where(output[1][:, 1] > self.threshold[1]) - total_boxes = total_boxes[passed] - - if total_boxes.size == 0: - return None - - total_boxes[:, 4] = output[1][passed, 1].reshape((-1,)) - reg = output[0][passed] - - # nms - pick = nms(total_boxes, 0.7, 'Union') - total_boxes = total_boxes[pick] - total_boxes = self.calibrate_box(total_boxes, reg[pick]) - total_boxes = self.convert_to_square(total_boxes) - total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) - else: - total_boxes = np.array( - [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]], dtype=np.float32) - num_box = total_boxes.shape[0] - [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad( - total_boxes, width, height) - # (3, 48, 48) is the input shape for ONet - input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32) - - for i in range(num_box): - tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32) - tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, - :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :] - input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) - - output = self.ONet.predict(input_buf) - # print(output[2]) - - # filter the total_boxes with threshold - passed = np.where(output[2][:, 1] > self.threshold[2]) - total_boxes = total_boxes[passed] - - if total_boxes.size == 0: - return None - - total_boxes[:, 4] = output[2][passed, 1].reshape((-1,)) - reg = output[1][passed] - points = output[0][passed] - - # compute landmark points - bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 - bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 - points[:, 0:5] = np.expand_dims( - total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5] - points[:, 5:10] = np.expand_dims( - total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10] - - # nms - total_boxes = self.calibrate_box(total_boxes, reg) - pick = nms(total_boxes, 0.7, 'Min') - total_boxes = total_boxes[pick] - points = points[pick] - - if not self.accurate_landmark: - return total_boxes, points - - ############################################# - # extended stage - ############################################# - num_box = total_boxes.shape[0] - patchw = np.maximum( - total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1) - patchw = np.round(patchw*0.25) - - # make it even - patchw[np.where(np.mod(patchw, 2) == 1)] += 1 - - input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32) - for i in range(5): - x, y = points[:, i], points[:, i+5] - x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw) - [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T, - width, - height) - for j in range(num_box): - tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32) - tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, - :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :] - input_buf[j, i*3:i*3+3, :, - :] = adjust_input(cv2.resize(tmpim, (24, 24))) - - output = self.LNet.predict(input_buf) - - pointx = np.zeros((num_box, 5)) - pointy = np.zeros((num_box, 5)) - - for k in range(5): - # do not make a large movement - tmp_index = np.where(np.abs(output[k]-0.5) > 0.35) - output[k][tmp_index[0]] = 0.5 - - pointx[:, k] = np.round( - points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw - pointy[:, k] = np.round( - points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw - - points = np.hstack([pointx, pointy]) - points = points.astype(np.int32) - - return total_boxes, points - - def detect_face(self, img, det_type=0): - """ - detect face over img - Parameters: - ---------- - img: numpy array, bgr order of shape (1, 3, n, m) - input image - Retures: - ------- - bboxes: numpy array, n x 5 (x1,y2,x2,y2,score) - bboxes - points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5) - landmarks - """ - - # check input - height, width, _ = img.shape - if det_type == 0: - MIN_DET_SIZE = 12 - - if img is None: - return None - - # only works for color image - if len(img.shape) != 3: - return None - - # detected boxes - total_boxes = [] - - minl = min(height, width) - - # get all the valid scales - scales = [] - m = MIN_DET_SIZE/self.minsize - minl *= m - factor_count = 0 - while minl > MIN_DET_SIZE: - scales.append(m*self.factor**factor_count) - minl *= self.factor - factor_count += 1 - - ############################################# - # first stage - ############################################# - # for scale in scales: - # return_boxes = self.detect_first_stage(img, scale, 0) - # if return_boxes is not None: - # total_boxes.append(return_boxes) - - sliced_index = self.slice_index(len(scales)) - total_boxes = [] - for batch in sliced_index: - # local_boxes = self.Pool.map( detect_first_stage_warpper, \ - # izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) ) - local_boxes = map(detect_first_stage_warpper, - zip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0]))) - total_boxes.extend(local_boxes) - - # remove the Nones - total_boxes = [i for i in total_boxes if i is not None] - - if len(total_boxes) == 0: - return None - - total_boxes = np.vstack(total_boxes) - - if total_boxes.size == 0: - return None - - # merge the detection from first stage - pick = nms(total_boxes[:, 0:5], 0.7, 'Union') - total_boxes = total_boxes[pick] - - bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 - bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 - - # refine the bboxes - total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw, - total_boxes[:, 1]+total_boxes[:, 6] * bbh, - total_boxes[:, 2]+total_boxes[:, 7] * bbw, - total_boxes[:, 3]+total_boxes[:, 8] * bbh, - total_boxes[:, 4] - ]) - - total_boxes = total_boxes.T - total_boxes = self.convert_to_square(total_boxes) - total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) - else: - total_boxes = np.array( - [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]], dtype=np.float32) - - ############################################# - # second stage - ############################################# - - num_box = total_boxes.shape[0] - - # pad the bbox - [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad( - total_boxes, width, height) - # (3, 24, 24) is the input shape for RNet - input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32) - - for i in range(num_box): - tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) - tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, - :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :] - - # print(img[0:20, 0:20, :]) - # cv2.imshow('ss',tmp) - # cv2.waitKey(0) - - input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24))) - - output = self.RNet.predict(input_buf) - - # filter the total_boxes with threshold - passed = np.where(output[1][:, 1] > self.threshold[1]) - total_boxes = total_boxes[passed] - - if total_boxes.size == 0: - return None - - total_boxes[:, 4] = output[1][passed, 1].reshape((-1,)) - reg = output[0][passed] - - # nms - pick = nms(total_boxes, 0.7, 'Union') - total_boxes = total_boxes[pick] - total_boxes = self.calibrate_box(total_boxes, reg[pick]) - total_boxes = self.convert_to_square(total_boxes) - total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) - - ############################################# - # third stage - ############################################# - num_box = total_boxes.shape[0] - - # pad the bbox - [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad( - total_boxes, width, height) - # (3, 48, 48) is the input shape for ONet - input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32) - - for i in range(num_box): - tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32) - tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, - :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :] - input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) - - output = self.ONet.predict(input_buf) - - # filter the total_boxes with threshold - passed = np.where(output[2][:, 1] > self.threshold[2]) - total_boxes = total_boxes[passed] - - if total_boxes.size == 0: - return None - - total_boxes[:, 4] = output[2][passed, 1].reshape((-1,)) - reg = output[1][passed] - points = output[0][passed] - - # compute landmark points - bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 - bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 - points[:, 0:5] = np.expand_dims( - total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5] - points[:, 5:10] = np.expand_dims( - total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10] - - # nms - total_boxes = self.calibrate_box(total_boxes, reg) - pick = nms(total_boxes, 0.7, 'Min') - total_boxes = total_boxes[pick] - points = points[pick] - - if not self.accurate_landmark: - return total_boxes, points - - ############################################# - # extended stage - ############################################# - num_box = total_boxes.shape[0] - patchw = np.maximum( - total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1) - patchw = np.round(patchw*0.25) - - # make it even - patchw[np.where(np.mod(patchw, 2) == 1)] += 1 - - input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32) - for i in range(5): - x, y = points[:, i], points[:, i+5] - x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw) - [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T, - width, - height) - for j in range(num_box): - tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32) - tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, - :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :] - input_buf[j, i*3:i*3+3, :, - :] = adjust_input(cv2.resize(tmpim, (24, 24))) - - output = self.LNet.predict(input_buf) - - pointx = np.zeros((num_box, 5)) - pointy = np.zeros((num_box, 5)) - - for k in range(5): - # do not make a large movement - tmp_index = np.where(np.abs(output[k]-0.5) > 0.35) - output[k][tmp_index[0]] = 0.5 - - pointx[:, k] = np.round( - points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw - pointy[:, k] = np.round( - points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw - - points = np.hstack([pointx, pointy]) - points = points.astype(np.int32) - - return total_boxes, points - - def list2colmatrix(self, pts_list): - """ - convert list to column matrix - Parameters: - ---------- - pts_list: - input list - Retures: - ------- - colMat: - - """ - assert len(pts_list) > 0 - colMat = [] - for i in range(len(pts_list)): - colMat.append(pts_list[i][0]) - colMat.append(pts_list[i][1]) - colMat = np.matrix(colMat).transpose() - return colMat - - def find_tfrom_between_shapes(self, from_shape, to_shape): - """ - find transform between shapes - Parameters: - ---------- - from_shape: - to_shape: - Retures: - ------- - tran_m: - tran_b: - """ - assert from_shape.shape[0] == to_shape.shape[0] and from_shape.shape[0] % 2 == 0 - - sigma_from = 0.0 - sigma_to = 0.0 - cov = np.matrix([[0.0, 0.0], [0.0, 0.0]]) - - # compute the mean and cov - from_shape_points = from_shape.reshape(from_shape.shape[0]/2, 2) - to_shape_points = to_shape.reshape(to_shape.shape[0]/2, 2) - mean_from = from_shape_points.mean(axis=0) - mean_to = to_shape_points.mean(axis=0) - - for i in range(from_shape_points.shape[0]): - temp_dis = np.linalg.norm(from_shape_points[i] - mean_from) - sigma_from += temp_dis * temp_dis - temp_dis = np.linalg.norm(to_shape_points[i] - mean_to) - sigma_to += temp_dis * temp_dis - cov += (to_shape_points[i].transpose() - - mean_to.transpose()) * (from_shape_points[i] - mean_from) - - sigma_from = sigma_from / to_shape_points.shape[0] - sigma_to = sigma_to / to_shape_points.shape[0] - cov = cov / to_shape_points.shape[0] - - # compute the affine matrix - s = np.matrix([[1.0, 0.0], [0.0, 1.0]]) - u, d, vt = np.linalg.svd(cov) - - if np.linalg.det(cov) < 0: - if d[1] < d[0]: - s[1, 1] = -1 - else: - s[0, 0] = -1 - r = u * s * vt - c = 1.0 - if sigma_from != 0: - c = 1.0 / sigma_from * np.trace(np.diag(d) * s) - - tran_b = mean_to.transpose() - c * r * mean_from.transpose() - tran_m = c * r - - return tran_m, tran_b - - def extract_image_chips(self, img, points, desired_size=256, padding=0): - """ - crop and align face - Parameters: - ---------- - img: numpy array, bgr order of shape (1, 3, n, m) - input image - points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5) - desired_size: default 256 - padding: default 0 - Retures: - ------- - crop_imgs: list, n - cropped and aligned faces - """ - crop_imgs = [] - for p in points: - shape = [] - for k in range(len(p)/2): - shape.append(p[k]) - shape.append(p[k+5]) - - if padding > 0: - padding = padding - else: - padding = 0 - # average positions of face points - mean_face_shape_x = [0.224152, 0.75610125, - 0.490127, 0.254149, 0.726104] - mean_face_shape_y = [0.2119465, 0.2119465, - 0.628106, 0.780233, 0.780233] - - from_points = [] - to_points = [] - - for i in range(len(shape)/2): - x = (padding + mean_face_shape_x[i]) / \ - (2 * padding + 1) * desired_size - y = (padding + mean_face_shape_y[i]) / \ - (2 * padding + 1) * desired_size - to_points.append([x, y]) - from_points.append([shape[2*i], shape[2*i+1]]) - - # convert the points to Mat - from_mat = self.list2colmatrix(from_points) - to_mat = self.list2colmatrix(to_points) - - # compute the similar transfrom - tran_m, tran_b = self.find_tfrom_between_shapes(from_mat, to_mat) - - probe_vec = np.matrix([1.0, 0.0]).transpose() - probe_vec = tran_m * probe_vec - - scale = np.linalg.norm(probe_vec) - angle = 180.0 / math.pi * \ - math.atan2(probe_vec[1, 0], probe_vec[0, 0]) - - from_center = [(shape[0]+shape[2])/2.0, (shape[1]+shape[3])/2.0] - to_center = [0, 0] - to_center[1] = desired_size * 0.4 - to_center[0] = desired_size * 0.5 - - ex = to_center[0] - from_center[0] - ey = to_center[1] - from_center[1] - - rot_mat = cv2.getRotationMatrix2D( - (from_center[0], from_center[1]), -1*angle, scale) - rot_mat[0][2] += ex - rot_mat[1][2] += ey - - chips = cv2.warpAffine(img, rot_mat, (desired_size, desired_size)) - crop_imgs.append(chips) - - return crop_imgs diff --git a/peropero_xm_v2.py b/peropero_xm_v3.py similarity index 54% rename from peropero_xm_v2.py rename to peropero_xm_v3.py index 14524d5..e9fb6fa 100644 --- a/peropero_xm_v2.py +++ b/peropero_xm_v3.py @@ -4,19 +4,19 @@ import numpy as np import time from termcolor import colored +from helper import read_pkl_model, start_up_init, encode_image +from multiprocessing import Process, Queue import asyncio -from multiprocessing import Process, Queue, Manager import socketio -from helper import read_pkl_model, start_up_init, encode_image -from CXMIPCamera import XMIPCamera -import face_detector +import IPCamera.interface as ipc import face_embedding -import functools +import face_detector async def upload_loop(url="http://127.0.0.1:6789"): # =====================Uploader Setsup======================== sio = socketio.AsyncClient() + @sio.on('response', namespace='/flandre') async def on_response(data): current_address, upload_frame = upstream_queue.get() @@ -28,8 +28,13 @@ async def on_response(data): await sio.emit('frame_data', image_string, namespace='/flandre') try: ip, img, dt, prob, name = result_queue.get_nowait() - result_string = {'image': encode_image(img), - 'time': dt, 'name': name, 'prob': prob, 'ip': ip} + result_string = { + 'image': encode_image(img), + 'time': dt, + 'name': name, + 'prob': prob, + 'ip': ip + } await sio.emit('result_data', result_string, namespace='/flandre') except Exception as e: pass @@ -45,7 +50,7 @@ async def on_connect(): async def embedding_loop(preload): # =================== FR MODEL ==================== - mlp, class_names = read_pkl_model('./model-mlp/mlp.pkl') + mlp, class_names = read_pkl_model(preload.classification) embedding = face_embedding.EmbeddingModel(preload) while True: ip, img = suspicion_face_queue.get() @@ -56,53 +61,37 @@ async def embedding_loop(preload): result_queue.put((ip, img, dt, prob, name)) -async def detection_loop(preload): +async def detection_loop(preload, address): # =================== FD MODEL ==================== detector = face_detector.DetectorModel(preload) - rate = preload.max_frame_rate + rate_time = 1 / preload.max_frame_rate + embedding_threshold = preload.embedding_threshold + loop = asyncio.get_running_loop() + camera = ipc.XMIPCamera(address.encode('UTF-8'), 34567, b"admin", b"") + camera.start() + while True: start_time = loop.time() - head_frame_list = frame_queue.get() + frame = camera.frame(rows=540, cols=960) - for (ip_address, head_frame) in head_frame_list: - for img, box in detector.get_all_boxes(head_frame, save_img=False): + for img, box in detector.get_all_boxes(frame, save_img=False): + if box[4] > embedding_threshold: try: - suspicion_face_queue.put_nowait((ip_address, img)) - except Exception as e: + suspicion_face_queue.put_nowait((address, img)) + except Exception as _: pass + # print(box[4]) + + box = box.astype(np.int) + cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), + [255, 255, 0], 2) - box = box.astype(int) - cv2.rectangle( - head_frame, (box[0], box[1]), (box[2], box[3]), [255, 255, 0], 2) - - upstream_queue.put((ip_address, head_frame)) + upstream_queue.put((address, frame)) + # print(colored(loop.time() - start_time, 'red'), flush=True) - print(colored(loop.time()-start_time, 'red'), flush=True) - - for i in range(int((loop.time() - start_time) * rate + 1)): - for item in frame_queue.get(): - upstream_queue.put(item) - - -async def camera_loop(preload): - reciprocal_of_max_frame_rate = 1/preload.max_frame_rate - address_dict = preload.address_dict - camera_dict = {} - - for address in address_dict: - xmcp = XMIPCamera(address.encode('UTF-8'), 34567, b"admin", b"") - xmcp.start() - camera_dict[address] = xmcp - - # =================== ETERNAL LOOP ==================== - loop = asyncio.get_running_loop() - while True: - start_time = loop.time() - frame_queue.put([(address, camera_dict[address].frame(rows=540, cols=960)) - for address in address_dict]) - restime = reciprocal_of_max_frame_rate - loop.time() + start_time + restime = rate_time - loop.time() + start_time if restime > 0: await asyncio.sleep(restime) @@ -111,19 +100,16 @@ async def camera_loop(preload): os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' address_dict = ['10.41.0.198', '10.41.0.199'] args = start_up_init() -args.mtcnn_minsize = 288 -args.mtcnn_factor = 0.1 -args.mtcnn_threshold = [0.92, 0.95, 0.99] -args.address_dict = address_dict +args.retina_model = './model/M25' +args.scales = [0.2] # =================== INIT ==================== -frame_queue = Queue(maxsize=args.max_frame_rate) upstream_queue = Queue(maxsize=args.max_frame_rate * len(address_dict)) suspicion_face_queue = Queue(maxsize=1) result_queue = Queue(maxsize=1) # =================== Process On ==================== -Process(target=lambda: asyncio.run(detection_loop(args))).start() +for address in address_dict: + Process(target=lambda: asyncio.run(detection_loop(args, address))).start() Process(target=lambda: asyncio.run(embedding_loop(args))).start() -Process(target=lambda: asyncio.run(camera_loop(args))).start() asyncio.run(upload_loop()) diff --git a/rcnn/Makefile b/rcnn/Makefile new file mode 100644 index 0000000..b1d9024 --- /dev/null +++ b/rcnn/Makefile @@ -0,0 +1,6 @@ +all: + cd cython/; python3 setup.py build_ext --inplace; rm -rf build; cd ../ + # cd rcnn/pycocotools/; python3 setup.py build_ext --inplace; rm -rf build; cd ../../ +clean: + cd cython/; rm *.so *.c *.cpp; cd ../ + # cd rcnn/pycocotools/; rm *.so; cd ../../ diff --git a/rcnn/cython/.gitignore b/rcnn/cython/.gitignore new file mode 100644 index 0000000..15a165d --- /dev/null +++ b/rcnn/cython/.gitignore @@ -0,0 +1,3 @@ +*.c +*.cpp +*.so diff --git a/rcnn/cython/anchors.pyx b/rcnn/cython/anchors.pyx new file mode 100755 index 0000000..7005199 --- /dev/null +++ b/rcnn/cython/anchors.pyx @@ -0,0 +1,35 @@ +cimport cython +import numpy as np +cimport numpy as np + +DTYPE = np.float32 +ctypedef np.float32_t DTYPE_t + +def anchors_cython(int height, int width, int stride, np.ndarray[DTYPE_t, ndim=2] base_anchors): + """ + Parameters + ---------- + height: height of plane + width: width of plane + stride: stride ot the original image + anchors_base: (A, 4) a base set of anchors + Returns + ------- + all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane + """ + cdef unsigned int A = base_anchors.shape[0] + cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE) + cdef unsigned int iw, ih + cdef unsigned int k + cdef unsigned int sh + cdef unsigned int sw + for iw in range(width): + sw = iw * stride + for ih in range(height): + sh = ih * stride + for k in range(A): + all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw + all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh + all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw + all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh + return all_anchors \ No newline at end of file diff --git a/rcnn/cython/bbox.pyx b/rcnn/cython/bbox.pyx new file mode 100644 index 0000000..0c49e12 --- /dev/null +++ b/rcnn/cython/bbox.pyx @@ -0,0 +1,55 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Sergey Karayev +# -------------------------------------------------------- + +cimport cython +import numpy as np +cimport numpy as np + +DTYPE = np.float +ctypedef np.float_t DTYPE_t + +def bbox_overlaps_cython( + np.ndarray[DTYPE_t, ndim=2] boxes, + np.ndarray[DTYPE_t, ndim=2] query_boxes): + """ + Parameters + ---------- + boxes: (N, 4) ndarray of float + query_boxes: (K, 4) ndarray of float + Returns + ------- + overlaps: (N, K) ndarray of overlap between boxes and query_boxes + """ + cdef unsigned int N = boxes.shape[0] + cdef unsigned int K = query_boxes.shape[0] + cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) + cdef DTYPE_t iw, ih, box_area + cdef DTYPE_t ua + cdef unsigned int k, n + for k in range(K): + box_area = ( + (query_boxes[k, 2] - query_boxes[k, 0] + 1) * + (query_boxes[k, 3] - query_boxes[k, 1] + 1) + ) + for n in range(N): + iw = ( + min(boxes[n, 2], query_boxes[k, 2]) - + max(boxes[n, 0], query_boxes[k, 0]) + 1 + ) + if iw > 0: + ih = ( + min(boxes[n, 3], query_boxes[k, 3]) - + max(boxes[n, 1], query_boxes[k, 1]) + 1 + ) + if ih > 0: + ua = float( + (boxes[n, 2] - boxes[n, 0] + 1) * + (boxes[n, 3] - boxes[n, 1] + 1) + + box_area - iw * ih + ) + overlaps[n, k] = iw * ih / ua + return overlaps diff --git a/rcnn/cython/cpu_nms.pyx b/rcnn/cython/cpu_nms.pyx new file mode 100644 index 0000000..1d0bef3 --- /dev/null +++ b/rcnn/cython/cpu_nms.pyx @@ -0,0 +1,68 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +cdef inline np.float32_t max(np.float32_t a, np.float32_t b): + return a if a >= b else b + +cdef inline np.float32_t min(np.float32_t a, np.float32_t b): + return a if a <= b else b + +def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): + cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] + cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] + cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] + cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] + cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] + + cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) + cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] + + cdef int ndets = dets.shape[0] + cdef np.ndarray[np.int_t, ndim=1] suppressed = \ + np.zeros((ndets), dtype=np.int) + + # nominal indices + cdef int _i, _j + # sorted indices + cdef int i, j + # temp variables for box i's (the box currently under consideration) + cdef np.float32_t ix1, iy1, ix2, iy2, iarea + # variables for computing overlap with box j (lower scoring box) + cdef np.float32_t xx1, yy1, xx2, yy2 + cdef np.float32_t w, h + cdef np.float32_t inter, ovr + + keep = [] + for _i in range(ndets): + i = order[_i] + if suppressed[i] == 1: + continue + keep.append(i) + ix1 = x1[i] + iy1 = y1[i] + ix2 = x2[i] + iy2 = y2[i] + iarea = areas[i] + for _j in range(_i + 1, ndets): + j = order[_j] + if suppressed[j] == 1: + continue + xx1 = max(ix1, x1[j]) + yy1 = max(iy1, y1[j]) + xx2 = min(ix2, x2[j]) + yy2 = min(iy2, y2[j]) + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (iarea + areas[j] - inter) + if ovr >= thresh: + suppressed[j] = 1 + + return keep diff --git a/rcnn/cython/gpu_nms.hpp b/rcnn/cython/gpu_nms.hpp new file mode 100644 index 0000000..68b6d42 --- /dev/null +++ b/rcnn/cython/gpu_nms.hpp @@ -0,0 +1,2 @@ +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id); diff --git a/rcnn/cython/gpu_nms.pyx b/rcnn/cython/gpu_nms.pyx new file mode 100644 index 0000000..59d84af --- /dev/null +++ b/rcnn/cython/gpu_nms.pyx @@ -0,0 +1,31 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +assert sizeof(int) == sizeof(np.int32_t) + +cdef extern from "gpu_nms.hpp": + void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) + +def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, + np.int32_t device_id=0): + cdef int boxes_num = dets.shape[0] + cdef int boxes_dim = dets.shape[1] + cdef int num_out + cdef np.ndarray[np.int32_t, ndim=1] \ + keep = np.zeros(boxes_num, dtype=np.int32) + cdef np.ndarray[np.float32_t, ndim=1] \ + scores = dets[:, 4] + cdef np.ndarray[np.int_t, ndim=1] \ + order = scores.argsort()[::-1] + cdef np.ndarray[np.float32_t, ndim=2] \ + sorted_dets = dets[order, :] + _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) + keep = keep[:num_out] + return list(order[keep]) diff --git a/rcnn/cython/nms_kernel.cu b/rcnn/cython/nms_kernel.cu new file mode 100644 index 0000000..038a590 --- /dev/null +++ b/rcnn/cython/nms_kernel.cu @@ -0,0 +1,144 @@ +// ------------------------------------------------------------------ +// Faster R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Shaoqing Ren +// ------------------------------------------------------------------ + +#include "gpu_nms.hpp" +#include +#include + +#define CUDA_CHECK(condition) \ + /* Code block avoids redefinition of cudaError_t error */ \ + do { \ + cudaError_t error = condition; \ + if (error != cudaSuccess) { \ + std::cout << cudaGetErrorString(error) << std::endl; \ + } \ + } while (0) + +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) +int const threadsPerBlock = sizeof(unsigned long long) * 8; + +__device__ inline float devIoU(float const * const a, float const * const b) { + float left = max(a[0], b[0]), right = min(a[2], b[2]); + float top = max(a[1], b[1]), bottom = min(a[3], b[3]); + float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); + float interS = width * height; + float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); + float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); + return interS / (Sa + Sb - interS); +} + +__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, + const float *dev_boxes, unsigned long long *dev_mask) { + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = + min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + __shared__ float block_boxes[threadsPerBlock * 5]; + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; + const float *cur_box = dev_boxes + cur_box_idx * 5; + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { + t |= 1ULL << i; + } + } + const int col_blocks = DIVUP(n_boxes, threadsPerBlock); + dev_mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +void _set_device(int device_id) { + int current_device; + CUDA_CHECK(cudaGetDevice(¤t_device)); + if (current_device == device_id) { + return; + } + // The call to cudaSetDevice must come before any calls to Get, which + // may perform initialization using the GPU. + CUDA_CHECK(cudaSetDevice(device_id)); +} + +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id) { + _set_device(device_id); + + float* boxes_dev = NULL; + unsigned long long* mask_dev = NULL; + + const int col_blocks = DIVUP(boxes_num, threadsPerBlock); + + CUDA_CHECK(cudaMalloc(&boxes_dev, + boxes_num * boxes_dim * sizeof(float))); + CUDA_CHECK(cudaMemcpy(boxes_dev, + boxes_host, + boxes_num * boxes_dim * sizeof(float), + cudaMemcpyHostToDevice)); + + CUDA_CHECK(cudaMalloc(&mask_dev, + boxes_num * col_blocks * sizeof(unsigned long long))); + + dim3 blocks(DIVUP(boxes_num, threadsPerBlock), + DIVUP(boxes_num, threadsPerBlock)); + dim3 threads(threadsPerBlock); + nms_kernel<<>>(boxes_num, + nms_overlap_thresh, + boxes_dev, + mask_dev); + + std::vector mask_host(boxes_num * col_blocks); + CUDA_CHECK(cudaMemcpy(&mask_host[0], + mask_dev, + sizeof(unsigned long long) * boxes_num * col_blocks, + cudaMemcpyDeviceToHost)); + + std::vector remv(col_blocks); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); + + int num_to_keep = 0; + for (int i = 0; i < boxes_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + + if (!(remv[nblock] & (1ULL << inblock))) { + keep_out[num_to_keep++] = i; + unsigned long long *p = &mask_host[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv[j] |= p[j]; + } + } + } + *num_out = num_to_keep; + + CUDA_CHECK(cudaFree(boxes_dev)); + CUDA_CHECK(cudaFree(mask_dev)); +} diff --git a/rcnn/cython/setup.py b/rcnn/cython/setup.py new file mode 100644 index 0000000..3e27add --- /dev/null +++ b/rcnn/cython/setup.py @@ -0,0 +1,169 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import os +from os.path import join as pjoin +from setuptools import setup +from distutils.extension import Extension +from Cython.Distutils import build_ext +import numpy as np + + +def find_in_path(name, path): + "Find a file in a search path" + # Adapted fom + # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ + for dir in path.split(os.pathsep): + binpath = pjoin(dir, name) + if os.path.exists(binpath): + return os.path.abspath(binpath) + return None + + +def locate_cuda(): + """Locate the CUDA environment on the system + + Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' + and values giving the absolute path to each directory. + + Starts by looking for the CUDAHOME env variable. If not found, everything + is based on finding 'nvcc' in the PATH. + """ + + # first check if the CUDAHOME env variable is in use + if 'CUDAHOME' in os.environ: + home = os.environ['CUDAHOME'] + nvcc = pjoin(home, 'bin', 'nvcc') + else: + # otherwise, search the PATH for NVCC + default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') + nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) + if nvcc is None: + raise EnvironmentError('The nvcc binary could not be ' + 'located in your $PATH. Either add it to your path, or set $CUDAHOME') + home = os.path.dirname(os.path.dirname(nvcc)) + + cudaconfig = {'home':home, 'nvcc':nvcc, + 'include': pjoin(home, 'include'), + 'lib64': pjoin(home, 'lib64')} + for k, v in cudaconfig.items(): + if not os.path.exists(v): + raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) + + return cudaconfig + + +# Test if cuda could be foun +try: + CUDA = locate_cuda() +except EnvironmentError: + CUDA = None + + +# Obtain the numpy include directory. This logic works across numpy versions. +try: + numpy_include = np.get_include() +except AttributeError: + numpy_include = np.get_numpy_include() + + +def customize_compiler_for_nvcc(self): + """inject deep into distutils to customize how the dispatch + to gcc/nvcc works. + + If you subclass UnixCCompiler, it's not trivial to get your subclass + injected in, and still have the right customizations (i.e. + distutils.sysconfig.customize_compiler) run on it. So instead of going + the OO route, I have this. Note, it's kindof like a wierd functional + subclassing going on.""" + + # tell the compiler it can processes .cu + self.src_extensions.append('.cu') + + # save references to the default compiler_so and _comple methods + default_compiler_so = self.compiler_so + super = self._compile + + # now redefine the _compile method. This gets executed for each + # object but distutils doesn't have the ability to change compilers + # based on source extension: we add it. + def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): + if os.path.splitext(src)[1] == '.cu': + # use the cuda for .cu files + self.set_executable('compiler_so', CUDA['nvcc']) + # use only a subset of the extra_postargs, which are 1-1 translated + # from the extra_compile_args in the Extension class + postargs = extra_postargs['nvcc'] + else: + postargs = extra_postargs['gcc'] + + super(obj, src, ext, cc_args, postargs, pp_opts) + # reset the default compiler_so, which we might have changed for cuda + self.compiler_so = default_compiler_so + + # inject our redefined _compile method into the class + self._compile = _compile + + +# run the customize_compiler +class custom_build_ext(build_ext): + def build_extensions(self): + customize_compiler_for_nvcc(self.compiler) + build_ext.build_extensions(self) + + +ext_modules = [ + Extension( + "bbox", + ["bbox.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs=[numpy_include] + ), + Extension( + "anchors", + ["anchors.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs=[numpy_include] + ), + Extension( + "cpu_nms", + ["cpu_nms.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs = [numpy_include] + ), +] + +if CUDA is not None: + ext_modules.append( + Extension('gpu_nms', + ['nms_kernel.cu', 'gpu_nms.pyx'], + library_dirs=[CUDA['lib64']], + libraries=['cudart'], + language='c++', + runtime_library_dirs=[CUDA['lib64']], + # this syntax is specific to this build system + # we're only going to use certain compiler args with nvcc and not with + # gcc the implementation of this trick is in customize_compiler() below + extra_compile_args={'gcc': ["-Wno-unused-function"], + 'nvcc': ['-arch=sm_35', + '--ptxas-options=-v', + '-c', + '--compiler-options', + "'-fPIC'"]}, + include_dirs = [numpy_include, CUDA['include']] + ) + ) +else: + print('Skipping GPU_NMS') + + +setup( + name='frcnn_cython', + ext_modules=ext_modules, + # inject our custom trigger + cmdclass={'build_ext': custom_build_ext}, +) diff --git a/rcnn/processing/bbox_transform.py b/rcnn/processing/bbox_transform.py new file mode 100644 index 0000000..0c4fc7c --- /dev/null +++ b/rcnn/processing/bbox_transform.py @@ -0,0 +1,218 @@ +import numpy as np +from ..cython.bbox import bbox_overlaps_cython +#from rcnn.config import config + + +def bbox_overlaps(boxes, query_boxes): + return bbox_overlaps_cython(boxes, query_boxes) + + +def bbox_overlaps_py(boxes, query_boxes): + """ + determine overlaps between boxes and query_boxes + :param boxes: n * 4 bounding boxes + :param query_boxes: k * 4 bounding boxes + :return: overlaps: n * k overlaps + """ + n_ = boxes.shape[0] + k_ = query_boxes.shape[0] + overlaps = np.zeros((n_, k_), dtype=np.float) + for k in range(k_): + query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + for n in range(n_): + iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 + if iw > 0: + ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 + if ih > 0: + box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) + all_area = float(box_area + query_box_area - iw * ih) + overlaps[n, k] = iw * ih / all_area + return overlaps + + +def clip_boxes(boxes, im_shape): + """ + Clip boxes to image boundaries. + :param boxes: [N, 4* num_classes] + :param im_shape: tuple of 2 + :return: [N, 4* num_classes] + """ + # x1 >= 0 + boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) + # y1 >= 0 + boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) + # x2 < im_shape[1] + boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) + # y2 < im_shape[0] + boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) + return boxes + + +def nonlinear_transform(ex_rois, gt_rois): + """ + compute bounding box regression targets from ex_rois to gt_rois + :param ex_rois: [N, 4] + :param gt_rois: [N, 4] + :return: [N, 4] + """ + assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' + + ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 + ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 + ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) + ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) + + gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 + gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 + gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) + gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) + + targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) + targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) + targets_dw = np.log(gt_widths / ex_widths) + targets_dh = np.log(gt_heights / ex_heights) + + if gt_rois.shape[1]<=4: + targets = np.vstack( + (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() + return targets + else: + targets = [targets_dx, targets_dy, targets_dw, targets_dh] + #if config.USE_BLUR: + # for i in range(4, gt_rois.shape[1]): + # t = gt_rois[:,i] + # targets.append(t) + targets = np.vstack(targets).transpose() + return targets + +def landmark_transform(ex_rois, gt_rois): + + assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' + + ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 + ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 + ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) + ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) + + + targets = [] + for i in range(gt_rois.shape[1]): + for j in range(gt_rois.shape[2]): + #if not config.USE_OCCLUSION and j==2: + # continue + if j==2: + continue + if j==0: #w + target = (gt_rois[:,i,j] - ex_ctr_x) / (ex_widths + 1e-14) + elif j==1: #h + target = (gt_rois[:,i,j] - ex_ctr_y) / (ex_heights + 1e-14) + else: #visibile + target = gt_rois[:,i,j] + targets.append(target) + + + targets = np.vstack(targets).transpose() + return targets + + +def nonlinear_pred(boxes, box_deltas): + """ + Transform the set of class-agnostic boxes into class-specific boxes + by applying the predicted offsets (box_deltas) + :param boxes: !important [N 4] + :param box_deltas: [N, 4 * num_classes] + :return: [N 4 * num_classes] + """ + if boxes.shape[0] == 0: + return np.zeros((0, box_deltas.shape[1])) + + boxes = boxes.astype(np.float, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) + ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) + + dx = box_deltas[:, 0::4] + dy = box_deltas[:, 1::4] + dw = box_deltas[:, 2::4] + dh = box_deltas[:, 3::4] + + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + + pred_boxes = np.zeros(box_deltas.shape) + # x1 + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) + # y1 + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) + # x2 + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) + # y2 + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) + + return pred_boxes + +def landmark_pred(boxes, landmark_deltas): + if boxes.shape[0] == 0: + return np.zeros((0, landmark_deltas.shape[1])) + boxes = boxes.astype(np.float, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) + ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) + preds = [] + for i in range(landmark_deltas.shape[1]): + if i%2==0: + pred = (landmark_deltas[:,i]*widths + ctr_x) + else: + pred = (landmark_deltas[:,i]*heights + ctr_y) + preds.append(pred) + preds = np.vstack(preds).transpose() + return preds + +def iou_transform(ex_rois, gt_rois): + """ return bbox targets, IoU loss uses gt_rois as gt """ + assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' + return gt_rois + + +def iou_pred(boxes, box_deltas): + """ + Transform the set of class-agnostic boxes into class-specific boxes + by applying the predicted offsets (box_deltas) + :param boxes: !important [N 4] + :param box_deltas: [N, 4 * num_classes] + :return: [N 4 * num_classes] + """ + if boxes.shape[0] == 0: + return np.zeros((0, box_deltas.shape[1])) + + boxes = boxes.astype(np.float, copy=False) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + dx1 = box_deltas[:, 0::4] + dy1 = box_deltas[:, 1::4] + dx2 = box_deltas[:, 2::4] + dy2 = box_deltas[:, 3::4] + + pred_boxes = np.zeros(box_deltas.shape) + # x1 + pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis] + # y1 + pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis] + # x2 + pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis] + # y2 + pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis] + + return pred_boxes + + +# define bbox_transform and bbox_pred +bbox_transform = nonlinear_transform +bbox_pred = nonlinear_pred diff --git a/rcnn/processing/generate_anchor.py b/rcnn/processing/generate_anchor.py new file mode 100644 index 0000000..eb47d8e --- /dev/null +++ b/rcnn/processing/generate_anchor.py @@ -0,0 +1,128 @@ +""" +Generate base anchors on index 0 +""" +from __future__ import print_function +import sys +from builtins import range +import numpy as np +from ..cython.anchors import anchors_cython +#from ..config import config + + +def anchors_plane(feat_h, feat_w, stride, base_anchor): + return anchors_cython(feat_h, feat_w, stride, base_anchor) + +def generate_anchors(base_size=16, ratios=[0.5, 1, 2], + scales=2 ** np.arange(3, 6), stride=16, dense_anchor=False): + """ + Generate anchor (reference) windows by enumerating aspect ratios X + scales wrt a reference (0, 0, 15, 15) window. + """ + + base_anchor = np.array([1, 1, base_size, base_size]) - 1 + ratio_anchors = _ratio_enum(base_anchor, ratios) + anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) + for i in range(ratio_anchors.shape[0])]) + if dense_anchor: + assert stride%2==0 + anchors2 = anchors.copy() + anchors2[:,:] += int(stride/2) + anchors = np.vstack( (anchors, anchors2) ) + #print('GA',base_anchor.shape, ratio_anchors.shape, anchors.shape) + return anchors + +#def generate_anchors_fpn(base_size=[64,32,16,8,4], ratios=[0.5, 1, 2], scales=8): +# """ +# Generate anchor (reference) windows by enumerating aspect ratios X +# scales wrt a reference (0, 0, 15, 15) window. +# """ +# anchors = [] +# _ratios = ratios.reshape( (len(base_size), -1) ) +# _scales = scales.reshape( (len(base_size), -1) ) +# for i,bs in enumerate(base_size): +# __ratios = _ratios[i] +# __scales = _scales[i] +# #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) +# r = generate_anchors(bs, __ratios, __scales) +# #print('anchors_fpn', r.shape, file=sys.stderr) +# anchors.append(r) +# return anchors + +def generate_anchors_fpn(dense_anchor=False, cfg = None): + #assert(False) + """ + Generate anchor (reference) windows by enumerating aspect ratios X + scales wrt a reference (0, 0, 15, 15) window. + """ + if cfg is None: + from ..config import config + cfg = config.RPN_ANCHOR_CFG + RPN_FEAT_STRIDE = [] + for k in cfg: + RPN_FEAT_STRIDE.append( int(k) ) + RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True) + anchors = [] + for k in RPN_FEAT_STRIDE: + v = cfg[str(k)] + bs = v['BASE_SIZE'] + __ratios = np.array(v['RATIOS']) + __scales = np.array(v['SCALES']) + stride = int(k) + #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) + r = generate_anchors(bs, __ratios, __scales, stride, dense_anchor) + #print('anchors_fpn', r.shape, file=sys.stderr) + anchors.append(r) + + return anchors + +def _whctrs(anchor): + """ + Return width, height, x center, and y center for an anchor (window). + """ + + w = anchor[2] - anchor[0] + 1 + h = anchor[3] - anchor[1] + 1 + x_ctr = anchor[0] + 0.5 * (w - 1) + y_ctr = anchor[1] + 0.5 * (h - 1) + return w, h, x_ctr, y_ctr + + +def _mkanchors(ws, hs, x_ctr, y_ctr): + """ + Given a vector of widths (ws) and heights (hs) around a center + (x_ctr, y_ctr), output a set of anchors (windows). + """ + + ws = ws[:, np.newaxis] + hs = hs[:, np.newaxis] + anchors = np.hstack((x_ctr - 0.5 * (ws - 1), + y_ctr - 0.5 * (hs - 1), + x_ctr + 0.5 * (ws - 1), + y_ctr + 0.5 * (hs - 1))) + return anchors + + +def _ratio_enum(anchor, ratios): + """ + Enumerate a set of anchors for each aspect ratio wrt an anchor. + """ + + w, h, x_ctr, y_ctr = _whctrs(anchor) + size = w * h + size_ratios = size / ratios + ws = np.round(np.sqrt(size_ratios)) + hs = np.round(ws * ratios) + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors + + +def _scale_enum(anchor, scales): + """ + Enumerate a set of anchors for each scale wrt an anchor. + """ + + w, h, x_ctr, y_ctr = _whctrs(anchor) + ws = w * scales + hs = h * scales + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors diff --git a/rcnn/processing/nms.py b/rcnn/processing/nms.py new file mode 100644 index 0000000..230139c --- /dev/null +++ b/rcnn/processing/nms.py @@ -0,0 +1,64 @@ +import numpy as np +from ..cython.cpu_nms import cpu_nms +try: + from ..cython.gpu_nms import gpu_nms +except ImportError: + gpu_nms = None + + +def py_nms_wrapper(thresh): + def _nms(dets): + return nms(dets, thresh) + return _nms + + +def cpu_nms_wrapper(thresh): + def _nms(dets): + return cpu_nms(dets, thresh) + return _nms + + +def gpu_nms_wrapper(thresh, device_id): + def _nms(dets): + return gpu_nms(dets, thresh, device_id) + if gpu_nms is not None: + return _nms + else: + return cpu_nms_wrapper(thresh) + + +def nms(dets, thresh): + """ + greedily select boxes with high confidence and overlap with current maximum <= thresh + rule out overlap >= thresh + :param dets: [[x1, y1, x2, y2 score]] + :param thresh: retain overlap < thresh + :return: indexes to keep + """ + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep diff --git a/retinaface.py b/retinaface.py new file mode 100644 index 0000000..730b059 --- /dev/null +++ b/retinaface.py @@ -0,0 +1,557 @@ +import sys +import os +import datetime +import time +import numpy as np +import mxnet as mx +from mxnet import ndarray as nd +import cv2 +from rcnn.processing.bbox_transform import clip_boxes +from rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane +from rcnn.processing.nms import gpu_nms_wrapper, cpu_nms_wrapper +from rcnn.processing.bbox_transform import bbox_overlaps + + +class RetinaFace: + def __init__(self, + prefix, + epoch, + ctx_id=0, + network='net3', + nms=0.4, + nocrop=False, + decay4=0.5, + vote=False): + self.ctx_id = ctx_id + self.network = network + self.decay4 = decay4 + self.nms_threshold = nms + self.vote = vote + self.nocrop = nocrop + self.debug = False + self.fpn_keys = [] + self.anchor_cfg = None + pixel_means = [0.0, 0.0, 0.0] + pixel_stds = [1.0, 1.0, 1.0] + pixel_scale = 1.0 + self.preprocess = False + _ratio = (1., ) + fmc = 3 + + if fmc == 3: + self._feat_stride_fpn = [32, 16, 8] + self.anchor_cfg = { + '32': { + 'SCALES': (32, 16), + 'BASE_SIZE': 16, + 'RATIOS': _ratio, + 'ALLOWED_BORDER': 9999 + }, + '16': { + 'SCALES': (8, 4), + 'BASE_SIZE': 16, + 'RATIOS': _ratio, + 'ALLOWED_BORDER': 9999 + }, + '8': { + 'SCALES': (2, 1), + 'BASE_SIZE': 16, + 'RATIOS': _ratio, + 'ALLOWED_BORDER': 9999 + }, + } + + if self.debug: + print(self._feat_stride_fpn, self.anchor_cfg) + + for s in self._feat_stride_fpn: + self.fpn_keys.append('stride%s' % s) + + dense_anchor = False + self._anchors_fpn = dict( + zip( + self.fpn_keys, + generate_anchors_fpn(dense_anchor=dense_anchor, + cfg=self.anchor_cfg))) + for k in self._anchors_fpn: + v = self._anchors_fpn[k].astype(np.float32) + self._anchors_fpn[k] = v + + self._num_anchors = dict( + zip(self.fpn_keys, + [anchors.shape[0] for anchors in self._anchors_fpn.values()])) + + sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) + if self.ctx_id >= 0: + self.ctx = mx.gpu(self.ctx_id) + self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) + else: + self.ctx = mx.cpu() + self.nms = cpu_nms_wrapper(self.nms_threshold) + self.pixel_means = np.array(pixel_means, dtype=np.float32) + self.pixel_stds = np.array(pixel_stds, dtype=np.float32) + self.pixel_scale = float(pixel_scale) + if self.debug: + print('means', self.pixel_means) + self.use_landmarks = False + if len(sym) // len(self._feat_stride_fpn) == 3: + self.use_landmarks = True + + if self.debug: + print('use_landmarks', self.use_landmarks) + c = len(sym) // len(self._feat_stride_fpn) + sym = sym[(c * 0):] + self._feat_stride_fpn = [32, 16, 8] + print('sym size:', len(sym)) + + image_size = (640, 640) + self.model = mx.mod.Module(symbol=sym, + context=self.ctx, + label_names=None) + self.model.bind(data_shapes=[('data', (1, 3, image_size[0], + image_size[1]))], + for_training=False) + self.model.set_params(arg_params, aux_params) + + def get_input(self, img): + im = img.astype(np.float32) + im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) + for i in range(3): + im_tensor[0, i, :, :] = ( + im[:, :, 2 - i] / self.pixel_scale - + self.pixel_means[2 - i]) / self.pixel_stds[2 - i] + + data = nd.array(im_tensor) + return data + + def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False): + if self.debug: + print('in_detect', threshold, scales, do_flip) + + proposals_list = [] + scores_list = [] + landmarks_list = [] + timea = datetime.datetime.now() + flips = [0] + if do_flip: + flips = [0, 1] + + for im_scale in scales: + for flip in flips: + if im_scale != 1.0: + im = cv2.resize(img, + None, + None, + fx=im_scale, + fy=im_scale, + interpolation=cv2.INTER_LINEAR) + else: + im = img.copy() + if flip: + im = im[:, ::-1, :] + if self.nocrop: + if im.shape[0] % 32 == 0: + h = im.shape[0] + else: + h = (im.shape[0] // 32 + 1) * 32 + if im.shape[1] % 32 == 0: + w = im.shape[1] + else: + w = (im.shape[1] // 32 + 1) * 32 + _im = np.zeros((h, w, 3), dtype=np.float32) + _im[0:im.shape[0], 0:im.shape[1], :] = im + im = _im + else: + im = im.astype(np.float32) + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('X1 uses', diff.total_seconds(), 'seconds') + + im_info = [im.shape[0], im.shape[1]] + im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) + for i in range(3): + im_tensor[0, i, :, :] = ( + im[:, :, 2 - i] / self.pixel_scale - + self.pixel_means[2 - i]) / self.pixel_stds[2 - i] + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('X2 uses', diff.total_seconds(), 'seconds') + data = nd.array(im_tensor) + db = mx.io.DataBatch(data=(data, ), + provide_data=[('data', data.shape)]) + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('X3 uses', diff.total_seconds(), 'seconds') + self.model.forward(db, is_train=False) + net_out = self.model.get_outputs() + + for _idx, s in enumerate(self._feat_stride_fpn): + _key = 'stride%s' % s + stride = int(s) + + if self.use_landmarks: + idx = _idx * 3 + else: + idx = _idx * 2 + #print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) + scores = net_out[idx].asnumpy() + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('A uses', diff.total_seconds(), 'seconds') + #print(scores.shape) + #print('scores',stride, scores.shape, file=sys.stderr) + scores = scores[:, self._num_anchors['stride%s' % + s]:, :, :] + + idx += 1 + bbox_deltas = net_out[idx].asnumpy() + + #if DEBUG: + # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) + # print 'scale: {}'.format(im_info[2]) + + #_height, _width = int(im_info[0] / stride), int(im_info[1] / stride) + height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] + + A = self._num_anchors['stride%s' % s] + K = height * width + anchors_fpn = self._anchors_fpn['stride%s' % s] + anchors = anchors_plane(height, width, stride, anchors_fpn) + #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) + anchors = anchors.reshape((K * A, 4)) + #print('num_anchors', self._num_anchors['stride%s'%s], file=sys.stderr) + #print('HW', (height, width), file=sys.stderr) + #print('anchors_fpn', anchors_fpn.shape, file=sys.stderr) + #print('anchors', anchors.shape, file=sys.stderr) + #print('bbox_deltas', bbox_deltas.shape, file=sys.stderr) + #print('scores', scores.shape, file=sys.stderr) + + scores = self._clip_pad(scores, (height, width)) + scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) + + #print('pre', bbox_deltas.shape, height, width) + bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) + #print('after', bbox_deltas.shape, height, width) + bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) + bbox_pred_len = bbox_deltas.shape[3] // A + #print(bbox_deltas.shape) + bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) + + #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) + proposals = self.bbox_pred(anchors, bbox_deltas) + proposals = clip_boxes(proposals, im_info[:2]) + + scores_ravel = scores.ravel() + #print('__shapes', proposals.shape, scores_ravel.shape) + #print('max score', np.max(scores_ravel)) + order = np.where(scores_ravel >= threshold)[0] + #scores = scores_ravel[order] + #order = _scores.argsort()[::-1] + #order = order[_order] + proposals = proposals[order, :] + scores = scores[order] + if stride == 4 and self.decay4 < 1.0: + scores *= self.decay4 + if flip: + oldx1 = proposals[:, 0].copy() + oldx2 = proposals[:, 2].copy() + proposals[:, 0] = im.shape[1] - oldx2 - 1 + proposals[:, 2] = im.shape[1] - oldx1 - 1 + + proposals[:, 0:4] /= im_scale + + proposals_list.append(proposals) + scores_list.append(scores) + + if not self.vote and self.use_landmarks: + idx += 1 + landmark_deltas = net_out[idx].asnumpy() + landmark_deltas = self._clip_pad( + landmark_deltas, (height, width)) + landmark_pred_len = landmark_deltas.shape[1] // A + landmark_deltas = landmark_deltas.transpose( + (0, 2, 3, 1)).reshape( + (-1, 5, landmark_pred_len // 5)) + #print(landmark_deltas.shape, landmark_deltas) + landmarks = self.landmark_pred(anchors, + landmark_deltas) + landmarks = landmarks[order, :] + + if flip: + landmarks[:, :, 0] = im.shape[1] - landmarks[:, :, 0] - 1 + order = [1, 0, 2, 4, 3] + flandmarks = landmarks.copy() + for idx, a in enumerate(order): + flandmarks[:, idx, :] = landmarks[:, a, :] + landmarks = flandmarks + landmarks[:, :, 0:2] /= im_scale + landmarks_list.append(landmarks) + + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('B uses', diff.total_seconds(), 'seconds') + proposals = np.vstack(proposals_list) + landmarks = None + if proposals.shape[0] == 0: + if self.use_landmarks: + landmarks = np.zeros((0, 5, 2)) + return np.zeros((0, 5)), landmarks + scores = np.vstack(scores_list) + + scores_ravel = scores.ravel() + order = scores_ravel.argsort()[::-1] + + proposals = proposals[order, :] + scores = scores[order] + if not self.vote and self.use_landmarks: + landmarks = np.vstack(landmarks_list) + landmarks = landmarks[order].astype(np.float32, copy=False) + + pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, + copy=False) + if not self.vote: + keep = self.nms(pre_det) + det = np.hstack((pre_det, proposals[:, 4:])) + det = det[keep, :] + if self.use_landmarks: + landmarks = landmarks[keep] + else: + det = np.hstack((pre_det, proposals[:, 4:])) + det = self.bbox_vote(det) + + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('C uses', diff.total_seconds(), 'seconds') + return det, landmarks + + def detect_center(self, img, threshold=0.5, scales=[1.0], do_flip=False): + det, landmarks = self.detect(img, threshold, scales, do_flip) + if det.shape[0] == 0: + return None, None + + bindex = 0 + if det.shape[0] > 1: + img_size = np.asarray(img.shape)[0:2] + bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - + det[:, 1]) + img_center = img_size / 2 + offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], + (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) + offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) + bindex = np.argmax(bounding_box_size - offset_dist_squared * 2.0) + + bbox = det[bindex, :] + landmark = landmarks[bindex, :, :] + return bbox, landmark + + @staticmethod + def check_large_pose(landmark, bbox): + assert landmark.shape == (5, 2) + assert len(bbox) == 4 + + def get_theta(base, x, y): + vx = x - base + vy = y - base + vx[1] *= -1 + vy[1] *= -1 + tx = np.arctan2(vx[1], vx[0]) + ty = np.arctan2(vy[1], vy[0]) + d = ty - tx + d = np.degrees(d) + + if d < -180.0: + d += 360. + elif d > 180.0: + d -= 360.0 + return d + + landmark = landmark.astype(np.float32) + + theta1 = get_theta(landmark[0], landmark[3], landmark[2]) + theta2 = get_theta(landmark[1], landmark[2], landmark[4]) + #print(va, vb, theta2) + theta3 = get_theta(landmark[0], landmark[2], landmark[1]) + theta4 = get_theta(landmark[1], landmark[0], landmark[2]) + theta5 = get_theta(landmark[3], landmark[4], landmark[2]) + theta6 = get_theta(landmark[4], landmark[2], landmark[3]) + theta7 = get_theta(landmark[3], landmark[2], landmark[0]) + theta8 = get_theta(landmark[4], landmark[1], landmark[2]) + #print(theta1, theta2, theta3, theta4, theta5, theta6, theta7, theta8) + left_score = 0.0 + right_score = 0.0 + up_score = 0.0 + down_score = 0.0 + if theta1 <= 0.0: + left_score = 10.0 + elif theta2 <= 0.0: + right_score = 10.0 + else: + left_score = theta2 / theta1 + right_score = theta1 / theta2 + if theta3 <= 10.0 or theta4 <= 10.0: + up_score = 10.0 + else: + up_score = max(theta1 / theta3, theta2 / theta4) + if theta5 <= 10.0 or theta6 <= 10.0: + down_score = 10.0 + else: + down_score = max(theta7 / theta5, theta8 / theta6) + mleft = (landmark[0][0] + landmark[3][0]) / 2 + mright = (landmark[1][0] + landmark[4][0]) / 2 + box_center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2) + ret = 0 + if left_score >= 3.0: + ret = 1 + if ret == 0 and left_score >= 2.0: + if mright <= box_center[0]: + ret = 1 + if ret == 0 and right_score >= 3.0: + ret = 2 + if ret == 0 and right_score >= 2.0: + if mleft >= box_center[0]: + ret = 2 + if ret == 0 and up_score >= 2.0: + ret = 3 + if ret == 0 and down_score >= 5.0: + ret = 4 + return ret, left_score, right_score, up_score, down_score + + @staticmethod + def _filter_boxes(boxes, min_size): + """ Remove all boxes with any side smaller than min_size """ + ws = boxes[:, 2] - boxes[:, 0] + 1 + hs = boxes[:, 3] - boxes[:, 1] + 1 + keep = np.where((ws >= min_size) & (hs >= min_size))[0] + return keep + + @staticmethod + def _filter_boxes2(boxes, max_size, min_size): + """ Remove all boxes with any side smaller than min_size """ + ws = boxes[:, 2] - boxes[:, 0] + 1 + hs = boxes[:, 3] - boxes[:, 1] + 1 + if max_size > 0: + keep = np.where(np.minimum(ws, hs) < max_size)[0] + elif min_size > 0: + keep = np.where(np.maximum(ws, hs) > min_size)[0] + return keep + + @staticmethod + def _clip_pad(tensor, pad_shape): + """ + Clip boxes of the pad area. + :param tensor: [n, c, H, W] + :param pad_shape: [h, w] + :return: [n, c, h, w] + """ + H, W = tensor.shape[2:] + h, w = pad_shape + + if h < H or w < W: + tensor = tensor[:, :, :h, :w].copy() + + return tensor + + @staticmethod + def bbox_pred(boxes, box_deltas): + """ + Transform the set of class-agnostic boxes into class-specific boxes + by applying the predicted offsets (box_deltas) + :param boxes: !important [N 4] + :param box_deltas: [N, 4 * num_classes] + :return: [N 4 * num_classes] + """ + if boxes.shape[0] == 0: + return np.zeros((0, box_deltas.shape[1])) + + boxes = boxes.astype(np.float, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) + ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) + + dx = box_deltas[:, 0:1] + dy = box_deltas[:, 1:2] + dw = box_deltas[:, 2:3] + dh = box_deltas[:, 3:4] + + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + + pred_boxes = np.zeros(box_deltas.shape) + # x1 + pred_boxes[:, 0:1] = pred_ctr_x - 0.5 * (pred_w - 1.0) + # y1 + pred_boxes[:, 1:2] = pred_ctr_y - 0.5 * (pred_h - 1.0) + # x2 + pred_boxes[:, 2:3] = pred_ctr_x + 0.5 * (pred_w - 1.0) + # y2 + pred_boxes[:, 3:4] = pred_ctr_y + 0.5 * (pred_h - 1.0) + + if box_deltas.shape[1] > 4: + pred_boxes[:, 4:] = box_deltas[:, 4:] + + return pred_boxes + + @staticmethod + def landmark_pred(boxes, landmark_deltas): + if boxes.shape[0] == 0: + return np.zeros((0, landmark_deltas.shape[1])) + boxes = boxes.astype(np.float, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) + ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) + pred = landmark_deltas.copy() + for i in range(5): + pred[:, i, 0] = landmark_deltas[:, i, 0] * widths + ctr_x + pred[:, i, 1] = landmark_deltas[:, i, 1] * heights + ctr_y + return pred + + def bbox_vote(self, det): + if det.shape[0] == 0: + dets = np.array([[10, 10, 20, 20, 0.002]]) + det = np.empty(shape=[0, 5]) + while det.shape[0] > 0: + # IOU + area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) + xx1 = np.maximum(det[0, 0], det[:, 0]) + yy1 = np.maximum(det[0, 1], det[:, 1]) + xx2 = np.minimum(det[0, 2], det[:, 2]) + yy2 = np.minimum(det[0, 3], det[:, 3]) + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + o = inter / (area[0] + area[:] - inter) + + # nms + merge_index = np.where(o >= self.nms_threshold)[0] + det_accu = det[merge_index, :] + det = np.delete(det, merge_index, 0) + if merge_index.shape[0] <= 1: + if det.shape[0] == 0: + try: + dets = np.row_stack((dets, det_accu)) + except: + dets = det_accu + continue + det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile( + det_accu[:, -1:], (1, 4)) + max_score = np.max(det_accu[:, 4]) + det_accu_sum = np.zeros((1, 5)) + det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum( + det_accu[:, -1:]) + det_accu_sum[:, 4] = max_score + try: + dets = np.row_stack((dets, det_accu_sum)) + except: + dets = det_accu_sum + dets = dets[0:750, :] + return dets diff --git a/train.py b/train.py index cb4453e..3414bf7 100644 --- a/train.py +++ b/train.py @@ -1,4 +1,3 @@ -import face_model import cv2 import sys import numpy as np @@ -13,12 +12,13 @@ from sklearn import metrics from helper import read_pkl_model, start_up_init, get_dataset, get_image_paths_and_labels +import face_embedding # =================== ARGS ==================== -args = start_up_init(True) +args = start_up_init() # =================== MODEL CLASS ==================== -arcface = face_model.FaceModel(args) +arcface = face_embedding.EmbeddingModel(args) # =================== LOAD DATASET ====================. dir_train = './Embedding/train.npy' @@ -75,12 +75,12 @@ # print('train dataset accuracy: %.2f%%' % # (100 * metrics.accuracy_score(y_test, model.predict(X_test)))) -# mlp.fit(train_emb_array, labels_train) +mlp.fit(train_emb_array, labels_train) -# with open('./model-mlp/mlp.pkl', 'wb') as outfile: -# pickle.dump((mlp, class_names), outfile) +with open('./zoo/model-mlp/mlp.pkl', 'wb') as outfile: + pickle.dump((mlp, class_names), outfile) -with open('./model-mlp/mlp.pkl', 'rb') as infile: +with open('./zoo/model-mlp/mlp.pkl', 'rb') as infile: (mlp, class_names) = pickle.load(infile) safe_prob = mlp.predict_proba(safe_emb_array) diff --git a/vtuber.py b/vtuber.py new file mode 100644 index 0000000..99b35f1 --- /dev/null +++ b/vtuber.py @@ -0,0 +1,145 @@ +# coding: utf-8 +import cv2 +import os +import numpy as np +import time +from termcolor import colored +import asyncio +from multiprocessing import Process, Queue +import socketio +from helper import start_up_init, encode_image, get_head_pose, line_pairs, eye_aspect_ratio +import IPCamera.interface as ipc +import face_detector +import mss + +# =====================DLIB UTILS======================== +import dlib +from imutils import face_utils + + +async def upload_loop(url="http://127.0.0.1:6789"): + # =====================Uploader Setsup======================== + sio = socketio.AsyncClient() + @sio.on('response', namespace='/sakuya') + async def on_response(data): + upload_frame = upstream_queue.get() + await sio.emit('frame_data', encode_image(upload_frame), namespace='/sakuya') + try: + euler_angle, shape, leftEAR, rightEAR = result_queue.get_nowait() + result_string = {'X': euler_angle[0, 0], 'Y': euler_angle[1, 0], 'Z': euler_angle[2, 0], + 'shape': shape.tolist(), 'leftEAR': leftEAR, 'rightEAR': rightEAR} + await sio.emit('result_data', result_string, namespace='/sakuya') + except Exception as e: + print(e) + pass + + @sio.on('connect', namespace='/sakuya') + async def on_connect(): + await sio.emit('frame_data', 0, namespace='/sakuya') + + await sio.connect(url) + await sio.wait() + + +async def camera_loop(preload): + # =================== FD MODEL ==================== + rate = preload.max_frame_rate + # reciprocal_of_max_frame_rate = 1/preload.max_frame_rate + reciprocal_of_max_frame_rate = 1/60 + loop = asyncio.get_running_loop() + + with mss.mss() as sct: + monitor = {"top": 0, "left": 0, "width": 1920, "height": 1080} + while True: + start_time = loop.time() + + img = np.array(sct.grab(monitor)) + upstream_queue.put(img) + + restime = reciprocal_of_max_frame_rate - loop.time() + start_time + if restime > 0: + await asyncio.sleep(restime) + + +async def detection_loop(preload): + reciprocal_of_max_frame_rate = 1/preload.max_frame_rate + address = preload.address + xmcp = ipc.XMIPCamera(address.encode('UTF-8'), 34567, b"admin", b"") + xmcp.start() + + # =================== FD MODEL ==================== + detector = face_detector.DetectorModel(preload) + rate = preload.max_frame_rate + loop = asyncio.get_running_loop() + + predictor = dlib.shape_predictor(preload.face_landmark_path) + + (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"] + (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"] + + # =================== ETERNAL LOOP ==================== + while True: + start_time = loop.time() + + try: + head_frame = xmcp.frame(rows=540, cols=960) + + for img, box, points in detector.get_boxes_and_landmarks(head_frame): + box = box.astype(int) + # nimg = cv2.resize( + # head_frame[box[1]:box[3], box[0]:box[2]], (112, 112)) + shape = predictor(img, dlib.rectangle(0, 0, 112, 112)) + # shape = predictor(head_frame, dlib.rectangle( + # box[0], box[1], box[2], box[3])) + # shape = face_utils.shape_to_np(shape) + 50 + shape = face_utils.shape_to_np(shape) + reprojectdst, euler_angle = get_head_pose(shape) + + # for (x, y) in shape: + # cv2.circle(head_frame, (x, y), 2, (0, 0, 255), -1) + + # for start, end in line_pairs: + # cv2.line( + # head_frame, reprojectdst[start], reprojectdst[end], (0, 0, 255)) + + # =================== EYES ==================== + shape = shape * 10 + + leftEye = shape[lStart:lEnd] + rightEye = shape[rStart:rEnd] + + leftEAR = eye_aspect_ratio(leftEye) + rightEAR = eye_aspect_ratio(rightEye) + + # head_frame = np.zeros([540, 960, 3]) + + result_queue.put((euler_angle, shape, leftEAR, rightEAR)) + except Exception as e: + print(e) + pass + + print(colored(loop.time()-start_time, 'red'), flush=True) + + restime = reciprocal_of_max_frame_rate - loop.time() + start_time + if restime > 0: + await asyncio.sleep(restime) + + +# =================== ARGS ==================== +os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' +args = start_up_init() +args.mtcnn_minsize = 168 +args.mtcnn_factor = 0.1 +args.mtcnn_threshold = [0.5, 0.6, 0.7] +args.address = '10.41.0.198' +args.face_landmark_path = './model/landmarks.dat' + +# =================== INIT ==================== +frame_queue = Queue(maxsize=args.max_frame_rate) +upstream_queue = Queue(maxsize=args.max_frame_rate) +result_queue = Queue(maxsize=args.max_frame_rate) + +# =================== Process On ==================== +Process(target=lambda: asyncio.run(detection_loop(args))).start() +Process(target=lambda: asyncio.run(camera_loop(args))).start() +asyncio.run(upload_loop())