Skip to content
This repository has been archived by the owner on Dec 21, 2023. It is now read-only.

Commit

Permalink
Object Detector export with NMS (#620)
Browse files Browse the repository at this point in the history
Object Detector Now supports non-maximal suppression in Core ML export.
  • Loading branch information
znation authored Jun 4, 2018
1 parent 85ace03 commit 38cc536
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 30 deletions.
60 changes: 48 additions & 12 deletions src/unity/python/turicreate/test/test_object_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,31 +143,31 @@ def setUpClass(self):
}
self.fields_ans = self.get_ans.keys()

@pytest.mark.xfail(rases = _ToolkitError)
@pytest.mark.xfail(raises = _ToolkitError)
def test_create_with_missing_feature(self):
tc.object_detector.create(self.sf, feature='wrong_feature', annotations=self.annotations)

@pytest.mark.xfail(rases = _ToolkitError)
@pytest.mark.xfail(raises = _ToolkitError)
def test_create_with_missing_annotations(self):
tc.object_detector.create(self.sf, feature=self.feature, annotations='wrong_annotations')

@pytest.mark.xfail(rases = _ToolkitError)
@pytest.mark.xfail(raises = _ToolkitError)
def test_create_with_invalid_annotations_list_coord(self):
sf = self.sf.head()
sf[self.annotations] = sf[self.annotations].apply(
lambda x: [{'label': _CLASSES[0], 'coordinates': [100, 50, 20, 40]}])

tc.object_detector.create(sf)

@pytest.mark.xfail(rases = _ToolkitError)
@pytest.mark.xfail(raises = _ToolkitError)
def test_create_with_invalid_annotations_not_dict(self):
sf = self.sf.head()
sf[self.annotations] = sf[self.annotations].apply(
lambda x: [1])

tc.object_detector.create(sf)

@pytest.mark.xfail(rases = _ToolkitError)
@pytest.mark.xfail(raises = _ToolkitError)
def test_create_with_empty_dataset(self):
tc.object_detector.create(self.sf[:0])

Expand Down Expand Up @@ -241,15 +241,15 @@ def test_evaluate(self):
ret = self.model.evaluate(self.sf[:0])
self.assertEqual(ret['mean_average_precision_50'], 0.0)

@pytest.mark.xfail(rases = _ToolkitError)
@pytest.mark.xfail(raises = _ToolkitError)
def test_evaluate_invalid_metric(self):
self.model.evaluate(self.sf.head(), metric='not-supported-metric')

@pytest.mark.xfail(rases = _ToolkitError)
@pytest.mark.xfail(raises = _ToolkitError)
def test_evaluate_invalid_format(self):
self.model.evaluate(self.sf.head(), output_type='not-supported-format')

@pytest.mark.xfail(rases = _ToolkitError)
@pytest.mark.xfail(raises = _ToolkitError)
def test_evaluate_missing_annotations(self):
sf = self.sf.copy()
del sf[self.annotations]
Expand All @@ -259,17 +259,20 @@ def test_export_coreml(self):
from PIL import Image
import coremltools
filename = tempfile.mkstemp('bingo.mlmodel')[1]
self.model.export_coreml(filename)
self.model.export_coreml(filename,
include_non_maximum_suppression=False)

coreml_model = coremltools.models.MLModel(filename)
img = self.sf[0:1][self.feature][0]
img_fixed = tc.image_analysis.resize(img, 416, 416, 3)
pil_img = Image.fromarray(img_fixed.pixel_data)
if _mac_ver() >= (10, 13):
ret = coreml_model.predict({self.feature: pil_img}, usesCPUOnly = True)
ret = coreml_model.predict({self.feature: pil_img},
usesCPUOnly = True)
self.assertEqual(ret['coordinates'].shape[1], 4)
self.assertEqual(ret['confidence'].shape[1], len(_CLASSES))
self.assertEqual(ret['coordinates'].shape[0], ret['confidence'].shape[0])
self.assertEqual(ret['coordinates'].shape[0],
ret['confidence'].shape[0])
# A numeric comparison of the resulting of top bounding boxes is
# not that meaningful unless the model has converged

Expand All @@ -281,7 +284,40 @@ def test_export_coreml(self):
sf = tc.SFrame({'image': [self.sf[self.feature][0]],
'ann': [self.sf[self.annotations][0][:1]]})
model2 = tc.object_detector.create(sf, max_iterations=1)
model2.export_coreml(filename2)
model2.export_coreml(filename2,
include_non_maximum_suppression=False)

@unittest.skipIf(_mac_ver() < (10, 14),
"Non-maximum suppression is only supported on MacOS 10.14+.")
def test_export_coreml_with_non_maximum_suppression(self):
filename = tempfile.mkstemp('bingo.mlmodel')[1]
self.model.export_coreml(filename, include_non_maximum_suppression=True)

coreml_model = coremltools.models.MLModel(filename)
img = self.sf[0:1][self.feature][0]
img_fixed = tc.image_analysis.resize(img, 416, 416, 3)
pil_img = Image.fromarray(img_fixed.pixel_data)
if _mac_ver() >= (10, 13):
ret = coreml_model.predict({self.feature: pil_img},
usesCPUOnly = True)
self.assertEqual(ret['coordinates'].shape[1], 4)
self.assertEqual(ret['confidence'].shape[1], len(_CLASSES))
self.assertEqual(ret['coordinates'].shape[0],
ret['confidence'].shape[0])
# A numeric comparison of the resulting of top bounding boxes is
# not that meaningful unless the model has converged

# Also check if we can train a second model and export it (there could
# be naming issues in mxnet)
filename2 = tempfile.mkstemp('bingo2.mlmodel')[1]
# We also test at the same time if we can export a model with a single
# class
sf = tc.SFrame({'image': [self.sf[self.feature][0]],
'ann': [self.sf[self.annotations][0][:1]]})
model2 = tc.object_detector.create(sf, max_iterations=1)
model2.export_coreml(filename2, include_non_maximum_suppression=True)



@unittest.skipIf(sys.platform != 'darwin' or _mac_ver() >= (10, 14),
"GPU selection should fail on macOS 10.13 or below")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,10 @@ def class_dict(aps):

return ret

def export_coreml(self, filename):
def export_coreml(self, filename,
include_non_maximum_suppression = True,
iou_threshold = None,
confidence_threshold = None):
"""
Save the model in Core ML format. The Core ML model takes an image of
fixed size as input and produces two output arrays: `confidence` and
Expand Down Expand Up @@ -1050,6 +1053,29 @@ def export_coreml(self, filename):
--------
save
Parameters
----------
filename : string
The path of the file where we want to save the Core ML model.
include_non_maximum_suppression : bool
Non-maximum suppression is only available in iOS 12+.
A boolean parameter to indicate whether the Core ML model should be
saved with built-in non-maximum suppression or not.
This parameter is set to True by default.
iou_threshold : float
Threshold value for non-maximum suppression. Non-maximum suppression
prevents multiple bounding boxes appearing over a single object.
This threshold, set between 0 and 1, controls how aggressive this
suppression is. A value of 1 means no maximum suppression will
occur, while a value of 0 will maximally suppress neighboring
boxes around a prediction.
confidence_threshold : float
Only return predictions above this level of confidence. The
threshold can range from 0 to 1.
Examples
--------
>>> model.export_coreml('detector.mlmodel')
Expand All @@ -1059,6 +1085,9 @@ def export_coreml(self, filename):
import coremltools
from coremltools.models import datatypes, neural_network

if not iou_threshold: iou_threshold = self.non_maximum_suppression_threshold
if not confidence_threshold: confidence_threshold = 0.25

preds_per_box = 5 + self.num_classes
num_anchors = len(self.anchors)
num_classes = self.num_classes
Expand Down Expand Up @@ -1101,13 +1130,18 @@ def export_coreml(self, filename):
input_features = list(zip(input_names, input_types))

num_spatial = self._grid_shape[0] * self._grid_shape[1]
num_bounding_boxes = num_anchors * num_spatial
CONFIDENCE_STR = ("raw_confidence" if include_non_maximum_suppression
else "confidence")
COORDINATES_STR = ("raw_coordinates" if include_non_maximum_suppression
else "coordinates")
output_names = [
'confidence',
'coordinates',
CONFIDENCE_STR,
COORDINATES_STR
]
output_dims = [
(num_anchors * num_spatial, num_classes),
(num_anchors * num_spatial, 4),
(num_bounding_boxes, num_classes),
(num_bounding_boxes, 4),
]
output_types = [datatypes.Array(*dim) for dim in output_dims]
output_features = list(zip(output_names, output_types))
Expand Down Expand Up @@ -1150,7 +1184,7 @@ def export_coreml(self, filename):

# (1, 2, B*H*W, 1)
builder.add_reshape(name=prefix + 'rel_xy',
target_shape=[batch_size, 2, num_anchors * num_spatial, 1],
target_shape=[batch_size, 2, num_bounding_boxes, 1],
mode=0,
input_name=prefix + 'rel_xy_sp',
output_name=prefix + 'rel_xy')
Expand Down Expand Up @@ -1214,7 +1248,7 @@ def export_coreml(self, filename):

# (1, 2, B*H*W, 1)
builder.add_reshape(name=prefix + 'wh',
target_shape=[1, 2, num_anchors * num_spatial, 1],
target_shape=[1, 2, num_bounding_boxes, 1],
mode=0,
input_name=prefix + 'wh_pre',
output_name=prefix + 'wh')
Expand All @@ -1231,18 +1265,18 @@ def export_coreml(self, filename):
input_name=prefix + 'boxes_out_transposed',
output_name=prefix + 'boxes_out')

scale = _np.zeros((num_anchors * num_spatial, 4, 1))
scale = _np.zeros((num_bounding_boxes, 4, 1))
scale[:, 0::2] = 1.0 / self._grid_shape[1]
scale[:, 1::2] = 1.0 / self._grid_shape[0]

# (1, B*H*W, 4, 1)
builder.add_scale(name='coordinates',
builder.add_scale(name=COORDINATES_STR,
W=scale,
b=0,
has_bias=False,
shape_scale=(num_anchors * num_spatial, 4, 1),
shape_scale=(num_bounding_boxes, 4, 1),
input_name=prefix + 'boxes_out',
output_name='coordinates')
output_name=COORDINATES_STR)

# CLASS PROBABILITIES AND OBJECT CONFIDENCE

Expand Down Expand Up @@ -1280,7 +1314,7 @@ def export_coreml(self, filename):
conf = prefix + 'conf_tiled_sp'
builder.add_elementwise(name=prefix + 'conf_tiled_sp',
mode='CONCAT',
input_names=[prefix + 'conf_sp'] * num_classes,
input_names=[prefix+'conf_sp']*num_classes,
output_name=conf)
else:
conf = prefix + 'conf_sp'
Expand All @@ -1293,18 +1327,19 @@ def export_coreml(self, filename):

# (1, C, B*H*W, 1)
builder.add_reshape(name=prefix + 'confprobs_transposed',
target_shape=[1, num_classes, num_anchors * num_spatial, 1],
target_shape=[1, num_classes, num_bounding_boxes, 1],
mode=0,
input_name=prefix + 'confprobs_sp',
output_name=prefix + 'confprobs_transposed')

# (1, B*H*W, C, 1)
builder.add_permute(name='confidence',
builder.add_permute(name=CONFIDENCE_STR,
dim=[0, 2, 1, 3],
input_name=prefix + 'confprobs_transposed',
output_name='confidence')
output_name=CONFIDENCE_STR)

_mxnet_converter._set_input_output_layers(builder, input_names, output_names)
_mxnet_converter._set_input_output_layers(
builder, input_names, output_names)
builder.set_input(input_names, input_dims)
builder.set_output(output_names, output_dims)
builder.set_pre_processing_parameters(image_input_names=self.feature)
Expand Down Expand Up @@ -1441,8 +1476,17 @@ def export_coreml(self, filename):
' Confidence Threshold override (default: {})')
mlmodel = coremltools.models.MLModel(model)
model_type = 'object detector (%s)' % self.model
mlmodel.short_description = _coreml_utils._mlmodel_short_description(model_type)
mlmodel.short_description = _coreml_utils._mlmodel_short_description(
model_type)
mlmodel.input_description[self.feature] = 'Input image'
if include_non_maximum_suppression:
iouThresholdString = '(optional) IOU Threshold override (default: {})'
mlmodel.input_description['iouThreshold'] = \
iouThresholdString.format(iou_threshold)
confidenceThresholdString = ('(optional)' +
' Confidence Threshold override (default: {})')
mlmodel.input_description['confidenceThreshold'] = \
confidenceThresholdString.format(confidence_threshold)
mlmodel.output_description['confidence'] = \
u'Boxes \xd7 Class confidence (see user-defined metadata "classes")'
mlmodel.output_description['coordinates'] = \
Expand All @@ -1451,7 +1495,12 @@ def export_coreml(self, filename):
'model': self.model,
'max_iterations': str(self.max_iterations),
'training_iterations': str(self.training_iterations),
'non_maximum_suppression_threshold': str(self.non_maximum_suppression_threshold),
'include_non_maximum_suppression': str(
include_non_maximum_suppression),
'non_maximum_suppression_threshold': str(
iou_threshold),
'confidence_threshold': str(confidence_threshold),
'iou_threshold': str(iou_threshold),
'feature': self.feature,
'annotations': self.annotations,
'classes': ','.join(self.classes),
Expand Down

0 comments on commit 38cc536

Please sign in to comment.