From f4a15fb61aacc468d4fc180c613c4851f99b9000 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Dec 2016 16:41:40 -0800 Subject: [PATCH] Add multi-class metrics. Change: 141118040 --- .../learn/python/learn/estimators/head.py | 298 ++++++++++-- .../python/learn/estimators/head_test.py | 451 ++++++++++++------ .../python/learn/estimators/metric_key.py | 6 + .../contrib/learn/python/learn/metric_spec.py | 3 + 4 files changed, 557 insertions(+), 201 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 60515a3fc68778..32c7787a9b803e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -19,6 +19,7 @@ from __future__ import print_function import abc +import functools import six from tensorflow.contrib import losses @@ -31,6 +32,7 @@ from tensorflow.contrib.learn.python.learn.estimators import prediction_key from tensorflow.contrib.session_bundle import exporter from tensorflow.python import summary +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops @@ -78,7 +80,7 @@ def _regression_head(label_name=None, def _multi_class_head(n_classes, label_name=None, weight_column_name=None, enable_centered_bias=False, head_name=None, - thresholds=None): + thresholds=None, metric_class_ids=None): """Creates a _Head for multi class single label classification. The Head uses softmax cross entropy loss. @@ -96,18 +98,24 @@ def _multi_class_head(n_classes, label_name=None, weight_column_name=None, head_name: name of the head. If provided, predictions, summary and metrics keys will be prefixed by the head_name and an underscore. thresholds: thresholds for eval metrics, defaults to [.5] + metric_class_ids: List of class IDs for which we should report per-class + metrics. Must all be in the range `[0, n_classes)`. Invalid if + `n_classes` is 2. Returns: An instance of _MultiClassHead. Raises: - ValueError: if n_classes is < 2 + ValueError: if `n_classes` is < 2, or `metric_class_ids` is provided when + `n_classes` is 2. """ if (n_classes is None) or (n_classes < 2): raise ValueError( "n_classes must be > 1 for classification: %s." % n_classes) if n_classes == 2: + if metric_class_ids: + raise ValueError("metric_class_ids invalid for n_classes==2.") return _BinaryLogisticHead(label_name=label_name, weight_column_name=weight_column_name, enable_centered_bias=enable_centered_bias, @@ -119,7 +127,8 @@ def _multi_class_head(n_classes, label_name=None, weight_column_name=None, weight_column_name=weight_column_name, enable_centered_bias=enable_centered_bias, head_name=head_name, - thresholds=thresholds) + thresholds=thresholds, + metric_class_ids=metric_class_ids) def _binary_svm_head(label_name=None, weight_column_name=None, @@ -155,7 +164,7 @@ def _binary_svm_head(label_name=None, weight_column_name=None, def _multi_label_head(n_classes, label_name=None, weight_column_name=None, enable_centered_bias=False, head_name=None, - thresholds=None): + thresholds=None, metric_class_ids=None): """Creates a _Head for multi label classification. The Head uses softmax cross entropy loss. @@ -173,6 +182,8 @@ def _multi_label_head(n_classes, label_name=None, weight_column_name=None, head_name: name of the head. If provided, predictions, summary and metrics keys will be prefixed by the head_name and an underscore. thresholds: thresholds for eval metrics, defaults to [.5] + metric_class_ids: List of class IDs for which we should report per-class + metrics. Must all be in the range `[0, n_classes)`. Returns: An instance of _MultiClassHead. @@ -187,7 +198,8 @@ def _multi_label_head(n_classes, label_name=None, weight_column_name=None, weight_column_name=weight_column_name, enable_centered_bias=enable_centered_bias, head_name=head_name, - thresholds=thresholds) + thresholds=thresholds, + metric_class_ids=metric_class_ids) # TODO(zakaria): Make the classes public once we are ready for users to subclass @@ -353,7 +365,9 @@ def _logits_to_predictions(self, logits): def _signature_fn(self): """Returns the signature_fn to be used in exporting.""" - def _regression_signature_fn(examples, unused_features, predictions): + def _regression_signature_fn(examples, features, predictions): + # pylint: disable=missing-docstring + del features if isinstance(predictions, dict): score = predictions[prediction_key.PredictionKey.SCORES] else: @@ -485,8 +499,9 @@ def _logits_to_predictions(self, logits): def _signature_fn(self): """Returns the signature_fn to be used in exporting.""" - def _classification_signature_fn(examples, unused_features, predictions): + def _classification_signature_fn(examples, features, predictions): """Servo signature function.""" + del features if isinstance(predictions, dict): default_signature = exporter.classification_signature( input_tensor=examples, @@ -527,12 +542,13 @@ def _add_binary_metric(key, metric_fn): _add_binary_metric( metric_key.MetricKey.PREDICTION_MEAN, _predictions_streaming_mean) _add_binary_metric( - metric_key.MetricKey.LABEL_MEAN, _labels_streaming_mean) + metric_key.MetricKey.LABEL_MEAN, _indicator_labels_streaming_mean) # Also include the streaming mean of the label as an accuracy baseline, as # a reminder to users. _add_binary_metric( - metric_key.MetricKey.ACCURACY_BASELINE, _labels_streaming_mean) + metric_key.MetricKey.ACCURACY_BASELINE, + _indicator_labels_streaming_mean) _add_binary_metric(metric_key.MetricKey.AUC, _streaming_auc) @@ -571,7 +587,8 @@ class _MultiClassHead(_Head): def __init__(self, n_classes, label_name, weight_column_name, enable_centered_bias, head_name, - loss_fn=_softmax_cross_entropy_loss, thresholds=None): + loss_fn=_softmax_cross_entropy_loss, thresholds=None, + metric_class_ids=None): """Base type for all single heads. Args: @@ -589,9 +606,11 @@ def __init__(self, n_classes, label_name, keys will be prefixed by the head_name and an underscore. loss_fn: Loss function. thresholds: thresholds for eval. + metric_class_ids: List of class IDs for which we should report per-class + metrics. Must all be in the range `[0, n_classes)`. Raises: - ValueError: if n_classes is invalid. + ValueError: if `n_classes` or `metric_class_ids` is invalid. """ super(_MultiClassHead, self).__init__(head_name=head_name) @@ -604,6 +623,11 @@ def __init__(self, n_classes, label_name, self._loss_fn = loss_fn self._enable_centered_bias = enable_centered_bias self._problem_type = constants.ProblemType.CLASSIFICATION + self._metric_class_ids = tuple( + [] if metric_class_ids is None else metric_class_ids) + for class_id in self._metric_class_ids: + if (class_id < 0) or (class_id >= n_classes): + raise ValueError("Class ID %s not in [0, %s)." % (class_id, n_classes)) @property def logits_dimension(self): @@ -667,8 +691,9 @@ def _logits_to_predictions(self, logits): def _signature_fn(self): """Returns the signature_fn to be used in exporting.""" - def _classification_signature_fn(examples, unused_features, predictions): + def _classification_signature_fn(examples, features, predictions): """Servo signature function.""" + del features if isinstance(predictions, dict): default_signature = exporter.classification_signature( input_tensor=examples, @@ -684,24 +709,104 @@ def _classification_signature_fn(examples, unused_features, predictions): return default_signature, {} return _classification_signature_fn + def _metric_spec(self, metric_fn, prediction_name): + return metric_spec.MetricSpec( + metric_fn, prediction_name, self._label_name, self._weight_column_name) + def _default_metrics(self): """Returns a dict of `MetricSpec` objects keyed by name.""" - metrics = {_head_prefixed(self._head_name, metric_key.MetricKey.LOSS): - _weighted_average_loss_metric_spec( - self._loss_fn, - prediction_key.PredictionKey.LOGITS, - self._label_name, - self._weight_column_name)} - - # TODO(b/29366811): This currently results in both an "accuracy" and an - # "accuracy/threshold_0.500000_mean" metric for binary classification. - metrics[_head_prefixed(self._head_name, metric_key.MetricKey.ACCURACY)] = ( - metric_spec.MetricSpec(metrics_lib.streaming_accuracy, - prediction_key.PredictionKey.CLASSES, - self._label_name, - self._weight_column_name)) - - # TODO(b/32953199): Add multiclass metrics. + def _streaming_auc_with_class_id_label(predictions, labels, weights=None): + indicator_labels = _class_id_labels_to_indicator( + labels, num_classes=self.logits_dimension) + return _streaming_auc(predictions, indicator_labels, weights) + + loss_key = _head_prefixed(self._head_name, metric_key.MetricKey.LOSS) + accuracy_key = _head_prefixed( + self._head_name, metric_key.MetricKey.ACCURACY) + auc_key = _head_prefixed(self._head_name, metric_key.MetricKey.AUC) + + metrics = { + loss_key: _weighted_average_loss_metric_spec( + self._loss_fn, + prediction_key.PredictionKey.LOGITS, + self._label_name, + self._weight_column_name), + # TODO(b/29366811): This currently results in both an "accuracy" and an + # "accuracy/threshold_0.500000_mean" metric for binary classification. + accuracy_key: self._metric_spec( + metrics_lib.streaming_accuracy, + prediction_key.PredictionKey.CLASSES), + auc_key: self._metric_spec( + _streaming_auc_with_class_id_label, + prediction_key.PredictionKey.PROBABILITIES) + } + + def _class_predictions_streaming_mean( + predictions, labels, weights=None, class_id=None): + del labels + return metrics_lib.streaming_mean( + math_ops.select( + math_ops.equal( + math_ops.to_int32(class_id), + math_ops.to_int32(predictions)), + array_ops.ones_like(predictions), + array_ops.zeros_like(predictions)), + weights=weights) + + def _class_labels_streaming_mean( + predictions, labels, weights=None, class_id=None): + del predictions + assert class_id is not None + return metrics_lib.streaming_mean( + math_ops.select( + math_ops.equal( + math_ops.to_int32(class_id), + math_ops.to_int32(labels)), + array_ops.ones_like(labels), + array_ops.zeros_like(labels)), + weights=weights) + + def _class_streaming_auc(predictions, labels, weights=None, class_id=None): + assert class_id is not None + indicator_labels = _class_id_labels_to_indicator( + labels, num_classes=self.logits_dimension) + return _streaming_auc( + predictions, indicator_labels, weights=weights, class_id=class_id) + + for class_id in self._metric_class_ids: + + # TODO(ptucker): Add per-class accuracy, precision, recall. + + prediction_mean_key = _head_prefixed( + self._head_name, + metric_key.MetricKey.CLASS_PREDICTION_MEAN % class_id) + label_mean_key = _head_prefixed( + self._head_name, metric_key.MetricKey.CLASS_LABEL_MEAN % class_id) + probability_mean_key = _head_prefixed( + self._head_name, + metric_key.MetricKey.CLASS_PROBABILITY_MEAN % class_id) + logits_mean_key = _head_prefixed( + self._head_name, + metric_key.MetricKey.CLASS_LOGITS_MEAN % class_id) + auc_key = _head_prefixed( + self._head_name, metric_key.MetricKey.CLASS_AUC % class_id) + + metrics[prediction_mean_key] = self._metric_spec( + functools.partial( + _class_predictions_streaming_mean, class_id=class_id), + prediction_key.PredictionKey.CLASSES) + metrics[label_mean_key] = self._metric_spec( + functools.partial(_class_labels_streaming_mean, class_id=class_id), + prediction_key.PredictionKey.PROBABILITIES) + metrics[probability_mean_key] = self._metric_spec( + functools.partial(_predictions_streaming_mean, class_id=class_id), + prediction_key.PredictionKey.PROBABILITIES) + metrics[logits_mean_key] = self._metric_spec( + functools.partial(_predictions_streaming_mean, class_id=class_id), + prediction_key.PredictionKey.LOGITS) + metrics[auc_key] = self._metric_spec( + functools.partial(_class_streaming_auc, class_id=class_id), + prediction_key.PredictionKey.LOGITS) return metrics @@ -713,6 +818,12 @@ def _to_labels_tensor(labels, label_name): return labels +def _assert_labels_rank(labels): + return control_flow_ops.Assert( + math_ops.less_equal(array_ops.rank(labels), 2), + ("labels shape should be either [batch_size, 1] or [batch_size]",)) + + class _BinarySvmHead(_BinaryLogisticHead): """_Head for binary classification using SVMs.""" @@ -720,12 +831,8 @@ def __init__(self, label_name, weight_column_name, enable_centered_bias, head_name, thresholds): def _loss_fn(logits, labels): with ops.name_scope(None, "hinge_loss", (logits, labels)) as name: - check_shape_op = control_flow_ops.Assert( - math_ops.less_equal(array_ops.rank(labels), 2), - ("labels shape should be either [batch_size, 1] or [batch_size]",)) - with ops.control_dependencies((check_shape_op,)): - labels = array_ops.reshape( - labels, shape=(array_ops.shape(labels)[0], 1)) + with ops.control_dependencies((_assert_labels_rank(labels),)): + labels = array_ops.reshape(labels, shape=(-1, 1)) return losses.hinge_loss(logits, labels, scope=name) super(_BinarySvmHead, self).__init__( @@ -769,7 +876,7 @@ class _MultiLabelHead(_MultiClassHead): # TODO(zakaria): add signature and metric for multilabel. def __init__(self, n_classes, label_name, weight_column_name, enable_centered_bias, head_name, - thresholds): + thresholds, metric_class_ids=None): super(_MultiLabelHead, self).__init__( n_classes=n_classes, @@ -778,7 +885,8 @@ def __init__(self, n_classes, label_name, enable_centered_bias=enable_centered_bias, head_name=head_name, loss_fn=_sigmoid_cross_entropy_loss, - thresholds=thresholds) + thresholds=thresholds, + metric_class_ids=metric_class_ids) def _logits_to_predictions(self, logits): """See `_MultiClassHead`.""" @@ -792,19 +900,79 @@ def _logits_to_predictions(self, logits): name=prediction_key.PredictionKey.CLASSES) } + def _metric_spec(self, metric_fn, prediction_name): + return metric_spec.MetricSpec( + metric_fn, prediction_name, self._label_name, self._weight_column_name) + + def _default_metrics(self): + """Returns a dict of `MetricSpec` objects keyed by name.""" + loss_key = _head_prefixed(self._head_name, metric_key.MetricKey.LOSS) + accuracy_key = _head_prefixed( + self._head_name, metric_key.MetricKey.ACCURACY) + auc_key = _head_prefixed(self._head_name, metric_key.MetricKey.AUC) + + metrics = { + loss_key: _weighted_average_loss_metric_spec( + self._loss_fn, + prediction_key.PredictionKey.LOGITS, + self._label_name, + self._weight_column_name), + # TODO(b/29366811): This currently results in both an "accuracy" and an + # "accuracy/threshold_0.500000_mean" metric for binary classification. + accuracy_key: self._metric_spec( + metrics_lib.streaming_accuracy, + prediction_key.PredictionKey.CLASSES), + auc_key: self._metric_spec( + _streaming_auc, prediction_key.PredictionKey.PROBABILITIES), + } + + for class_id in self._metric_class_ids: + + # TODO(ptucker): Add per-class accuracy, precision, recall. + + prediction_mean_key = _head_prefixed( + self._head_name, + metric_key.MetricKey.CLASS_PREDICTION_MEAN % class_id) + label_mean_key = _head_prefixed( + self._head_name, metric_key.MetricKey.CLASS_LABEL_MEAN % class_id) + probability_mean_key = _head_prefixed( + self._head_name, + metric_key.MetricKey.CLASS_PROBABILITY_MEAN % class_id) + logits_mean_key = _head_prefixed( + self._head_name, metric_key.MetricKey.CLASS_LOGITS_MEAN % class_id) + auc_key = _head_prefixed( + self._head_name, metric_key.MetricKey.CLASS_AUC % class_id) + + metrics[prediction_mean_key] = self._metric_spec( + functools.partial(_predictions_streaming_mean, class_id=class_id), + prediction_key.PredictionKey.CLASSES) + metrics[label_mean_key] = self._metric_spec( + functools.partial( + _indicator_labels_streaming_mean, class_id=class_id), + prediction_key.PredictionKey.CLASSES) + metrics[probability_mean_key] = self._metric_spec( + functools.partial(_predictions_streaming_mean, class_id=class_id), + prediction_key.PredictionKey.PROBABILITIES) + metrics[logits_mean_key] = self._metric_spec( + functools.partial(_predictions_streaming_mean, class_id=class_id), + prediction_key.PredictionKey.LOGITS) + metrics[auc_key] = self._metric_spec( + functools.partial(_streaming_auc, class_id=class_id), + prediction_key.PredictionKey.LOGITS) + + return metrics + def _weighted_loss(loss, weight): - """Returns cumulative weighted loss.""" + """Returns cumulative weighted loss as 1d `Tensor`.""" with ops.name_scope(None, "weighted_loss", (loss, weight)) as name: - unweighted_loss = array_ops.reshape(loss, shape=(-1,)) - weighted_loss = math_ops.mul(unweighted_loss, - array_ops.reshape( - weight, shape=(-1,)), - name=name) - return weighted_loss + return math_ops.mul(array_ops.reshape(loss, shape=(-1,)), + array_ops.reshape(weight, shape=(-1,)), + name=name) def _weight_tensor(features, weight_column_name): + """Returns weights as 1d `Tensor`.""" if not weight_column_name: return None with ops.name_scope( @@ -982,17 +1150,49 @@ def _streaming_weighted_average_loss(predictions, labels, weights=None): pred_key, label_key, weight_key) -def _labels_streaming_mean(unused_predictions, labels, weights=None): +def _indicator_labels_streaming_mean( + predictions, labels, weights=None, class_id=None): + del predictions + if class_id is not None: + labels = labels[:, class_id] return metrics_lib.streaming_mean(labels, weights=weights) -def _predictions_streaming_mean(predictions, unused_labels, weights=None): +def _predictions_streaming_mean( + predictions, labels, weights=None, class_id=None): + del labels + if class_id is not None: + predictions = predictions[:, class_id] return metrics_lib.streaming_mean(predictions, weights=weights) -def _streaming_auc(predictions, labels, weights=None): - return metrics_lib.streaming_auc(predictions, labels, - weights=_float_weights_or_none(weights)) +# TODO(ptucker): Add support for SparseTensor labels. +def _class_id_labels_to_indicator(labels, num_classes): + if (num_classes is None) or (num_classes < 2): + raise ValueError("Invalid num_classes %s." % num_classes) + with ops.control_dependencies((_assert_labels_rank(labels),)): + labels = array_ops.reshape(labels, (-1,)) + return array_ops.one_hot(labels, depth=num_classes, axis=-1) + + +def _streaming_auc(predictions, labels, weights=None, class_id=None): + if class_id is not None: + predictions = predictions[:, class_id] + labels = labels[:, class_id] + return metrics_lib.streaming_auc( + predictions, math_ops.cast(labels, dtypes.bool), + weights=_float_weights_or_none(weights)) + + +def _assert_class_id(class_id, num_classes=None): + """Average label value for class `class_id`.""" + if (class_id is None) or (class_id < 0): + raise ValueError("Invalid class_id %s." % class_id) + if num_classes is not None: + if num_classes < 2: + raise ValueError("Invalid num_classes %s." % num_classes) + if class_id >= num_classes: + raise ValueError("Invalid class_id %s." % class_id) def _accuracy_at_threshold(threshold): @@ -1013,6 +1213,6 @@ def _streaming_metrics(predictions, labels, weights=None): precision_tensor, update_op = streaming_metrics_fn( predictions, labels=labels, thresholds=(threshold,), weights=_float_weights_or_none(weights)) - return array_ops.squeeze(precision_tensor), update_op + return array_ops.squeeze(precision_tensor), array_ops.squeeze(update_op) return _streaming_metrics diff --git a/tensorflow/contrib/learn/python/learn/estimators/head_test.py b/tensorflow/contrib/learn/python/learn/estimators/head_test.py index 40eb7d17de297a..b84a8ce3c2081a 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head_test.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function +import math import numpy as np import six import tensorflow as tf @@ -40,77 +41,94 @@ def _assert_variables( def _assert_no_variables(test_case): - _assert_variables(test_case, set([]), set([]), set([])) + _assert_variables(test_case) -class RegressionModelHeadTest(tf.test.TestCase): +# This must be called from within a tf.Session. +def _assert_metrics( + test_case, expected_loss, expected_eval_metrics, model_fn_ops): + test_case.assertAlmostEqual(expected_loss, model_fn_ops.loss.eval(), places=4) + for k in six.iterkeys(expected_eval_metrics): + test_case.assertIn(k, six.iterkeys(model_fn_ops.eval_metric_ops)) + tf.initialize_local_variables().run() + for key, expected_value in six.iteritems(expected_eval_metrics): + value_tensor, update_tensor = model_fn_ops.eval_metric_ops[key] + update = update_tensor.eval() + test_case.assertAlmostEqual( + expected_value, update, places=4, + msg="%s: update, expected %s, got %s." % (key, expected_value, update)) + value = value_tensor.eval() + test_case.assertAlmostEqual( + expected_value, value, places=4, + msg="%s: value, expected %s, got %s." % (key, expected_value, value)) + - def _assert_metrics(self, model_fn_ops): - self.assertItemsEqual(( - "loss", - ), six.iterkeys(model_fn_ops.eval_metric_ops)) +def _sigmoid(x): + return 1. / (1. + math.exp(-1 * x)) - # TODO(zakaria): test multilabel regresssion. + +class RegressionModelHeadTest(tf.test.TestCase): + + # TODO(zakaria): test multilabel regression. def testRegression(self): head = head_lib._regression_head() - with tf.Graph().as_default(), tf.Session() as sess: + with tf.Graph().as_default(), tf.Session(): prediction = tf.constant([[1.], [1.], [3.]]) labels = tf.constant([[0.], [1.], [1.]]) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=prediction) - self._assert_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(5. / 3, sess.run(model_fn_ops.loss)) + _assert_metrics(self, 5. / 3, {"loss": 5. / 3}, model_fn_ops) + def testRegressionEvalMode(self): + head = head_lib._regression_head() + with tf.Graph().as_default(), tf.Session(): + prediction = tf.constant([[1.], [1.], [3.]]) + labels = tf.constant([[0.], [1.], [1.]]) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.EVAL, _noop_train_op, logits=prediction) self.assertIsNone(model_fn_ops.train_op) + _assert_no_variables(self) + _assert_metrics(self, 5. / 3, {"loss": 5. / 3}, model_fn_ops) def testRegressionWithLabelName(self): label_name = "my_label" head = head_lib._regression_head(label_name=label_name) - with tf.Graph().as_default(), tf.Session() as sess: + with tf.Graph().as_default(), tf.Session(): prediction = tf.constant([[1.], [1.], [3.]]) labels = {label_name: tf.constant([[0.], [1.], [1.]])} model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=prediction) - self._assert_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(5. / 3, sess.run(model_fn_ops.loss)) - - model_fn_ops = head.head_ops({}, labels, - tf.contrib.learn.ModeKeys.EVAL, - _noop_train_op, logits=prediction) - self.assertIsNone(model_fn_ops.train_op) + _assert_metrics(self, 5. / 3, {"loss": 5. / 3}, model_fn_ops) def testRegressionWithWeights(self): head = head_lib._regression_head( weight_column_name="label_weight") - with tf.Graph().as_default(), tf.Session() as sess: - features = {"label_weight": tf.constant([[2.], [5.], [0.]])} + with tf.Graph().as_default(), tf.Session(): + weights = ((2.,), (5.,), (0.,)) + features = {"label_weight": tf.constant(weights)} prediction = tf.constant([[1.], [1.], [3.]]) labels = tf.constant([[0.], [1.], [1.]]) model_fn_ops = head.head_ops(features, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=prediction) - self._assert_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(2. / 3, sess.run(model_fn_ops.loss), places=3) + _assert_metrics(self, 2. / len(weights), { + "loss": 2. / np.sum(weights) + }, model_fn_ops) def testRegressionWithCenteredBias(self): - head = head_lib._regression_head( - weight_column_name="label_weight", enable_centered_bias=True) - with tf.Graph().as_default(), tf.Session() as sess: - features = {"label_weight": tf.constant([[2.], [5.], [0.]])} + head = head_lib._regression_head(enable_centered_bias=True) + with tf.Graph().as_default(), tf.Session(): prediction = tf.constant([[1.], [1.], [3.]]) labels = tf.constant([[0.], [1.], [1.]]) - model_fn_ops = head.head_ops(features, labels, + model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=prediction) - self._assert_metrics(model_fn_ops) _assert_variables(self, expected_global=( "centered_bias_weight:0", "centered_bias_weight/Adagrad:0", @@ -118,7 +136,7 @@ def testRegressionWithCenteredBias(self): "centered_bias_weight:0", )) tf.global_variables_initializer().run() - self.assertAlmostEqual(2. / 3, sess.run(model_fn_ops.loss), places=3) + _assert_metrics(self, 5. / 3, {"loss": 5. / 3}, model_fn_ops) def testErrorInSparseTensorLabels(self): head = head_lib._regression_head() @@ -136,70 +154,111 @@ def testErrorInSparseTensorLabels(self): class MultiLabelModelHeadTest(tf.test.TestCase): - def _assert_metrics(self, model_fn_ops): - self.assertItemsEqual(( - "accuracy", - "loss", - ), six.iterkeys(model_fn_ops.eval_metric_ops)) + def setUp(self): + self._logits = ((1., 0., 0.),) + self._labels = ((0, 0, 1),) + + def _expected_eval_metrics(self, expected_loss): + return { + "accuracy": 1. / 3, + "auc": 1. / 4, + "loss": expected_loss, + "auc/class0": 1., + "auc/class1": 1., + "auc/class2": 0., + "labels/actual_label_mean/class0": self._labels[0][0], + "labels/actual_label_mean/class1": self._labels[0][1], + "labels/actual_label_mean/class2": self._labels[0][2], + "labels/logits_mean/class0": self._logits[0][0], + "labels/logits_mean/class1": self._logits[0][1], + "labels/logits_mean/class2": self._logits[0][2], + "labels/prediction_mean/class0": self._logits[0][0], + "labels/prediction_mean/class1": self._logits[0][1], + "labels/prediction_mean/class2": self._logits[0][2], + "labels/probability_mean/class0": _sigmoid(self._logits[0][0]), + "labels/probability_mean/class1": _sigmoid(self._logits[0][1]), + "labels/probability_mean/class2": _sigmoid(self._logits[0][2]), + } def testMultiLabel(self): - head = head_lib._multi_label_head(n_classes=3) - with tf.Graph().as_default(), tf.Session() as sess: - logits = tf.constant([[1., 0., 0.]]) - labels = tf.constant([[0, 0, 1]]) + n_classes = 3 + head = head_lib._multi_label_head( + n_classes=n_classes, metric_class_ids=range(n_classes)) + with tf.Graph().as_default(), tf.Session(): + logits = tf.constant(self._logits) + labels = tf.constant(self._labels) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(0.89985204, sess.run(model_fn_ops.loss)) + expected_loss = .89985204 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) + def testMultiLabelEvalMode(self): + n_classes = 3 + head = head_lib._multi_label_head( + n_classes=n_classes, metric_class_ids=range(n_classes)) + with tf.Graph().as_default(), tf.Session(): + logits = tf.constant([[1., 0., 0.]]) + labels = tf.constant([[0, 0, 1]]) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.EVAL, _noop_train_op, logits=logits) self.assertIsNone(model_fn_ops.train_op) + _assert_no_variables(self) + expected_loss = .89985204 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) def testMultiLabelWithLabelName(self): + n_classes = 3 label_name = "my_label" - head = head_lib._multi_label_head(n_classes=3, label_name=label_name) - with tf.Graph().as_default(), tf.Session() as sess: + head = head_lib._multi_label_head( + n_classes=n_classes, label_name=label_name, + metric_class_ids=range(n_classes)) + with tf.Graph().as_default(), tf.Session(): logits = tf.constant([[1., 0., 0.]]) labels = {label_name: tf.constant([[0, 0, 1]])} model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(0.89985204, sess.run(model_fn_ops.loss)) - - model_fn_ops = head.head_ops({}, labels, - tf.contrib.learn.ModeKeys.EVAL, - _noop_train_op, logits=logits) - self.assertIsNone(model_fn_ops.train_op) + expected_loss = .89985204 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) def testMultiLabelWithWeight(self): + n_classes = 3 head = head_lib._multi_label_head( - n_classes=3, weight_column_name="label_weight") - with tf.Graph().as_default(), tf.Session() as sess: - features = {"label_weight": tf.constant([0.1])} + n_classes=n_classes, weight_column_name="label_weight", + metric_class_ids=range(n_classes)) + with tf.Graph().as_default(), tf.Session(): + features = {"label_weight": tf.constant([.1])} logits = tf.constant([[1., 0., 0.]]) labels = tf.constant([[0, 0, 1]]) model_fn_ops = head.head_ops(features, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(0.089985214, sess.run(model_fn_ops.loss)) + _assert_metrics( + self, .089985214, self._expected_eval_metrics(2.69956), + model_fn_ops) def testMultiLabelWithCenteredBias(self): - head = head_lib._multi_label_head(n_classes=3, enable_centered_bias=True) - with tf.Graph().as_default(), tf.Session() as sess: + n_classes = 3 + head = head_lib._multi_label_head( + n_classes=n_classes, enable_centered_bias=True, + metric_class_ids=range(n_classes)) + with tf.Graph().as_default(), tf.Session(): logits = tf.constant([[1., 0., 0.]]) labels = tf.constant([[0, 0, 1]]) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_metrics(model_fn_ops) _assert_variables(self, expected_global=( "centered_bias_weight:0", "centered_bias_weight/Adagrad:0", @@ -207,45 +266,69 @@ def testMultiLabelWithCenteredBias(self): "centered_bias_weight:0", )) tf.global_variables_initializer().run() - self.assertAlmostEqual(0.89985204, sess.run(model_fn_ops.loss)) + expected_loss = .89985204 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) -class MultiClassModelHeadTest(tf.test.TestCase): +class BinaryClassificationModelHeadTest(tf.test.TestCase): - def _assert_binary_metrics(self, model_fn_ops): - self.assertItemsEqual(( - "accuracy", - "accuracy/baseline_label_mean", - "accuracy/threshold_0.500000_mean", - "auc", - "labels/actual_label_mean", - "labels/prediction_mean", - "loss", - "precision/positive_threshold_0.500000_mean", - "recall/positive_threshold_0.500000_mean", - ), six.iterkeys(model_fn_ops.eval_metric_ops)) + def setUp(self): + self._logits = ((1.,), (1.,)) + self._labels = ((1.,), (0.,)) + + def _expected_eval_metrics(self, expected_loss): + return { + "accuracy": 1. / 2, + "accuracy/baseline_label_mean": np.mean(self._labels), + "accuracy/threshold_0.500000_mean": 1. / 2, + "auc": 1. / 2, + "labels/actual_label_mean": np.mean(self._labels), + "labels/prediction_mean": .731059, # softmax + "loss": expected_loss, + "precision/positive_threshold_0.500000_mean": 1. / 2, + "recall/positive_threshold_0.500000_mean": 1. / 1, + } def testBinaryClassification(self): - head = head_lib._multi_class_head(n_classes=2) - with tf.Graph().as_default(), tf.Session() as sess: - logits = tf.constant([[1.], [1.]]) - labels = tf.constant([[1.], [0.]]) + n_classes = 2 + head = head_lib._multi_class_head(n_classes=n_classes) + with tf.Graph().as_default(), tf.Session(): + logits = tf.constant(self._logits) + labels = tf.constant(self._labels) # logloss: z:label, x:logit # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_binary_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(0.81326175, sess.run(model_fn_ops.loss), - delta=1e-6) + expected_loss = .81326175 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) + + def testBinaryClassificationEvalMode(self): + n_classes = 2 + head = head_lib._multi_class_head(n_classes=n_classes) + with tf.Graph().as_default(), tf.Session(): + logits = tf.constant(self._logits) + labels = tf.constant(self._labels) + # logloss: z:label, x:logit + # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.EVAL, _noop_train_op, logits=logits) self.assertIsNone(model_fn_ops.train_op) + _assert_no_variables(self) + expected_loss = .81326175 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) def testErrorInSparseTensorLabels(self): - head = head_lib._multi_class_head(n_classes=2) + n_classes = 2 + head = head_lib._multi_class_head(n_classes=n_classes) with tf.Graph().as_default(): prediction = tf.constant([[1.], [1.], [3.]]) labels = tf.SparseTensor( @@ -260,51 +343,60 @@ def testErrorInSparseTensorLabels(self): def testBinaryClassificationWithLabelName(self): label_name = "my_label" head = head_lib._multi_class_head(n_classes=2, label_name=label_name) - with tf.Graph().as_default(), tf.Session() as sess: - logits = tf.constant([[1.], [1.]]) - labels = {label_name: tf.constant([[1.], [0.]])} + with tf.Graph().as_default(), tf.Session(): + logits = tf.constant(self._logits) + labels = {label_name: tf.constant(self._labels)} # logloss: z:label, x:logit # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_binary_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(0.81326175, sess.run(model_fn_ops.loss), - delta=1e-6) - model_fn_ops = head.head_ops({}, labels, - tf.contrib.learn.ModeKeys.EVAL, - _noop_train_op, logits=logits) - self.assertIsNone(model_fn_ops.train_op) + expected_loss = .81326175 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) def testBinaryClassificationWithWeights(self): + n_classes = 2 head = head_lib._multi_class_head( - n_classes=2, weight_column_name="label_weight") - with tf.Graph().as_default(), tf.Session() as sess: - features = {"label_weight": tf.constant([[1.], [0.]])} - logits = tf.constant([[1.], [1.]]) - labels = tf.constant([[1.], [0.]]) + n_classes=n_classes, weight_column_name="label_weight") + with tf.Graph().as_default(), tf.Session(): + weights = ((1.,), (0.,)) + features = {"label_weight": tf.constant(weights)} + logits = tf.constant(self._logits) + labels = tf.constant(self._labels) # logloss: z:label, x:logit # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) model_fn_ops = head.head_ops(features, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_binary_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(.31326166 / 2, sess.run(model_fn_ops.loss), - delta=1e-6) + expected_total_loss = .31326166 + _assert_metrics( + self, expected_total_loss / len(weights), { + "accuracy": 1. / 1, + "accuracy/baseline_label_mean": 1. / 1, + "accuracy/threshold_0.500000_mean": 1. / 1, + "auc": 0. / 1, + "labels/actual_label_mean": 1. / 1, + "labels/prediction_mean": .731059, # softmax + # TODO(ptucker): Is this the correct eval loss, sum not average? + "loss": expected_total_loss, + "precision/positive_threshold_0.500000_mean": 1. / 1, + "recall/positive_threshold_0.500000_mean": 1. / 1, + }, model_fn_ops) def testBinaryClassificationWithCenteredBias(self): head = head_lib._multi_class_head(n_classes=2, enable_centered_bias=True) - with tf.Graph().as_default(), tf.Session() as sess: - logits = tf.constant([[1.], [1.]]) - labels = tf.constant([[1.], [0.]]) + with tf.Graph().as_default(), tf.Session(): + logits = tf.constant(self._logits) + labels = tf.constant(self._labels) # logloss: z:label, x:logit # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_binary_metrics(model_fn_ops) _assert_variables(self, expected_global=( "centered_bias_weight:0", "centered_bias_weight/Adagrad:0", @@ -312,50 +404,97 @@ def testBinaryClassificationWithCenteredBias(self): "centered_bias_weight:0", )) tf.global_variables_initializer().run() - self.assertAlmostEqual(0.81326175, sess.run(model_fn_ops.loss), - delta=1e-6) + expected_loss = .81326175 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) - def _assert_multi_class_metrics(self, model_fn_ops): - self.assertItemsEqual(( - "accuracy", - "loss", - ), six.iterkeys(model_fn_ops.eval_metric_ops)) + +class MultiClassModelHeadTest(tf.test.TestCase): + + def setUp(self): + self._logits = ((1., 0., 0.),) + self._labels = (2,) + + def _expected_eval_metrics(self, expected_loss): + return { + "accuracy": 0., + "auc": 1. / 4, + "loss": expected_loss, + "auc/class0": 1., + "auc/class1": 1., + "auc/class2": 0., + "labels/actual_label_mean/class0": 0. / 1, + "labels/actual_label_mean/class1": 0. / 1, + "labels/actual_label_mean/class2": 1. / 1, + "labels/logits_mean/class0": self._logits[0][0], + "labels/logits_mean/class1": self._logits[0][1], + "labels/logits_mean/class2": self._logits[0][2], + "labels/prediction_mean/class0": self._logits[0][0], + "labels/prediction_mean/class1": self._logits[0][1], + "labels/prediction_mean/class2": self._logits[0][2], + "labels/probability_mean/class0": 0.576117, # softmax + "labels/probability_mean/class1": 0.211942, # softmax + "labels/probability_mean/class2": 0.211942, # softmax + } def testMultiClass(self): n_classes = 3 - head = head_lib._multi_class_head(n_classes=n_classes) - with tf.Graph().as_default(), tf.Session() as sess: - logits = tf.constant([[1., 0., 0.]]) - labels = tf.constant([2]) + head = head_lib._multi_class_head( + n_classes=n_classes, metric_class_ids=range(n_classes)) + with tf.Graph().as_default(), tf.Session(): + logits = tf.constant(self._logits) + labels = tf.constant(self._labels) # logloss: z:label, x:logit # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_multi_class_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(1.5514446, sess.run(model_fn_ops.loss)) + expected_loss = 1.5514446 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) + + def testMultiClassEvalMode(self): + n_classes = 3 + head = head_lib._multi_class_head( + n_classes=n_classes, metric_class_ids=range(n_classes)) + with tf.Graph().as_default(), tf.Session(): + logits = tf.constant(self._logits) + labels = tf.constant(self._labels) + # logloss: z:label, x:logit + # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.EVAL, _noop_train_op, logits=logits) self.assertIsNone(model_fn_ops.train_op) + _assert_no_variables(self) + expected_loss = 1.5514446 + _assert_metrics( + self, expected_loss, self._expected_eval_metrics(expected_loss), + model_fn_ops) def testMultiClassWithWeight(self): n_classes = 3 head = head_lib._multi_class_head( - n_classes=n_classes, weight_column_name="label_weight") - with tf.Graph().as_default(), tf.Session() as sess: - features = {"label_weight": tf.constant([0.1])} - logits = tf.constant([[1., 0., 0.]]) - labels = tf.constant([2]) + n_classes=n_classes, weight_column_name="label_weight", + metric_class_ids=range(n_classes)) + with tf.Graph().as_default(), tf.Session(): + weight = .1 + features = {"label_weight": tf.constant([weight])} + logits = tf.constant(self._logits) + labels = tf.constant(self._labels) # logloss: z:label, x:logit # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) model_fn_ops = head.head_ops(features, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) - self._assert_multi_class_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual(.15514446, sess.run(model_fn_ops.loss)) + expected_loss = 1.5514446 + _assert_metrics( + self, expected_loss * weight, + self._expected_eval_metrics(expected_loss), model_fn_ops) def testInvalidNClasses(self): for n_classes in (None, -1, 0, 1): @@ -370,15 +509,9 @@ def setUp(self): # (i.e., < 0) but it is within the [-1,1] margin. There is a 0.5 loss # incurred by this example. The 2nd prediction is outside the margin so it # incurs no loss at all. - self._predictions = ((-0.5,), (1.2,)) + self._predictions = ((-.5,), (1.2,)) self._labels = (0, 1) - self._expected_losses = (0.5, 0.0) - - def _assert_metrics(self, model_fn_ops): - self.assertItemsEqual(( - "accuracy", - "loss", - ), six.iterkeys(model_fn_ops.eval_metric_ops)) + self._expected_losses = (.5, 0.) def testBinarySVMDefaultWeights(self): head = head_lib._binary_svm_head() @@ -388,15 +521,28 @@ def testBinarySVMDefaultWeights(self): model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=predictions) - self._assert_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual( - np.average(self._expected_losses), model_fn_ops.loss.eval()) + expected_loss = np.average(self._expected_losses) + _assert_metrics(self, expected_loss, { + "accuracy": 1., + "loss": expected_loss, + }, model_fn_ops) - model_fn_ops = head.head_ops({}, labels, - tf.contrib.learn.ModeKeys.EVAL, - _noop_train_op, logits=predictions) - self.assertIsNone(model_fn_ops.train_op) + def testBinarySVMEvalMode(self): + head = head_lib._binary_svm_head() + with tf.Graph().as_default(), tf.Session(): + predictions = tf.constant(self._predictions) + labels = tf.constant(self._labels) + model_fn_ops = head.head_ops({}, labels, + tf.contrib.learn.ModeKeys.EVAL, + _noop_train_op, logits=predictions) + self.assertIsNone(model_fn_ops.train_op) + _assert_no_variables(self) + expected_loss = np.average(self._expected_losses) + _assert_metrics(self, expected_loss, { + "accuracy": 1., + "loss": expected_loss, + }, model_fn_ops) def testBinarySVMWithLabelName(self): label_name = "my_label" @@ -407,31 +553,30 @@ def testBinarySVMWithLabelName(self): model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=predictions) - self._assert_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual( - np.average(self._expected_losses), model_fn_ops.loss.eval()) - - model_fn_ops = head.head_ops({}, labels, - tf.contrib.learn.ModeKeys.EVAL, - _noop_train_op, logits=predictions) - self.assertIsNone(model_fn_ops.train_op) + expected_loss = np.average(self._expected_losses) + _assert_metrics(self, expected_loss, { + "accuracy": 1., + "loss": expected_loss, + }, model_fn_ops) def testBinarySVMWithWeights(self): head = head_lib._binary_svm_head(weight_column_name="weights") with tf.Graph().as_default(), tf.Session(): predictions = tf.constant(self._predictions) labels = tf.constant(self._labels) - weights = (7.0, 11.0) + weights = (7., 11.) features = {"weights": tf.constant(weights)} model_fn_ops = head.head_ops(features, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=predictions) - self._assert_metrics(model_fn_ops) _assert_no_variables(self) - self.assertAlmostEqual( - np.sum(np.multiply(weights, self._expected_losses)) / 2.0, - model_fn_ops.loss.eval()) + expected_weighted_sum = np.sum(np.multiply( + weights, self._expected_losses)) + _assert_metrics(self, expected_weighted_sum / len(weights), { + "accuracy": 1., + "loss": expected_weighted_sum / np.sum(weights), + }, model_fn_ops) def testBinarySVMWithCenteredBias(self): head = head_lib._binary_svm_head(enable_centered_bias=True) @@ -441,7 +586,6 @@ def testBinarySVMWithCenteredBias(self): model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=predictions) - self._assert_metrics(model_fn_ops) _assert_variables(self, expected_global=( "centered_bias_weight:0", "centered_bias_weight/Adagrad:0", @@ -449,8 +593,11 @@ def testBinarySVMWithCenteredBias(self): "centered_bias_weight:0", )) tf.global_variables_initializer().run() - self.assertAlmostEqual( - np.average(self._expected_losses), model_fn_ops.loss.eval()) + expected_loss = np.average(self._expected_losses) + _assert_metrics(self, expected_loss, { + "accuracy": 1., + "loss": expected_loss, + }, model_fn_ops) def _noop_train_op(unused_loss): diff --git a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py index 8df08e507fed33..10ac888eca7a0f 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py +++ b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py @@ -19,10 +19,16 @@ class MetricKey(object): + """Metric key strings.""" LOSS = "loss" AUC = "auc" + CLASS_AUC = "auc/class%d" PREDICTION_MEAN = "labels/prediction_mean" + CLASS_PREDICTION_MEAN = "labels/prediction_mean/class%d" + CLASS_LOGITS_MEAN = "labels/logits_mean/class%d" + CLASS_PROBABILITY_MEAN = "labels/probability_mean/class%d" LABEL_MEAN = "labels/actual_label_mean" + CLASS_LABEL_MEAN = "labels/actual_label_mean/class%d" ACCURACY = "accuracy" ACCURACY_BASELINE = "accuracy/baseline_label_mean" ACCURACY_MEAN = "accuracy/threshold_%f_mean" diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py index a4df7ba658c724..1c404903e53fc5 100644 --- a/tensorflow/contrib/learn/python/learn/metric_spec.py +++ b/tensorflow/contrib/learn/python/learn/metric_spec.py @@ -194,6 +194,9 @@ def _get_dict(name, dict_or_tensor, key): raise ValueError('MetricSpec with ' + name + '_key specified' ' requires ' + name + 's dict, got %s' % dict_or_tensor) + if key not in dict_or_tensor: + raise KeyError( + 'Key \'%s\' missing from %s.' % (key, dict_or_tensor.keys())) return dict_or_tensor[key] else: if isinstance(dict_or_tensor, dict):