forked from jorgedelpozolerida/Segmentation_CMB
-
Notifications
You must be signed in to change notification settings - Fork 0
/
classification_evaluation.py
173 lines (144 loc) · 6.99 KB
/
classification_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import logging
from typing import Dict, Optional
from collections import defaultdict
import numpy as np
import clearml
from crbr.evaluation.evaluations import Evaluation, AccumulatedEvaluation
from crbr.evaluation.classification_outcome import ClassificationOutcome
from crbr.utils.evaluation_utils import compute_metrics_from_cm
logger = logging.getLogger(__name__)
class ClassificationEvaluation(Evaluation):
"""
Basic classification evaluation class for comparing two multi-class binary vectors. Returns a
dictionary with each class name as key and a ClassificationOutcome object
(representing FP/FN/TP/TN outcomes) as value.
"""
NAME = "classification"
def __init__(self, **eval_params):
"""
:param class_label_map: An optional dictionary mapping class indices to class names.
This will change keys in the metrics dict returned by self.evaluate. E.g., for
easier readability. Example: {0: "no tumour", 1: "tumour"}.
"""
super().__init__(**eval_params)
def evaluate(self, true_array: np.ndarray, predicted_array: np.ndarray) -> dict:
"""
Evaluate a shape [N] multi-class binary vector against a shape [N] multi-class binary
predicted vector.
:param true_array: The true multi-class binary array
:param predicted_array: The predicted multi-class binary array
:return: A dictionary of metrics.
"""
# Check shapes
if len(true_array) != len(predicted_array):
raise ValueError(
"true_array and predicted_array must have the same shape, "
f"got {true_array.shape} and {predicted_array.shape}"
)
# Cast to bool (mostly for input validation purposes)
true_array = true_array.astype(bool)
predicted_array = predicted_array.astype(bool)
# Get a list of class labels
classes = np.arange(len(true_array))
metrics_dict = {}
for class_, true, predicted in zip(classes, true_array, predicted_array):
outcome = ClassificationOutcome.from_observations(
observed=true, predicted=predicted
)
metrics_dict[class_] = outcome
return metrics_dict
class AcccumulatedClassificationEvaluation(
ClassificationEvaluation, AccumulatedEvaluation
):
"""
Accumulated classification evaluation class. Mirrors the SegmentationEvaluation class but
each call the self.evaluate updates an internal confusion matrix. Metrics are finally computed
in self.get_accumulated_evaluation, which calls the self.compute_metrics_from_cm method on the
accumulated CM. The CM may be reset with self.reset_accumulator.
See AccumulatedEvaluation for more details.
"""
NAME = "accumulated_classification"
def __init__(self, **kwargs):
super().__init__(**kwargs)
# Create multi-label confusion matrices
self.accumulated_cms = defaultdict(lambda: np.zeros((2, 2), dtype=int))
def _reset_accumulator(self):
"""
Resets the internal confusion matrix counts by filling them with zeros.
"""
for confusion_matrix in self.accumulated_cms.values():
confusion_matrix.fill(0)
def compute_metrics_from_binary_cm(self, confusion_matrix: np.ndarray) -> dict:
"""
Computes the metrics from a binary confusion matrix.
:param confusion_matrix: A binary confusion matrix of shape [2, 2], where cm[0, 0] is TN,
cm[0, 1] is FP, cm[1, 0] is FN, and cm[1, 1] is TP (unless otherwise specified in the
crbr.utils.evaluation_utils.ClassificationOutcome class).
:return: A dictionary of metrics for a single class, Dict[str, float]
"""
metrics_df = compute_metrics_from_cm(confusion_matrix, as_dict=False, add_avg_column=False)
metric_names = list(metrics_df.T.columns)
metric_values = metrics_df.iloc[:, 1].values
return dict(zip(metric_names, metric_values))
def log_accumulated_evaluation(
self, logger: clearml.Logger, accumulated_metrics: Optional[dict] = None, round: int = 2
) -> None:
"""
Log the accumulated evaluation.
:param logger: The ClearML logger to log to.
:param accumulated_metrics: The accumulated metrics to log. If not specified, will call
self.get_accumulated_evaluation.
:param round: The number of decimal places to round to.
"""
accumulated_metrics = (
accumulated_metrics or self.get_accumulated_evaluation()
)
# Report confusion matrices
keys = [k for k in accumulated_metrics.keys() if k.startswith('confusion_matrix')]
classes = [k.split('_')[-1] for k in keys]
classes = self.safe_apply_label_map(classes)
for key, class_ in zip(keys, classes):
# Get CM as DF table without applying label map (multilabel binary CM here)
cm = self.cm_to_table(
accumulated_metrics[key],
apply_label_map=False,
multi_label_binary=True
).round(round)
logger.report_table(
title="Accumulated Classification Evaluations",
series=f"Multi-label CM for class: {class_}",
table_plot=cm,
)
# Report metrics table
logger.report_table(
title="Accumulated Classification Evaluations",
series="Metrics",
table_plot=self.metrics_to_table(accumulated_metrics).round(round),
)
def get_accumulated_evaluation(self) -> dict:
"""
Computes the metrics from the accumulated confusion matrices.
:return: A dictionary of metrics for all classes, Dict[str, float].
"""
class_wise_metrics = {}
for class_label, confusion_matrix in self.accumulated_cms.items():
# Compute metrics for the current class
class_metrics = self.compute_metrics_from_binary_cm(confusion_matrix)
# Add metrics with label {metric_name}_{class_label}
for metric_name, metric_value in class_metrics.items():
class_wise_metrics[f"{metric_name}_{class_label}"] = metric_value
# Add confusion matrix
class_wise_metrics[f'confusion_matrix_{class_label}'] = confusion_matrix.copy()
return class_wise_metrics
def evaluate(self, true_array: np.ndarray, predicted_array: np.ndarray) -> None:
"""
Evaluate a shape [N] multi-class binary vector against a shape [N] multi-class binary
predicted vector, but cashes the results in the internally stored multi-label confiusion
matrices.
:param true_array: The true multi-class binary array
:param predicted_array: The predicted multi-class binary array
:return: None
"""
evaluation = super().evaluate(true_array=true_array, predicted_array=predicted_array)
for class_label, outcome in evaluation.items():
self.accumulated_cms[class_label][outcome.cm_indices] += 1