-
Notifications
You must be signed in to change notification settings - Fork 9
/
TreeClassificationVoter_scenesegmentation.py
158 lines (138 loc) · 5.45 KB
/
TreeClassificationVoter_scenesegmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
###############################################################################
# modified to account for multidimensional input images
###############################################################################
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils.validation import (
check_X_y,
check_array,
check_is_fitted,
)
from sklearn.utils.multiclass import check_classification_targets
from .base import BaseClassificationVoter
class MultidimensionalTreeClassificationVoter(BaseClassificationVoter):
"""
A class used to vote on data transformed under a tree, which inherits from
the BaseClassificationVoter class in base.py.
Parameters
----------
kappa : float
coefficient for finite sample correction
If set to default, no finite sample correction is performed.
classes : list, default=[]
list of all possible output label values
Attributes
----------
missing_label_indices_ : list
a (potentially empty) list of label values
that exist in the ``classes`` parameter but
are missing in the latest ``fit`` function
call
uniform_posterior_ : ndarray of shape (n_classes,)
the uniform posterior associated with the
"""
def __init__(self, kappa=np.inf, classes=[]):
self.kappa = kappa
self.classes = np.asarray(classes)
def fit(self, X, y):
"""
Fits transformed data X given corresponding class labels y.
Parameters
----------
X : array of shape [n_samples, n_features]
the transformed input data
with each sample having 2D shape
y : array of shape [n_samples]
the class labels
Returns
-------
self : TreeClassificationVoter
The object itself.
"""
check_classification_targets(y)
num_fit_classes = len(np.unique(y))
self.missing_label_indices_ = []
if self.classes.size != 0 and num_fit_classes < len(self.classes):
for idx, label in enumerate(self.classes):
if label not in np.unique(y):
self.missing_label_indices_.append(idx)
self.uniform_posterior_ = np.ones(num_fit_classes) / num_fit_classes
self.leaf_to_posterior_ = {}
for leaf_id in np.unique(X):
idxs_in_leaf = np.where(X == leaf_id)[0]
class_counts = [
len(np.where(y[idxs_in_leaf] == y_val)[0]) for y_val in np.unique(y)
]
posteriors = np.nan_to_num(np.array(class_counts) / np.sum(class_counts))
posteriors = self._finite_sample_correction(
posteriors, len(idxs_in_leaf), self.kappa
)
self.leaf_to_posterior_[leaf_id] = posteriors
return self
def predict_proba(self, X):
"""
Returns the posterior probabilities of each class for data X.
Parameters
----------
X : array of shape [n_samples, n_features]
the transformed input data
Returns
-------
y_proba_hat : ndarray of shape [n_samples, n_classes]
posteriors per example
Raises
------
NotFittedError
When the model is not fitted.
"""
check_is_fitted(self)
votes_per_example = []
for x in X:
if x in list(self.leaf_to_posterior_.keys()):
votes_per_example.append(self.leaf_to_posterior_[x])
else:
votes_per_example.append(self.uniform_posterior_)
votes_per_example = np.array(votes_per_example)
if len(self.missing_label_indices_) > 0:
for i in self.missing_label_indices_:
new_col = np.zeros(votes_per_example.shape[0])
votes_per_example = np.insert(votes_per_example, i, new_col, axis=1)
return votes_per_example
def predict(self, X):
"""
Returns the predicted class labels for data X.
Parameters
----------
X : array of shape [n_samples, n_features]
the transformed input data
Returns
-------
y_hat : ndarray of shape [n_samples]
predicted class label per example
Raises
------
NotFittedError
When the model is not fitted.
"""
return self.classes[np.argmax(self.predict_proba(X), axis=1)]
def _finite_sample_correction(self, posteriors, num_points_in_partition, kappa):
"""
Encourage posteriors to approach uniform when there is low data through a finite sample correction.
Parameters
----------
posteriors : array of shape[n_samples, n_classes]
posterior of each class for each sample
num_points_in_partition : int
number of samples in this particular transformation
kappa : float
coefficient for finite sample correction
Returns
-------
y_proba_hat : ndarray of shape [n_samples, n_classes]
posteriors per example
"""
correction_constant = 1 / (kappa * num_points_in_partition)
zero_posterior_idxs = np.where(posteriors == 0)[0]
posteriors[zero_posterior_idxs] = correction_constant
posteriors /= sum(posteriors)
return posteriors