-
Notifications
You must be signed in to change notification settings - Fork 3
/
single_softmax_cost.m
54 lines (44 loc) · 1.4 KB
/
single_softmax_cost.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
function [cost, grad] = single_softmax_cost(theta, x, y, lambda)
% Takes a parameters theta, data point x and its label y and computes cost
% and gradient of softmax classifier.
%
% In:
% theta - parameter;
% theta \in R[numClasses, numFeatures]
% x - data point
% y - label of x
% lambda - weight decay parameter
%
% Out:
% cost - cost of the softmax regression at a given point theta
% grad - gradient of the softmax regression at a given point theta
%
% Written by: Mateusz Malinowski
% Email: [email protected]
%
nfeatures = length(x);
% unroll the parameters from theta
theta = reshape(theta, [], nfeatures);
% take parameter corresponding to current class
classTheta = theta(y, :);
% compute the probability matrix probMatrix(r,c) = p(y^c=r | x^c; theta);
thetaTimesX = theta*x;
sumexptheta = sum(exp(thetaTimesX - classTheta * x));
probMatrix = 1.0 / sumexptheta;
regularizationTerm = 0.5 * lambda * norm(theta(:), 2)^2;
dataTerm = -log(probMatrix);
cost = dataTerm + regularizationTerm;
if nargin == 1
return;
else
% prob matrix for all classes
mymaxi = max(thetaTimesX);
sumexptheta = sum(exp(thetaTimesX - mymaxi));
probMatrix = exp(thetaTimesX - mymaxi) / sumexptheta;
thetagrad = -probMatrix;
thetagrad(y) = 1 - probMatrix(y);
thetagrad = -thetagrad * x' + lambda * theta;
% roll the gradient matrices into a vector for minFunc
grad = thetagrad(:);
end
end