-
Notifications
You must be signed in to change notification settings - Fork 2
/
cvDistance.m
107 lines (85 loc) · 3.88 KB
/
cvDistance.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
function [ euclideanDistance, squaredDistance, CI, CIDistribution ] = cvDistance( class1, class2, subtractMean, CIMode, CIAlpha, CIResamples )
%This function estimates the distance between the means of two
%distributions.
%inputs:
%class1 and class2 are N x D matrices, where D is the number of
%dimensions and N is the number of samples
%If class1 and class2 have different numbers of samples, this function
%will use a slower method that considers all possible pairings from
%each class for the held-out set
%If subtractMean is true, this will center each vector
%before computing the size of the difference (default is off).
%CIMode can be none, bootCentered, bootPercentile, or jackknife
%CIAlpha sets the coverage of the confidence interval to
%100*(1-CIAlpha) percent
%CIResamples sets the number of bootstrap resamples, if using bootstrap
%mode (as opposed to jackknife)
%CIDistribution is the distribution of bootstrap statistics or
%jackknife leave-one-out statistics
%outputs:
%The first column of CI corresponds to euclidean distance, the second
%column corresponds to squared distance.
%CIDistribution is the bootstrap distribution or leave-one-out
%jackknife estimates
if nargin<3
subtractMean = false;
end
if nargin<4
CIMode = 'none';
end
if nargin<5
CIAlpha = 0.05;
end
if nargin<6
CIResamples = 10000;
end
classSizes = [size(class1,1), size(class2,1)];
if classSizes(1)==classSizes(2)
%if class sizes are equal, run a special fast implementation
squaredDistEstimates = zeros(size(class1,1),1);
for x=1:size(class1,1)
bigSetIdx = [1:(x-1),(x+1):size(class1,1)];
smallSetIdx = x;
meanDiff_bigSet = mean(class1(bigSetIdx,:)-class2(bigSetIdx,:));
meanDiff_smallSet = class1(smallSetIdx,:)-class2(smallSetIdx,:);
if subtractMean
squaredDistEstimates(x) = (meanDiff_bigSet-mean(meanDiff_bigSet))*(meanDiff_smallSet-mean(meanDiff_smallSet))';
else
squaredDistEstimates(x) = meanDiff_bigSet*meanDiff_smallSet';
end
end
else
%if class sizes are unequal, we have to split the data into unequal
%folds
nFolds = min(classSizes);
foldIdxPerClass = getFoldedIdx( classSizes, nFolds );
squaredDistEstimates = zeros(nFolds,1);
for x=1:nFolds
bigSetIdx_1 = horzcat(foldIdxPerClass{1,[1:(x-1), (x+1):nFolds]});
smallSetIdx_1 = foldIdxPerClass{1,x};
bigSetIdx_2 = horzcat(foldIdxPerClass{2,[1:(x-1), (x+1):nFolds]});
smallSetIdx_2 = foldIdxPerClass{2,x};
meanDiff_bigSet = mean(class1(bigSetIdx_1,:),1) - mean(class2(bigSetIdx_2,:),1);
meanDiff_smallSet = mean(class1(smallSetIdx_1,:),1)-mean(class2(smallSetIdx_2,:),1);
if subtractMean
squaredDistEstimates(x) = (meanDiff_bigSet-mean(meanDiff_bigSet))*(meanDiff_smallSet-mean(meanDiff_smallSet))';
else
squaredDistEstimates(x) = meanDiff_bigSet*meanDiff_smallSet';
end
end
end
squaredDistance = mean(squaredDistEstimates);
euclideanDistance = sign(squaredDistance)*sqrt(abs(squaredDistance));
%compute confidence interval if requensted
if ~strcmp(CIMode, 'none')
wrapperFun = @(x,y)(ciWrapper(x,y,subtractMean));
[CI, CIDistribution] = cvCI([euclideanDistance, squaredDistance], wrapperFun, {class1, class2}, CIMode, CIAlpha, CIResamples);
else
CI = [];
CIDistribution = [];
end
end
function output = ciWrapper(class1, class2, subtractMean)
[ euclideanDistance, squaredDistance ] = cvDistance( class1, class2, subtractMean );
output = [euclideanDistance, squaredDistance];
end