-
Notifications
You must be signed in to change notification settings - Fork 15
/
Hcs.m
267 lines (237 loc) · 7.55 KB
/
Hcs.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
function Hcs = Hcs(A, Pjoint)
% calculate redundant entropy between a set of sources
% from pointwise common surprisal
% using max ent distribution preserving pairwise marginals
%
% A - cell array of sources
% Pjoint - full joint distribution (S last axis)
isclosefp = @(a,b) abs(a - b) <= eps(max(abs(a), abs(b)));
s = size(Pjoint);
Nx = length(s); % number of dependent variables
vars = 1:Nx;
NA = length(A);
if NA>3
error('Hcs: only 3 sources supported')
end
PA(NA).Pa = []; % intialize struct
Am = zeros(1,NA); % number of symbols in each element
if NA==3 && sum(cellfun(@length,A))==6
% don't need to solve pairwise maxent - no third order interactions
% possible because of structure of multivariate atoms
Hcs = Hcs_fulljoint(A, Pjoint);
return
end
% sort variables within each source
A = cellfun(@sort, A, 'Unif',false);
% build distributions for each source
for ai=1:NA
thsA = A{ai};
Nv = length(thsA);
% vars to sum over
sumover = setdiff(vars, thsA);
Pa = Pjoint;
for ii=1:length(sumover)
Pa = sum(Pa, sumover(ii));
end
% distribution P(a)
Pa = squeeze(Pa);
Pa = Pa(:);
PA(ai).Pa = Pa;
Am(ai) = size(Pa,1);
end
% build pairwise joint element distributions
if NA>1
pairs = nchoosek(1:NA,2);
Npair = size(pairs,1);
Ppair(Npair).Paa = []; % intialize struct
for pi=1:Npair
thsA = [A{pairs(pi,1)} A{pairs(pi,2)}];
Nv = length(thsA);
Nv1 = length(A{pairs(pi,1)});
Nv2 = length(A{pairs(pi,2)});
% collapse variables we don't need
sumover = setdiff(vars, thsA);
Paa = Pjoint;
for ii=1:length(sumover)
Paa = sum(Paa, sumover(ii));
end
Paa = squeeze(Paa);
% reorder axes to match order of unique variables in this pair of
% elements
% order we want
Aunq = unique(thsA,'stable');
% order we have
[Aunqsrt, Aunqsrtidx] = sort(Aunq);
% invert order
[~, Aidx] = sort(Aunqsrtidx);
% Paa = permute(Paa, [Aidx length(Aunq)+1]);
Paa = permute(Paa, [Aidx]);
thsA = changem(thsA, 1:length(Aunq), Aunq);
Aunq = unique(thsA, 'stable');
% copy duplicate variables as required
uniquevar_i = 1;
for allvar_i=1:Nv
if (uniquevar_i>length(Aunq)) || (thsA(allvar_i) ~= Aunq(uniquevar_i))
% need to insert a duplicate variable
var_needed = thsA(allvar_i);
copy_from = find(thsA==var_needed,1);
Paa = copy_var(Paa, copy_from, allvar_i);
else
% axis order is correct
uniquevar_i = uniquevar_i + 1;
end
end
% joint distribution over all variables
% in both pairs of elements
% now should have correct variable axis in correct order
% collapse A1
s = size(Paa);
Paa = reshape(Paa, [prod(s(1:Nv1)) s(Nv1+1:end)]);
% collapse A2
s = size(Paa);
Paa = reshape(Paa, [s(1) prod(s(2:end))]);
Ppair(pi).Paa = Paa;
end
end
% build triplewise joint element distributions
Paaa = cell(1,NA);
if NA==3
thsA = [A{1} A{2} A{3}];
Nv = length(thsA);
Nv1 = length(A{1});
Nv2 = length(A{2});
Nv3 = length(A{3});
% collapse variables we don't need
sumover = setdiff(vars, thsA);
Paaa = Pjoint;
for ii=1:length(sumover)
Paaa = sum(Paaa, sumover(ii));
end
Paaa = squeeze(Paaa);
% reorder axes to match order of unique variables in this pair of
% elements
% order we want
Aunq = unique(thsA,'stable');
% order we have
[Aunqsrt, Aunqsrtidx] = sort(Aunq);
% invert order
[~, Aidx] = sort(Aunqsrtidx);
Paaas= permute(Paaa, [Aidx length(Aunq)+1]);
thsA = changem(thsA, 1:length(Aunq), Aunq);
Aunq = unique(thsA, 'stable');
% copy duplicate variables as required
uniquevar_i = 1;
for allvar_i=1:Nv
if (uniquevar_i>length(Aunq)) || (thsA(allvar_i) ~= Aunq(uniquevar_i))
% need to insert a duplicate variable
var_needed = thsA(allvar_i);
copy_from = find(thsA==var_needed,1);
Paaa = copy_var(Paaa, copy_from, allvar_i);
else
% axis order is correct
uniquevar_i = uniquevar_i + 1;
end
end
% joint distribution over all variables
% now should have correct variable axes in correct order
% collapse A1
s = size(Paaa);
Nv1 = length(A{1});
Paaa = reshape(Paaa, [prod(s(1:Nv1)) s(Nv1+1:end)]);
% collapse A2
s = size(Paaa);
Nv2 = length(A{2});
Paaa = reshape(Paaa, [s(1) prod(s(2:Nv2+1)) s(Nv2+2:end)]);
% collapse A3
s = size(Paaa);
Paaa = reshape(Paaa, [s(1:2) prod(s(3:end))]);
Ptrip(1).Paaa = Paaa;
P2 = marg_maxent2(Paaa);
Ptrip(1).Paaa2 = P2;
% overwrite pairwise and single distributions to be consistent with the
% maxent
PA(1).Pa = squeeze(sum(sum(P2,2),3));
PA(2).Pa = squeeze(sum(sum(P2,1),3));
PA(3).Pa = squeeze(sum(sum(P2,1),2));
for pi=1:Npair
thsA = [pairs(pi,1) pairs(pi,2)];
sumover = setdiff(1:3, thsA);
Paa = P2;
for ii=1:length(sumover)
Paa = sum(Paa, sumover(ii));
end
Paa = squeeze(Paa);
Ppair(pi).Paa = Paa;
end
end
% pointwise common surprial
cs = zeros([Am 1]);
if NA==1
for a1=1:Am(1)
s1 = -log2( PA(1).Pa(a1) );
cs(a1) = s1;
end
% keyboard
cs = PA(1).Pa .* cs;
elseif NA==2
for a1=1:Am(1)
for a2=1:Am(2)
s1 = -log2( PA(1).Pa(a1) );
s2 = -log2( PA(2).Pa(a2) );
sj = -log2( Ppair(1).Paa(a1,a2) );
% local coinformation (entropy overlap)
i = (s1 + s2 - sj);
% local entropy always positive
% if local information positive then have overlap
if i>0
cs(a1,a2) = i;
% maybe need to take min over s1,s2,sj??
else
% misinformation, not counted as overlapping entropy
continue
end
end
end
% keyboard
% cdsraw = cds;
cs = Ppair(1).Paa .* cs;
elseif NA==3
for a1=1:Am(1)
for a2=1:Am(2)
for a3=1:Am(3)
s1 = -log2( PA(1).Pa(a1) );
s2 = -log2( PA(2).Pa(a2) );
s3 = -log2( PA(3).Pa(a3) );
sj12 = -log2( Ppair(1).Paa(a1,a2) );
sj13 = -log2( Ppair(2).Paa(a1,a3) );
sj23 = -log2( Ppair(3).Paa(a2,a3) );
sj123 = -log2( Ptrip(1).Paaa2(a1,a2,a3) );
% local coiinformation (entropy overlap)
i = (sj123 + s1 + s2 + s3 - sj12 - sj13 - sj23);
% local entropy always positive
% if local co-information positive then have overlap
if i>0
cs(a1,a2,a3) = i;
else
% not counted as overlapping entropy
continue
end
end
end
end
cs = Ptrip(1).Paaa2 .* cs;
end
locred = nansum(cs(:));
Hcs = locred;
function Pnew = copy_var(P, var, newpos)
% form joint distribution with variable var copied to axis position newpos
s = size(P);
varM = s(var);
% size of new array
news = [s(1:newpos-1) varM s(newpos:end)];
Pnew = zeros(news);
subP = cell(1,ndims(P));
[subP{:}] = ind2sub(size(P),1:numel(P));
subPnew = [subP(1:newpos-1) subP(var) subP(newpos:end)];
indPnew = sub2ind(size(Pnew), subPnew{:});
Pnew(indPnew) = P(:);