-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_golden.m
37 lines (35 loc) · 917 Bytes
/
load_golden.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
function WTrueNeighbor=golden_matrix(golden_file)
% golden_file has format like this:
% * *
% * *
% ...
% where * represents sentence index which has more than 80% similarity compare to
% corresponding query. This golden file is generated by gen_golden.py in LSH program
data_num = 9901;
query_num = 2045;
WTrueNeighbor = zeros(query_num, data_num);
whos WTrueNeighbor
[fid, message] = fopen(golden_file, 'r');
if fid == -1
display(message)
return
end
idx = 0;
while ~feof(fid)
idx = idx+1;
if mod(idx, 1000) == 0
display([num2str(idx),' queries finished.'])
end
aline = fgetl(fid);
aline = regexp(aline, ' ', 'split');
sen_num = size(aline,2)-1;
aline = aline(:, 1:sen_num);
aline_l = zeros(1,sen_num);
for i=1:sen_num
aline_l(i) = str2num(aline{1,i});
end
for sen_idx = aline_l
WTrueNeighbor(idx, sen_idx) = 1;
end
end
fclose(fid);