-
Notifications
You must be signed in to change notification settings - Fork 1
/
oracles.py
68 lines (47 loc) · 2.19 KB
/
oracles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
This file contains oracle models for dataset generation described in Section 4.1
"""
import numpy as np
from scipy.spatial.distance import pdist, squareform
from itertools import permutations
import random
def data_oracle_generator(true_points, failure_prob=0.1):
"""
This method returns an oracle function that responds to tuplewise ranking queries
of the form "which of b_1, ..., b_n is closer to a in the true embedding space"
without any additional noise.
Input:
true_points: ``true'' embedding coordinates for an object set
Returns:
oracle: A function that accepts a tuple and returns an oracle response ranking
"""
oracle = lambda x: [x[0]] + sorted(x[1:], key=lambda a:np.sqrt(np.sum((true_points[x[0]]-true_points[a])**2)))
return oracle
def plackett_luce_data_oracle_generator(true_points, P=0.95, failure_prob=0.05):
"""
This method returns an oracle function that responds to tuplewise ranking queries
of the form "which of b_1, ..., b_n is closer to a in the true embedding space"
according to the Plackett-Luce model specified in Section 4.1
Inputs:
true_points: ``true'' embedding coordinates for an object set
P: percentile encompassing all distances in true_points
Returns:
oracle: A function that accepts a tuple and returns an oracle response ranking
"""
if failure_prob != None:
P = 1. - failure_prob
all_distances = pdist(true_points)
alpha = -np.log(1-P)/float(np.log(max(all_distances)+1))
pareto = lambda x: alpha/float((x+1)**(alpha+1))
def probabilistic_oracle(x):
distances = np.sqrt([sum((true_points[x[0]] - true_points[x[i]])**2) for i in range(1, len(x))])
pmf_unnorm = [pareto(z) for z in distances];
response = [x[0]]
candidates = list(x[1:])
while len(candidates) > 0:
pmf = np.array(pmf_unnorm)/float(sum(pmf_unnorm))
close_idx = np.random.choice(range(len(candidates)),p=pmf)
response.append(candidates.pop(close_idx))
del pmf_unnorm[close_idx]
return tuple(response)
return probabilistic_oracle