forked from AllenDowney/BayesMadeSimple
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lincoln.py
128 lines (97 loc) · 3.64 KB
/
lincoln.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""This file contains code used in "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2014 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
from __future__ import print_function, division
import thinkbayes
import thinkplot
import numpy
"""
Bayesian solution to the Lincoln index, described in a blog
article at Probably Overthinking It.
Last year my occasional correspondent John D. Cook wrote an excellent
blog post about the Lincoln index, which is a way to estimate the
number of errors in a document (or program) by comparing results from
two independent testers. Here's his presentation of the problem:
"Suppose you have a tester who finds 20 bugs in your program. You
want to estimate how many bugs are really in the program. You know
there are at least 20 bugs, and if you have supreme confidence in your
tester, you may suppose there are around 20 bugs. But maybe your
tester isn't very good. Maybe there are hundreds of bugs. How can you
have any idea how many bugs there are? There's no way to know with one
tester. But if you have two testers, you can get a good idea, even if
you don't know how skilled the testers are."
Then he presents the Lincoln index, an estimator "described by
Frederick Charles Lincoln in 1930," where Wikpedia's use of
"described" is a hint that the index is another example of Stigler's
law of eponymy.
"Suppose two testers independently search for bugs. Let k1 be the
number of errors the first tester finds and k2 the number of errors
the second tester finds. Let c be the number of errors both testers
find. The Lincoln Index estimates the total number of errors as k1 k2
/ c [I changed his notation to be consistent with mine]."
So if the first tester finds 20 bugs, the second finds 15, and they
find 3 in common, we estimate that there are about 100 bugs.
Of course, whenever I see something like this, the idea that pops into
my head is that there must be a (better) Bayesian solution! And there
is.
"""
def choose(n, k, d={}):
"""The binomial coefficient "n choose k".
Args:
n: number of trials
k: number of successes
d: map from (n,k) tuples to cached results
Returns:
int
"""
if k == 0:
return 1
if n == 0:
return 0
try:
return d[n, k]
except KeyError:
res = choose(n-1, k) + choose(n-1, k-1)
d[n, k] = res
return res
def binom(k, n, p):
"""Computes the rest of the binomial PMF.
k: number of hits
n: number of attempts
p: probability of a hit
"""
return p**k * (1-p)**(n-k)
class Lincoln(thinkbayes.Suite, thinkbayes.Joint):
"""Represents hypotheses about the number of errors."""
def Likelihood(self, data, hypo):
"""Computes the likelihood of the data under the hypothesis.
hypo: n, p1, p2
data: k1, k2, c
"""
n, p1, p2 = hypo
k1, k2, c = data
part1 = choose(n, k1) * binom(k1, n, p1)
part2 = choose(k1, c) * choose(n-k1, k2-c) * binom(k2, n, p2)
return part1 * part2
def main():
data = 20, 15, 3
probs = numpy.linspace(0, 1, 101)
hypos = []
for n in range(32, 350):
for p1 in probs:
for p2 in probs:
hypos.append((n, p1, p2))
suite = Lincoln(hypos)
suite.Update(data)
n_marginal = suite.Marginal(0)
thinkplot.Pmf(n_marginal, label='n')
thinkplot.Save(root='lincoln1',
xlabel='number of bugs',
ylabel='PMF',
formats=['pdf', 'png'])
print(n_marginal.Mean())
print(n_marginal.MaximumLikelihood())
if __name__ == '__main__':
main()