diff --git a/code/brfss.py b/code/brfss.py index c3865a1..d4fcd70 100644 --- a/code/brfss.py +++ b/code/brfss.py @@ -4,6 +4,7 @@ Copyright 2010 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import math import sys @@ -58,7 +59,7 @@ def Recode(self): """Recode variables that need cleaning.""" def CleanWeight(weight): - if weight in [7777, 9999]: + if weight in [7777, 9999, 'NA']: return 'NA' elif weight < 1000: return weight / 2.2 @@ -95,13 +96,13 @@ def SummarizeHeight(self): [d[r.sex].append(r.htm3) for r in self.records if r.htm3 != 'NA'] [d['all'].append(r.htm3) for r in self.records if r.htm3 != 'NA'] - print 'Height (cm):' - print 'key n mean var sigma cv' - for key, t in d.iteritems(): + print('Height (cm):') + print('key n mean var sigma cv') + for key, t in d.items(): mu, var = thinkstats.TrimmedMeanVar(t) sigma = math.sqrt(var) cv = sigma / mu - print key, len(t), mu, var, sigma, cv + print(key, len(t), mu, var, sigma, cv) return d @@ -113,13 +114,13 @@ def SummarizeWeight(self): [d[r.sex].append(r.weight2) for r in self.records if r.weight2 != 'NA'] [d['all'].append(r.weight2) for r in self.records if r.weight2 != 'NA'] - print 'Weight (kg):' - print 'key n mean var sigma cv' - for key, t in d.iteritems(): + print('Weight (kg):') + print('key n mean var sigma cv') + for key, t in d.items(): mu, var = thinkstats.TrimmedMeanVar(t) sigma = math.sqrt(var) cv = sigma / mu - print key, len(t), mu, var, sigma, cv + print(key, len(t), mu, var, sigma, cv) def SummarizeWeightChange(self): @@ -130,7 +131,7 @@ def SummarizeWeightChange(self): changes = [(curr - prev) for curr, prev in data] - print 'Mean change', thinkstats.Mean(changes) + print('Mean change', thinkstats.Mean(changes)) def main(name, data_dir='.'): diff --git a/code/columns.py b/code/columns.py index d720228..0dae1f0 100644 --- a/code/columns.py +++ b/code/columns.py @@ -4,6 +4,8 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function + import csv @@ -16,7 +18,7 @@ def read_csv(filename, constructor): fp = open(filename) reader = csv.reader(fp) - header = reader.next() + header = next(reader) names = [s.lower() for s in header] objs = [make_object(t, names, constructor) for t in reader] @@ -47,7 +49,7 @@ def print_cols(cols): cols: list of columns """ for i, col in enumerate(cols): - print i, col[0], col[1] + print(i, col[0], col[1]) def make_col_dict(cols, names): diff --git a/code/cookie.py b/code/cookie.py index 85f778a..1661076 100644 --- a/code/cookie.py +++ b/code/cookie.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function from thinkbayes import Pmf @@ -16,4 +17,4 @@ pmf.Normalize() -print pmf.Prob('Bowl 1') +print(pmf.Prob('Bowl 1')) diff --git a/code/cookie2.py b/code/cookie2.py index 019f05c..e81578c 100644 --- a/code/cookie2.py +++ b/code/cookie2.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function from thinkbayes import Pmf @@ -55,7 +56,7 @@ def main(): pmf.Update('vanilla') for hypo, prob in pmf.Items(): - print hypo, prob + print(hypo, prob) if __name__ == '__main__': diff --git a/code/correlation.py b/code/correlation.py index b6d8ccb..458667d 100644 --- a/code/correlation.py +++ b/code/correlation.py @@ -4,6 +4,7 @@ Copyright 2010 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import math import random @@ -171,7 +172,7 @@ def CorrelatedGenerator(rho): x = random.gauss(0, 1) yield x - sigma = math.sqrt(1 - rho**2); + sigma = math.sqrt(1 - rho**2) while True: x = random.gauss(x * rho, sigma) yield x diff --git a/code/dice.py b/code/dice.py index 6984b43..9935b0a 100644 --- a/code/dice.py +++ b/code/dice.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function from thinkbayes import Suite @@ -27,13 +28,13 @@ def main(): suite = Dice([4, 6, 8, 12, 20]) suite.Update(6) - print 'After one 6' + print('After one 6') suite.Print() for roll in [4, 8, 7, 7, 2]: suite.Update(roll) - print 'After more rolls' + print('After more rolls') suite.Print() diff --git a/code/dungeons.py b/code/dungeons.py index 0df9ed0..bcb7d07 100644 --- a/code/dungeons.py +++ b/code/dungeons.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import random @@ -23,7 +24,7 @@ def __init__(self, sides, name=''): name: string """ thinkbayes.Pmf.__init__(self, name=name) - for x in xrange(1, sides+1): + for x in range(1, sides+1): self.Set(x, 1) self.Normalize() diff --git a/code/euro.py b/code/euro.py index 629a62b..ca68eb6 100644 --- a/code/euro.py +++ b/code/euro.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function """This file contains a partial solution to a problem from MacKay, "Information Theory, Inference, and Learning Algorithms." @@ -59,7 +60,7 @@ def Likelihood(self, data, hypo): def UniformPrior(): """Makes a Suite with a uniform prior.""" - suite = Euro(xrange(0, 101)) + suite = Euro(range(0, 101)) return suite @@ -89,17 +90,17 @@ def RunUpdate(suite, heads=140, tails=110): def Summarize(suite): """Prints summary statistics for the suite.""" - print suite.Prob(50) + print(suite.Prob(50)) - print 'MLE', suite.MaximumLikelihood() + print('MLE', suite.MaximumLikelihood()) - print 'Mean', suite.Mean() - print 'Median', thinkbayes.Percentile(suite, 50) + print('Mean', suite.Mean()) + print('Median', thinkbayes.Percentile(suite, 50)) - print '5th %ile', thinkbayes.Percentile(suite, 5) - print '95th %ile', thinkbayes.Percentile(suite, 95) + print('5th %ile', thinkbayes.Percentile(suite, 5)) + print('95th %ile', thinkbayes.Percentile(suite, 95)) - print 'CI', thinkbayes.CredibleInterval(suite, 90) + print('CI', thinkbayes.CredibleInterval(suite, 90)) def PlotSuites(suites, root): diff --git a/code/euro2.py b/code/euro2.py index 3f9420a..af2fa04 100644 --- a/code/euro2.py +++ b/code/euro2.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function """This file contains a partial solution to a problem from MacKay, "Information Theory, Inference, and Learning Algorithms." @@ -58,7 +59,7 @@ def Likelihood(self, data, hypo): def Version1(): - suite = Euro(xrange(0, 101)) + suite = Euro(range(0, 101)) heads, tails = 140, 110 dataset = 'H' * heads + 'T' * tails @@ -69,7 +70,7 @@ def Version1(): def Version2(): - suite = Euro(xrange(0, 101)) + suite = Euro(range(0, 101)) heads, tails = 140, 110 dataset = 'H' * heads + 'T' * tails @@ -78,7 +79,7 @@ def Version2(): def Version3(): - suite = Euro2(xrange(0, 101)) + suite = Euro2(range(0, 101)) heads, tails = 140, 110 suite.Update((heads, tails)) @@ -88,7 +89,7 @@ def Version3(): def main(): suite = Version3() - print suite.Mean() + print(suite.Mean()) thinkplot.Pmf(suite) thinkplot.Show() diff --git a/code/euro3.py b/code/euro3.py index 7fea862..878f7d0 100644 --- a/code/euro3.py +++ b/code/euro3.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function """This file contains a partial solution to a problem from MacKay, "Information Theory, Inference, and Learning Algorithms." @@ -72,32 +73,32 @@ def Main(): suite = Euro() like_f = suite.Likelihood(data, 50) - print 'p(D|F)', like_f + print('p(D|F)', like_f) actual_percent = 100.0 * 140 / 250 likelihood = suite.Likelihood(data, actual_percent) - print 'p(D|B_cheat)', likelihood - print 'p(D|B_cheat) / p(D|F)', likelihood / like_f + print('p(D|B_cheat)', likelihood) + print('p(D|B_cheat) / p(D|F)', likelihood / like_f) like40 = suite.Likelihood(data, 40) like60 = suite.Likelihood(data, 60) likelihood = 0.5 * like40 + 0.5 * like60 - print 'p(D|B_two)', likelihood - print 'p(D|B_two) / p(D|F)', likelihood / like_f + print('p(D|B_two)', likelihood) + print('p(D|B_two) / p(D|F)', likelihood / like_f) - b_uniform = Euro(xrange(0, 101)) + b_uniform = Euro(range(0, 101)) b_uniform.Remove(50) b_uniform.Normalize() likelihood = SuiteLikelihood(b_uniform, data) - print 'p(D|B_uniform)', likelihood - print 'p(D|B_uniform) / p(D|F)', likelihood / like_f + print('p(D|B_uniform)', likelihood) + print('p(D|B_uniform) / p(D|F)', likelihood / like_f) b_tri = TrianglePrior() b_tri.Remove(50) b_tri.Normalize() likelihood = b_tri.Update(data) - print 'p(D|B_tri)', likelihood - print 'p(D|B_tri) / p(D|F)', likelihood / like_f + print('p(D|B_tri)', likelihood) + print('p(D|B_tri) / p(D|F)', likelihood / like_f) if __name__ == '__main__': diff --git a/code/hockey.py b/code/hockey.py index 4a92191..91a2e28 100644 --- a/code/hockey.py +++ b/code/hockey.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import math @@ -112,7 +113,7 @@ def ReadHockeyData(filename='hockey_data.csv'): # map from (team1, team2) to (score1, score2) pairs = {} - for key, pair in games.iteritems(): + for key, pair in games.items(): t1, t2 = pair key = t1.team, t2.team entry = t1.total, t2.total @@ -129,7 +130,7 @@ def ProcessScoresPairwise(pairs): """ # map from (team1, team2) to list of goals scored goals_scored = {} - for key, entries in pairs.iteritems(): + for key, entries in pairs.items(): t1, t2 = key for entry in entries: g1, g2 = entry @@ -138,7 +139,7 @@ def ProcessScoresPairwise(pairs): # make a list of average goals scored lams = [] - for key, goals in goals_scored.iteritems(): + for key, goals in goals_scored.items(): if len(goals) < 3: continue lam = thinkstats.Mean(goals) @@ -150,9 +151,9 @@ def ProcessScoresPairwise(pairs): thinkplot.Show() mu, var = thinkstats.MeanVar(lams) - print 'mu, sig', mu, math.sqrt(var) + print('mu, sig', mu, math.sqrt(var)) - print 'BOS v VAN', pairs['BOS', 'VAN'] + print('BOS v VAN', pairs['BOS', 'VAN']) def ProcessScoresTeamwise(pairs): @@ -162,7 +163,7 @@ def ProcessScoresTeamwise(pairs): """ # map from team to list of goals scored goals_scored = {} - for key, entries in pairs.iteritems(): + for key, entries in pairs.items(): t1, t2 = key for entry in entries: g1, g2 = entry @@ -171,7 +172,7 @@ def ProcessScoresTeamwise(pairs): # make a list of average goals scored lams = [] - for key, goals in goals_scored.iteritems(): + for key, goals in goals_scored.items(): lam = thinkstats.Mean(goals) lams.append(lam) @@ -181,7 +182,7 @@ def ProcessScoresTeamwise(pairs): thinkplot.Show() mu, var = thinkstats.MeanVar(lams) - print 'mu, sig', mu, math.sqrt(var) + print('mu, sig', mu, math.sqrt(var)) def main(): @@ -230,8 +231,8 @@ def main(): time_dist1 = MakeGoalTimePmf(suite1) time_dist2 = MakeGoalTimePmf(suite2) - print 'MLE bruins', suite1.MaximumLikelihood() - print 'MLE canucks', suite2.MaximumLikelihood() + print('MLE bruins', suite1.MaximumLikelihood()) + print('MLE canucks', suite2.MaximumLikelihood()) thinkplot.Clf() thinkplot.PrePlot(num=2) @@ -247,16 +248,16 @@ def main(): p_loss = diff.ProbLess(0) p_tie = diff.Prob(0) - print p_win, p_loss, p_tie + print(p_win, p_loss, p_tie) p_overtime = thinkbayes.PmfProbLess(time_dist1, time_dist2) p_adjust = thinkbayes.PmfProbEqual(time_dist1, time_dist2) p_overtime += p_adjust / 2 - print 'p_overtime', p_overtime + print('p_overtime', p_overtime) - print p_overtime * p_tie + print(p_overtime * p_tie) p_win += p_overtime * p_tie - print 'p_win', p_win + print('p_win', p_win) # win the next two p_series = p_win**2 @@ -264,7 +265,7 @@ def main(): # split the next two, win the third p_series += 2 * p_win * (1-p_win) * p_win - print 'p_series', p_series + print('p_series', p_series) if __name__ == '__main__': diff --git a/code/jaynes.py b/code/jaynes.py index ff77a48..3c5e977 100644 --- a/code/jaynes.py +++ b/code/jaynes.py @@ -4,6 +4,7 @@ Copyright 2013 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import thinkbayes import thinkplot @@ -168,7 +169,7 @@ def main(): suite = Detector(r, f, step=1) suite.Update(k) thinkplot.Pmf(suite) - print suite.MaximumLikelihood() + print(suite.MaximumLikelihood()) thinkplot.Save(root='jaynes1', xlabel='Number of particles (n)', diff --git a/code/kidney.py b/code/kidney.py index 6419cbd..cd6a32d 100644 --- a/code/kidney.py +++ b/code/kidney.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import math import numpy @@ -44,12 +45,12 @@ def SimpleModel(): d1 = 15.5 d0 = d1 / 2.0 ** doublings - print 'interval (days)', interval - print 'interval (years)', interval / 365 - print 'dt', dt - print 'doublings', doublings - print 'd1', d1 - print 'd0', d0 + print('interval (days)', interval) + print('interval (years)', interval / 365) + print('dt', dt) + print('doublings', doublings) + print('d1', d1) + print('d0', d0) # assume an initial linear measure of 0.1 cm d0 = 0.1 @@ -61,19 +62,19 @@ def SimpleModel(): # what linear doubling time does that imply? dt = interval / doublings - print 'doublings', doublings - print 'dt', dt + print('doublings', doublings) + print('dt', dt) # compute the volumetric doubling time and RDT vdt = dt / 3 rdt = 365 / vdt - print 'vdt', vdt - print 'rdt', rdt + print('vdt', vdt) + print('rdt', rdt) cdf = MakeCdf() p = cdf.Prob(rdt) - print 'Prob{RDT > 2.4}', 1-p + print('Prob{RDT > 2.4}', 1-p) def MakeCdf(): @@ -231,7 +232,7 @@ def GenerateSample(n, pc, lam1, lam2): Returns: list of random variates """ - xs = [GenerateRdt(pc, lam1, lam2) for _ in xrange(n)] + xs = [GenerateRdt(pc, lam1, lam2) for _ in range(n)] return xs @@ -321,7 +322,7 @@ def __init__(self): def GetBuckets(self): """Returns an iterator for the keys in the cache.""" - return self.sequences.iterkeys() + return self.sequences.keys() def GetSequence(self, bucket): """Looks up a bucket in the cache.""" @@ -389,7 +390,7 @@ def Print(self): for bucket in sorted(self.GetBuckets()): ss = self.GetSequence(bucket) diameter = BucketToCm(bucket) - print diameter, len(ss) + print(diameter, len(ss)) def Correlation(self): """Computes the correlation between log volumes and rdts.""" @@ -421,7 +422,7 @@ def MakeSequences(self, n, rho, cdf): sequences.append(seq) if i % 100 == 0: - print i + print(i) return sequences @@ -665,7 +666,7 @@ def PrintTable(fp, xs, ts): fp.write(r'\hline' '\n') for i, (cm, ps) in enumerate(zip(xs, ts)): - #print cm, ps + #print(cm, ps) if i % 3 == 0: PrintCI(fp, cm, ps) @@ -724,10 +725,10 @@ def TestCorrelation(cdf): rho = 0.4 rdt_seq = CorrelatedGenerator(cdf, rho) - xs = [rdt_seq.next() for _ in range(n)] + xs = [next(rdt_seq) for _ in range(n)] rho2 = correlation.SerialCorr(xs) - print rho, rho2 + print(rho, rho2) cdf2 = thinkbayes.MakeCdfFromList(xs) thinkplot.Cdfs([cdf, cdf2]) @@ -737,7 +738,7 @@ def TestCorrelation(cdf): def main(script): for size in [1, 5, 10]: bucket = CmToBucket(size) - print 'Size, bucket', size, bucket + print('Size, bucket', size, bucket) SimpleModel() @@ -761,10 +762,10 @@ def main(script): calc.PlotBuckets() _ = calc.MakeSequences(1900, rho, fit) - print 'V0-RDT correlation', calc.cache.Correlation() + print('V0-RDT correlation', calc.cache.Correlation()) - print '15.5 Probability age > 8 year', calc.cache.ProbOlder(15.5, 8) - print '6.0 Probability age > 8 year', calc.cache.ProbOlder(6.0, 8) + print('15.5 Probability age > 8 year', calc.cache.ProbOlder(15.5, 8)) + print('6.0 Probability age > 8 year', calc.cache.ProbOlder(6.0, 8)) calc.PlotConditionalCdfs() diff --git a/code/monty.py b/code/monty.py index 6e90034..1c08dcc 100644 --- a/code/monty.py +++ b/code/monty.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function from thinkbayes import Pmf @@ -53,7 +54,7 @@ def main(): pmf.Update(data) for hypo, prob in sorted(pmf.Items()): - print hypo, prob + print(hypo, prob) if __name__ == '__main__': diff --git a/code/myplot.py b/code/myplot.py index 3a10e92..0ce663d 100644 --- a/code/myplot.py +++ b/code/myplot.py @@ -4,6 +4,7 @@ Copyright 2010 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import math import matplotlib @@ -69,7 +70,6 @@ def ColorGenerator(cls, n): """ for i in cls.which_colors[n]: yield cls.colors[i] - raise StopIteration('Ran out of colors in Brewer.ColorGenerator') @classmethod def InitializeIter(cls, num): @@ -141,7 +141,7 @@ def Underride(d, **options): if d is None: d = {} - for key, val in options.iteritems(): + for key, val in options.items(): d.setdefault(key, val) return d @@ -172,9 +172,9 @@ def Plot(xs, ys, style='', **options): if color_iter: try: - options = Underride(options, color=color_iter.next()) + options = Underride(options, color=next(color_iter)) except StopIteration: - print 'Warning: Brewer ran out of colors.' + print('Warning: Brewer ran out of colors.') Brewer.ClearIter() options = Underride(options, linewidth=3, alpha=0.8) @@ -345,7 +345,7 @@ def Contour(obj, pcolor=False, contour=True, imshow=False, **options): Underride(options, linewidth=3, cmap=matplotlib.cm.Blues) - xs, ys = zip(*d.iterkeys()) + xs, ys = zip(*d.keys()) xs = sorted(set(xs)) ys = sorted(set(ys)) @@ -473,7 +473,7 @@ def SaveFormat(root, fmt='eps'): fmt: string format """ filename = '%s.%s' % (root, fmt) - print 'Writing', filename + print('Writing', filename) pyplot.savefig(filename, format=fmt, dpi=300) @@ -501,7 +501,7 @@ def SaveFormat(root, fmt='eps'): def main(): color_iter = Brewer.ColorGenerator(7) for color in color_iter: - print color + print(color) if __name__ == '__main__': main() diff --git a/code/paintball.py b/code/paintball.py index 2e22012..03f229e 100644 --- a/code/paintball.py +++ b/code/paintball.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import thinkbayes @@ -114,8 +115,8 @@ def MakePosteriorPlot(suite): marginal_beta = suite.Marginal(1) marginal_beta.name = 'beta' - print 'alpha CI', marginal_alpha.CredibleInterval(50) - print 'beta CI', marginal_beta.CredibleInterval(50) + print('alpha CI', marginal_alpha.CredibleInterval(50)) + print('beta CI', marginal_beta.CredibleInterval(50)) thinkplot.PrePlot(num=2) diff --git a/code/price.py b/code/price.py index 449397c..e2d6c41 100644 --- a/code/price.py +++ b/code/price.py @@ -4,6 +4,7 @@ Copyright 2013 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import csv import numpy @@ -33,13 +34,13 @@ def ReadData(filename='showcases.2011.csv'): data = t[1:] try: data = [int(x) for x in data] - # print heading, data[0], len(data) + # print(heading, data[0], len(data)) res.append(data) except ValueError: pass fp.close() - return zip(*res) + return list(zip(*res)) class Price(thinkbayes.Suite): @@ -259,11 +260,11 @@ def MakePlots(player1, player2): cdf2 = player2.CdfDiff() cdf2.name = 'player 2' - print 'Player median', cdf1.Percentile(50) - print 'Player median', cdf2.Percentile(50) + print('Player median', cdf1.Percentile(50)) + print('Player median', cdf2.Percentile(50)) - print 'Player 1 overbids', player1.ProbOverbid() - print 'Player 2 overbids', player2.ProbOverbid() + print('Player 1 overbids', player1.ProbOverbid()) + print('Player 2 overbids', player2.ProbOverbid()) thinkplot.Cdfs([cdf1, cdf2]) thinkplot.Save(root='price2', @@ -280,8 +281,8 @@ def MakePlayers(): cols = zip(*data) price1, price2, bid1, bid2, diff1, diff2 = cols - # print list(sorted(price1)) - # print len(price1) + # print(list(sorted(price1))) + # print(len(price1)) player1 = Player(price1, bid1, diff1) player2 = Player(price2, bid2, diff2) @@ -301,12 +302,12 @@ def PlotExpectedGains(guess1=20000, guess2=40000): player1.MakeBeliefs(guess1) player2.MakeBeliefs(guess2) - print 'Player 1 prior mle', player1.prior.MaximumLikelihood() - print 'Player 2 prior mle', player2.prior.MaximumLikelihood() - print 'Player 1 mean', player1.posterior.Mean() - print 'Player 2 mean', player2.posterior.Mean() - print 'Player 1 mle', player1.posterior.MaximumLikelihood() - print 'Player 2 mle', player2.posterior.MaximumLikelihood() + print('Player 1 prior mle', player1.prior.MaximumLikelihood()) + print('Player 2 prior mle', player2.prior.MaximumLikelihood()) + print('Player 1 mean', player1.posterior.Mean()) + print('Player 2 mean', player2.posterior.Mean()) + print('Player 1 mle', player1.posterior.MaximumLikelihood()) + print('Player 2 mle', player2.posterior.MaximumLikelihood()) player1.PlotBeliefs('price3') player2.PlotBeliefs('price4') @@ -319,11 +320,11 @@ def PlotExpectedGains(guess1=20000, guess2=40000): bids, gains = calc1.ExpectedGains() thinkplot.Plot(bids, gains, label='Player 1') - print 'Player 1 optimal bid', max(zip(gains, bids)) + print('Player 1 optimal bid', max(zip(gains, bids))) bids, gains = calc2.ExpectedGains() thinkplot.Plot(bids, gains, label='Player 2') - print 'Player 2 optimal bid', max(zip(gains, bids)) + print('Player 2 optimal bid', max(zip(gains, bids))) thinkplot.Save(root='price5', xlabel='bid ($)', @@ -370,13 +371,13 @@ def TestCode(calc): """ # test ProbWin for diff in [0, 100, 1000, 10000, 20000]: - print diff, calc.ProbWin(diff) + print(diff, calc.ProbWin(diff)) print # test Return price = 20000 for bid in [17000, 18000, 19000, 19500, 19800, 20001]: - print bid, calc.Gain(bid, price) + print(bid, calc.Gain(bid, price)) print diff --git a/code/redline.py b/code/redline.py index 8fc4f85..a90aa7e 100644 --- a/code/redline.py +++ b/code/redline.py @@ -4,6 +4,7 @@ Copyright 2013 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import thinkbayes @@ -211,9 +212,9 @@ def MakePlot(self, root='redline2'): root: string """ - print 'Mean z', self.pmf_z.Mean() / 60 - print 'Mean zb', self.pmf_zb.Mean() / 60 - print 'Mean y', self.pmf_y.Mean() / 60 + print('Mean z', self.pmf_z.Mean() / 60) + print('Mean zb', self.pmf_zb.Mean() / 60) + print('Mean y', self.pmf_y.Mean() / 60) cdf_z = self.pmf_z.MakeCdf() cdf_zb = self.pmf_zb.MakeCdf() @@ -347,7 +348,7 @@ def __init__(self, passenger_data): for _k1, y, k2 in passenger_data: self.post_lam.Update((y, k2)) - print 'Mean posterior lambda', self.post_lam.Mean() + print('Mean posterior lambda', self.post_lam.Mean()) def MakePlot(self, root='redline1'): """Plot the prior and posterior CDF of passengers arrival rate. @@ -406,7 +407,7 @@ def RemoveNegatives(pmf): pmf: Pmf """ - for val in pmf.Values(): + for val in list(pmf.Values()): if val < 0: pmf.Remove(val) pmf.Normalize() @@ -469,7 +470,7 @@ def Update(self, data): """ k, y = data - print k, y + print(k, y) prior = self.PredictivePmf(self.xs) gaps = Gaps(prior) gaps.Update(y) @@ -508,7 +509,7 @@ def Update(self, data): mean_zb = obs_zb.Mean() self.mean_zbs.append(mean_zb) - print k, y, mean_zb + print(k, y, mean_zb) # use observed z to update beliefs about pmf_z self.params += numpy.array(probs) @@ -543,9 +544,9 @@ def __init__(self, xs, pcounts, passenger_data): def PlotPmfs(self): """Plot the PMFs.""" - print 'Mean y', self.pmf_y.Mean() - print 'Mean z', self.post_z.Mean() - print 'Mean zb', self.post_zb.Mean() + print('Mean y', self.pmf_y.Mean()) + print('Mean z', self.post_z.Mean()) + print('Mean zb', self.post_zb.Mean()) thinkplot.Pmf(self.pmf_y) thinkplot.Pmf(self.post_z) @@ -684,7 +685,7 @@ def RunSimpleProcess(gap_times, lam=0.0333, num_passengers=15, plot=True): UPPER_BOUND = 1200 cdf_z = thinkbayes.MakeCdfFromList(gap_times).Scale(1.0/60) - print 'CI z', cdf_z.CredibleInterval(90) + print('CI z', cdf_z.CredibleInterval(90)) xs = MakeRange(low=10) @@ -726,11 +727,11 @@ def RunMixProcess(gap_times, lam=0.0333, num_passengers=15, plot=True): total_y = 0 total_k2 = 0 for k1, y, k2 in passenger_data: - print k1, y/60, k2 + print(k1, y/60, k2) total_y += y/60 total_k2 += k2 - print total_k2, total_y - print 'Average arrival rate', total_k2 / total_y + print(total_k2, total_y) + print('Average arrival rate', total_k2 / total_y) are = ArrivalRateEstimator(passenger_data) diff --git a/code/redline_data.py b/code/redline_data.py index 795f188..d0522fc 100644 --- a/code/redline_data.py +++ b/code/redline_data.py @@ -6,6 +6,7 @@ Copyright 2013 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import csv import json @@ -13,7 +14,10 @@ import os import sys import redis -import urllib2 +try: + import urllib2 +except ImportError: + import urllib.request as urllib2 from datetime import datetime, time @@ -30,7 +34,7 @@ def __init__(self): try: password = os.environ['REDIS_AUTH'] except KeyError: - print 'Environment variable REDIS_AUTH is not set.' + print('Environment variable REDIS_AUTH is not set.') sys.exit() self.r = redis.StrictRedis(host=self.host, @@ -49,7 +53,7 @@ def WriteTrainSpotting(self, timestamp, tripid, seconds, live=True): dt = datetime.fromtimestamp(timestamp) day = dt.date().isoformat() - print dt, tripid, seconds, timestamp + print(dt, tripid, seconds, timestamp) if live: self.r.sadd('days', day) @@ -65,7 +69,7 @@ def FindArrivals(self, start_hour=16, end_hour=18): Returns: map from string day to unsorted list of arrival datetimes """ days = self.r.smembers('days') - print days + print(days) start_time = time(hour=start_hour) end_time = time(hour=end_hour) @@ -128,7 +132,7 @@ def ReadJson(): url = 'http://developer.mbta.com/lib/rthr/red.json' json_text = urllib2.urlopen(url).read() json_obj = json.loads(json_text) - print json_obj + print(json_obj) def ReadAndStore(red): @@ -151,11 +155,11 @@ def Loop(red, start_time, end_time, delay=60): """ if datetime.now() < start_time: diff = start_time - datetime.now() - print 'Sleeping', diff + print('Sleeping', diff) sleep(diff.total_seconds()) while datetime.now() < end_time: - print 'Collecting' + print('Collecting') ReadAndStore(red) sleep(delay) @@ -177,8 +181,8 @@ def GetInterarrivals(arrival_map): Returns: list of float interarrival times in seconds """ interarrival_seconds = [] - for day, arrivals in sorted(arrival_map.iteritems()): - print day, len(arrivals) + for day, arrivals in sorted(arrival_map.items()): + print(day, len(arrivals)) arrivals.sort() diffs = numpy.diff(arrivals) diffs = [diff.total_seconds() for diff in diffs] @@ -194,13 +198,13 @@ def main(script, command='collect'): start = TodayAt(16) end = TodayAt(18) - print start, end + print(start, end) Loop(red, start, end) elif command == 'report': arrival_map = red.FindArrivals() interarrivals = GetInterarrivals(arrival_map) - print repr(interarrivals) + print(repr(interarrivals)) if __name__ == '__main__': diff --git a/code/sat.py b/code/sat.py index 9a9af93..23237e0 100644 --- a/code/sat.py +++ b/code/sat.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import csv import math @@ -130,10 +131,10 @@ def CompareScores(self, a_score, b_score, constructor): ratio = top.Prob('A') / top.Prob('B') - print 'Likelihood ratio', ratio + print('Likelihood ratio', ratio) posterior = ratio / (ratio + 1) - print 'Posterior', posterior + print('Posterior', posterior) if constructor is Sat2: ComparePosteriorPredictive(a_sat, b_sat) @@ -341,10 +342,10 @@ def ComparePosteriorPredictive(a_sat, b_sat): b_like = thinkbayes.PmfProbLess(a_pred, b_pred) c_like = thinkbayes.PmfProbEqual(a_pred, b_pred) - print 'Posterior predictive' - print 'A', a_like - print 'B', b_like - print 'C', c_like + print('Posterior predictive') + print('A', a_like) + print('B', b_like) + print('C', c_like) def PlotPriorDist(pmf): @@ -441,11 +442,11 @@ def ProbCorrectTable(): difficulties = [-1.85, -0.05, 1.75] for eff in efficacies: - print '%0.2f & ' % eff, + print('%0.2f & ' % eff, end="") for diff in difficulties: p = ProbCorrect(eff, diff) - print '%0.2f & ' % p, - print r'\\' + print('%0.2f & ' % p, end="") + print(r'\\') def main(script): diff --git a/code/species.py b/code/species.py index 576cb50..e1d7403 100644 --- a/code/species.py +++ b/code/species.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import matplotlib.pyplot as pyplot import thinkplot @@ -46,7 +47,7 @@ def Lookup(self, key): def Keys(self): """Returns an iterator of keys.""" - return self.shelf.iterkeys() + return self.shelf.keys() def Read(self): """Returns the contents of the shelf as a map.""" @@ -107,7 +108,7 @@ def prob_bogus(k, r): p = (1-q) ** k return p - print self.code, clean_param + print(self.code, clean_param) counts = self.GetCounts() r = 1.0 * sum(counts) @@ -145,7 +146,7 @@ def GetNames(self): def PrintCounts(self): """Prints the counts and species names.""" for count, name in reversed(self.species): - print count, name + print(count, name) def GetSpecies(self, index): """Gets the count and name of the indicated species. @@ -191,7 +192,7 @@ def Process(self, low=None, high=500, conc=1, iters=100): self.suite.Update(counts) #end = time.time() - #print 'Processing time' end-start + #print('Processing time' end-start) def MakePrediction(self, num_sims=100): """Make predictions for the given subject. @@ -243,7 +244,7 @@ def MakeFigures(self): def PlotDistN(self): """Plots distribution of n.""" pmf = self.suite.DistN() - print '90% CI for N:', pmf.CredibleInterval(90) + print('90% CI for N:', pmf.CredibleInterval(90)) pmf.name = self.code thinkplot.Clf() @@ -290,8 +291,8 @@ def PlotPrevalence(self, rank=1, cdf_flag=True): count, _ = self.GetSpecies(index) mix.name = '%d (%d)' % (rank, count) - print '90%% CI for prevalence of species %d:' % rank, - print mix.CredibleInterval(90) + print('90%% CI for prevalence of species %d:' % rank, end="") + print(mix.CredibleInterval(90)) if cdf_flag: cdf = mix.MakeCdf() @@ -307,8 +308,8 @@ def PlotMixture(self, rank=1): # convert rank to index index = self.GetM() - rank - print self.GetSpecies(index) - print self.GetCounts()[index] + print(self.GetSpecies(index)) + print(self.GetCounts()[index]) metapmf, mix = self.suite.DistOfPrevalence(index) @@ -354,7 +355,7 @@ def GenerateObservations(self, num_reads): observations = cdf.Sample(num_reads) #for ob in observations: - # print ob + # print(ob) return n, observations @@ -478,8 +479,8 @@ def MakeConditionals(curves, ks): pmf.name = 'k=%d' % k cdf = pmf.MakeCdf() cdfs.append(cdf) - print '90%% credible interval for %d' % k, - print cdf.CredibleInterval(90) + print('90%% credible interval for %d' % k, end="") + print(cdf.CredibleInterval(90)) return cdfs @@ -512,7 +513,7 @@ def MakeFracCdfs(curves, ks): d.setdefault(k, []).append(frac) cdfs = {} - for k, fracs in d.iteritems(): + for k, fracs in d.items(): cdf = thinkbayes.MakeCdfFromList(fracs) cdfs[k] = cdf @@ -552,8 +553,7 @@ def ReadRarefactedData(filename='journal.pone.0047712.s001.csv', """ fp = open(filename) reader = csv.reader(fp) - _ = reader.next() - + _ = next(reader) subject = Subject('') subject_map = {} @@ -573,7 +573,7 @@ def ReadRarefactedData(filename='journal.pone.0047712.s001.csv', count = int(t[2]) subject.Add(species, count) - for code, subject in subject_map.iteritems(): + for code, subject in subject_map.items(): subject.Done(clean_param=clean_param) return subject_map @@ -592,8 +592,9 @@ def ReadCompleteDataset(filename='BBB_data_from_Rob.csv', clean_param=0): """ fp = open(filename) reader = csv.reader(fp) - header = reader.next() - header = reader.next() + + header = next(reader) + header = next(reader) subject_codes = header[1:-1] subject_codes = ['B'+code for code in subject_codes] @@ -620,7 +621,7 @@ def ReadCompleteDataset(filename='BBB_data_from_Rob.csv', clean_param=0): counts = [int(x) for x in t[1:-1]] - # print otu_code, species + # print(otu_code, species) for code, count in zip(subject_codes, counts): if count > 0: @@ -628,7 +629,7 @@ def ReadCompleteDataset(filename='BBB_data_from_Rob.csv', clean_param=0): uber_subject.Add(species, count) uber_subject.Done(clean_param=clean_param) - for code, subject in subject_map.iteritems(): + for code, subject in subject_map.items(): subject.Done(clean_param=clean_param) return subject_map, uber_subject @@ -652,7 +653,7 @@ def JoinSubjects(): # read the complete dataset all_subjects, _ = ReadCompleteDataset() - for code, subject in sampled_subjects.iteritems(): + for code, subject in sampled_subjects.items(): if code in all_subjects: match = all_subjects[code] subject.Match(match) @@ -730,7 +731,7 @@ def PlotFracCdfs(cdfs, root='species-frac'): thinkplot.Clf() color = '#225EA8' - for k, cdf in cdfs.iteritems(): + for k, cdf in cdfs.items(): xs, ys = cdf.Render() ys = [1-y for y in ys] thinkplot.Plot(xs, ys, color=color, linewidth=1) @@ -825,12 +826,12 @@ def Preload(self, data): m = len(data) singletons = data.count(1) num = m - singletons - print m, singletons, num + print(m, singletons, num) addend = numpy.ones(num, dtype=numpy.float) * 1 - print len(addend) - print len(self.params[singletons:m]) + print(len(addend)) + print(len(self.params[singletons:m])) self.params[singletons:m] += addend - print 'Preload', num + print('Preload', num) def Update(self, data): """Updates the distribution based on data. @@ -972,7 +973,7 @@ def SamplePosterior(self): n = self.RandomN() prevalences = self.SamplePrevalences(n) - #print 'Peeking at n_cheat' + #print('Peeking at n_cheat') #n = n_cheat return n, prevalences @@ -1011,7 +1012,7 @@ def Unbias(self, n, m, q_desired): x = sum(params[:m]) y = sum(params[m:]) a = x + y - #print x, y, a, x/a, y/a + #print(x, y, a, x/a, y/a) g = q_desired * a / y f = (a - g * y) / x @@ -1207,11 +1208,11 @@ def MakePosterior(constructor, data, ns, conc=1, iters=1000): """ suite = constructor(ns, conc=conc, iters=iters) - # print constructor.__name__ + # print(constructor.__name__) start = time.time() suite.Update(data) end = time.time() - print 'Processing time', end-start + print('Processing time', end-start) return suite @@ -1264,12 +1265,12 @@ def SimpleDirichletExample(): dirichlet = thinkbayes.Dirichlet(3) for i in range(3): beta = dirichlet.MarginalBeta(i) - print 'mean', names[i], beta.Mean() + print('mean', names[i], beta.Mean()) dirichlet.Update(data) for i in range(3): beta = dirichlet.MarginalBeta(i) - print 'mean', names[i], beta.Mean() + print('mean', names[i], beta.Mean()) pmf = beta.MakePmf(name=names[i]) thinkplot.Pmf(pmf) @@ -1341,8 +1342,8 @@ def ProcessSubjects(codes): pmfs.append(pmf) - print 'ProbGreater', thinkbayes.PmfProbGreater(pmfs[0], pmfs[1]) - print 'ProbLess', thinkbayes.PmfProbLess(pmfs[0], pmfs[1]) + print('ProbGreater', thinkbayes.PmfProbGreater(pmfs[0], pmfs[1])) + print('ProbLess', thinkbayes.PmfProbLess(pmfs[0], pmfs[1])) thinkplot.Save(root='species4', xlabel='Number of species', @@ -1394,15 +1395,13 @@ def PrintSummary(subject): subject: Subject """ - print subject.code - print 'found %d species in %d reads' % (subject.num_species, - subject.num_reads) + print(subject.code) + print('found %d species in %d reads' % (subject.num_species, subject.num_reads)) - print 'total %d species in %d reads' % (subject.total_species, - subject.total_reads) + print('total %d species in %d reads' % (subject.total_species, subject.total_reads)) cdf = subject.suite.DistN().MakeCdf() - print 'n' + print('n') PrintPrediction(cdf, 'unknown') @@ -1415,8 +1414,8 @@ def PrintPrediction(cdf, actual): median = cdf.Percentile(50) low, high = cdf.CredibleInterval(75) - print 'predicted %0.2f (%0.2f %0.2f)' % (median, low, high) - print 'actual', actual + print('predicted %0.2f (%0.2f %0.2f)' % (median, low, high)) + print('actual', actual) def RandomSeed(x): @@ -1483,14 +1482,14 @@ def PlotActualPrevalences(): # concentration parameter used in the simulation conc = 0.06 - for code, subject in subject_map.iteritems(): + for code, subject in subject_map.items(): prevalences = subject.GetPrevalences() m = len(prevalences) if m < 2: continue actual_max = max(prevalences) - print code, m, actual_max + print(code, m, actual_max) # incr the PMFs if m > 50: @@ -1588,12 +1587,12 @@ def Validate(self, num_runs=100, clean_param=0): subject_map, _ = ReadCompleteDataset(clean_param=clean_param) i = 0 - for match in subject_map.itervalues(): + for match in subject_map.values(): if match.num_reads < 400: continue num_reads = 100 - print 'Validate', match.code + print('Validate', match.code) subject = match.Resample(num_reads) subject.Match(match) @@ -1647,9 +1646,9 @@ def PlotL(self, root='species-n'): def PlotCalibrationCurves(self, root='species5'): """Plots calibration curves""" - print self.total_n - print self.total_q - print self.total_l + print(self.total_n) + print(self.total_q) + print(self.total_l) thinkplot.Plot([0, 100], [0, 100], color='gray', alpha=0.2) @@ -1689,14 +1688,14 @@ def RunCalibration(self, seed, n_low, n_high, r, tr): self.conc) l_actual = len(hist) - len(subhist) - print 'Run low, high, conc', n_low, n_high, self.conc - print 'Run r, tr', r, tr - print 'Run n, q, l', n_actual, q_actual, l_actual + print('Run low, high, conc', n_low, n_high, self.conc) + print('Run r, tr', r, tr) + print('Run n, q, l', n_actual, q_actual, l_actual) # extract the data data = [count for species, count in subhist.Items()] data.sort() - print 'data', data + print('data', data) # make a Subject and process subject = Subject('simulated') @@ -1726,17 +1725,17 @@ def RunSubject(self, subject, n_actual, q_actual, l_actual): # check the distribution of n pmf_n = suite.DistN() - print 'n' + print('n') self.total_n += self.CheckDistribution(pmf_n, n_actual, self.n_seq) # check the distribution of q pmf_q = suite.DistQ() - print 'q' + print('q') self.total_q += self.CheckDistribution(pmf_q, q_actual, self.q_seq) # check the distribution of additional species pmf_l = subject.DistL() - print 'l' + print('l') self.total_l += self.CheckDistribution(pmf_l, l_actual, self.l_seq) def CheckDistribution(self, pmf, actual, seq): @@ -1847,12 +1846,12 @@ def PlotSubjectCdf(code=None, clean_param=0): else: subject = subject_map[code] - print subject.code + print(subject.code) m = subject.GetM() subject.Process(high=m, conc=0.1, iters=0) - print subject.suite.params[:m] + print(subject.suite.params[:m]) # plot the cdf options = dict(linewidth=3, color='blue', alpha=0.5) diff --git a/code/survey.py b/code/survey.py index 09e07e2..c8f90bf 100644 --- a/code/survey.py +++ b/code/survey.py @@ -4,6 +4,7 @@ Copyright 2010 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import sys import gzip @@ -73,8 +74,8 @@ def MakeRecord(self, line, fields, constructor): s = line[start-1:end] val = cast(s) except ValueError: - #print line - #print field, start, end, s + # print(line) + # print(field, start, end, s) val = 'NA' setattr(obj, field, val) return obj @@ -184,11 +185,11 @@ def Recode(self): def main(name, data_dir='.'): resp = Respondents() resp.ReadRecords(data_dir) - print 'Number of respondents', len(resp.records) + print('Number of respondents', len(resp.records)) preg = Pregnancies() preg.ReadRecords(data_dir) - print 'Number of pregnancies', len(preg.records) + print('Number of pregnancies', len(preg.records)) if __name__ == '__main__': diff --git a/code/thinkbayes.py b/code/thinkbayes.py index db8222f..1484200 100644 --- a/code/thinkbayes.py +++ b/code/thinkbayes.py @@ -161,7 +161,7 @@ def InitMapping(self, values): values: map from value to probability """ - for value, prob in values.iteritems(): + for value, prob in values.items(): self.Set(value, prob) def InitPmf(self, values): @@ -182,7 +182,7 @@ def __len__(self): def __iter__(self): return iter(self.d) - def iterkeys(self): + def keys(self): return iter(self.d) def __contains__(self, value): @@ -230,7 +230,7 @@ def Log(self, m=None): if m is None: m = self.MaxLike() - for x, p in self.d.iteritems(): + for x, p in self.d.items(): if p: self.Set(x, math.log(p / m)) else: @@ -250,7 +250,7 @@ def Exp(self, m=None): if m is None: m = self.MaxLike() - for x, p in self.d.iteritems(): + for x, p in self.d.items(): self.Set(x, math.exp(p - m)) def GetDict(self): @@ -284,7 +284,7 @@ def Render(self): def Print(self): """Prints the values and freqs/probs in ascending order.""" - for val, prob in sorted(self.d.iteritems()): + for val, prob in sorted(self.d.items()): print(val, prob) def Set(self, x, y=0): @@ -326,12 +326,12 @@ def Remove(self, x): def Total(self): """Returns the total of the frequencies/probabilities in the map.""" - total = sum(self.d.itervalues()) + total = sum(self.d.values()) return total def MaxLike(self): """Returns the largest frequency/probability in the map.""" - return max(self.d.itervalues()) + return max(self.d.values()) class Hist(_DictWrapper): @@ -403,7 +403,7 @@ def ProbGreater(self, x): returns: float probability """ - t = [prob for (val, prob) in self.d.iteritems() if val > x] + t = [prob for (val, prob) in self.d.items() if val > x] return sum(t) def ProbLess(self, x): @@ -413,7 +413,7 @@ def ProbLess(self, x): returns: float probability """ - t = [prob for (val, prob) in self.d.iteritems() if val < x] + t = [prob for (val, prob) in self.d.items() if val < x] return sum(t) def __lt__(self, obj): @@ -513,7 +513,7 @@ def Random(self): target = random.random() total = 0.0 - for x, p in self.d.iteritems(): + for x, p in self.d.items(): total += p if total >= target: return x @@ -528,7 +528,7 @@ def Mean(self): float mean """ mu = 0.0 - for x, p in self.d.iteritems(): + for x, p in self.d.items(): mu += p * x return mu @@ -546,7 +546,7 @@ def Var(self, mu=None): mu = self.Mean() var = 0.0 - for x, p in self.d.iteritems(): + for x, p in self.d.items(): var += p * (x - mu) ** 2 return var @@ -633,6 +633,17 @@ def Max(self, k): cdf.ps = [p ** k for p in cdf.ps] return cdf + def __hash__(self): + # FIXME + # This imitates python2 implicit behaviour, which was removed in python3 + + # Some problems with an id based hash: + # looking up different pmfs with the same contents will give different values + # looking up a new Pmf will always produce a keyerror + + # A solution might be to make a "FrozenPmf" immutable class (like frozenset) + # and base a hash on a tuple of the items of self.d + return id(self) class Joint(Pmf): """Represents a joint distribution. @@ -1095,7 +1106,7 @@ def MakeCdfFromDict(d, name=''): Returns: Cdf object """ - return MakeCdfFromItems(d.iteritems(), name) + return MakeCdfFromItems(d.items(), name) def MakeCdfFromHist(hist, name=''): @@ -1154,7 +1165,7 @@ def Update(self, data): returns: the normalizing constant """ - for hypo in self.Values(): + for hypo in list(self.Values()): like = self.Likelihood(data, hypo) self.Mult(hypo, like) return self.Normalize() @@ -1487,7 +1498,7 @@ def SampleSum(dists, n): returns: new Pmf of sums """ - pmf = MakePmfFromList(RandomSum(dists) for i in xrange(n)) + pmf = MakePmfFromList(RandomSum(dists) for i in range(n)) return pmf @@ -1552,7 +1563,7 @@ def MakePoissonPmf(lam, high, step=1): returns: normalized Pmf """ pmf = Pmf() - for k in xrange(0, high + 1, step): + for k in range(0, high + 1, step): p = EvalPoissonPmf(k, lam) pmf.Set(k, p) pmf.Normalize() @@ -1699,14 +1710,14 @@ def MakePmf(self, steps=101, name=''): pmf = cdf.MakePmf() return pmf - xs = [i / (steps - 1.0) for i in xrange(steps)] + xs = [i / (steps - 1.0) for i in range(steps)] probs = [self.EvalPdf(x) for x in xs] pmf = MakePmfFromDict(dict(zip(xs, probs)), name) return pmf def MakeCdf(self, steps=101): """Returns the CDF of this distribution.""" - xs = [i / (steps - 1.0) for i in xrange(steps)] + xs = [i / (steps - 1.0) for i in range(steps)] ps = [scipy.special.betainc(self.alpha, self.beta, x) for x in xs] cdf = Cdf(xs, ps) return cdf diff --git a/code/thinkplot.py b/code/thinkplot.py index 1af0db8..05d6cc6 100644 --- a/code/thinkplot.py +++ b/code/thinkplot.py @@ -73,7 +73,6 @@ def ColorGenerator(cls, n): """ for i in cls.which_colors[n]: yield cls.colors[i] - raise StopIteration('Ran out of colors in _Brewer.ColorGenerator') @classmethod def InitializeIter(cls, num): diff --git a/code/train.py b/code/train.py index e6ea01d..745baed 100644 --- a/code/train.py +++ b/code/train.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function from dice import Dice import thinkplot @@ -18,11 +19,11 @@ class Train(Dice): def main(): - hypos = xrange(1, 1001) + hypos = range(1, 1001) suite = Train(hypos) suite.Update(60) - print suite.Mean() + print(suite.Mean()) thinkplot.PrePlot(1) thinkplot.Pmf(suite) diff --git a/code/train2.py b/code/train2.py index c951683..8394ef6 100644 --- a/code/train2.py +++ b/code/train2.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function from dice import Dice import thinkplot @@ -21,7 +22,7 @@ def Mean(suite): def MakePosterior(high, dataset): - hypos = xrange(1, high+1) + hypos = range(1, high+1) suite = Train(hypos) suite.name = str(high) @@ -37,7 +38,7 @@ def main(): for high in [500, 1000, 2000]: suite = MakePosterior(high, dataset) - print high, suite.Mean() + print(high, suite.Mean()) thinkplot.Save(root='train2', xlabel='Number of trains', diff --git a/code/train3.py b/code/train3.py index 73cd534..b3c8452 100644 --- a/code/train3.py +++ b/code/train3.py @@ -4,6 +4,7 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import thinkbayes import thinkplot @@ -40,7 +41,7 @@ def MakePosterior(high, dataset, constructor): Returns: posterior Suite """ - hypos = xrange(1, high+1) + hypos = range(1, high+1) suite = constructor(hypos) suite.name = str(high) @@ -80,18 +81,18 @@ def main(): for high in [500, 1000, 2000]: suite = MakePosterior(high, dataset, Train2) - print high, suite.Mean() + print(high, suite.Mean()) thinkplot.Save(root='train3', xlabel='Number of trains', ylabel='Probability') interval = Percentile(suite, 5), Percentile(suite, 95) - print interval + print(interval) cdf = thinkbayes.MakeCdfFromPmf(suite) interval = cdf.Percentile(5), cdf.Percentile(95) - print interval + print(interval) if __name__ == '__main__': diff --git a/code/variability.py b/code/variability.py index 282f70d..3f63019 100644 --- a/code/variability.py +++ b/code/variability.py @@ -4,11 +4,11 @@ Copyright 2012 Allen B. Downey License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html """ +from __future__ import print_function import math import numpy -import cPickle -import numpy +import pickle import random import scipy @@ -111,7 +111,7 @@ def LogUpdateSetMedianIPR(self, data): # compute summary stats median, s = MedianS(xs, num_sigmas=NUM_SIGMAS) - print 'median, s', median, s + print('median, s', median, s) self.LogUpdateSetABC(n, median, s) @@ -176,7 +176,7 @@ def MakeRange(estimate, stderr): m = numpy.mean(xs) s = numpy.std(xs) - print 'classical estimators', m, s + print('classical estimators', m, s) # compute ranges for m and s stderr_m = s / math.sqrt(n) @@ -228,7 +228,7 @@ def PlotCdfs(d, labels): labels: map from key to string label """ thinkplot.Clf() - for key, xs in d.iteritems(): + for key, xs in d.items(): mu = thinkstats.Mean(xs) xs = thinkstats.Jitter(xs, 1.3) xs = [x-mu for x in xs] @@ -260,9 +260,9 @@ def PlotCoefVariation(suites): thinkplot.PrePlot(num=2) pmfs = {} - for label, suite in suites.iteritems(): + for label, suite in suites.items(): pmf = CoefVariation(suite) - print 'CV posterior mean', pmf.Mean() + print('CV posterior mean', pmf.Mean()) cdf = thinkbayes.MakeCdfFromPmf(pmf, label) thinkplot.Cdf(cdf) @@ -272,16 +272,14 @@ def PlotCoefVariation(suites): xlabel='Coefficient of variation', ylabel='Probability') - print 'female bigger', thinkbayes.PmfProbGreater(pmfs['female'], - pmfs['male']) - print 'male bigger', thinkbayes.PmfProbGreater(pmfs['male'], - pmfs['female']) + print('female bigger', thinkbayes.PmfProbGreater(pmfs['female'], pmfs['male'])) + print('male bigger', thinkbayes.PmfProbGreater(pmfs['male'], pmfs['female'])) def PlotOutliers(samples): """Make CDFs showing the distribution of outliers.""" cdfs = [] - for label, sample in samples.iteritems(): + for label, sample in samples.items(): outliers = [x for x in sample if x < 150] cdf = thinkbayes.MakeCdfFromList(outliers, label) @@ -324,7 +322,7 @@ def DumpHeights(data_dir='.', n=10000): [d[r.sex].append(r.htm3) for r in resp.records if r.htm3 != 'NA'] fp = open('variability_data.pkl', 'wb') - cPickle.dump(d, fp) + pickle.dump(d, fp) fp.close() @@ -333,8 +331,8 @@ def LoadHeights(): returns: map from sex code to list of heights. """ - fp = open('variability_data.pkl', 'r') - d = cPickle.load(fp) + fp = open('variability_data.pkl', 'rb') + d = pickle.load(fp) fp.close() return d @@ -440,14 +438,14 @@ def Summarize(xs): xs: sequence of values """ - # print smallest and largest + # print(smallest and largest) xs.sort() - print 'smallest', xs[:10] - print 'largest', xs[-10:] + print('smallest', xs[:10]) + print('largest', xs[-10:]) - # print median and interquartile range + # print(median and interquartile range) cdf = thinkbayes.MakeCdfFromList(xs) - print cdf.Percentile(25), cdf.Percentile(50), cdf.Percentile(75) + print(cdf.Percentile(25), cdf.Percentile(50), cdf.Percentile(75)) def RunEstimate(update_func, num_points=31, median_flag=False): @@ -463,9 +461,9 @@ def RunEstimate(update_func, num_points=31, median_flag=False): # PlotCdfs(d, labels) suites = {} - for key, xs in d.iteritems(): + for key, xs in d.items(): name = labels[key] - print name, len(xs) + print(name, len(xs)) Summarize(xs) xs = thinkstats.Jitter(xs, 1.3) @@ -474,14 +472,14 @@ def RunEstimate(update_func, num_points=31, median_flag=False): suite = Height(mus, sigmas, name) suites[name] = suite update_func(suite, xs) - print 'MLE', suite.MaximumLikelihood() + print('MLE', suite.MaximumLikelihood()) PlotPosterior(suite) pmf_m = suite.Marginal(0) pmf_s = suite.Marginal(1) - print 'marginal mu', pmf_m.Mean(), pmf_m.Var() - print 'marginal sigma', pmf_s.Mean(), pmf_s.Var() + print('marginal mu', pmf_m.Mean(), pmf_m.Var()) + print('marginal sigma', pmf_s.Mean(), pmf_s.Var()) # PlotMarginals(suite)