diff --git a/gold_presentation.pdf b/gold_presentation.pdf new file mode 100644 index 0000000..d45f827 Binary files /dev/null and b/gold_presentation.pdf differ diff --git a/golf.Rmd b/golf.Rmd index eab557b..36572f0 100644 --- a/golf.Rmd +++ b/golf.Rmd @@ -1,30 +1,143 @@ --- -title: "A Probability Model for Golf Putting" -author: "Eric Novik" +title: "A Probability Model for Golf Putting (paper by Gelman and Nolan)" +author: "Eric Novik and Daniel Lee" date: "28 April 2016" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) +library(needs) +needs(readr) +needs(ggplot2) +needs(dplyr) +needs(magrittr) +needs(rstan) +needs(tidyr) ``` -## R Markdown +## Set up the Dataset +```{r} +N <- 19 +tries <- c(1443, 694, 455, 353, 272, 256, 240, 217, 200, 237, 202, 192, 174, + 167, 201, 195, 191, 147, 152) +successes <- c(1346, 577, 337, 208, 149, 136, 111, 69, 67, 75, 52, 46, 54, + 28, 27, 31, 33, 20, 24) +dist <- 2:20 * 12 # converting to inches +golf <- data.frame(tries, successes, dist) -This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . +# The golf ball has diameter 2r = 1.68 inches +r <- 1.68/2 +# and the hole has diameter 2R = 4.25 inches +R <- 4.25/2 -When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: +# estimated parameter from the paper +sigma <- 0.026 +``` + +## Helper Function for the Threshold Angle +```{r} +theta0 <- function(x) { + asin((R - r) / x) +} +``` + +## Replicating the Graph from the Paper +```{r} +golf %<>% + mutate( + p = successes / tries, + error_sd = sqrt((p * (1 - p)) / tries), + lower = p - 2 * error_sd, + upper = p + 2 * error_sd, + fit = 2 * pnorm(theta0(dist) / sigma) - 1 + ) -```{r cars} -summary(cars) +limits <- with(golf, aes(ymax = upper, ymin = lower)) +p <- ggplot(golf, aes(x = dist, y = p)) +p <- p + geom_pointrange(limits) +p <- p + geom_line(aes(y = fit), colour = "red") +p <- p + xlab("Distance (feet)") + + ylab("Proportion of Success") + + theme_bw() +p ``` -## Including Plots +## Stan Model +This model is defined in ```golf.stan``` file +```{r, eval=FALSE} +functions { + real theta0(real x, real R, real r) { + return asin((R - r) / x); + } +} + +data { + int N; + int tries[N]; + int successes[N]; + real dist[N]; +} + +transformed data { + real R; + real r; + R <- 4.25 / 2; + r <- 1.68 / 2; +} + +parameters { + real sigma; +} + +model { + real p[N]; + + for (n in 1:N) { + p[n] <- 2 * Phi(theta0(dist[n], R, r) / sigma) - 1; + } + + sigma ~ cauchy(0, 2.5); + successes ~ binomial(tries, p); +} -You can also embed plots, for example: +generated quantities { + real sigma_degrees; + sigma_degrees <- 180/pi() * sigma; +} +``` + +## Fitting the Model in Stan +Data is passed directly from the R environment to Stan. +```{r, cache=TRUE} +golf_fit <- stan(file = "golf.stan", iter = 200) +golf_fit +``` + +## Extracting Parameter Values and Plotting +```{r} +sigma <- rstan::extract(golf_fit, pars = 'sigma') $sigma +sigma_deg <- rstan::extract(golf_fit, pars = 'sigma_degrees')$sigma_degrees + +plot_dens <- function(param) { + ggplot(as.data.frame(param), aes(x = param)) + + geom_line(stat = "density") + theme_bw() +} + +plot_dens(sigma) + xlab("Sigma") +plot_dens(sigma_deg) + xlab("Sigma Degrees") +``` -```{r pressure, echo=FALSE} -plot(pressure) +## Plotting Sigma from Multiple Draws +```{r} +fits <- sapply(sigma, FUN = function(x, y) 2 * pnorm(theta0(y), sd = x) - 1, y = golf$dist) +dim(fits) +head(fits)[, 1:6] +fits <- data.frame(fits, dist = golf$dist) +fits <- gather(fits, key = fit, value = measurement, -dist) +dim(fits) +head(fits) +p + geom_line(data = fits, aes(x = dist, y = measurement, group = fit), alpha = 1/150) + theme_bw() ``` -Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. + diff --git a/golf.html b/golf.html new file mode 100644 index 0000000..9296e8a --- /dev/null +++ b/golf.html @@ -0,0 +1,375 @@ + + + + + + + + + + + + + + + +A Probability Model for Golf Putting (paper by Gelman and Nolan) + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+

Set up the Dataset

+
N <- 19
+tries <- c(1443, 694, 455, 353,  272, 256, 240, 217, 200, 237, 202, 192, 174, 
+           167, 201, 195, 191, 147, 152)
+successes <- c(1346, 577, 337,  208, 149, 136, 111, 69, 67, 75, 52, 46, 54,
+               28, 27, 31, 33, 20,  24)
+dist <- 2:20 * 12 # converting to inches
+golf <- data.frame(tries, successes, dist)
+
+# The golf ball has diameter 2r = 1.68 inches 
+r <- 1.68/2
+# and the hole has diameter 2R = 4.25 inches
+R <- 4.25/2
+
+# estimated parameter from the paper
+sigma <- 0.026
+
+
+

Helper Function for the Threshold Angle

+
theta0 <- function(x) {
+  asin((R - r) / x)
+}
+
+
+

Replicating the Graph from the Paper

+
golf %<>%
+  mutate(
+    p = successes / tries,
+    error_sd = sqrt((p  * (1 - p)) / tries),
+    lower = p - 2 * error_sd,
+    upper = p + 2 * error_sd,
+    fit = 2 * pnorm(theta0(dist) / sigma) - 1
+  )
+
+limits <- with(golf, aes(ymax = upper, ymin = lower))
+p <- ggplot(golf, aes(x = dist, y = p))
+p <- p + geom_pointrange(limits)
+p <- p + geom_line(aes(y = fit), colour = "red")
+p <- p +  xlab("Distance (feet)") + 
+  ylab("Proportion of Success") +
+  theme_bw()
+p
+

+
+
+

Stan Model

+

This model is defined in golf.stan file

+
functions {
+  real theta0(real x, real R, real r) {
+    return asin((R - r) / x);
+  }
+}
+
+data {
+  int N;
+  int<lower = 0> tries[N];
+  int<lower = 0> successes[N];
+  real<lower = 0> dist[N];
+}
+
+transformed data {
+  real R;
+  real r;
+  R <- 4.25 / 2;
+  r <- 1.68 / 2;
+}
+
+parameters {
+  real<lower = 0> sigma;
+}
+
+model {
+  real p[N];
+  
+  for (n in 1:N) {
+    p[n] <- 2 * Phi(theta0(dist[n], R, r) / sigma) - 1;
+  }
+  
+  sigma ~ cauchy(0, 2.5);
+  successes ~ binomial(tries, p);
+}
+
+generated quantities {
+  real sigma_degrees;
+  sigma_degrees <- 180/pi() * sigma;
+}
+
+
+

Fitting the Model in Stan

+

Data is passed directly from the R environment to Stan.

+
golf_fit <- stan(file = "golf.stan", iter = 200)
+
## 
+## SAMPLING FOR MODEL 'golf' NOW (CHAIN 1).
+## 
+## Chain 1, Iteration:   1 / 200 [  0%]  (Warmup)
+## Chain 1, Iteration:  20 / 200 [ 10%]  (Warmup)
+## Chain 1, Iteration:  40 / 200 [ 20%]  (Warmup)
+## Chain 1, Iteration:  60 / 200 [ 30%]  (Warmup)
+## Chain 1, Iteration:  80 / 200 [ 40%]  (Warmup)
+## Chain 1, Iteration: 100 / 200 [ 50%]  (Warmup)
+## Chain 1, Iteration: 101 / 200 [ 50%]  (Sampling)
+## Chain 1, Iteration: 120 / 200 [ 60%]  (Sampling)
+## Chain 1, Iteration: 140 / 200 [ 70%]  (Sampling)
+## Chain 1, Iteration: 160 / 200 [ 80%]  (Sampling)
+## Chain 1, Iteration: 180 / 200 [ 90%]  (Sampling)
+## Chain 1, Iteration: 200 / 200 [100%]  (Sampling)# 
+## #  Elapsed Time: 0.002103 seconds (Warm-up)
+## #                0.002134 seconds (Sampling)
+## #                0.004237 seconds (Total)
+## # 
+## 
+## SAMPLING FOR MODEL 'golf' NOW (CHAIN 2).
+## 
+## Chain 2, Iteration:   1 / 200 [  0%]  (Warmup)
+## Chain 2, Iteration:  20 / 200 [ 10%]  (Warmup)
+## Chain 2, Iteration:  40 / 200 [ 20%]  (Warmup)
+## Chain 2, Iteration:  60 / 200 [ 30%]  (Warmup)
+## Chain 2, Iteration:  80 / 200 [ 40%]  (Warmup)
+## Chain 2, Iteration: 100 / 200 [ 50%]  (Warmup)
+## Chain 2, Iteration: 101 / 200 [ 50%]  (Sampling)
+## Chain 2, Iteration: 120 / 200 [ 60%]  (Sampling)
+## Chain 2, Iteration: 140 / 200 [ 70%]  (Sampling)
+## Chain 2, Iteration: 160 / 200 [ 80%]  (Sampling)
+## Chain 2, Iteration: 180 / 200 [ 90%]  (Sampling)
+## Chain 2, Iteration: 200 / 200 [100%]  (Sampling)# 
+## #  Elapsed Time: 0.00274 seconds (Warm-up)
+## #                0.002709 seconds (Sampling)
+## #                0.005449 seconds (Total)
+## # 
+## 
+## SAMPLING FOR MODEL 'golf' NOW (CHAIN 3).
+## 
+## Chain 3, Iteration:   1 / 200 [  0%]  (Warmup)
+## Chain 3, Iteration:  20 / 200 [ 10%]  (Warmup)
+## Chain 3, Iteration:  40 / 200 [ 20%]  (Warmup)
+## Chain 3, Iteration:  60 / 200 [ 30%]  (Warmup)
+## Chain 3, Iteration:  80 / 200 [ 40%]  (Warmup)
+## Chain 3, Iteration: 100 / 200 [ 50%]  (Warmup)
+## Chain 3, Iteration: 101 / 200 [ 50%]  (Sampling)
+## Chain 3, Iteration: 120 / 200 [ 60%]  (Sampling)
+## Chain 3, Iteration: 140 / 200 [ 70%]  (Sampling)
+## Chain 3, Iteration: 160 / 200 [ 80%]  (Sampling)
+## Chain 3, Iteration: 180 / 200 [ 90%]  (Sampling)
+## Chain 3, Iteration: 200 / 200 [100%]  (Sampling)# 
+## #  Elapsed Time: 0.002454 seconds (Warm-up)
+## #                0.002107 seconds (Sampling)
+## #                0.004561 seconds (Total)
+## # 
+## 
+## SAMPLING FOR MODEL 'golf' NOW (CHAIN 4).
+## 
+## Chain 4, Iteration:   1 / 200 [  0%]  (Warmup)
+## Chain 4, Iteration:  20 / 200 [ 10%]  (Warmup)
+## Chain 4, Iteration:  40 / 200 [ 20%]  (Warmup)
+## Chain 4, Iteration:  60 / 200 [ 30%]  (Warmup)
+## Chain 4, Iteration:  80 / 200 [ 40%]  (Warmup)
+## Chain 4, Iteration: 100 / 200 [ 50%]  (Warmup)
+## Chain 4, Iteration: 101 / 200 [ 50%]  (Sampling)
+## Chain 4, Iteration: 120 / 200 [ 60%]  (Sampling)
+## Chain 4, Iteration: 140 / 200 [ 70%]  (Sampling)
+## Chain 4, Iteration: 160 / 200 [ 80%]  (Sampling)
+## Chain 4, Iteration: 180 / 200 [ 90%]  (Sampling)
+## Chain 4, Iteration: 200 / 200 [100%]  (Sampling)# 
+## #  Elapsed Time: 0.002247 seconds (Warm-up)
+## #                0.002186 seconds (Sampling)
+## #                0.004433 seconds (Total)
+## #
+
golf_fit
+
## Inference for Stan model: golf.
+## 4 chains, each with iter=200; warmup=100; thin=1; 
+## post-warmup draws per chain=100, total post-warmup draws=400.
+## 
+##                   mean se_mean   sd     2.5%      25%      50%      75%
+## sigma             0.03    0.00 0.00     0.03     0.03     0.03     0.03
+## sigma_degrees     1.53    0.00 0.02     1.48     1.52     1.53     1.55
+## lp__          -2926.80    0.05 0.77 -2929.06 -2926.93 -2926.53 -2926.33
+##                  97.5% n_eff Rhat
+## sigma             0.03   172 1.00
+## sigma_degrees     1.57   172 1.00
+## lp__          -2926.26   203 1.01
+## 
+## Samples were drawn using NUTS(diag_e) at Thu Apr 28 21:03:59 2016.
+## For each parameter, n_eff is a crude measure of effective sample size,
+## and Rhat is the potential scale reduction factor on split chains (at 
+## convergence, Rhat=1).
+
+
+

Extracting Parameter Values and Plotting

+
sigma <- rstan::extract(golf_fit, pars = 'sigma') $sigma
+sigma_deg <- rstan::extract(golf_fit, pars = 'sigma_degrees')$sigma_degrees
+
+plot_dens <- function(param) {
+  ggplot(as.data.frame(param), aes(x = param)) + 
+    geom_line(stat = "density") + theme_bw()
+}
+
+plot_dens(sigma) + xlab("Sigma")
+

+
plot_dens(sigma_deg) + xlab("Sigma Degrees")
+

+
+
+

Plotting Sigma from Multiple Draws

+
fits <- sapply(sigma, FUN = function(x, y) 2 * pnorm(theta0(y), sd = x) - 1, y = golf$dist)
+dim(fits)
+
## [1]  19 400
+
head(fits)[, 1:6]
+
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]
+## [1,] 0.9516929 0.9527677 0.9500655 0.9570835 0.9596918 0.9514495
+## [2,] 0.8118555 0.8139836 0.8086748 0.8227666 0.8282768 0.8113766
+## [3,] 0.6763448 0.6786804 0.6728697 0.6884131 0.6945994 0.6758203
+## [4,] 0.5702067 0.5724352 0.5668981 0.5817629 0.5877276 0.5697069
+## [5,] 0.4894228 0.4914663 0.4863922 0.5000407 0.5055417 0.4889647
+## [6,] 0.4272050 0.4290606 0.4244550 0.4368579 0.4418703 0.4267891
+
fits <- data.frame(fits, dist = golf$dist)
+fits <- gather(fits, key = fit, value = measurement, -dist)
+dim(fits)
+
## [1] 7600    3
+
head(fits)
+
##   dist fit measurement
+## 1   24  X1   0.9516929
+## 2   36  X1   0.8118555
+## 3   48  X1   0.6763448
+## 4   60  X1   0.5702067
+## 5   72  X1   0.4894228
+## 6   84  X1   0.4272050
+
p + geom_line(data = fits, aes(x = dist, y = measurement, group = fit), alpha = 1/150) + theme_bw()
+

+
+ + + + +
+ + + + + + + + diff --git a/golf.stan b/golf.stan new file mode 100644 index 0000000..74c7904 --- /dev/null +++ b/golf.stan @@ -0,0 +1,39 @@ +functions { + real theta0(real x, real R, real r) { + return asin((R - r) / x); + } +} + +data { + int N; + int tries[N]; + int successes[N]; + real dist[N]; +} + +transformed data { + real R; + real r; + R <- 4.25 / 2; + r <- 1.68 / 2; +} + +parameters { + real sigma; +} + +model { + real p[N]; + + for (n in 1:N) { + p[n] <- 2 * Phi(theta0(dist[n], R, r) / sigma) - 1; + } + + sigma ~ cauchy(0, 2.5); + successes ~ binomial(tries, p); +} + +generated quantities { + real sigma_degrees; + sigma_degrees <- 180/pi() * sigma; +}