Skip to content

Latest commit

 

History

History
110 lines (89 loc) · 3.66 KB

clumped-bootstrap-calibration.org

File metadata and controls

110 lines (89 loc) · 3.66 KB

example workflow

load libraries

## library(tidyverse) # dplyr, readr, purrr, glue, ggplot2, <3
library(dplyr)
library(ggplot2)

## library(boot)      # bootstrapping
## library(bfsl)      # best fit straight line, for the York regression
library(ggdist)    # for calculating averages and visualising distributions

theme_set(theme_bw() + theme(text = element_text(size = 24)))

# this sloppy package
library(clumpedcalib)

load calibration data and calculate bootstrapped York regression

A very very limited subset some calibration data.

raw <- readr::read_csv("dat/example_calib.csv",
                col_names = c("X", "D47", "sd_X", "sd_D47"))

# this is very much a toy example with only 100 bootstraps!
calib <- clumped_calib_boot(raw, Nsim = 100) |>
  # for the real deal, increase Nsim to something like 1e5 and save the results
  # for future re-use
  readr::write_csv(glue::glue("out/{lubridate::today()}_calib_clumped_boot.csv"))

calib <- readr::read_csv("out/2023-06-22_calib_clumped_boot.csv")

make a plot

raw |>
  ggplot(aes(x = X, y = D47)) +
  geom_point() +
  geom_errorbar(aes(xmin = X - sd_X, xmax = X + sd_X)) +
  geom_errorbar(aes(ymin = D47 - sd_D47, ymax = D47 + sd_D47)) +
  # for now just draw all the lines since we're only sampling a few
  # setting alpha lower allows you to overplot the draws
  # don't forget to subset to just a few 100 though, otherwise it will be slow
  geom_abline(aes(slope = slope, intercept = intercept),
              alpha = .2, data = calib)

imgs/calib_plot.png

load sample data

I’ve come up with some really silly sample data.

dat <- readr::read_csv("dat/fake_data.csv")

pl_raw <- dat |>
  ggplot(aes(x = age, y = D47_final, colour = factor(bins))) +
  geom_point()
pl_raw

imgs/data_raw.png

calculate bootstrapped means and apply the temperature calibration

# calculate other "normal" summary stats if desired, like N
oth <- dat |>
  group_by(bins) |>
  summarize(n = n())

# calculate d18Osw and temp
sum <- apply_calib_and_d18O_boot(data = dat,
                                 calib = calib,
                                 group = bins,
                                 Nsim = 100) |>
  left_join(oth)

# make a plot
sum |>
  ggplot(aes(x = age, y = temp)) +
  ggdist::geom_pointinterval(aes(ymin = temp.lower, ymax = temp.upper,
                                 linewidth = factor(.width))) +
  scale_linewidth_manual(values = c("0.68" = 9, "0.95" = 2), guide = "none") +
  geom_text(aes(label = paste("N =", n)), nudge_x = 1.5)

imgs/data_plot.png