rcode.qmd

---
title: "R code from PRACTICE sections"
execute:
  eval: false
---

::: {.callout-note}
This Notebook contains only the R code from the PRACTICE sections of the manuscript. The number of samples and repetitions for all simulations have been greatly reduced to ensure that the code runs quickly and with limited computational resources.
:::

## Simulate the data generating process

```{r}
simulate <- function(n_subjects = 100, n_items = 50,
  b_0 = 0.847, b_e = 1.350, b_a = -1.253, b_c = 2.603,
  b_ea = 0.790, b_ec = -1.393,
  sd_u0s = 0.5, sd_u0i = 0.5, ...){
  require(dplyr)
  require(faux)
  # simulate design
  dat <- add_random(subject = n_subjects, item = n_items) %>%
    add_between("subject", expert = c(1, 0), .prob = c(0.25, 0.75)) %>%
    mutate(advice_present = rbinom(n(), 1, prob = 2/3)) %>%
    mutate(advice_correct = if_else(advice_present == 1L, 
                                    rbinom(n(), 1L, prob = 0.8), 0L)) %>%
    # add random effects
    add_ranef("subject", u0s = sd_u0s) %>%
    add_ranef("item", u0i = sd_u0i) %>%
    # compute dependent variable
    mutate(linpred = b_0 + u0i + u0s +
      b_e * expert + b_a * advice_present + b_c * advice_correct +
      b_ea * expert * advice_present + b_ec * expert * advice_correct) %>%
    mutate(y_prob = plogis(linpred)) %>%
    mutate(y_bin = rbinom(n = n(), size = 1, prob = y_prob))
  dat
}
```

## Specify the population parameters

```{r}
b_0 <- qlogis(0.7)
b_e <- qlogis(0.9) - b_0
b_a <- qlogis(0.4) - b_0
b_ea <- qlogis(0.85) - b_0 - b_e - b_a
b_c <- qlogis(0.9) - b_0 - b_a
b_ec <- qlogis(0.95) - b_0 - b_e - b_a - b_c - b_ea
c(b_0 = b_0, b_e = b_e, b_a = b_a, b_c = b_c, b_ea = b_ea, b_ec = b_ec)
```

```{r}
plogis(b_0 + b_e + b_a + b_c + b_ea + b_ec)
```

### Insightful descriptive statistics

```{r}
library(tidyverse)
set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500,
  sd_u0s = 0.5, sd_u0i = 0.5)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
 "student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
 "student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
 "student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  group_by(condition) %>%
  summarize(relative_frequency = sum(y_bin) / n())
```

### Insightful model based quantities

```{r}
#| label: fig-margdist1
#| fig-cap: "Marginal distributions including means, 66% and 95% confidence intervals for all experimental conditions."

library(ggdist)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))
```

```{r}
#| label: fig-margdist2
#| fig-cap: "Marginal distributions including means, 66% and 95% confidence intervals for all experimental conditions while setting the standard deviation of item random intercepts to 0.01."
set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500, sd_u0i = 0.01)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))
```

```{r}
#| include: false
set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500, sd_u0s = 0.01)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))
```


```{r}
#| label: fig-margdist3
#| echo: false
#| fig-cap: "Marginal distributions including means, 66% and 95% confidence intervals for all experimental conditions while setting the standard deviation of subject and item random intercepts to 3."
set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500, sd_u0s = 3, sd_u0i = 3)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))
```

## Estimate the statistical model

```{r}
#| message: false
library(tidyverse)
library(lme4)
set.seed(1)
dat <- simulate(n_subjects = 100, n_items = 50)
f <- y_bin ~ 1 + expert + advice_present + advice_correct + 
  expert:advice_present + expert:advice_correct +
  (1|subject) + (1|item)
fit <- glmer(f, data = dat, family = "binomial")
summary(fit)
```

## Compute the estimate

```{r}
grid1 <- data.frame(advice_present = c(1, 0), advice_correct = c(1, 0), 
  expert = c(1, 1))
grid1
pred <- predict(fit, newdata = grid1, type = "response", re.form = NA)
pred
pred[1] - pred[2]
```

```{r}
library(marginaleffects)
library(tinytable)
grid2 <- expand_grid(advice_present = 0:1, 
  advice_correct = 0:1, expert = 0:1)
grid2
preds <- predictions(fit, newdata = grid2, 
  type = "response", re.form = NA)
print(preds, style = "tinytable") %>% theme_tt(theme = "resize")
```

```{r}
contrasts <- preds %>% 
  hypotheses(hypothesis = c(
    "b8 = b2",  # (correct advice, expert) - (no advice, expert)
    "b2 = b6",  # (no advice, expert) - (incorrect advice, expert) 
    "b7 = b1",  # (correct advice, student) - (no advice, student)
    "b1 = b5"), # (no advice, student) - (incorrect advice, student)
    equivalence = c(0, 0))
print(contrasts, style = "tinytable") %>% theme_tt(theme = "resize")
```

## Perform repeated simulations

```{r}
sim_and_analyse <- function(
  formula_chr = "y_bin ~ 1 + expert + advice_present + advice_correct + 
    expert:advice_present + expert:advice_correct + (1|subject) + (1|item)",
  contrasts = c("b8 = b2", "b2 = b6", "b7 = b1", "b1 = b5"), ...){
  require(lme4)
  require(marginaleffects)
  require(tidyr)
  # simulate data
  dat <- simulate(...)
  # fit model
  model <- glmer(as.formula(formula_chr), data = dat, family = "binomial")
  # compute contrasts
  contr_df <- expand_grid(advice_present = 0:1, advice_correct = 0:1,
    expert = 0:1)
  predictions(model, newdata = contr_df, type = "response", re.form = NA) %>%
    hypotheses(hypothesis = contrasts, equivalence = c(0, 0)) %>%
    data.frame()
}
```

```{r}
#| message: false
library(future)
plan("multisession", workers = 4)
set.seed(2)
```

```{r}
#| message: false
library(furrr)
sim_result <- crossing(
  rep = 1:5,
  n_subjects = c(100, 150, 200, 250),
  n_items = c(10, 30, 50, 70)
) %>%
  mutate(res = future_pmap(., sim_and_analyse, 
    .options = furrr_options(seed = TRUE))) %>%
  unnest(col = res)
```

#### Power results

```{r}
#| label: fig-finalpwr
#| fig-cap: "Simulation-based power estimates including 95% confidence interval of the case study for different numbers of subjects and items, based on a significance level of 0.05."
library(binom)
alpha <- 0.05
power <- sim_result %>%
  pivot_wider(names_from = term, names_sep = "_", 
    values_from = estimate:p.value.equiv) %>%
  group_by(n_subjects, n_items) %>% 
  summarise(
    power = mean(`p.value.noninf_b1=b5` < alpha & 
        `p.value.noninf_b8=b2` < alpha & `p.value.noninf_b2=b6` < alpha & 
        `p.value.noninf_b7=b1` < alpha), 
    n_sig = sum(`p.value.noninf_b1=b5` < alpha & 
        `p.value.noninf_b8=b2` < alpha & `p.value.noninf_b2=b6` < alpha & 
        `p.value.noninf_b7=b1` < alpha),
    n = n(),
    ci.lwr = binom.confint(n_sig, n, method = "wilson")$lower,
    ci.upr = binom.confint(n_sig, n, method = "wilson")$upper, 
    .groups = "drop")
power %>%
  mutate(across(c(n_subjects, n_items), factor)) %>%
  ggplot(aes(n_subjects, n_items, fill = power)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f \n [%.2f; %.2f]", 
                                power, ci.lwr, ci.upr)), 
    color = "white", size = 4) +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlab("number of subjects") + ylab("number of items")
```

#### Precision results

```{r}
#| label: fig-finalprecision
#| fig-cap: "Simulation-based precision estimates (expected width of confidence intervals) including 95% confidence interval of the case study for different numbers of subjects and items, based on a confidence level of 0.95."
precision <- sim_result %>%
  pivot_wider(names_from = term, names_sep = "_", 
    values_from = estimate:p.value.equiv) %>%
  group_by(n_subjects, n_items) %>%
  mutate(width = `conf.high_b8=b2` - `conf.low_b8=b2`) %>%
  summarise(precision = mean(width),
    ci.lwr = t.test(width)$conf.int[1],
    ci.upr = t.test(width)$conf.int[2], 
    .groups = "drop")
precision %>%
  mutate(across(c(n_subjects, n_items), factor)) %>%
  ggplot(aes(n_subjects, n_items, fill = precision)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f \n [%.2f; %.2f]", 
                                precision, ci.lwr, ci.upr)), 
    color = "white", size = 4) +
  scale_fill_viridis_c(limits = c(0, 0.3), direction = -1) +
  guides(fill = guide_legend(reverse=FALSE))
```