Introduce some repeatedly used IRT-related functions #102

netique · 2023-11-20T21:07:57Z

We often copy this portion into our shared.R. These functions seem pretty stable but may struggle with unidimensional models and data not obtained from LS.

make_mirt_data <- function(.data, codebook, na_level = NULL,
                           ignore_case = TRUE, negate = FALSE, plot = TRUE) {
  # make a "backup" of original factor variables for barplots etc.
  .data <- .data |>
    mutate(across(all_of(codebook$item), .names = ".{.col}"))

  common_lvls <- .data %>%
    select(all_of(codebook$item)) |>
    map(levels) |>
    unique()

  if (length(common_lvls) != 1L) rlang::abort("levels of items differ")

  common_lvls <- common_lvls[[1L]]


  if (!is.null(na_level)) {
    match <- str_subset(common_lvls, regex(na_level, ignore_case = ignore_case),
      negate = negate
    )

    .data <- .data |>
      mutate(
        across(
          all_of(codebook$item),
          \(item) fct_nanify(item,
            level = na_level,
            ignore_case = ignore_case, negate = negate
          ) |>
            fct_drop(match) # remove the level after data are removed
        )
      )
  }
  new_common_lvls <- .data %>%
    select(all_of(codebook$item)) |>
    map(levels) |>
    unique()

  if (length(new_common_lvls) != 1L) rlang::abort("new levels of items differ")

  new_common_lvls <- new_common_lvls[[1L]]

  message("check new intergers (for nonreversed items only):")
  names(new_common_lvls) <- seq_along(new_common_lvls)

  print(new_common_lvls)


  # now reversed items
  rev_items <- codebook |>
    filter(rev) |>
    pull(item)
  max_val <- length(new_common_lvls)

  .data <- .data |>
    mutate(
      across(all_of(codebook$item), \(item) as.integer(item)),
      across(any_of(rev_items), \(item) (max_val + 1L) - item)
    )

  if (plot) {
    .data |>
      select(any_of(codebook$item)) |>
      ShinyItemAnalysis::plot_corr("polychoric", shape = "square") |>
      print()
  }

  .data
}


# fun for building mirt model out of codebook
mod_from_codebook <- function(codebook, cov_terms = TRUE) {
  out <- codebook %>%
    mutate(id = row_number(), dim = fct_inorder(dim)) %>%
    group_by(dim) %>%
    summarise(item = str_flatten(id, collapse = ",")) %>%
    glue::glue_data("{dim} = {item}") %>%
    str_flatten(collapse = "\n")

  if (cov_terms) {
    cov_terms <- codebook$dim %>%
      unique() %>%
      str_flatten(collapse = "*")

    out <- str_c(out, "\nCOV = ", cov_terms)
  }

  out
}


# fun for fitting GRM on W1 data accoring to codebook and model specified above
fit_grm <- function(.data, codebook, drop_empty = TRUE, ...) {
  mod <- codebook %>% mod_from_codebook()

  # ensure the order is the same as in the codebook which defines
  # item indices mirt acts upon
  .data <- .data %>% select(all_of(codebook$item))

  if (drop_empty) {
    # refactor: .data |> drop_na()
    .data <- .data |>
      filter(if_any(everything(), ~ !is.na(.x)))
  }

  .data |>
    mirt( itemtype = "graded", method = "MHRM", ...)
}



bind_thetas <- function(.data, fit_object, codebook, restore_items = TRUE, plausible_draw = FALSE, ...) {
  resp_patt <- .data %>% select(all_of(codebook$item))

  # if plausible vals, mirt return raw matrix without names
  factor_names <- fit_object |> extract.mirt("factorNames")

  method <- if (length(factor_names) > 1L) "MAP" else "EAP"
  method <- if (plausible_draw) "plausible" else method

  qmc <- if (length(factor_names) > 1L) TRUE else FALSE

  set.seed(123)
  res <- fscores(fit_object,
    response.pattern = resp_patt, method = method, QMC = qmc, ...
  )


  # we have to treat the output diferently if using PVs (its a mistake of mirt to some extent)
  if (plausible_draw) {
    # get indices of empty rows
    empty_case <- rowMeans(is.na(resp_patt)) == 1

    # make a form to cast the results - the same dims as the resp_patt
    out <- matrix(nrow = nrow(resp_patt), ncol = length(factor_names))

    # fill non-empty rows with the results - other rows are NAs, as they would be in normal fscores output
    out[!empty_case, ] <- res

    # name cols
    colnames(out) <- str_c(factor_names, "_pv")
  }

  # restore original variables, as the mirt ones are not needed anymore
  # (we have the thetas)
  if (restore_items) {
    .data <- .data |>
      select(-any_of(codebook$item)) |> # drop mirt vars
      rename_with(
        \(item) str_remove(item, "\\."),
        any_of(str_c(".", codebook$item))
      )
  }

  if (plausible_draw) {
    bind_cols(.data, out)
  } else {
    bind_cols(.data, res)
  }
}





# codebook ----------------------------------------------------------------

# https://www.oecd.org/education/talis/TALIS_2018_Technical_Report.pdf
# page 285, item "M" is added only in the Czech version
# and does not really correlate with the rest

se_codebook <- tribble(
  ~item, ~dim, ~rev,
  "se_a", "eng", FALSE,
  "se_b", "eng", FALSE,
  "se_c", "ins", FALSE,
  "se_d", "cls", FALSE,
  "se_e", "eng", FALSE,
  "se_f", "cls", FALSE,
  "se_g", "eng", FALSE,
  "se_h", "cls", FALSE,
  "se_i", "cls", FALSE,
  "se_j", "ins", FALSE,
  "se_k", "ins", FALSE,
  "se_l", "ins", FALSE
)```

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Introduce some repeatedly used IRT-related functions #102

Introduce some repeatedly used IRT-related functions #102

netique commented Nov 20, 2023

Introduce some repeatedly used IRT-related functions #102

Introduce some repeatedly used IRT-related functions #102

Comments

netique commented Nov 20, 2023