Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce some repeatedly used IRT-related functions #102

Open
netique opened this issue Nov 20, 2023 · 0 comments
Open

Introduce some repeatedly used IRT-related functions #102

netique opened this issue Nov 20, 2023 · 0 comments

Comments

@netique
Copy link
Member

netique commented Nov 20, 2023

We often copy this portion into our shared.R. These functions seem pretty stable but may struggle with unidimensional models and data not obtained from LS.

make_mirt_data <- function(.data, codebook, na_level = NULL,
                           ignore_case = TRUE, negate = FALSE, plot = TRUE) {
  # make a "backup" of original factor variables for barplots etc.
  .data <- .data |>
    mutate(across(all_of(codebook$item), .names = ".{.col}"))

  common_lvls <- .data %>%
    select(all_of(codebook$item)) |>
    map(levels) |>
    unique()

  if (length(common_lvls) != 1L) rlang::abort("levels of items differ")

  common_lvls <- common_lvls[[1L]]


  if (!is.null(na_level)) {
    match <- str_subset(common_lvls, regex(na_level, ignore_case = ignore_case),
      negate = negate
    )

    .data <- .data |>
      mutate(
        across(
          all_of(codebook$item),
          \(item) fct_nanify(item,
            level = na_level,
            ignore_case = ignore_case, negate = negate
          ) |>
            fct_drop(match) # remove the level after data are removed
        )
      )
  }
  new_common_lvls <- .data %>%
    select(all_of(codebook$item)) |>
    map(levels) |>
    unique()

  if (length(new_common_lvls) != 1L) rlang::abort("new levels of items differ")

  new_common_lvls <- new_common_lvls[[1L]]

  message("check new intergers (for nonreversed items only):")
  names(new_common_lvls) <- seq_along(new_common_lvls)

  print(new_common_lvls)


  # now reversed items
  rev_items <- codebook |>
    filter(rev) |>
    pull(item)
  max_val <- length(new_common_lvls)

  .data <- .data |>
    mutate(
      across(all_of(codebook$item), \(item) as.integer(item)),
      across(any_of(rev_items), \(item) (max_val + 1L) - item)
    )

  if (plot) {
    .data |>
      select(any_of(codebook$item)) |>
      ShinyItemAnalysis::plot_corr("polychoric", shape = "square") |>
      print()
  }

  .data
}


# fun for building mirt model out of codebook
mod_from_codebook <- function(codebook, cov_terms = TRUE) {
  out <- codebook %>%
    mutate(id = row_number(), dim = fct_inorder(dim)) %>%
    group_by(dim) %>%
    summarise(item = str_flatten(id, collapse = ",")) %>%
    glue::glue_data("{dim} = {item}") %>%
    str_flatten(collapse = "\n")

  if (cov_terms) {
    cov_terms <- codebook$dim %>%
      unique() %>%
      str_flatten(collapse = "*")

    out <- str_c(out, "\nCOV = ", cov_terms)
  }

  out
}


# fun for fitting GRM on W1 data accoring to codebook and model specified above
fit_grm <- function(.data, codebook, drop_empty = TRUE, ...) {
  mod <- codebook %>% mod_from_codebook()

  # ensure the order is the same as in the codebook which defines
  # item indices mirt acts upon
  .data <- .data %>% select(all_of(codebook$item))

  if (drop_empty) {
    # refactor: .data |> drop_na()
    .data <- .data |>
      filter(if_any(everything(), ~ !is.na(.x)))
  }

  .data |>
    mirt( itemtype = "graded", method = "MHRM", ...)
}



bind_thetas <- function(.data, fit_object, codebook, restore_items = TRUE, plausible_draw = FALSE, ...) {
  resp_patt <- .data %>% select(all_of(codebook$item))

  # if plausible vals, mirt return raw matrix without names
  factor_names <- fit_object |> extract.mirt("factorNames")

  method <- if (length(factor_names) > 1L) "MAP" else "EAP"
  method <- if (plausible_draw) "plausible" else method

  qmc <- if (length(factor_names) > 1L) TRUE else FALSE

  set.seed(123)
  res <- fscores(fit_object,
    response.pattern = resp_patt, method = method, QMC = qmc, ...
  )


  # we have to treat the output diferently if using PVs (its a mistake of mirt to some extent)
  if (plausible_draw) {
    # get indices of empty rows
    empty_case <- rowMeans(is.na(resp_patt)) == 1

    # make a form to cast the results - the same dims as the resp_patt
    out <- matrix(nrow = nrow(resp_patt), ncol = length(factor_names))

    # fill non-empty rows with the results - other rows are NAs, as they would be in normal fscores output
    out[!empty_case, ] <- res

    # name cols
    colnames(out) <- str_c(factor_names, "_pv")
  }

  # restore original variables, as the mirt ones are not needed anymore
  # (we have the thetas)
  if (restore_items) {
    .data <- .data |>
      select(-any_of(codebook$item)) |> # drop mirt vars
      rename_with(
        \(item) str_remove(item, "\\."),
        any_of(str_c(".", codebook$item))
      )
  }

  if (plausible_draw) {
    bind_cols(.data, out)
  } else {
    bind_cols(.data, res)
  }
}





# codebook ----------------------------------------------------------------

# https://www.oecd.org/education/talis/TALIS_2018_Technical_Report.pdf
# page 285, item "M" is added only in the Czech version
# and does not really correlate with the rest

se_codebook <- tribble(
  ~item, ~dim, ~rev,
  "se_a", "eng", FALSE,
  "se_b", "eng", FALSE,
  "se_c", "ins", FALSE,
  "se_d", "cls", FALSE,
  "se_e", "eng", FALSE,
  "se_f", "cls", FALSE,
  "se_g", "eng", FALSE,
  "se_h", "cls", FALSE,
  "se_i", "cls", FALSE,
  "se_j", "ins", FALSE,
  "se_k", "ins", FALSE,
  "se_l", "ins", FALSE
)```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant