From ce5d2d4053523cae5cf1abf0f0c3324c351267e4 Mon Sep 17 00:00:00 2001 From: Sascha Meyen Date: Fri, 25 Oct 2024 11:03:19 +0200 Subject: [PATCH] Updated handling of extreme hit and false alarm rates. Also unified debiasing simulation so that not each measure is simulated separately (for speedup). --- R/estimateMetaI.R | 44 ++++++++----------- R/int_estimate_RMI.R | 6 ++- ...ier.R => int_estimate_contingency_table.R} | 12 ++--- R/int_estimate_meta_I.R | 10 +++-- R/int_estimate_meta_Ir1.R | 16 ++++--- R/int_estimate_meta_Ir1_acc.R | 6 ++- R/int_estimate_meta_Ir2.R | 5 ++- R/int_estimate_sensitivity.R | 11 +++++ R/int_get_analytic_information_bounds.R | 1 + R/int_get_bias_reduced_meta_measure.R | 41 ++++++++--------- 10 files changed, 89 insertions(+), 63 deletions(-) rename R/{int_estimate_classifier.R => int_estimate_contingency_table.R} (78%) diff --git a/R/estimateMetaI.R b/R/estimateMetaI.R index 548d667..4a4eb7d 100644 --- a/R/estimateMetaI.R +++ b/R/estimateMetaI.R @@ -123,34 +123,19 @@ estimateMetaI <- function(data, bias_reduction = TRUE) { s <- participant == data$participant part_data <- data[s, ] - estimated_classifier <- estimate_classifier(part_data) - number_of_stimuli <- table(part_data$stimulus) - - re <- data.frame(participant = participant , - meta_I = estimate_meta_I(estimated_classifier) , - meta_Ir1 = estimate_meta_Ir1(estimated_classifier) , - meta_Ir1_acc = estimate_meta_Ir1_acc(estimated_classifier), - meta_Ir2 = estimate_meta_Ir2(estimated_classifier) , - RMI = estimate_RMI(estimated_classifier) ) + estimated_table <- estimate_contingency_table(part_data) + + re <- data.frame(participant = participant, + get_meta_I_measures(estimated_table)) + if (bias_reduction) { - rb <- data.frame(meta_I_debiased = get_bias_reduced_meta_measure(estimated_classifier, - number_of_stimuli , - estimate_meta_I ) , - meta_Ir1_debiased = get_bias_reduced_meta_measure(estimated_classifier, - number_of_stimuli , - estimate_meta_Ir1 ) , - meta_Ir1_acc_debiased = get_bias_reduced_meta_measure(estimated_classifier , - number_of_stimuli , - estimate_meta_Ir1_acc), - meta_Ir2_debiased = get_bias_reduced_meta_measure(estimated_classifier, - number_of_stimuli , - estimate_meta_Ir2 ) , - RMI_debiased = get_bias_reduced_meta_measure(estimated_classifier, - number_of_stimuli , - estimate_RMI ) ) - re <- cbind(re, rb) + cat("Bias reduction for participant ", participant, " (speed up with 'bias_reduction = FALSE')\n") + re <- data.frame(participant = participant, + get_meta_I_measures(estimated_table), + debiased = get_bias_reduced_meta_I_measures(estimated_table)) } + res <- rbind(res, re) } @@ -159,3 +144,12 @@ estimateMetaI <- function(data, bias_reduction = TRUE) { res } + +get_meta_I_measures <- function(estimated_table) { + meta_I_measures <- data.frame(meta_I = estimate_meta_I(estimated_table) , + meta_Ir1 = estimate_meta_Ir1(estimated_table) , + meta_Ir1_acc = estimate_meta_Ir1_acc(estimated_table), + meta_Ir2 = estimate_meta_Ir2(estimated_table) , + RMI = estimate_RMI(estimated_table) ) + meta_I_measures +} diff --git a/R/int_estimate_RMI.R b/R/int_estimate_RMI.R index dd71173..836b38f 100644 --- a/R/int_estimate_RMI.R +++ b/R/int_estimate_RMI.R @@ -1,4 +1,8 @@ -estimate_RMI <- function(estimated_classifier) { +estimate_RMI <- function(estimated_table) { + + # Normalize + estimated_classifier <- estimated_table/sum(estimated_table) + prior <- rowSums(estimated_classifier) # Prior accuracy <- get_accuracy(estimated_classifier) # Accuracy information <- get_information(estimated_classifier) # Information diff --git a/R/int_estimate_classifier.R b/R/int_estimate_contingency_table.R similarity index 78% rename from R/int_estimate_classifier.R rename to R/int_estimate_contingency_table.R index 43dd736..58b18c3 100644 --- a/R/int_estimate_classifier.R +++ b/R/int_estimate_contingency_table.R @@ -2,15 +2,15 @@ # rating) combinations in a contingency matrix with one row per stimulus and # one column per response. Columnwise responses are sorted from "certaintly # the first stimulus" (left) to "certainly the second stimulus" (right). -estimate_classifier <- function(data) +estimate_contingency_table <- function(data) { number_of_confidence_steps <- length(levels(data$rating)) confidence_steps <- seq_len(number_of_confidence_steps) response_steps <- c(-rev(confidence_steps), confidence_steps) - estimated_classifier <- matrix(0, nrow = 2, ncol = 2*length(confidence_steps)) - rownames(estimated_classifier) <- paste0("Stimulus = ", c("-1", "+1")) - colnames(estimated_classifier) <- paste0("Response = ", response_steps) + estimated_table <- matrix(0, nrow = 2, ncol = 2*length(confidence_steps)) + rownames(estimated_table) <- paste0("Stimulus = ", c("-1", "+1")) + colnames(estimated_table) <- paste0("Response = ", response_steps) tab <- table(data$stimulus, data$rating, data$correct) @@ -30,10 +30,10 @@ estimate_classifier <- function(data) which_response <- which(response == response_steps) - estimated_classifier[which_stimulus, which_response] <- mean(s) + estimated_table[which_stimulus, which_response] <- sum(s) } } } - estimated_classifier + estimated_table } diff --git a/R/int_estimate_meta_I.R b/R/int_estimate_meta_I.R index 042b86c..167443d 100644 --- a/R/int_estimate_meta_I.R +++ b/R/int_estimate_meta_I.R @@ -1,6 +1,10 @@ -estimate_meta_I <- function(estimated_classifier) { - p <- rowSums(estimated_classifier) # Prior - a <- get_accuracy(estimated_classifier) # Accuracy +estimate_meta_I <- function(estimated_table) { + + # Normalize + estimated_classifier <- estimated_table/sum(estimated_table) + + p <- rowSums(estimated_classifier) # Prior + a <- get_accuracy(estimated_classifier) # Accuracy info <- get_information(estimated_classifier) # Transmitted information information_bounds <- get_analytic_information_bounds(p, a) diff --git a/R/int_estimate_meta_Ir1.R b/R/int_estimate_meta_Ir1.R index e271ef8..b1ed922 100644 --- a/R/int_estimate_meta_Ir1.R +++ b/R/int_estimate_meta_Ir1.R @@ -1,9 +1,13 @@ -estimate_meta_Ir1 <- function(estimated_classifier) { - p <- rowSums(estimated_classifier) # Prior - d <- estimate_sensitivity(estimated_classifier) # Sensitivity - a <- pnorm(abs(d)/2) # Accuracy - - meta_I <- estimate_meta_I(estimated_classifier) # Meta-I +estimate_meta_Ir1 <- function(estimated_table) { + + # Normalize + estimated_classifier <- estimated_table/sum(estimated_table) + + p <- rowSums(estimated_classifier) # Prior + d <- estimate_sensitivity(estimated_table) # Sensitivity + a <- pnorm(abs(d)/2) # Accuracy + + meta_I <- estimate_meta_I(estimated_classifier) # Meta-I information_bounds <- get_analytic_information_bounds(p, a) lower_bound <- information_bounds$lowest # Lower bound on trans. information diff --git a/R/int_estimate_meta_Ir1_acc.R b/R/int_estimate_meta_Ir1_acc.R index 6087434..31ed83f 100644 --- a/R/int_estimate_meta_Ir1_acc.R +++ b/R/int_estimate_meta_Ir1_acc.R @@ -1,4 +1,8 @@ -estimate_meta_Ir1_acc <- function(estimated_classifier) { +estimate_meta_Ir1_acc <- function(estimated_table) { + + # Normalize + estimated_classifier <- estimated_table/sum(estimated_table) + p <- rowSums(estimated_classifier) # Prior a <- get_accuracy(estimated_classifier) # Sensitivity d <- 2*qnorm(a) # Accuracy diff --git a/R/int_estimate_meta_Ir2.R b/R/int_estimate_meta_Ir2.R index 6f3c72c..5b2bc28 100644 --- a/R/int_estimate_meta_Ir2.R +++ b/R/int_estimate_meta_Ir2.R @@ -1,4 +1,7 @@ -estimate_meta_Ir2 <- function(estimated_classifier){ +estimate_meta_Ir2 <- function(estimated_table) { + + # Normalize + estimated_classifier <- estimated_table/sum(estimated_table) meta_I <- estimate_meta_I(estimated_classifier) # Meta-I diff --git a/R/int_estimate_sensitivity.R b/R/int_estimate_sensitivity.R index 9993c03..7660de1 100644 --- a/R/int_estimate_sensitivity.R +++ b/R/int_estimate_sensitivity.R @@ -13,6 +13,17 @@ estimate_sensitivity <- function(estimated_classifier){ n_correctly_predict_second_row <- sum(estimated_classifier[2, idx_predicting_second_row]) n_predict_second_row <- sum(estimated_classifier[ , idx_predicting_second_row]) + # Avoid infinite values by using Miller's (1996) 0.5-convention + # (originally suggested by Murdock and Ogilvie, 1968). + if (n_correctly_predict_first_row == n_predict_first_row) + n_correctly_predict_first_row <- n_correctly_predict_first_row - 0.5 + if (n_correctly_predict_first_row == 0) + n_correctly_predict_first_row <- 0.5 + if (n_correctly_predict_second_row == n_predict_second_row) + n_correctly_predict_second_row <- n_correctly_predict_second_row - 0.5 + if (n_correctly_predict_second_row == 0) + n_correctly_predict_second_row <- 0.5 + # Calculcate sensitivity hit_rate <- n_correctly_predict_first_row / n_predict_first_row false_alarm_rate <- 1 - ( n_correctly_predict_second_row / n_predict_second_row ) diff --git a/R/int_get_analytic_information_bounds.R b/R/int_get_analytic_information_bounds.R index b31d5e5..8e11a34 100644 --- a/R/int_get_analytic_information_bounds.R +++ b/R/int_get_analytic_information_bounds.R @@ -45,6 +45,7 @@ get_lower_info_for_one <- function(prior, accuracy) { # Determine which labels Y occur frequently enough (1:m3) s <- p >= (cumsum(p) - a)/(1:L - 1) m3 <- tail(which(s), 1) + if (length(m3) == 0) return(NaN) q <- sum(p[1:m3]) pl <- p[1:m3] diff --git a/R/int_get_bias_reduced_meta_measure.R b/R/int_get_bias_reduced_meta_measure.R index 4bb3a60..0b51612 100644 --- a/R/int_get_bias_reduced_meta_measure.R +++ b/R/int_get_bias_reduced_meta_measure.R @@ -1,41 +1,42 @@ # Because Meta-I measures are inherently biased, simulate data to estimate and # then subtract this bias. -get_bias_reduced_meta_measure <- function(estimated_classifier, ns, get_meta_measure) +get_bias_reduced_meta_I_measures <- function(estimated_table) { - meta_I_measure <- get_meta_measure(estimated_classifier) # Baseline estimate + meta_I_measures <- get_meta_I_measures(estimated_table) # Baseline estimate # Simulations based on the observed frequencies nsim <- 1000 - simulated_meta_measures <- c() + simulated_meta_measures <- data.frame() for (i in 1:nsim) { # Simulate one data row-wise - counts <- estimated_classifier*0 - for (j in 1:nrow(estimated_classifier)) + simulated_table <- estimated_table*0 + for (j in 1:nrow(estimated_table)) { - counts[j, ] <- rmultinom(1, ns[j], estimated_classifier[j, ]) + n <- sum(estimated_table[j, ]) + simulated_table[j, ] <- rmultinom(1, n, estimated_table[j, ]/n) } - simulated_classifier <- counts/sum(counts) # Skip a simulation if accuracy is 50% or 100% - a <- get_accuracy(simulated_classifier) + estimated_classifier <- simulated_table/sum(simulated_table) + a <- get_accuracy(estimated_classifier) if (round(a - 1, 6) == 0) next; if (round(a - 0, 6) == 0) next; - simulated_meta_measures[i] <- get_meta_measure(simulated_classifier) - } + simulated_meta_measures <- rbind(simulated_meta_measures, + get_meta_I_measures(simulated_table)) - # If simulations did not work, return the baseline estimate - if (length(simulated_meta_measures) < 1) - { - simulated_meta_measures <- na.omit(simulated_meta_measures) - simulated_meta_measures <- get_meta_measure(estimated_classifier) + # Loading bar + cat(sprintf('|%s%s|\r', + paste0(rep('=', round(i/nsim*20)), collapse = ''), + paste0(rep(' ', 20-round(i/nsim*20)), collapse = ''))) + if (i == nsim) cat("\n") } - + # Reduce bias - expected_meta_measure <- mean(simulated_meta_measures, na.rm = TRUE) - estimated_bias <- meta_I_measure - expected_meta_measure - bias_reduced_meta_I_measure <- meta_I_measure - estimated_bias + expected_meta_I_measures <- colMeans(simulated_meta_measures, na.rm = TRUE) + estimated_bias <- meta_I_measures - expected_meta_I_measures + bias_reduced_meta_I_measures <- meta_I_measures - estimated_bias - bias_reduced_meta_I_measure + bias_reduced_meta_I_measures }