-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add first version of cdi utilities (percentile benchmark, cleanup, re…
…lative scores)
- Loading branch information
1 parent
e774122
commit ae7164d
Showing
17 changed files
with
533 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
Package: peekbankr | ||
Type: Package | ||
Title: Accessing the Peekbank Database and working with Peekbank data | ||
Version: 0.2.1.0 | ||
Version: 0.2.3.1 | ||
Authors@R: c( | ||
person("Mika", "Braginsky", email = "[email protected]", role = c("aut", "cre")), | ||
person("Kyle", "MacDonald", email = "[email protected]", role = "aut"), | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,228 @@ | ||
library(dplyr) | ||
library(tidyr) | ||
library(here) | ||
library(purrr) | ||
|
||
#' Checks cdi data for inconsistencies, warns about them, and fixes them | ||
#' | ||
#' @param cdi_data a subjects table with unnested cdi data, needs columns "subject_id", "language", "instrument_type", "age", "sex", "measure", "rawscore" | ||
#' | ||
#' @return a cleaned up version of the cdi data | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \donttest{ | ||
#' clean_cdi_data <- all_subjects %>% | ||
#' unnest(subject_aux_data) %>% | ||
#' filter(!is.na(cdi_responses)) %>% | ||
#' unnest(cdi_responses) %>% | ||
#' peekbankr::cleanup_cdi_data() | ||
#' } | ||
cleanup_cdi_data <- function(cdi_data) { | ||
# TODO: what else do we need to check here? Ask the others about this | ||
|
||
required_columns <- c( | ||
"subject_id", | ||
"language", | ||
"instrument_type", | ||
"age", | ||
"sex", | ||
"measure", | ||
"rawscore" | ||
) | ||
|
||
missing_cols <- setdiff(required_columns, colnames(cdi_data)) | ||
if (length(missing_cols) > 0) { | ||
stop("Missing required columns: ", paste(missing_cols, collapse = ", ")) | ||
} | ||
|
||
# Check if there are multiple scores for the same cdi administration within a single participant | ||
duplicate_removed_rows <- cdi_data %>% | ||
dplyr::group_by(subject_id, instrument_type, measure, age, language) %>% | ||
dplyr::filter(rawscore != max(rawscore, na.rm = TRUE)) | ||
|
||
if (nrow(duplicate_removed_rows) > 0) { | ||
print("Warning: there are some duplicate cdi values in your data. These were removed, but you should check the input data.") | ||
print(duplicate_removed_rows) | ||
|
||
# fix the duplicates for analysis until the input data is fixed | ||
cdi_data <- cdi_data %>% | ||
dplyr::group_by(subject_id, instrument_type, measure, age, language) %>% | ||
dplyr::filter(rawscore == max(rawscore, na.rm = TRUE)) | ||
} | ||
|
||
return(cdi_data) | ||
} | ||
|
||
#' Populate the provided cdi data with percentile values for that specific age, instrument_type, measure and language. Loosely based on the work from this repo https://github.com/kachergis/cdi-percentiles/tree/main by George Kachergis and Jess Mankewitz with advice from Virginia Marchman. | ||
#' | ||
#' @param subjects_table a subjects table with unnested cdi data, needs columns "subject_id", "language", "instrument_type", "age", "sex", "measure", "rawscore" | ||
#' | ||
#' @return the input table with added columns containing the reference age used, the reference year used, and both gender specific and general percentile values for the cdi score | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \donttest{ | ||
#' full_cdi_data <- all_subjects %>% | ||
#' unnest(subject_aux_data) %>% | ||
#' filter(!is.na(cdi_responses)) %>% | ||
#' unnest(cdi_responses) %>% | ||
#' peekbankr::cleanup_cdi_data() %>% | ||
#' peekbankr::populate_cdi_percentiles() | ||
#' } | ||
populate_cdi_percentiles <- function(subjects_table) { | ||
required_columns <- c( | ||
"subject_id", | ||
"language", | ||
"instrument_type", | ||
"age", | ||
"sex", | ||
"measure", | ||
"rawscore" | ||
) | ||
|
||
output_columns <- c( | ||
"reference_age", | ||
"reference_year", | ||
"percentile_all", | ||
"percentile_sex", | ||
"norm_score_all", | ||
"norm_score_sex" | ||
) | ||
|
||
missing_cols <- setdiff(required_columns, colnames(subjects_table)) | ||
if (length(missing_cols) > 0) { | ||
stop("Missing required columns: ", paste(missing_cols, collapse = ", ")) | ||
} | ||
|
||
existing_output_cols <- intersect(output_columns, colnames(subjects_table)) | ||
if (length(existing_output_cols) > 0) { | ||
stop("Output columns already exist: ", paste(existing_output_cols, collapse = ", ")) | ||
} | ||
|
||
norms_tables <- readRDS("data/cdi_benchmarks_2022/norms_tables.rds") | ||
|
||
cdi_norms_long <- norms_tables %>% | ||
purrr::imap(\(table, name){ | ||
table %>% | ||
as_tibble() %>% | ||
tidyr::pivot_longer(cols = c(-age), names_to = "head", values_to = "score") %>% | ||
dplyr::rename(norm_percentile = age, reference_age = head) %>% | ||
dplyr::mutate(name = gsub(".csv", "", name, fixed = T)) %>% | ||
tidyr::separate(name, | ||
into = c("language", "instrument_type", "measure", "norm_sex"), | ||
sep = "_" | ||
) %>% | ||
# Add zero rows in one step | ||
dplyr::bind_rows(., dplyr::distinct(., reference_age, language, instrument_type, measure, norm_sex) %>% | ||
dplyr::mutate(norm_percentile = 1, score = 0)) | ||
}) %>% | ||
dplyr::bind_rows() %>% | ||
# TODO: create a wordbank/iso lookup for all languages | ||
dplyr::mutate( | ||
language = ifelse(language == "eng", "English (American)", NA), | ||
reference_age = as.numeric(reference_age) | ||
) | ||
|
||
# find reference age for each participants entry | ||
subject_table_with_ref_age <- subjects_table %>% | ||
dplyr::inner_join( | ||
cdi_norms_long %>% dplyr::distinct(reference_age, instrument_type, measure, language), | ||
by = c( | ||
"instrument_type", | ||
"measure", "language" | ||
), | ||
relationship = "many-to-many" | ||
) %>% | ||
dplyr::mutate(age_diff = abs(age - reference_age)) %>% | ||
dplyr::group_by(across(!c(age_diff, reference_age))) %>% | ||
dplyr::slice_min(abs(age_diff), n = 1, with_ties = FALSE) %>% | ||
dplyr::ungroup() %>% | ||
dplyr::select(-age_diff) | ||
|
||
subject_table_with_cdi_percentiles <- subject_table_with_ref_age %>% | ||
dplyr::inner_join( | ||
cdi_norms_long, | ||
by = c("instrument_type", "measure", "reference_age", "language"), | ||
relationship = "many-to-many" | ||
) %>% | ||
dplyr::filter(score < rawscore) %>% | ||
dplyr::group_by(across(!c(score, norm_percentile))) %>% | ||
dplyr::slice_max(score, n = 1, with_ties = FALSE) %>% | ||
dplyr::ungroup() %>% | ||
tidyr::pivot_wider(names_from = "norm_sex", values_from = c("norm_percentile", "score")) %>% | ||
dplyr::mutate( | ||
percentile_all = norm_percentile_both, | ||
percentile_sex = case_when( | ||
sex == "male" ~ norm_percentile_m, | ||
sex == "female" ~ norm_percentile_f, | ||
T ~ NA | ||
), | ||
norm_score_all = score_both, | ||
norm_score_sex = case_when( | ||
sex == "male" ~ score_m, | ||
sex == "female" ~ score_f, | ||
T ~ NA | ||
) | ||
) %>% | ||
dplyr::select(subject_id, instrument_type, measure, age, language, reference_age, percentile_all, percentile_sex, norm_score_all, norm_score_sex) %>% | ||
dplyr::mutate(reference_year = "2022") | ||
|
||
return(subjects_table %>% dplyr::left_join( | ||
subject_table_with_cdi_percentiles, | ||
by = c("subject_id", "instrument_type", "measure", "age", "language") | ||
)) | ||
} | ||
|
||
|
||
#' Adds a relative cdi score indicating the percentage of total achievable points the subject got on each given measure | ||
#' | ||
#' @param subjects_table a subjects table with unnested cdi data, needs columns "subject_id", "language", "instrument_type", "measure", "rawscore" | ||
#' | ||
#' @return the input table with an added "cdi_relative" column that contains the percentage of total points gained in the given administrations | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \donttest{ | ||
#' cdi_data <- all_subjects %>% | ||
#' unnest(subject_aux_data) %>% | ||
#' filter(!is.na(cdi_responses)) %>% | ||
#' unnest(cdi_responses) %>% | ||
#' append_relative_cdi_scores() | ||
#' } | ||
append_relative_cdi_scores <- function(subjects_table) { | ||
required_columns <- c( | ||
"subject_id", | ||
"instrument_type", | ||
"language", | ||
"measure", | ||
"rawscore" | ||
) | ||
|
||
output_columns <- c( | ||
"cdi_relative" | ||
) | ||
|
||
missing_cols <- setdiff(required_columns, colnames(subjects_table)) | ||
if (length(missing_cols) > 0) { | ||
stop("Missing required columns: ", paste(missing_cols, collapse = ", ")) | ||
} | ||
|
||
existing_output_cols <- intersect(output_columns, colnames(subjects_table)) | ||
if (length(existing_output_cols) > 0) { | ||
stop("Output columns already exist: ", paste(existing_output_cols, collapse = ", ")) | ||
} | ||
|
||
# TODO: find instrument_length values for all languages | ||
subjects_table %>% | ||
dplyr::mutate( | ||
instrument_length = case_when(instrument_type == "ws" ~ 680, | ||
instrument_type == "wg" & language == "English (American)" ~ 396, | ||
instrument_type == "wsshort" ~ 100, | ||
instrument_type == "wg" & language == "Spanish (Mexican)" ~ 428, # TODO: double-check Spanish WG length.. | ||
.default = NA | ||
), | ||
CDI_percent = rawscore / instrument_length | ||
) %>% | ||
dplyr::select(-instrument_length) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11" | ||
"age",8,9,10,11,12,13,14,15,16,17,18 | ||
"5",1,1,2,3,4,6,9,14,20,30,41 | ||
"10",2,3,5,7,10,14,20,29,41,55,73 | ||
"15",4,6,8,12,17,24,33,45,61,80,101 | ||
"20",6,9,13,18,25,34,46,62,82,104,128 | ||
"25",8,12,17,24,33,45,60,80,102,126,152 | ||
"30",11,16,23,31,42,57,75,97,122,149,176 | ||
"35",14,20,28,39,52,70,91,115,142,170,198 | ||
"40",18,25,35,47,63,83,107,134,162,191,220 | ||
"45",22,30,42,56,75,98,124,153,182,212,240 | ||
"50",26,36,49,66,87,113,142,172,202,232,259 | ||
"55",31,43,58,77,101,129,160,192,222,251,278 | ||
"60",37,50,68,89,116,146,179,212,242,270,296 | ||
"65",43,59,78,102,132,164,199,232,262,289,312 | ||
"70",51,68,90,117,149,184,220,253,282,307,328 | ||
"75",60,80,104,134,169,206,242,274,301,324,343 | ||
"80",71,93,121,154,191,229,265,296,321,341,357 | ||
"85",85,110,141,177,216,255,290,319,340,357,369 | ||
"90",103,132,167,206,247,286,318,342,359,371,380 | ||
"95",134,168,206,248,289,324,350,367,378,385,390 | ||
"99",194,233,275,314,347,369,382,389,393,394,395 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11" | ||
"age",8,9,10,11,12,13,14,15,16,17,18 | ||
"5",1,1,2,4,5,8,12,18,26,36,49 | ||
"10",2,4,6,9,12,18,25,36,49,65,84 | ||
"15",4,6,10,14,20,28,39,53,71,91,114 | ||
"20",6,10,14,20,28,39,53,71,92,116,142 | ||
"25",9,13,19,27,37,51,67,88,112,139,168 | ||
"30",12,18,25,35,47,63,82,105,132,161,192 | ||
"35",16,22,31,42,57,75,97,123,152,183,214 | ||
"40",19,27,38,51,68,88,113,141,171,203,235 | ||
"45",24,33,45,60,79,102,129,159,191,223,255 | ||
"50",28,39,53,70,91,117,146,177,210,243,274 | ||
"55",34,46,62,81,105,132,163,196,229,261,292 | ||
"60",40,54,71,93,119,148,181,214,248,279,308 | ||
"65",47,63,82,106,134,166,199,234,266,297,324 | ||
"70",55,73,94,121,151,184,219,253,285,314,338 | ||
"75",65,84,108,137,170,204,240,273,304,330,352 | ||
"80",76,98,125,156,191,227,262,294,322,345,364 | ||
"85",91,116,145,179,215,252,286,316,340,360,375 | ||
"90",111,139,171,207,245,281,312,339,359,374,384 | ||
"95",143,175,210,248,285,318,344,364,377,386,391 | ||
"99",205,241,278,312,342,364,379,387,392,395,396 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11" | ||
"age",8,9,10,11,12,13,14,15,16,17,18 | ||
"5",1,1,1,2,3,5,7,11,16,23,31 | ||
"10",2,3,4,6,8,12,17,24,34,46,61 | ||
"15",3,5,7,10,14,20,28,39,53,69,88 | ||
"20",5,8,11,15,22,30,41,55,72,92,115 | ||
"25",8,11,16,22,30,40,54,71,92,115,141 | ||
"30",11,15,21,28,38,52,68,89,112,138,165 | ||
"35",14,19,26,36,48,64,83,106,132,160,189 | ||
"40",17,24,33,44,59,77,99,125,153,182,212 | ||
"45",22,30,40,53,70,91,116,144,173,204,234 | ||
"50",26,36,48,63,83,106,133,163,194,225,255 | ||
"55",32,43,57,74,96,122,152,183,215,246,275 | ||
"60",38,50,66,86,111,139,171,204,236,266,294 | ||
"65",45,59,77,100,127,158,191,225,256,286,312 | ||
"70",53,70,90,115,145,178,212,246,277,305,329 | ||
"75",63,82,105,132,165,200,235,268,298,323,344 | ||
"80",75,96,122,153,187,224,259,291,318,341,359 | ||
"85",90,114,143,177,213,250,285,315,339,357,371 | ||
"90",111,139,171,207,245,282,314,340,359,373,382 | ||
"95",144,176,212,251,288,321,347,366,378,386,391 | ||
"99",209,246,283,318,347,368,381,389,393,395,396 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11" | ||
"age",8,9,10,11,12,13,14,15,16,17,18 | ||
"5",0,0,0,0,0,0,0,0,0,1,2 | ||
"10",0,0,0,0,0,0,0,1,2,3,5 | ||
"15",0,0,0,0,0,1,1,2,4,6,10 | ||
"20",0,0,0,1,1,1,2,4,6,10,15 | ||
"25",0,0,1,1,2,2,4,6,9,14,21 | ||
"30",1,1,1,2,2,4,5,8,13,20,29 | ||
"35",1,1,2,2,3,5,8,12,17,26,37 | ||
"40",1,2,2,3,5,7,10,15,23,33,46 | ||
"45",2,2,3,5,7,10,14,20,29,40,56 | ||
"50",2,3,4,6,9,12,18,25,36,49,67 | ||
"55",3,4,6,8,11,16,22,31,43,59,79 | ||
"60",4,6,8,11,15,20,28,39,53,70,92 | ||
"65",5,7,10,13,18,25,35,47,63,83,107 | ||
"70",7,9,12,17,23,32,43,57,75,98,124 | ||
"75",9,12,16,21,29,39,52,69,90,114,143 | ||
"80",11,15,20,27,37,49,64,84,107,134,165 | ||
"85",15,20,26,35,47,62,80,103,129,158,191 | ||
"90",20,26,35,47,61,80,102,128,158,190,224 | ||
"95",29,38,51,67,86,110,138,169,201,235,269 | ||
"99",51,67,88,112,141,174,209,244,277,307,334 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11" | ||
"age",8,9,10,11,12,13,14,15,16,17,18 | ||
"5",0,0,0,0,0,0,0,1,1,2,3 | ||
"10",0,0,0,0,0,1,1,2,3,5,8 | ||
"15",0,0,0,0,1,1,2,3,5,9,13 | ||
"20",0,0,0,1,1,2,3,5,8,13,19 | ||
"25",0,0,1,1,2,3,5,8,12,18,26 | ||
"30",0,1,1,2,3,5,7,11,16,23,33 | ||
"35",1,1,2,3,4,6,9,14,20,29,41 | ||
"40",1,2,3,4,6,8,12,17,25,36,50 | ||
"45",2,2,4,5,7,11,15,22,31,43,59 | ||
"50",2,3,5,7,10,13,19,26,37,51,69 | ||
"55",3,4,6,9,12,17,23,32,44,60,80 | ||
"60",4,6,8,11,15,20,28,38,52,70,92 | ||
"65",5,7,10,14,18,25,33,45,61,81,105 | ||
"70",7,9,13,17,22,30,40,53,71,93,120 | ||
"75",9,12,16,21,28,36,48,63,83,107,137 | ||
"80",12,15,20,26,34,44,58,75,97,124,156 | ||
"85",16,20,26,33,43,55,71,91,115,145,179 | ||
"90",21,27,34,43,55,70,88,112,140,172,208 | ||
"95",32,40,49,61,76,94,117,145,177,212,250 | ||
"99",57,69,84,101,122,147,177,210,245,281,315 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11" | ||
"age",8,9,10,11,12,13,14,15,16,17,18 | ||
"5",0,0,0,0,0,0,0,0,0,0,1 | ||
"10",0,0,0,0,0,0,0,1,1,2,3 | ||
"15",0,0,0,0,0,0,1,1,2,4,7 | ||
"20",0,0,0,0,1,1,2,3,4,7,11 | ||
"25",0,0,1,1,1,2,3,4,7,11,18 | ||
"30",1,1,1,1,2,3,4,7,11,17,25 | ||
"35",1,1,2,2,3,4,6,10,15,23,33 | ||
"40",1,2,2,3,4,6,9,14,21,30,43 | ||
"45",2,2,3,4,6,8,12,18,27,39,55 | ||
"50",2,3,4,6,8,11,16,24,34,49,68 | ||
"55",3,4,5,7,10,15,21,31,43,60,82 | ||
"60",4,5,7,10,14,19,27,38,54,73,99 | ||
"65",5,7,9,12,17,25,34,48,66,89,117 | ||
"70",6,8,12,16,22,31,43,59,80,106,138 | ||
"75",8,11,15,20,28,39,54,73,97,126,161 | ||
"80",10,14,19,26,36,50,68,90,117,150,188 | ||
"85",13,18,25,34,47,64,85,112,143,179,219 | ||
"90",17,24,33,46,63,84,110,141,176,215,256 | ||
"95",24,34,48,66,89,118,151,187,226,265,303 | ||
"99",43,60,83,113,148,187,229,269,305,337,362 |
Oops, something went wrong.