-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOPTIMUM-Neuro_tracking_2018-08-02.Rmd
579 lines (459 loc) · 23.4 KB
/
OPTIMUM-Neuro_tracking_2018-08-02.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
---
title: "OPTIMUM-Neuro conference call 2018-08-02"
output: html_document
always_allow_html: yes
---
```{r setup, include = FALSE, warning = FALSE}
# load graphics libraries
library(plyr)
library(ggplot2)
library(reshape2)
library(tidyverse)
library(knitr)
library(kableExtra)
library(plotly)
library(stringr)
library(bsselectR)
library(lubridate)
library(textclean)
```
```{r data cleaning, include = FALSE, warning = FALSE}
#read in data
df <- read.csv('data/OPTIMUMMainDatabaseF_DATA_2018-08-02_0935.csv', na.strings=c("","NA")) #contains all but MRI
mri <- read.csv('data/OPTIMUMMainDatabaseF_DATA_2018-08-02_0933.csv', na.strings=c("","NA")) #contains just MRI
targets <- read.csv('data/OPT_recruitTargets.csv')
#for the purposes of this report - only consider the first session of the baseline scan - might need to modify later if want to count followup
mri <- subset(mri, mri$redcap_repeat_instrument =="optimum_neuro_mri")
mri <- subset(mri, mri$redcap_event_name =="baseline_arm_6")
#merge mri and df dataframes
df <- merge(df, mri, by=c('record_id', 'redcap_event_name'), all.x=TRUE)
#pull out site code from subject IDs
df$site <- substring(df$record_id, 1, 2)
#make sure that site data is a factor
df$site <- as.factor(df$site)
sites <- factor(x = c('CU', 'LA', 'UP', 'UT', 'WU'), levels = c('CU', 'LA', 'UP', 'UT', 'WU'))
#make sure target data is in proper format
targets$month <- as.character(targets$month)
#make sure all site codes are uppercase
df$site <- toupper(df$site)
#remove any sites that don't match expected site codes
df <- df[(df$site == 'CU' | df$site == 'LA' | df$site == 'UP' | df$site == 'UT' | df$site == 'WU'),]
#rename timepoint variable
names(df)[names(df)=="redcap_event_name"] <- "timepoint"
#rename factors in timepoint variable
levels(df$timepoint) <- c(levels(df$timepoint), "baseline", "6 mth FU", "24 mth FU")
df$timepoint[df$timepoint == "baseline_arm_6"] <- "baseline"
df$timepoint[df$timepoint == "6_mth_fu_arm_6"] <- "6 mth FU"
df$timepoint[df$timepoint == "24_mth_fu_arm_6"] <- "24 mth FU"
df$timepoint <- droplevels(df$timepoint, exclude = c("baseline_arm_6", "6_mth_fu_arm_6", "24_mth_fu_arm_6"))
```
>We are currently in month `r targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'month_count']` of the study.
This report is meant to provide a quick overview of the OPTIMUM-Neuro study, to facilitate discussion on its bi-weekly conference calls. All data summarized here derives from the 'tracking' arm of the WashU REDCap database (arm 6), and thus is only as accurate as the data captured there.
<br>
------
<br>
###Recruitment
```{r recruitment, echo = FALSE, include = FALSE, warning = FALSE}
#make a smaller dataframe
recruit_df <- df[, c(
"record_id",
"site",
"timepoint",
"meta_consent_date",
"meta_terminate_date",
"meta_terminate_reason",
"plasma_blood_status",
"plasma_blood_date",
"mr_t1",
"mr_date",
"dkefs_complete",
"dkefs_date_admin",
"rbans_complete",
"rbans_date_admin")]
#make sure important dates are in date format
recruit_df$dkefs_date <- as.Date(recruit_df$dkefs_date_admin, format = "%Y-%m-%d", origin = "1970-01-01") #convert date to date format
recruit_df$rbans_date <- as.Date(recruit_df$rbans_date_admin, format = "%Y-%m-%d", origin = "1970-01-01") #convert date to date format
#get current date and add to dataframe
recruit_df$currentYm <- Sys.Date() #add current sysdate to df
recruit_df$currentYm_str <- as.character(substr(recruit_df$currentYm, 1, 7)) #store month and year, as character
#add column that indicates if consented, consent date, and if this month
recruit_df$consented[is.na(recruit_df$meta_consent_date)] <- 0
recruit_df$consented[is.na(recruit_df$consented)] <- 1
recruit_df$meta_consent_date <- as.Date(recruit_df$meta_consent_date, format = "%Y-%m-%d", origin = "1970-01-01") #convert date to date format
recruit_df$meta_consent_mth <- as.character(substr(recruit_df$meta_consent_date, 1, 7)) #store month and year, as character
recruit_df$meta_consent_mthT <- recruit_df$meta_consent_mth %in% recruit_df$currentYm_str #logical T or F- was consent this month?
#add column that indicates if completed, complete date, and if this month
recruit_df$completed <- ifelse(recruit_df$meta_terminate_reason == 3, 1, 0)
recruit_df$meta_terminate_date <- as.Date(recruit_df$meta_terminate_date, format = "%Y-%m-%d", origin = "1970-01-01") #convert date to date format
recruit_df$meta_terminate_mth <- as.character(substr(recruit_df$meta_terminate_date, 1, 7)) #store month and year, as character
recruit_df$meta_terminate_mthT <- recruit_df$meta_terminate_mth %in% recruit_df$currentYm_str #logical T or F- was complete this month?
recruit_df$meta_complete_mthT <- ifelse(recruit_df$completed == 1 & recruit_df$meta_terminate_mthT == TRUE, TRUE, FALSE)
#add column that indicates if terminated
recruit_df$terminated <- ifelse(recruit_df$meta_terminate_reason == 3, 0, 1)
#add columns that indicate timepoint at which terminated (for reasons other than completion)
recruit_df$terminate_baseline <- ifelse(recruit_df$meta_terminate_reason != 3 & recruit_df$timepoint == 'baseline', 1, NA)
recruit_df$terminate_FU_06_MTH <- ifelse(recruit_df$meta_terminate_reason != 3 & recruit_df$timepoint == '6 mth FU', 1, NA)
recruit_df$terminate_FU_24_MTH <- ifelse(recruit_df$meta_terminate_reason != 3 & recruit_df$timepoint == '24 mth FU', 1, NA)
####################################################
#add column that indicates if enrolled (where enrollment is 2 of 3 mri, blood, neuropsych)
enroll_df <- recruit_df #make a new dataframe (did this because want new DF to consider only baseline)
#take only baseline
enroll_df <- enroll_df[(enroll_df$timepoint == 'baseline'),]
#first, check if T1 complete, and if date is this month
#note - here, 'complete' means just T1 complete
names(enroll_df)[names(enroll_df) == "mr_t1"] <- "enroll_mri" #change name of status column
names(enroll_df)[names(enroll_df) == "mr_date"] <- "enroll_mri_date" #change name of date column
enroll_df$enroll_mri <- ifelse(enroll_df$enroll_mri == 1, 1, NA) #turn no to NA
enroll_df$enroll_mri_date <- as.Date(enroll_df$enroll_mri_date, format = "%Y-%m-%d", origin = "1970-01-01") #convert date to datetime format
enroll_df$enroll_mri_mth <- as.character(substr(enroll_df$enroll_mri_date, 1, 7)) #store month and year, as character
enroll_df$enroll_mri_mthT <- enroll_df$enroll_mri_mth %in% enroll_df$currentYm_str #logical T or F- was MRI this month?
#second, check if blood complete, and if date is this month
#note - here, 'complete' means that just the plasma is complete (don't care about serum)
enroll_df$enroll_bld <- ifelse(enroll_df$plasma_blood_status == 1, 1, NA) #turn no and partial to NA
enroll_df$enroll_bld_date <- substr(enroll_df$plasma_blood_date, 1, 11)#cut out the time contained in datetime
enroll_df$enroll_bld_date <- as.Date(enroll_df$enroll_bld_date, format = "%Y-%m-%d", origin = "1970-01-01") #convert to datetime
enroll_df$enroll_bld_mth <- as.character(substr(enroll_df$enroll_bld_date, 1, 7)) #store month and year, as character
enroll_df$enroll_bld_mthT <- enroll_df$enroll_bld_mth %in% enroll_df$currentYm_str #logical T or F - was blood this month?
#third, check if neuropsych complete, and if data is this month
#note - here, 'complete' means rbans and dkefs done in entirity
#make new columns with completion and date data for 2 assessments we care about - dkefs and rbans
enroll_df$dkefs <- ifelse(enroll_df$dkefs_complete == 1, 1, NA) #turn no and partial to NA
enroll_df$enroll_dkefs_date <- as.Date(enroll_df$dkefs_date_admin, format = "%Y-%m-%d", origin = "1970-01-01") #convert date to datetime format
enroll_df$enroll_dkefs_mth <- as.character(substr(enroll_df$enroll_dkefs_date, 1, 7)) #store month and year, as character
enroll_df$enroll_dkefs_mthT <- enroll_df$enroll_dkefs_mth %in% enroll_df$currentYm_str #logical T or F- was dkefs this month?
enroll_df$rbans <- ifelse(enroll_df$rbans_complete == 1, 1, NA) #turn no and partial to NA
enroll_df$enroll_rbans_date <- as.Date(enroll_df$rbans_date_admin, format = "%Y-%m-%d", origin = "1970-01-01") #convert date to datetime format
enroll_df$enroll_rbans_mth <- as.character(substr(enroll_df$enroll_rbans_date, 1, 7)) #store month and year, as character
enroll_df$enroll_rbans_mthT <- enroll_df$enroll_rbans_mth %in% enroll_df$currentYm_str #logical T or F- was rbans this month?
#take the sum of columns - want both of the central neuropsych assessments (dkefs, rbans) to be complete
enroll_df[, c('dkefs', 'rbans')] <- sapply(enroll_df[, c('dkefs', 'rbans')], as.numeric) #change to numeric
enroll_df$np_count <- rowSums(enroll_df[,c('dkefs', 'rbans')]) #sum
#make an enrolled column for neuropsych - if sum is 2
enroll_df$enroll_np <- ifelse(enroll_df$np_count == 2, 1, 0)
#identify the second date in ordered dates - i.e., when enrollment for np criteria is met
np_dates <- c('dkefs_date', 'rbans_date')
enroll_df$enroll_np_date <- NA
for (n in 1:nrow(enroll_df)){
dates <- enroll_df[n, np_dates] # pull out NP assessment dates
dates <- dates[order(dates)] # order dates
if (length(dates[!is.na(dates)]) == 2) { # if there are 2 assesments
enroll_df$enroll_np_date[n] <- dates[[2]] # pull out the second assessment date
} else {
enroll_df$enroll_np_date[n] <- NA # otherwise print 'NA'
}
}
#determine if np criteria was met this month
enroll_df$enroll_np_date <- as.Date(enroll_df$enroll_np_date, origin = "1970-01-01") #turn back into date format
enroll_df$enroll_np_mth <- as.character(substr(enroll_df$enroll_np_date, 1, 7)) #store month and year, as character
enroll_df$enroll_np_mthT <- enroll_df$enroll_np_mth %in% enroll_df$currentYm_str #logical T or F- was criteria met this month?
#determine if enrollment criteria is met, i.e., at least 2/3 of mri, blood, and np completed
enroll_df$enroll <- rowSums(enroll_df[,c('enroll_mri', 'enroll_bld', 'enroll_np')], na.rm = T) #calculate sum
enroll_df$enroll <- ifelse(enroll_df$enroll >= 2, 1, 0)
#determine date of 2/3 of mri, blood, and psy completed
enroll_dates <- c('enroll_mri_date','enroll_bld_date', 'enroll_np_date')
enroll_df$enroll_date <- NA
for (n in 1:nrow(enroll_df)){
dates <- enroll_df[n, enroll_dates] # pull out component dates
dates <- dates[order(dates)] # order dates
if (length(dates[!is.na(dates)]) >= 2) { # if there are 2 or more components
enroll_df$enroll_date[n] <- dates[[2]] # pull out the second component dates
} else {
enroll_df$enroll_date[n] <- NA # otherwise print 'NA'
}
}
enroll_df$enroll_date <- as.Date(enroll_df$enroll_date, origin = "1970-01-01") #turn back into date format
#determine if enrollment was met this month
enroll_df$enroll_mth <- as.character(substr(enroll_df$enroll_date, 1, 7)) #store month and year, as character
enroll_df$enroll_mthT <- enroll_df$enroll_mth %in% enroll_df$currentYm_str #logical T or F- was criteria met this month?
#merge back dataframes
recruit_df <- merge(recruit_df, enroll_df, by = c('record_id', 'timepoint'), all.x = TRUE)
```
```{r recruit_table, include = FALSE, warning = FALSE}
# create vector of recruitment variables
recruit_vars <- c(
"meta_consent_mthT.x", #consented current month
"consented.x", #consented to date
"enroll_mthT", #enrolled current month no
"enroll", #enrolled to date no
"meta_complete_mthT.x", #completed current month
"completed.x", #completed to date
"terminate_baseline.x", #terminated baseline
"terminate_FU_06_MTH.x", #terminated 6 mth FU
"terminate_FU_24_MTH.x") #terminated 24 mth FU
#turn all NAs into 0
recruit_df[, recruit_vars] <- apply(recruit_df[, recruit_vars], 2, function(x){replace(x, is.na(x), 0)})
#initialize dataframe and names columns and rows
recruit_table <- data.frame(matrix(ncol=11, nrow=length(recruit_vars)))
#names of columns and rows on demo_table
names(recruit_table) <- c(paste0(c('n', '%', 'n', '%', 'n', '%', 'n', '%', 'n', '%', 'total')))
row.names(recruit_table) <- c(
"consented current month",
"consented to date",
"enrolled current month",
"enrolled to date",
"completed current month",
"completed to date",
"terminated during baseline",
"terminated during 6 mth FU",
"terminated during 24 mth FU")
# initialize counters (j = row, k = column)
j <- 1
k <- 1
for (var in recruit_vars) {
for (site in sites) {
recruit_table[j,k] <- sum(recruit_df[recruit_df$site.x == site, var], na.rm = TRUE)
k <- k + 2
}
k <- 1
j <- j + 1
}
# add in totals
j <- 1
k <- 1
for (var in recruit_vars) {
recruit_table[j,11] <- sum(recruit_df[var], na.rm = TRUE)
j <- j + 1
}
# add in percentages
# consented current month
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site, 'meta_consent_mthT.x'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'per_month']
recruit_table[1,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
# consented total
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site, 'consented.x'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'target']
recruit_table[2,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
# enrollment current month
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site, 'enroll_mthT'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'per_month']
recruit_table[3,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
# enrollement total
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site, 'enroll'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'target']
recruit_table[4,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
```
```{r recruitment_table, echo = FALSE, warning = FALSE}
#add row names to make like demo table
recruit_table <- cbind(metric = rownames(recruit_table), recruit_table)
rownames(recruit_table) <- NULL
#turns NAs into dashes
options(knitr.kable.NA = '--')
#table
kable(recruit_table, "html", escape = F, align = c('l', 'c', 'c', 'c', 'c', 'c', 'c')) %>%
kable_styling("hover", "condensed", full_width = T) %>%
add_header_above(c(" ", "CU" = 2, "LA" = 2, "UP" = 2, "UT" = 2, "WU" = 2, " ")) %>%
group_rows('Consent[note]', 1, 2, label_row_css = "background-color: #ddd; color: 999;") %>%
group_rows("Enrollment[note]", 3, 4, label_row_css = "background-color: #ddd; color: 999;") %>%
group_rows("Completion[note]", 5, 6, label_row_css = "background-color: #ddd; color: 999;") %>%
group_rows("Termination[note]", 7, 9, label_row_css = "background-color: #ddd; color: 999;") %>%
column_spec(1, width = "20em", background = '#f5f5f5') %>%
add_footnote(c("`Consent` counts all participants who signed an OPTIMUM-Neuro consent form.",
"`Enrollment` counts all participants who completed ≥2/3 of the MRI, bloodwork, and neuropsych at the baseline visit.",
"`Completion` counts all enrolled participants who completed the 24 mth FU visit.",
"`Termination` counts participants coded as: 'not eligible for randomization', 'withdrew', 'lost to follow-up', 'death', 'investigator discretion', and 'other'. Termination does not capture participants who completed the study, i.e., this category is independent from `Completion`."),
notation = "number") %>%
footnote(general = "Current month captures calendar month. The `%` columns calculate percentage of target, with all sites having common targets.")
rm(recruit_table)
```
------
<br>
###Enrollment breakdown
```{r enrollment_breakdown, echo = FALSE, warning = FALSE}
# create vector of enrollment variables
enroll_vars <- c(
"enroll_np_mthT",
"enroll_np",
"enroll_bld_mthT",
"enroll_bld",
"enroll_mri_mthT",
"enroll_mri"
)
#turn all NAs into 0
recruit_df[, enroll_vars] <- apply(recruit_df[, enroll_vars], 2, function(x){replace(x, is.na(x), 0)})
#initialize dataframe and names columns and rows
enroll_table <- data.frame(matrix(ncol=11, nrow=length(enroll_vars)))
#names of columns and rows on table
names(enroll_table) <- c(paste0(c('n', '%', 'n', '%', 'n', '%', 'n', '%', 'n', '%', 'total')))
row.names(enroll_table) <- c(
"completed neuropsych current month",
"completed neuropsych to date",
"completed blood current month",
"completed blood to date",
"completed MRI current month",
"completed MRI to date")
# initialize counters (j = row, k = column)
j <- 1
k <- 1
for (var in enroll_vars) {
for (site in sites) {
enroll_table[j,k] <- sum(recruit_df[recruit_df$site.x == site & recruit_df$enroll == 1, var], na.rm = TRUE)
k <- k + 2
}
k <- 1
j <- j + 1
}
# add in totals
j <- 1
k <- 1
for (var in enroll_vars) {
enroll_table[j,11] <- sum(recruit_df[recruit_df$enroll == 1, var], na.rm = TRUE)
j <- j + 1
}
#add in percentages - neuropsych current month
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site & recruit_df$enroll == 1, 'enroll_np_mthT'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'per_month']
enroll_table[1,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
#add in percentages - neuropsych to date
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site & recruit_df$enroll == 1, 'enroll_np'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'target']
enroll_table[2,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
#add in percentages - blood current month
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site & recruit_df$enroll == 1, 'enroll_bld_mthT'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'per_month']
enroll_table[3,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
#add in percentages - blood to date
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site & recruit_df$enroll == 1, 'enroll_bld'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'target']
enroll_table[4,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
#add in percentages - mri current month
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site & recruit_df$enroll == 1, 'enroll_mri_mthT'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'per_month']
enroll_table[5,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
#add in percentages - mri to date
j <- 2
for (site in sites) {
n = sum(recruit_df[recruit_df$site.x == site & recruit_df$enroll == 1, 'enroll_mri'])
d = targets[targets$month == as.character(substr(Sys.Date(), 1, 7)), 'target']
enroll_table[6,j] <- sprintf("%1.0f%%", 100*round(n/d, 2))
j <- j + 2
}
```
```{r enrollment_table, echo = FALSE, warning = FALSE}
#add row names to make like demo table
enroll_table <- cbind(metric = rownames(enroll_table), enroll_table)
rownames(enroll_table) <- NULL
#turns NAs into dashes
options(knitr.kable.NA = '--')
#table
kable(enroll_table, "html", escape = F, align = c('l', 'c', 'c', 'c', 'c', 'c', 'c')) %>%
kable_styling("hover", "condensed", full_width = T) %>%
add_header_above(c(" ", "CU" = 2, "LA" = 2, "UP" = 2, "UT" = 2, "WU" = 2, " ")) %>%
group_rows('Neuropsych[note]', 1, 2, label_row_css = "background-color: #ddd; color: 999;") %>%
group_rows("Blood[note]", 3, 4, label_row_css = "background-color: #ddd; color: 999;") %>%
group_rows("MRI[note]", 5, 6, label_row_css = "background-color: #ddd; color: 999;") %>%
column_spec(1, width = "20em", background = '#f5f5f5') %>%
add_footnote(c(
"`Neuropsych` counts all participants who completed both the RBANS and D-KEFS in full.",
"`Blood` counts all participants who submitted one aliquot of plasma.",
"`MRI` counts all participants who completed the T1."),
notation = "number") %>%
footnote(general = "Current month captures calendar month. The `%` columns calculate percentage of target, with all sites having common targets.")
rm(enroll_table)
```
------
<br>
###Demographics
```{r demographics, echo = FALSE, warning = FALSE}
# create a separate dataframe of data from just baseline timepoint
demo_df <- df[(df$timepoint == 'baseline'),]
# modify race variable to have 2 levels
demo_df$demo_race <- ifelse(demo_df$demo_race != 2, 1, 2)
# create vector of demographic variables
demo_vars <- c(
'demo_sex',
'demo_ethnicity',
'demo_race',
'demo_age',
'demo_edu')
# initialize dataframe
demo_table <- data.frame(matrix(ncol=6, nrow=length(demo_vars)))
# names of columns and rows on demo_table
names(demo_table) <- c(paste0(c('CU', 'LA', 'UP', 'UT', 'WU', 'p')))
row.names(demo_table) <- c('sex (M:F:O)', 'ethnicity (H:NH)', 'race (minority:non-minority)', 'age', 'education')
# initialize counters (j = row, k = column)
j <- 1
k <- 1
# for loop for categorical
for (var in demo_vars[1:3]) {
# count observations for each cluster
for (site in sites) {
out <- table(demo_df$site, demo_df[[var]])
demo_table[j,k] <- paste(out[site,], collapse = ':')
k <- k + 1}
# run chi-square tests
chi_sq <- chisq.test(out)
demo_table[j,k] <- sub("^(-?)0.", "\\1.", sprintf("%.3f", chi_sq$p.value))
k <- 1
j <- j + 1}
# initialize counters (j = row, k = column)
j <- 4
k <- 1
# for loop for continuous variables
for (var in demo_vars[4:5]) {
# calculate means and SDs for each site
for (site in sites) {
M <- sprintf('%.02f', mean(demo_df[demo_df$site == site, var], na.rm = TRUE))
SD <- sprintf('%.02f', sd(demo_df[demo_df$site == site, var], na.rm = TRUE))
demo_table[j,k] <- paste( M,' (',SD,')', sep='')
k <- k + 1}
# run one-way ANOVA with cluster as between-subjects variable
F_test <- aov(demo_df[[var]] ~ demo_df$site, na.action=na.omit)
# extract p-value
F_test.p.value <- summary(F_test)[[1]][["Pr(>F)"]][[1]]
# rounded p-value to 3 decimals and without leading zero
F_test.p <- sub("^(-?)0.", "\\1.", sprintf("%.3f", F_test.p.value))
demo_table[j,k] <- F_test.p
k <- 1
j <- j + 1}
```
```{r demographics_table, echo = FALSE, warning = FALSE}
demo_table$p <- as.numeric(demo_table$p)
#table
demo_table %>%
mutate(
metric = row.names(demo_table),
p = cell_spec(p, "html", color = ifelse(p < 0.05, "red", "black"))
) %>%
select(metric, CU, LA, UP, UT, WU, p) %>%
kable('html', escape = F, align = c('l', 'c', 'c', 'c', 'c', 'c', 'c')) %>%
kable_styling("hover", full_width = T) %>%
column_spec(1, width = "20em", background = '#f5f5f5') %>%
footnote(general = "These values represent all participants consented at the OPTIMUM-Neuro baseline visit. It does not take into account the participants who consented but did not meet enrollment criteria, and/or who later terminated. Values in brackets are standard deviation. The _p_ values result from Chi-squared for categorical variables and omnibus ANOVA for continuous variables.")
rm(demo_df, demo_table)
```