From 9d81b4e7d86d6fcf7972d3f1835a379305807567 Mon Sep 17 00:00:00 2001 From: JoachimGoedhart Date: Tue, 31 Jan 2023 17:04:40 +0100 Subject: [PATCH] Added repeatability stats -Added calculation and reporting of repeatability stats -Repeatability stats are available in Table 4 -Fixed a bug that prevented calculation of p-value for multiple conditions --- app.R | 69 ++++++++++++++++++++++++++++++----- repeatability.R | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+), 9 deletions(-) create mode 100644 repeatability.R diff --git a/app.R b/app.R index a8f555c..4b1c5b9 100644 --- a/app.R +++ b/app.R @@ -25,6 +25,11 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# Improvements: +# display of p-values (use of scientific notation if smaller than 0.001 +# Needs fixing, since doesn't work for multiple conditions) +# Implement measures of reproducibility/repeatability + library(shiny) library(tidyverse) @@ -39,6 +44,7 @@ library(broom) source("geom_flat_violin.R") source("themes.R") source("function_tidy_df.R") +source("repeatability.R") ###### Functions ########## @@ -450,6 +456,7 @@ conditionalPanel(condition = "input.show_table == true", h3("Difference with the h3("Table 1: Statistics for individual replicates"),dataTableOutput('data_summary'), h3("Table 2: Statistics for conditions"),dataTableOutput('data_summary_condition') , h3("Table 3: Statistics for comparison of means between conditions"),dataTableOutput('data_difference'), + h3("Table 4: Statistics for repeatability"),dataTableOutput('data_repeats'), NULL ), tabPanel("About", includeHTML("about.html") @@ -480,9 +487,9 @@ df_upload <- reactive({ if (input$data_input == 2) { data <- df_tidy_example2 - x_var.selected <<- "Method" + x_var.selected <<- "Condition" y_var.selected <<- "BP" - g_var.selected <<- "Replicate" + g_var.selected <<- "N" } else if (input$data_input == 1) { data <- df_tidy_example x_var.selected <<- "Treatment" @@ -1494,12 +1501,13 @@ df_difference <- reactive({ df_difference <- df_difference %>% mutate_at(c(2:4), round, input$digits) %>% mutate_at(c(5), round, 8) #Use scientific notation if smaller than 0.001 - if (df_difference$p.value<0.001 && df_difference$p.value>=1e-10) { - df_difference$p.value <- formatC(df_difference$p.value, format = "e", digits = 2) - #Any p-value lower than 1e-10 is noted as <1e-10 - } else if (df_difference$p.value<1e-10) { - df_difference$p.value <- "<1e-10" - } + # Needs fixing, since doesn't work for multiple conditions + # if (df_difference$p.value<0.001 && df_difference$p.value>=1e-10) { + # df_difference$p.value <- formatC(df_difference$p.value, format = "e", digits = 2) + # #Any p-value lower than 1e-10 is noted as <1e-10 + # } else if (df_difference$p.value<1e-10) { + # df_difference$p.value <- "<1e-10" + # } return(df_difference) @@ -1525,6 +1533,17 @@ df_summary_condition <- reactive({ `95%CI_hi` = mean - qt((1-Confidence_level)/2, n - 1) * sem, NULL) + + # ## Need to add the repeats for each subject + # df_id <- df_selected() %>% group_by(Condition, Replica) %>% mutate(replicates=row_number()) %>% ungroup() + # ## Calculate measures of repeatability + # df_repeatability <- df_id %>% repeatability(values=Value, replicates=replicates , groups=Condition) + # df_repeatability <- df_repeatability %>% + # select(-c(Replica, replicates, TSS, SSw, SSb, MSw, MSb)) + # + # observe({print(head(df_repeatability))}) + + return(df) }) @@ -1532,6 +1551,19 @@ df_summary_condition_rounded <- reactive({ df <- df_summary_condition() %>% mutate_at(c(3:7), round, input$digits) }) +df_repeats_rounded <- reactive({ + + ## Need to add the repeats for each subject + df <- df_selected() %>% group_by(Condition, Replica) %>% mutate(replicates=row_number()) %>% ungroup() + + ## Calculate paramaters + df <- df %>% repeatability(values=Value, replicates=replicates , groups=Condition) %>% + select(-c(Replica, replicates, TSS, SSw, SSb, MSw, MSb)) + + + df <- df %>% mutate_at(c(4:9), round, input$digits) +}) + #### A predefined selection of stats for the table ########### observeEvent(input$summary_replicate, { @@ -1599,6 +1631,21 @@ output$data_difference <- renderDataTable( ) ) +#### Render the data summary as a table ########### + +output$data_repeats <- renderDataTable( + datatable( + df_repeats_rounded(), + # colnames = c(ID = 1), + selection = 'none', + extensions = c('Buttons', 'ColReorder'), + options = list(dom = 'Bfrtip', pageLength = 100, + buttons = c('copy', 'csv','excel', 'pdf'), + editable=FALSE, colReorder = list(realtime = FALSE), columnDefs = list(list(className = 'dt-center', targets = '_all')) + ) + ) +) + ############## Render the data summary as a table ########### @@ -1624,8 +1671,9 @@ output$legend <- renderText({ HTML_Legend <- append(HTML_Legend, paste('

Table 3: Statistics for the comparison of the conditions to "',input$zero,'" based on the mean of the replicates.
', sep="")) - HTML_Legend <- append(HTML_Legend, paste('The difference is a point estimate of the size of the effect and the and 95% confidence interval is an interval estimate. ', sep="")) + HTML_Legend <- append(HTML_Legend, paste('The difference is a point estimate of the size of the effect and the 95% confidence interval is an interval estimate. ', sep="")) + if (input$connect!='blank') { HTML_Legend <- append(HTML_Legend, paste('The replicates are paired between conditions and a paired t-test is used to calculate the p-value. ', sep="")) @@ -1633,6 +1681,9 @@ output$legend <- renderText({ HTML_Legend <- append(HTML_Legend, paste("The replicates are not paired and Welch's t-test is performed to calculate the p-value. ", sep="")) } + HTML_Legend <- append(HTML_Legend, paste('

Table 4: Statistics for the repeatability for each of the conditions. For explanation of the parameters see Barnhart & Barboriak, 2009

', sep="")) + + if (length(df$Condition)>1) { HTML_Legend <- append(HTML_Legend, paste("
The p-values are not corrected for multiple comparisons. Consider alternative statistical analyses.", sep="")) diff --git a/repeatability.R b/repeatability.R new file mode 100644 index 0000000..e6b5647 --- /dev/null +++ b/repeatability.R @@ -0,0 +1,96 @@ +## Function to calculate measures of repeatability +## Code for testing the function at the bottom + +library(tidyverse) + +# Function to calculate the 'sum of squares' +sos <- function(x) { + sum((mean(x)-x)^2) +} + +# Function that takes a dataframe with at least two columns. +# One column which has (measurement) values and another column that indicates the repeats +# Note, these repeats or the 'observational units' or replicates (Not the subject or 'experimental unit') +# Optional: a third column when multiple conditions/groups are present. +# The result is a dataframe with the repeatability coefficient (RC) and IntraClass Correlation (ICC) +# When groups are defined, the result will be shown for each group + +repeatability <- function(df, values, replicates, groups) { + + #This is necessary to deal with arguments in tidyverse functions + replicate <- enquo(replicates) + group <- enquo(groups) + value <- enquo(values) + + if (rlang::quo_is_missing(replicate)) { + print("Warning: Replicates not identifed") + } + + #Add IDs for the subjects + df_id <- df %>% group_by(!! group, !! replicate) %>% mutate(Subject=row_number()) %>% ungroup() + + #Calculate n and k + # n is the number of Experimental Units + # k is the number of repeated measurements, or Observational Units + df_id <- df_id %>% group_by(!! replicate, !! group) %>% mutate(n=n()) %>% ungroup() + df_id <- df_id %>% group_by(Subject, !! group) %>% mutate(k=n()) %>% ungroup() + + #Calculate 'total sum of squares' for each condition TSS + df_id <- df_id %>% group_by(!! group) %>% mutate(TSS = sos(!! value)) %>% ungroup() + + #Simplify the dataframe, will depend on presence of 'groups' argument + if (rlang::quo_is_missing(group)) { + print("Single group") + df_result <- df_id %>% distinct(n, k, .keep_all = TRUE) + } else { + print("Multiple groups") + df_result <- df_id %>% distinct(!! group, .keep_all = TRUE) + } + + + #Calculate 'within groups sum of squares' SSw for each condition + df_result$SSw <- df_id %>% + group_by(!! group, Subject) %>% + summarize(sos = sos(!! value), .groups = 'drop') %>% + group_by(!! group) %>% summarize(SSw=sum(sos)) %>% pull(SSw) + + #Simplify the dataframe by removing irrelevant columns + df_result <- df_result %>% select(-c(Subject, quo_name(value))) + + #Calculate the ICC and RC + df_result <- df_result %>% + mutate(SSb = TSS-SSw) %>% + mutate(MSw = SSw / (n*(k-1))) %>% + mutate(MSb = SSb / (n-1)) %>% + mutate(ICC = (MSb - MSw) / (MSb + ((k-1)*MSw))) %>% + # mutate(VR = (MSw * k) / (MSb + ((k-1)*MSw))) %>% + mutate(`total SD` = sqrt(MSb + ((k-1)*MSw)/k)) %>% + #### Add Effective Sample Size = n*k/(1+(k-1)*ICC) + #### DOI: 10.1093/cvr/cvx151 + mutate(`Effective N` = (n*k)/(1+(k-1)*ICC)) %>% + mutate(RC = 1.96*sqrt(2) * sqrt(MSw)) %>% + mutate(`RC (95%CI_lo)` = 1.96*sqrt(2)*sqrt(SSw/qchisq(0.975, (n*(k-1)))), + `RC (95%CI_hi)` = 1.96*sqrt(2)*sqrt(SSw/qchisq(0.025, (n*(k-1)))) + ) + + return(df_result) +} + + +########################################## +## For testing, load this dataframe: +## df <- read.csv("Table_5_physiotherapy_tidy.csv") +## Run test: df %>% repeatability(values=Value, replicates=Measurement) + +## For testing with multiple groups/conditions, load this dataframe: +## df <- read.csv("SystBloodPressure_tidy.csv") +## Run the function like this: +## repeatability(df, values=BP, replicates=Replicate , groups=Method) +## or: +## df %>% repeatability(values=BP, replicates=n , groups=Condition) + +## synthetic <- read.csv("synthetic.csv") +## synthetic %>% repeatability(values=Values, replicates=expUnit , groups=Condition) + + +