From 9d81b4e7d86d6fcf7972d3f1835a379305807567 Mon Sep 17 00:00:00 2001
From: JoachimGoedhart <j.goedhart@uva.nl>
Date: Tue, 31 Jan 2023 17:04:40 +0100
Subject: [PATCH] Added repeatability stats

-Added calculation and reporting of repeatability stats
-Repeatability stats are available in Table 4
-Fixed a bug that prevented calculation of p-value for multiple conditions
---
 app.R           | 69 ++++++++++++++++++++++++++++++-----
 repeatability.R | 96 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 156 insertions(+), 9 deletions(-)
 create mode 100644 repeatability.R

diff --git a/app.R b/app.R
index a8f555c..4b1c5b9 100644
--- a/app.R
+++ b/app.R
@@ -25,6 +25,11 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# Improvements:
+# display of p-values (use of scientific notation if smaller than 0.001
+# Needs fixing, since doesn't work for multiple conditions)
+# Implement measures of reproducibility/repeatability
+
 
 library(shiny)
 library(tidyverse)
@@ -39,6 +44,7 @@ library(broom)
 source("geom_flat_violin.R")
 source("themes.R")
 source("function_tidy_df.R")
+source("repeatability.R")
 
 ###### Functions ##########
 
@@ -450,6 +456,7 @@ conditionalPanel(condition = "input.show_table == true", h3("Difference with the
                            h3("Table 1: Statistics for individual replicates"),dataTableOutput('data_summary'),
                            h3("Table 2: Statistics for conditions"),dataTableOutput('data_summary_condition') , 
                            h3("Table 3: Statistics for comparison of means between conditions"),dataTableOutput('data_difference'),
+                           h3("Table 4: Statistics for repeatability"),dataTableOutput('data_repeats'),
                            NULL
                            ),
                   tabPanel("About", includeHTML("about.html")
@@ -480,9 +487,9 @@ df_upload <- reactive({
     
     if (input$data_input == 2) {
       data <- df_tidy_example2
-      x_var.selected <<- "Method"
+      x_var.selected <<- "Condition"
       y_var.selected <<- "BP"
-      g_var.selected <<- "Replicate" 
+      g_var.selected <<- "N" 
     }  else if (input$data_input == 1) {
         data <- df_tidy_example
         x_var.selected <<- "Treatment"
@@ -1494,12 +1501,13 @@ df_difference <- reactive({
   df_difference <- df_difference %>% mutate_at(c(2:4), round, input$digits)  %>% mutate_at(c(5), round, 8)
   
   #Use scientific notation if smaller than 0.001
-  if (df_difference$p.value<0.001 && df_difference$p.value>=1e-10) {
-  df_difference$p.value  <- formatC(df_difference$p.value, format = "e", digits = 2)
-  #Any p-value lower than 1e-10 is noted as <1e-10
-  } else if (df_difference$p.value<1e-10) {
-    df_difference$p.value <- "<1e-10"
-  }
+  # Needs fixing, since doesn't work for multiple conditions
+  # if (df_difference$p.value<0.001 && df_difference$p.value>=1e-10) {
+  # df_difference$p.value  <- formatC(df_difference$p.value, format = "e", digits = 2)
+  # #Any p-value lower than 1e-10 is noted as <1e-10
+  # } else if (df_difference$p.value<1e-10) {
+  #   df_difference$p.value <- "<1e-10"
+  # }
 
   return(df_difference)
   
@@ -1525,6 +1533,17 @@ df_summary_condition <- reactive({
            `95%CI_hi` = mean - qt((1-Confidence_level)/2, n - 1) * sem,
            NULL)
   
+  
+  # ## Need to add the repeats for each subject
+  # df_id <- df_selected() %>% group_by(Condition, Replica) %>% mutate(replicates=row_number()) %>% ungroup()
+  # ## Calculate measures of repeatability
+  # df_repeatability <- df_id %>% repeatability(values=Value, replicates=replicates , groups=Condition)
+  # df_repeatability <- df_repeatability %>%
+  #   select(-c(Replica, replicates, TSS, SSw, SSb, MSw, MSb))
+  # 
+  # observe({print(head(df_repeatability))})
+  
+  
   return(df)
 })
 
@@ -1532,6 +1551,19 @@ df_summary_condition_rounded <- reactive({
   df <- df_summary_condition() %>% mutate_at(c(3:7), round, input$digits)
 })
 
+df_repeats_rounded <- reactive({
+  
+  ## Need to add the repeats for each subject
+  df <- df_selected() %>% group_by(Condition, Replica) %>% mutate(replicates=row_number()) %>% ungroup()
+  
+  ## Calculate paramaters
+  df <- df %>% repeatability(values=Value, replicates=replicates , groups=Condition) %>%
+    select(-c(Replica, replicates, TSS, SSw, SSb, MSw, MSb))
+  
+  
+  df <- df %>% mutate_at(c(4:9), round, input$digits)
+})
+
 #### A predefined selection of stats for the table  ###########
 
 observeEvent(input$summary_replicate, {
@@ -1599,6 +1631,21 @@ output$data_difference <- renderDataTable(
   ) 
 ) 
 
+#### Render the data summary as a table ###########
+
+output$data_repeats <- renderDataTable(
+  datatable(
+    df_repeats_rounded(),
+    #  colnames = c(ID = 1),
+    selection = 'none',
+    extensions = c('Buttons', 'ColReorder'),
+    options = list(dom = 'Bfrtip', pageLength = 100,
+                   buttons = c('copy', 'csv','excel', 'pdf'),
+                   editable=FALSE, colReorder = list(realtime = FALSE), columnDefs = list(list(className = 'dt-center', targets = '_all'))
+    ) 
+  ) 
+) 
+
 
 ############## Render the data summary as a table ###########
 
@@ -1624,8 +1671,9 @@ output$legend <- renderText({
     
     HTML_Legend <- append(HTML_Legend, paste('<p><u>Table 3</u>: Statistics for the comparison of the conditions to "',input$zero,'" based on the <b>mean</b> of the replicates.</br>', sep=""))
     
-    HTML_Legend <- append(HTML_Legend, paste('The difference is a point estimate of the size of the effect and the and 95% confidence interval is an interval estimate. ', sep=""))
     
+    HTML_Legend <- append(HTML_Legend, paste('The difference is a point estimate of the size of the effect and the 95% confidence interval is an interval estimate. ', sep=""))
+     
     
     if (input$connect!='blank') {
       HTML_Legend <- append(HTML_Legend, paste('The replicates are paired between conditions and a paired t-test is used to calculate the p-value. ', sep=""))
@@ -1633,6 +1681,9 @@ output$legend <- renderText({
       HTML_Legend <- append(HTML_Legend, paste("The replicates are <b>not</b> paired and Welch's t-test is performed to calculate the p-value. ", sep=""))
     }
     
+    HTML_Legend <- append(HTML_Legend, paste('<p><u>Table 4</u>: Statistics for the repeatability for each of the conditions. For explanation of the parameters see <a href="https://doi.org/10.1593/tlo.09268">Barnhart & Barboriak, 2009</a></br></p>', sep=""))
+    
+    
     if (length(df$Condition)>1) {
       HTML_Legend <- append(HTML_Legend, paste("</br>The p-values are <b>not corrected</b> for multiple comparisons. Consider alternative statistical analyses.", sep=""))
       
diff --git a/repeatability.R b/repeatability.R
new file mode 100644
index 0000000..e6b5647
--- /dev/null
+++ b/repeatability.R
@@ -0,0 +1,96 @@
+## Function to calculate measures of repeatability
+## Code for testing the function at the bottom
+
+library(tidyverse)
+
+# Function to calculate the 'sum of squares'
+sos <- function(x) {
+  sum((mean(x)-x)^2)
+}
+
+# Function that takes a dataframe with at least two columns.
+# One column which has (measurement) values and another column that indicates the repeats
+# Note, these repeats or the 'observational units' or replicates (Not the subject or 'experimental unit')
+# Optional: a third column when multiple conditions/groups are present.
+# The result is a dataframe with the repeatability coefficient (RC) and IntraClass Correlation (ICC)
+# When groups are defined, the result will be shown for each group
+
+repeatability <- function(df, values, replicates, groups) {
+  
+  #This is necessary to deal with arguments in tidyverse functions
+  replicate <- enquo(replicates)
+  group <- enquo(groups)
+  value <- enquo(values)
+  
+  if (rlang::quo_is_missing(replicate)) {
+    print("Warning: Replicates not identifed")
+  }
+  
+  #Add IDs for the subjects
+  df_id <- df %>% group_by(!! group, !! replicate) %>% mutate(Subject=row_number()) %>% ungroup()
+  
+  #Calculate n and k
+  # n is the number of Experimental Units
+  # k is the number of repeated measurements, or Observational Units
+  df_id <- df_id %>% group_by(!! replicate, !! group) %>% mutate(n=n()) %>% ungroup()
+  df_id <- df_id %>% group_by(Subject, !! group) %>% mutate(k=n()) %>% ungroup()
+  
+  #Calculate 'total sum of squares' for each condition TSS
+  df_id <- df_id %>% group_by(!! group) %>% mutate(TSS = sos(!! value)) %>% ungroup()
+  
+  #Simplify the dataframe, will depend on presence of 'groups' argument
+  if (rlang::quo_is_missing(group)) {
+    print("Single group")
+    df_result <- df_id %>% distinct(n, k, .keep_all = TRUE)
+  } else {
+    print("Multiple groups")
+    df_result <- df_id %>% distinct(!! group, .keep_all = TRUE)
+  }
+  
+
+  #Calculate 'within groups sum of squares' SSw for each condition
+  df_result$SSw <- df_id %>%
+    group_by(!! group, Subject) %>%
+    summarize(sos = sos(!! value), .groups = 'drop') %>%
+    group_by(!! group) %>% summarize(SSw=sum(sos)) %>% pull(SSw)
+  
+  #Simplify the dataframe by removing irrelevant columns
+  df_result <- df_result %>% select(-c(Subject, quo_name(value)))
+
+  #Calculate the ICC and RC
+  df_result <- df_result %>%
+    mutate(SSb = TSS-SSw) %>%
+    mutate(MSw = SSw / (n*(k-1))) %>%
+    mutate(MSb = SSb / (n-1)) %>%
+    mutate(ICC =  (MSb - MSw) / (MSb + ((k-1)*MSw))) %>%
+    # mutate(VR =  (MSw * k) / (MSb + ((k-1)*MSw))) %>%
+    mutate(`total SD` =  sqrt(MSb + ((k-1)*MSw)/k)) %>%
+    #### Add Effective Sample Size = n*k/(1+(k-1)*ICC)
+    #### DOI: 10.1093/cvr/cvx151
+    mutate(`Effective N` = (n*k)/(1+(k-1)*ICC)) %>%
+    mutate(RC = 1.96*sqrt(2) * sqrt(MSw)) %>%
+    mutate(`RC (95%CI_lo)` = 1.96*sqrt(2)*sqrt(SSw/qchisq(0.975, (n*(k-1)))),
+           `RC (95%CI_hi)` = 1.96*sqrt(2)*sqrt(SSw/qchisq(0.025, (n*(k-1))))
+           )
+  
+  return(df_result)
+}
+
+
+##########################################
+## For testing, load this dataframe:
+## df <- read.csv("Table_5_physiotherapy_tidy.csv")
+## Run test: df %>% repeatability(values=Value, replicates=Measurement)
+
+## For testing with multiple groups/conditions, load this dataframe:
+## df <- read.csv("SystBloodPressure_tidy.csv")
+## Run the function like this:
+## repeatability(df, values=BP, replicates=Replicate , groups=Method)
+## or:
+## df %>% repeatability(values=BP, replicates=n , groups=Condition)
+
+## synthetic <- read.csv("synthetic.csv")
+## synthetic %>% repeatability(values=Values, replicates=expUnit , groups=Condition)
+
+
+