From 8d0253d7f4dff665dde66f68154325776f25988c Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Wed, 24 Jan 2024 08:42:50 -0500
Subject: [PATCH] Files supportive of sparse AUC standard error calculation
 (for integration into the main PKNCA code base later)

---
 inst/sparse/corsatt.R        | 132 +++++++++++++++++++++++++++++++++++
 inst/sparse/corsatt_holder.R | 132 +++++++++++++++++++++++++++++++++++
 2 files changed, 264 insertions(+)
 create mode 100644 inst/sparse/corsatt.R
 create mode 100644 inst/sparse/corsatt_holder.R

diff --git a/inst/sparse/corsatt.R b/inst/sparse/corsatt.R
new file mode 100644
index 00000000..845f77ef
--- /dev/null
+++ b/inst/sparse/corsatt.R
@@ -0,0 +1,132 @@
+#' Calculate Satterwaithe correlation for sparse AUC using Bailer's method to
+#' provide AUC, SE, and degrees of freedom.
+#' 
+#' @param m number of times
+#' @param a number of animals
+#' @param D design matrix
+#' @param w vector of trapezoidal weights
+#' @param y data structured as D
+#' @return auc = estimated AUC
+#' se.auc = estimated standard error of estimated AUC
+#' df = estimated degrees of freedom
+#' width = half-width of the 95% confidence interval
+#' lower = lower bound of the 95% confidence interval
+#' upper = upper bound of the 95% confidence interval
+#' @references The function here is copied from the appendix of
+#' Nedelman JR, Jia X. An extension of Satterthwaite’s approximation applied to
+#' pharmacokinetics. Journal of Biopharmaceutical Statistics. 1998;8(2):317-328.
+#' doi:10.1080/10543409808835241
+corsatt <- function(m, a, D, w, y) {
+  # Create identity matrices and matrices of ones
+  Ia <- diag(rep(1, a))
+  Im <- diag(rep(1, m))
+  Ima <- diag(rep(1, m*a))
+  Ja <- matrix(1, ncol=a, nrow=a)
+  Jma <- matrix(1, nrow=m, ncol=a)
+  
+  R <- D %*% t(D)
+  
+  Delta <- diag(c(D), nrow=m*a, ncol=m*a)
+  DeltaStar <- diag(c(D)/kronecker(rep(1, a), diag(R)))
+  
+  A <- diag(w/diag(R)) %*% (R/(R - 1)) %*% diag(w/diag(R))
+  
+  P <- kronecker(Ja, Im) %*% DeltaStar
+  
+  M <- t(Ima - P) %*% Delta %*% kronecker(Ia, A) %*% Delta %*% (Ima - P)
+  
+  # Convert zeros where animals are not observed to NA
+  y[D == 0] <- NA
+  
+  # Compute vector of means
+  y_mean <- matrix(rowMeans(x=y, na.rm=TRUE), nrow=m, ncol=1)
+  
+  # Compute AUC from the data using the linear trapezoidal rule
+  auc <- t(w) %*% y_mean
+  
+  # Estimate covariance matrix from the data
+  sigest <- matrix(NA_real_, nrow=m, ncol=m)
+  y_dif <- y - kronecker(matrix(1, nrow=1, ncol=a), y_mean)
+  # first compute variances (diagonals of cov matrix)
+  for (j1 in seq_len(nrow(y))) {
+    r <- R[j1, j1]
+    sigest[j1, j1] <- mean(y_dif[j1,] * y_dif[j1,], na.rm=TRUE) * r/(r - 1)
+  }
+  # now compute above the diagonal
+  for (j1 in 1:(nrow(y) - 1)) {
+    for (j2 in (j1 + 1):nrow(y)) {
+      r <- R[j1, j2]
+      cov <- mean(y_dif[j1,] * y_dif[j2,], na.rm=TRUE) * r/(r - 1)
+      # prepare to enforce the Cauchy-Schwartz inequality
+      cs_cov <- sign(cov)*sqrt(sigest[j1, j1]*sigest[j2, j2])
+      if (is.na(cov)) {
+        sigest[j1, j2] <- 0
+      } else if (abs(cov) <= abs(cs_cov)) {
+        sigest[j1, j2] <- cov
+      } else {
+        # Must enforce the Cauchy-Schwartz inequality
+        sigest[j1, j2] <- cs_cov
+      }
+      # Now fill in below the diagonal
+      sigest[j2, j1] <- sigest[j1, j2]
+    }
+  }
+  
+  # Compute df and confidence interval information
+  EV <- M %*% kronecker(Ia, sigest)
+  E <- sum(diag(EV))
+  V <- 2*sum(diag(EV %*% EV))
+  df <- 2*E^2/V
+  
+  data.frame(
+    auc=auc,
+    se_auc=sqrt(E),
+    df=df
+  )
+}
+
+# Example 1a from Yeh, C. Estimation and significant tests of area under the
+# curve derived from incomplete sampling. ASA Proceedings of the
+# Biopharmaceutical Section, 1990, pp. 74-81.
+
+# Number of times is
+mm <- 9
+
+# Number of animals is
+aa <- 9
+
+# D matrix
+DD <- matrix(rep(c(1, 0, 0,
+                   0, 1, 0,
+                   1, 0, 0,
+                   0, 1, 0,
+                   1, 0, 0,
+                   0, 1, 0,
+                   0, 0, 1,
+                   0, 0, 1,
+                   0, 0, 1), each=3),
+             nrow=mm, ncol=aa, byrow=TRUE)
+
+time <- c(0, 0.5, 1, 2, 4, 6, 8, 12, 24)
+# AUC weights
+ww_prep <- time[-1] - time[-length(time)]
+ww <- c(0, ww_prep/2) + c(ww_prep/2, 0)
+conc_raw <-
+  c(
+    0, 0, 0,
+    4, 1.3, 3.2,
+    4.69, 2.07, 6.45,
+    6.68, 3.83, 6.08,
+    4.69, 4.06, 6.45, # Accurate relative to the source paper; duplicated data relative to two rows ago suggests possible transcription error
+    8.13, 9.54, 6.29,
+    9.36, 13, 5.48,
+    5.18, 5.18, 2.79,
+    1.06, 2.15, 0.827
+  )
+conc <- t(DD)
+conc[t(DD) == 1] <- conc_raw
+conc <- t(conc)
+
+# According to source paper, should return
+# data.frame(auc=110.1015, se_auc=14.78964, df=2.144318)
+corsatt(mm, aa, DD, ww, conc)
diff --git a/inst/sparse/corsatt_holder.R b/inst/sparse/corsatt_holder.R
new file mode 100644
index 00000000..845f77ef
--- /dev/null
+++ b/inst/sparse/corsatt_holder.R
@@ -0,0 +1,132 @@
+#' Calculate Satterwaithe correlation for sparse AUC using Bailer's method to
+#' provide AUC, SE, and degrees of freedom.
+#' 
+#' @param m number of times
+#' @param a number of animals
+#' @param D design matrix
+#' @param w vector of trapezoidal weights
+#' @param y data structured as D
+#' @return auc = estimated AUC
+#' se.auc = estimated standard error of estimated AUC
+#' df = estimated degrees of freedom
+#' width = half-width of the 95% confidence interval
+#' lower = lower bound of the 95% confidence interval
+#' upper = upper bound of the 95% confidence interval
+#' @references The function here is copied from the appendix of
+#' Nedelman JR, Jia X. An extension of Satterthwaite’s approximation applied to
+#' pharmacokinetics. Journal of Biopharmaceutical Statistics. 1998;8(2):317-328.
+#' doi:10.1080/10543409808835241
+corsatt <- function(m, a, D, w, y) {
+  # Create identity matrices and matrices of ones
+  Ia <- diag(rep(1, a))
+  Im <- diag(rep(1, m))
+  Ima <- diag(rep(1, m*a))
+  Ja <- matrix(1, ncol=a, nrow=a)
+  Jma <- matrix(1, nrow=m, ncol=a)
+  
+  R <- D %*% t(D)
+  
+  Delta <- diag(c(D), nrow=m*a, ncol=m*a)
+  DeltaStar <- diag(c(D)/kronecker(rep(1, a), diag(R)))
+  
+  A <- diag(w/diag(R)) %*% (R/(R - 1)) %*% diag(w/diag(R))
+  
+  P <- kronecker(Ja, Im) %*% DeltaStar
+  
+  M <- t(Ima - P) %*% Delta %*% kronecker(Ia, A) %*% Delta %*% (Ima - P)
+  
+  # Convert zeros where animals are not observed to NA
+  y[D == 0] <- NA
+  
+  # Compute vector of means
+  y_mean <- matrix(rowMeans(x=y, na.rm=TRUE), nrow=m, ncol=1)
+  
+  # Compute AUC from the data using the linear trapezoidal rule
+  auc <- t(w) %*% y_mean
+  
+  # Estimate covariance matrix from the data
+  sigest <- matrix(NA_real_, nrow=m, ncol=m)
+  y_dif <- y - kronecker(matrix(1, nrow=1, ncol=a), y_mean)
+  # first compute variances (diagonals of cov matrix)
+  for (j1 in seq_len(nrow(y))) {
+    r <- R[j1, j1]
+    sigest[j1, j1] <- mean(y_dif[j1,] * y_dif[j1,], na.rm=TRUE) * r/(r - 1)
+  }
+  # now compute above the diagonal
+  for (j1 in 1:(nrow(y) - 1)) {
+    for (j2 in (j1 + 1):nrow(y)) {
+      r <- R[j1, j2]
+      cov <- mean(y_dif[j1,] * y_dif[j2,], na.rm=TRUE) * r/(r - 1)
+      # prepare to enforce the Cauchy-Schwartz inequality
+      cs_cov <- sign(cov)*sqrt(sigest[j1, j1]*sigest[j2, j2])
+      if (is.na(cov)) {
+        sigest[j1, j2] <- 0
+      } else if (abs(cov) <= abs(cs_cov)) {
+        sigest[j1, j2] <- cov
+      } else {
+        # Must enforce the Cauchy-Schwartz inequality
+        sigest[j1, j2] <- cs_cov
+      }
+      # Now fill in below the diagonal
+      sigest[j2, j1] <- sigest[j1, j2]
+    }
+  }
+  
+  # Compute df and confidence interval information
+  EV <- M %*% kronecker(Ia, sigest)
+  E <- sum(diag(EV))
+  V <- 2*sum(diag(EV %*% EV))
+  df <- 2*E^2/V
+  
+  data.frame(
+    auc=auc,
+    se_auc=sqrt(E),
+    df=df
+  )
+}
+
+# Example 1a from Yeh, C. Estimation and significant tests of area under the
+# curve derived from incomplete sampling. ASA Proceedings of the
+# Biopharmaceutical Section, 1990, pp. 74-81.
+
+# Number of times is
+mm <- 9
+
+# Number of animals is
+aa <- 9
+
+# D matrix
+DD <- matrix(rep(c(1, 0, 0,
+                   0, 1, 0,
+                   1, 0, 0,
+                   0, 1, 0,
+                   1, 0, 0,
+                   0, 1, 0,
+                   0, 0, 1,
+                   0, 0, 1,
+                   0, 0, 1), each=3),
+             nrow=mm, ncol=aa, byrow=TRUE)
+
+time <- c(0, 0.5, 1, 2, 4, 6, 8, 12, 24)
+# AUC weights
+ww_prep <- time[-1] - time[-length(time)]
+ww <- c(0, ww_prep/2) + c(ww_prep/2, 0)
+conc_raw <-
+  c(
+    0, 0, 0,
+    4, 1.3, 3.2,
+    4.69, 2.07, 6.45,
+    6.68, 3.83, 6.08,
+    4.69, 4.06, 6.45, # Accurate relative to the source paper; duplicated data relative to two rows ago suggests possible transcription error
+    8.13, 9.54, 6.29,
+    9.36, 13, 5.48,
+    5.18, 5.18, 2.79,
+    1.06, 2.15, 0.827
+  )
+conc <- t(DD)
+conc[t(DD) == 1] <- conc_raw
+conc <- t(conc)
+
+# According to source paper, should return
+# data.frame(auc=110.1015, se_auc=14.78964, df=2.144318)
+corsatt(mm, aa, DD, ww, conc)