Skip to content

Vertex wise best model

Gabriel A. Devenyi edited this page Apr 9, 2021 · 6 revisions

This wiki is to select and statistically test vertex wise models. In this example, the code was used to test the age relationship of the surface area (SA) at the vertex level. Therefore, poly(age,1) was used to model a linear relationship, poly(age,2) was used to model a second order relationship and poly(age,3) for a third order relationship.

poly stands for orthogonal polynomials which generates a polynomial basis function for age across the entire age range.

Load necessary packages

library(RMINC)
library(lme4)
library(ggplot2)
library(effects)
library(lmerTest)

Load csv files

sf <- read.csv('../../../../../../../../data/chamal/projects/aurelie/Camcan/Replication_morph/Camcan-MAGeT_morph_good_QC.csv', header = T) #Load demographic files
sf$right_SA <- paste0("../../../../../../../../data/chamal/projects/aurelie/Camcan/Replication_morph/SA/Right/",sf$Subjects,"_T1w.mnc.n4correct.cutneckapplyautocrop.beastextract_vorn_SA_5mm_blur.txt", sep="") #load vertex-wise data, here RSA

1- Create your models and run AIC

# Create AIC_summary to obtain AIC for vertexLmer
AIC_summary <-
  function(mmod)
    c(RMINC:::fixef_summary(mmod), AIC = extractAIC(mmod)[2])

vlm1 <-
  vertexLm(right_SA ~ poly(Age,1)+ Sex , data = sf)

vlm2 <-
  vertexLm(right_SA ~ poly(Age,2)+ Sex , data = sf)

vlm3 <-
  vertexLm(right_SA ~ poly(Age,3)+ Sex , data = sf)

vlm<-cbind(vlm1,vlm2,vlm3)

2- Select best model with AIC

matrix<-data.frame(matrix(ncol = 5, nrow = 1215))

colnames(matrix) <- c("AIC1","AIC2", "AIC3", "minAIC","AIC")
voxels<-(1:1215)

for (i in voxels) {
  
  matrix[,1]<-AIC(vlm1) # vlm[i,9]=AIC1
  matrix[,2]<-AIC(vlm2) # vlm[i,20]=AIC2
  matrix[,3]<-AIC(vlm3) # vlm[i,33]=AIC3
  
  matrix[i,4]<-min(matrix[i,1:3])
  if (matrix[i,4]==matrix[i,1]) {matrix[i,5]<-"1"}
  if (matrix[i,4]==matrix[i,2]) {matrix[i,5]<-"2"}
  if (matrix[i,4]==matrix[i,3]) {matrix[i,5]<-"3"}
  
}

write.table(matrix, file='../../../../data/chamal/projects/aurelie/Camcan/Replication_morph/SA/RSA_bestAIC.csv',col.names=F, row.names=F, sep=",")

3- Gather and tranform the t-value in p-value

tvalue<-cbind(vlm[,7:8],vlm[,17:19],vlm[,29:32])

#transform tvalue in pvalue

p.value = dt(tvalue, df=length(tvalue) - 1)

p.value_best<-data.frame(matrix(ncol = 5, nrow = 1215))

colnames(p.value_best) <- c( "AIC","pvalue-poly(Age, 1)", "pvalue-poly(Age, 2)", "pvalue-poly(Age, 3)", "pvalue-Sex")

4- Gather pvalue's best model in one simplified dataframe

for (j in 1:1215) {
  p.value_best[j,1]<-matrix[j,5]
  if (p.value_best[j,1]==1) {p.value_best[j,2]<-p.value[j,1]}
  if (p.value_best[j,1]==1) {p.value_best[j,5]<-p.value[j,2]}
  if (p.value_best[j,1]==2) {p.value_best[j,2]<-p.value[j,3]}
  if (p.value_best[j,1]==2) {p.value_best[j,3]<-p.value[j,4]}
  if (p.value_best[j,1]==2) {p.value_best[j,5]<-p.value[j,5]}
  if (p.value_best[j,1]==3) {p.value_best[j,2]<-p.value[j,6]}
  if (p.value_best[j,1]==3) {p.value_best[j,3]<-p.value[j,7]}
  if (p.value_best[j,1]==3) {p.value_best[j,4]<-p.value[j,8]}
  if (p.value_best[j,1]==3) {p.value_best[j,5]<-p.value[j,9]}
  p.value_best[p.value_best == 0] <- NA
}

5- Run FDR correction on p.valuebest

newmydata<-stack(p.value_best, select=c("pvalue-poly(Age, 1)", "pvalue-poly(Age, 2)", "pvalue-poly(Age, 3)", "pvalue-Sex"))
datafdr<-newmydata[1]
datafdrr<-as.numeric(unlist(datafdr))
datacorrected<-p.adjust(datafdrr,method = "fdr")
datacorrected<-data.frame(datacorrected)
Unstack<-cbind(datacorrected,newmydata[,2])
pvaluecorrected<-unstack(Unstack)
pvalueTRUE_FALSE<-pvaluecorrected<=0.05

6- Select t-value corresponding to significant pvalue

matrixtvalue<-data.frame(matrix(ncol = 7, nrow = 1215))

colnames(matrixtvalue) <- c( "tvalue-poly(Age, 1)1", "tvalue-poly(Age, 2)1", "tvalue-poly(Age, 3)1","tvalue-poly(Age, 2)2", "tvalue-poly(Age, 3)2","tvalue-poly(Age, 3)3", "tvalue-Sex")

for (j in 1:1215) {
  if (pvalueTRUE_FALSE[j,1]=="TRUE" && p.value_best[j,1]==1 ) {matrixtvalue[j,1]<-vlm[j,7]}
  if (pvalueTRUE_FALSE[j,4]=="TRUE" && p.value_best[j,1]==1 ) {matrixtvalue[j,7]<-vlm[j,8]}
  if (pvalueTRUE_FALSE[j,2]=="TRUE" && p.value_best[j,1]==2 ) {matrixtvalue[j,2]<-vlm[j,17]}
  if (pvalueTRUE_FALSE[j,2]=="TRUE" && p.value_best[j,1]==2 ) {matrixtvalue[j,4]<-vlm[j,18]}
  if (pvalueTRUE_FALSE[j,4]=="TRUE" && p.value_best[j,1]==2 ) {matrixtvalue[j,7]<-vlm[j,19]}
  if (pvalueTRUE_FALSE[j,3]=="TRUE" && p.value_best[j,1]==3 ) {matrixtvalue[j,3]<-vlm[j,29]}
  if (pvalueTRUE_FALSE[j,3]=="TRUE" && p.value_best[j,1]==3 ) {matrixtvalue[j,5]<-vlm[j,30]}
  if (pvalueTRUE_FALSE[j,3]=="TRUE" && p.value_best[j,1]==3 ) {matrixtvalue[j,6]<-vlm[j,31]}
  if (pvalueTRUE_FALSE[j,4]=="TRUE" && p.value_best[j,1]==3 ) {matrixtvalue[j,7]<-vlm[j,32]}
}
matrixtvalue[is.na(matrixtvalue)] <- 0

write.table(matrixtvalue, file='../../../../data/chamal/projects/aurelie/Camcan/Replication_morph/RSA-vertexfdr-poly.csv',col.names=F, row.names=F, sep=",")

7- Min and Max prediction

dataSA_voxels <- read.csv('../../../../../../../../../data/chamal/projects/aurelie/Camcan/Replication_morph/RSA_Camcan.csv', header=T) # Load csv where each column = SA for one participant
datademographic<-read.csv('../../../../../../../../data/chamal/projects/aurelie/Camcan/Replication_morph/Camcan-MAGeT_morph_good_QC.csv', header = T) #Load demographic files

dataSA_voxels<-dataSA_voxels[,1:350]
dataSA_voxels <- t(dataSA_voxels)
dataok<-cbind(dataSA_voxels, datademographic)

minmax<-data.frame(matrix(ncol = 3, nrow = 1215))
colnames(minmax) <- c("AIC", "min","max")

for (i in 1:1215) {
  
  gmvolume_model1 = lm(dataok[,i] ~ poly(Age, 1) + Sex   , data = dataok)
  gmvolume_model2 = lm(dataok[,i] ~ poly(Age, 2) + Sex   , data = dataok)
  gmvolume_model3 = lm(dataok[,i] ~ poly(Age, 3) + Sex  , data = dataok)
  
  fitdata_1 = as.data.frame(Effect(c( "Age"),gmvolume_model1, xlevels=list(Age=seq(18,81,1))))
  fitdata_2 = as.data.frame(Effect(c( "Age"),gmvolume_model2, xlevels=list(Age=seq(18,81,1))))
  fitdata_3 = as.data.frame(Effect(c( "Age"),gmvolume_model3, xlevels=list(Age=seq(18,81,1))))
  
  minmax[i,1]<-matrix[i,5]
  
  maxpos1<-which.max(fitdata_1$fit)
  minpos1<-which.min(fitdata_1$fit)
  max1<-fitdata_1$Age[maxpos1]
  min1<-fitdata_1$Age[minpos1]
  
  maxpos2<-which.max(fitdata_2$fit)
  minpos2<-which.min(fitdata_2$fit)
  max2<-fitdata_2$Age[maxpos2]
  min2<-fitdata_2$Age[minpos2]
  
  maxpos3<-which.max(fitdata_3$fit)
  minpos3<-which.min(fitdata_3$fit)
  max3<-fitdata_3$Age[maxpos3]
  min3<-fitdata_3$Age[minpos3]
  
  if (minmax[i,1]==1) {minmax[i,2]<-min1}
  if (minmax[i,1]==2) {minmax[i,2]<-min2}
  if (minmax[i,1]==3) {minmax[i,2]<-min3}
  if (minmax[i,1]==1) {minmax[i,3]<-max1}
  if (minmax[i,1]==2) {minmax[i,3]<-max2}
  if (minmax[i,1]==3) {minmax[i,3]<-max3}
  
}

write.csv(minmax, '../../../../../data/chamal/projects/aurelie/Camcan/Replication_morph/SA/RSA-max-min.csv')

8- Plot one peak voxel

gmvolume_model3 = lm(dataok[,360] ~ poly(Age, 1) + Sex  , data = dataok)
fitdata_gmvolume3 = as.data.frame(Effect(c( "Age"),gmvolume_model3, xlevels=list(Age=seq(18,81,1))))

ggplot(data = dataok, aes(y=dataok[,360],x=Age)) +
  geom_point(color="darkorchid4", lwd=2) +
  geom_line(data = fitdata_gmvolume3, aes(y=fit), color="darkorchid4",lwd=2) +
  geom_ribbon(data = fitdata_gmvolume3, aes(y=fit, ymin=lower, ymax=upper), color="darkorchid4", fill="darkorchid4", alpha=0.4) +
  geom_point(color="darkorchid4", lwd=2) + theme_bw()+
  ggtitle("poly(Age,1)") +  
  ylab("SA") +
  ylim(c(0.5,1.5))+
  theme(plot.title = element_text(hjust = 0.5))+
  theme(plot.title = element_text(size = 15, face = "bold"), legend.title=element_text(size=12), legend.text=element_text(size=12)) +
  theme(axis.text.x = element_text(color = "grey20", size = 15, angle = 0, hjust = .5, vjust = .5, face = "bold"),
        axis.text.y = element_text(color = "grey20", size = 15, angle = 0, hjust = 1, vjust = 0, face = "bold"),  
        axis.title.x = element_text(color = "grey20", size = 15, angle = 0, hjust = .5, vjust = 0, face = "bold"),
        axis.title.y = element_text(color = "grey20", size = 15, angle = 90, hjust = .5, vjust = .5, face = "bold"))+
  theme(plot.margin = margin(0.2,.2,.2,.2, "cm"))

9- Example of results

AIC_example

Figure 3: A) SA vertex-wise best fit age-relationship models as determined by AIC; purple for linear, cyan for second order and yellow for third order relationships with age; S: superior view; I: inferior view. B) Representation of significant age effects using the best model at each vertex (only higher order predictor shown) and the location of 6 significant peak vertices. t-value maps correspond to significant p-values (corrected at FDR 5%). C) Using the best model at each vertex, representation of the age for which SA was at its maximum or minimum. D) Plots of the SA of 6 peak vertices with age.

Clone this wiki locally