From 72c6ef8512fd8da2588df60dd4d4abf15ec71f0e Mon Sep 17 00:00:00 2001 From: "Jason D. Yeatman" Date: Fri, 29 Mar 2024 18:43:54 -0700 Subject: [PATCH 1/3] initial commit of mgcv GAM tract profile tutorial --- .../gam-tract-profiles-example.Rmd | 66 +++ .../gam-tract-profiles-example.html | 453 ++++++++++++++++++ 2 files changed, 519 insertions(+) create mode 100644 examples/howto_examples/gam-tract-profiles-example.Rmd create mode 100644 examples/howto_examples/gam-tract-profiles-example.html diff --git a/examples/howto_examples/gam-tract-profiles-example.Rmd b/examples/howto_examples/gam-tract-profiles-example.Rmd new file mode 100644 index 000000000..31ca1e485 --- /dev/null +++ b/examples/howto_examples/gam-tract-profiles-example.Rmd @@ -0,0 +1,66 @@ +--- +title: "AFQ-GAMS-Tutorial" +author: "Jason Yeatman" +date: "2024-03-30" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## GAMs Tutorial + +First we'll pull the Sarica data and merge nodes.csv (diffusion data) with subjects.csv (participant meta data) + +```{r, warning=FALSE, message=FALSE} +library(dplyr) +library(mgcv) +library(gratia) +library(ggplot2) +df <- read.csv('https://raw.githubusercontent.com/yeatmanlab/Sarica_2017/gh-pages/data/nodes.csv') +df <- inner_join(df, read.csv('https://raw.githubusercontent.com/yeatmanlab/Sarica_2017/gh-pages/data/subjects.csv')) +mutate(df, class = factor(class), subjectID = factor(subjectID))-> df + +``` + +```{r, message=FALSE, warning=FALSE} +# Fit a simple GAM with a random effect of subject and smoothers that +# differ by class (control or ALS) +g1 <- gam(md ~ s(nodeID,class, bs='fs') + s(subjectID, bs= 're'), data=df) +# Get smooth estimates +dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68) + +# Remove random effects and plot +filter(dfs, type != 'Random effect') %>% +ggplot(aes(x=nodeID,y=est, color=class)) + + geom_line() + + geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 ) + +``` + +```{r, message=FALSE, warning=FALSE} +# The above plot looks good but now let's let the curves vary across # subjects too. What I notice here is that it dramatically effects the standard error +g1 <- gam(md ~ s(nodeID,class, bs='fs') + s(nodeID, subjectID, bs= 're'), data=df) +# Get smooth estimates +dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68) + +# Remove random effects and plot +filter(dfs, type != 'Random effect') %>% +ggplot(aes(x=nodeID,y=est, color=class)) + + geom_line() + + geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 ) +``` + +```{r, message=FALSE, warning=FALSE} +# Yet another way to do the same thing and I don't get the difference +g1 <- gam(md ~ s(nodeID,by=class, bs='fs') + s(subjectID, bs= 're'), data=df) +# Get smooth estimates +dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68) + +# Remove random effects and plot +filter(dfs, type != 'Random effect') %>% +ggplot(aes(x=nodeID,y=est, color=class)) + + geom_line() + + geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 ) +``` diff --git a/examples/howto_examples/gam-tract-profiles-example.html b/examples/howto_examples/gam-tract-profiles-example.html new file mode 100644 index 000000000..01d41d9c2 --- /dev/null +++ b/examples/howto_examples/gam-tract-profiles-example.html @@ -0,0 +1,453 @@ + + + + + + + + + + + + + + + +AFQ-GAMS-Tutorial + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

GAMs Tutorial

+

First we’ll pull the Sarica data and merge nodes.csv (diffusion data) +with subjects.csv (participant meta data)

+
library(dplyr)
+library(mgcv)
+library(gratia)
+library(ggplot2)
+df <- read.csv('https://raw.githubusercontent.com/yeatmanlab/Sarica_2017/gh-pages/data/nodes.csv')
+df <- inner_join(df, read.csv('https://raw.githubusercontent.com/yeatmanlab/Sarica_2017/gh-pages/data/subjects.csv'))
+mutate(df, class = factor(class), subjectID = factor(subjectID))-> df
+
# Fit a simple GAM with a random effect of subject and smoothers that 
+# differ by class (control or ALS)
+g1 <- gam(md ~ s(nodeID,class, bs='fs') + s(subjectID, bs= 're'), data=df)
+# Get smooth estimates
+dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68)
+
+# Remove random effects and plot
+filter(dfs, type != 'Random effect') %>%
+ggplot(aes(x=nodeID,y=est, color=class)) +
+  geom_line()  +
+  geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 )
+

+
# The above plot looks good but now let's let the curves vary across # subjects too. What I notice here is that it dramatically effects the standard error
+g1 <- gam(md ~ s(nodeID,class, bs='fs') + s(nodeID, subjectID, bs= 're'), data=df)
+# Get smooth estimates
+dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68)
+
+# Remove random effects and plot
+filter(dfs, type != 'Random effect') %>%
+ggplot(aes(x=nodeID,y=est, color=class)) +
+  geom_line()  +
+  geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 )
+

+
# Yet another way to do the same thing and I don't get the difference
+g1 <- gam(md ~ s(nodeID,by=class, bs='fs') + s(subjectID, bs= 're'), data=df)
+# Get smooth estimates
+dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68)
+
+# Remove random effects and plot
+filter(dfs, type != 'Random effect') %>%
+ggplot(aes(x=nodeID,y=est, color=class)) +
+  geom_line()  +
+  geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 )
+

+
+ + + + +
+ + + + + + + + + + + + + + + From 81200ff368c25474c26dec309486b18545a54ef9 Mon Sep 17 00:00:00 2001 From: "Jason D. Yeatman" Date: Fri, 29 Mar 2024 19:48:39 -0700 Subject: [PATCH 2/3] added difference and CI --- .../gam-tract-profiles-example.Rmd | 18 ++++++++++++++---- .../gam-tract-profiles-example.html | 16 ++++++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/examples/howto_examples/gam-tract-profiles-example.Rmd b/examples/howto_examples/gam-tract-profiles-example.Rmd index 31ca1e485..25db99aac 100644 --- a/examples/howto_examples/gam-tract-profiles-example.Rmd +++ b/examples/howto_examples/gam-tract-profiles-example.Rmd @@ -41,9 +41,9 @@ ggplot(aes(x=nodeID,y=est, color=class)) + ```{r, message=FALSE, warning=FALSE} # The above plot looks good but now let's let the curves vary across # subjects too. What I notice here is that it dramatically effects the standard error -g1 <- gam(md ~ s(nodeID,class, bs='fs') + s(nodeID, subjectID, bs= 're'), data=df) +g2 <- gam(md ~ s(nodeID,class, bs='fs') + s(nodeID, subjectID, bs= 're'), data=df) # Get smooth estimates -dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68) +dfs <- smooth_estimates(g2) %>% add_confint(coverage=.68) # Remove random effects and plot filter(dfs, type != 'Random effect') %>% @@ -54,9 +54,9 @@ ggplot(aes(x=nodeID,y=est, color=class)) + ```{r, message=FALSE, warning=FALSE} # Yet another way to do the same thing and I don't get the difference -g1 <- gam(md ~ s(nodeID,by=class, bs='fs') + s(subjectID, bs= 're'), data=df) +g3 <- gam(md ~ s(nodeID,by=class, bs='fs') + s(subjectID, bs= 're'), data=df) # Get smooth estimates -dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68) +dfs <- smooth_estimates(g3) %>% add_confint(coverage=.68) # Remove random effects and plot filter(dfs, type != 'Random effect') %>% @@ -64,3 +64,13 @@ ggplot(aes(x=nodeID,y=est, color=class)) + geom_line() + geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 ) ``` + +Now calculate differences between the smooths for the different factors and plot out the difference an 95% CI + +```{r} +ds<-difference_smooths(g3, smooth="s(nodeID)") +ggplot(ds,aes(x=nodeID,y=diff)) + + geom_line() + + geom_ribbon(aes(ymin = lower, ymax=upper),alpha = 0.2 ) + + geom_hline(yintercept=0) +``` diff --git a/examples/howto_examples/gam-tract-profiles-example.html b/examples/howto_examples/gam-tract-profiles-example.html index 01d41d9c2..395c1cd3e 100644 --- a/examples/howto_examples/gam-tract-profiles-example.html +++ b/examples/howto_examples/gam-tract-profiles-example.html @@ -381,9 +381,9 @@

GAMs Tutorial

geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 )

# The above plot looks good but now let's let the curves vary across # subjects too. What I notice here is that it dramatically effects the standard error
-g1 <- gam(md ~ s(nodeID,class, bs='fs') + s(nodeID, subjectID, bs= 're'), data=df)
+g2 <- gam(md ~ s(nodeID,class, bs='fs') + s(nodeID, subjectID, bs= 're'), data=df)
 # Get smooth estimates
-dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68)
+dfs <- smooth_estimates(g2) %>% add_confint(coverage=.68)
 
 # Remove random effects and plot
 filter(dfs, type != 'Random effect') %>%
@@ -392,9 +392,9 @@ 

GAMs Tutorial

geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 )

# Yet another way to do the same thing and I don't get the difference
-g1 <- gam(md ~ s(nodeID,by=class, bs='fs') + s(subjectID, bs= 're'), data=df)
+g3 <- gam(md ~ s(nodeID,by=class, bs='fs') + s(subjectID, bs= 're'), data=df)
 # Get smooth estimates
-dfs <- smooth_estimates(g1) %>% add_confint(coverage=.68)
+dfs <- smooth_estimates(g3) %>% add_confint(coverage=.68)
 
 # Remove random effects and plot
 filter(dfs, type != 'Random effect') %>%
@@ -402,6 +402,14 @@ 

GAMs Tutorial

geom_line() + geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 )

+

Now calculate differences between the smooths for the different +factors and plot out the difference an 95% CI

+
ds<-difference_smooths(g3, smooth="s(nodeID)")
+ggplot(ds,aes(x=nodeID,y=diff)) +
+  geom_line()  +
+  geom_ribbon(aes(ymin = lower, ymax=upper),alpha = 0.2 ) +
+  geom_hline(yintercept=0)
+

From 8f4be3b6025c0ab7fe24ce7a0df757f2e744708e Mon Sep 17 00:00:00 2001 From: "Jason D. Yeatman" Date: Sat, 30 Mar 2024 07:35:11 -0700 Subject: [PATCH 3/3] added continuous example --- .../gam-tract-profiles-example.Rmd | 27 ++++++++++++++++++- .../gam-tract-profiles-example.html | 23 +++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/examples/howto_examples/gam-tract-profiles-example.Rmd b/examples/howto_examples/gam-tract-profiles-example.Rmd index 25db99aac..5088a8f50 100644 --- a/examples/howto_examples/gam-tract-profiles-example.Rmd +++ b/examples/howto_examples/gam-tract-profiles-example.Rmd @@ -54,7 +54,7 @@ ggplot(aes(x=nodeID,y=est, color=class)) + ```{r, message=FALSE, warning=FALSE} # Yet another way to do the same thing and I don't get the difference -g3 <- gam(md ~ s(nodeID,by=class, bs='fs') + s(subjectID, bs= 're'), data=df) +g3 <- gam(md ~ s(nodeID,by=class, bs='fs', k=10) + s(subjectID, bs= 're'), data=df) # Get smooth estimates dfs <- smooth_estimates(g3) %>% add_confint(coverage=.68) @@ -74,3 +74,28 @@ ggplot(ds,aes(x=nodeID,y=diff)) + geom_ribbon(aes(ymin = lower, ymax=upper),alpha = 0.2 ) + geom_hline(yintercept=0) ``` + +Now look at continuous measures along the tract profile + +```{r} +# Yet another way to do the same thing and I don't get the difference +g4 <- gam(md ~ s(nodeID,ALSFRSbulbar, bs='fs', k=10) + s(subjectID, bs= 're'), data=df) +dfs <- smooth_estimates(g4) %>% add_confint(coverage=.68) +#filter(dfs,smooth=='s(nodeID)') %>% +# ggplot(aes(x=nodeID,y=est)) + +# geom_line() + +# geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 ) + +# 2d plot showing how tract profiles vary as a function of AFS bulbar +filter(dfs,smooth=='s(nodeID,ALSFRSbulbar)') %>% +ggplot(aes(x=nodeID, y=ALSFRSbulbar))+ + geom_raster(aes(fill=est)) + +# Now plot separate tract profiles by bulbar +dfs %>% mutate(ALSFRSbulbar.factor=factor(ALSFRSbulbar)) %>% filter(ALSFRSbulbar== sort(unique(dfs$ALSFRSbulbar))[2] | ALSFRSbulbar== sort(unique(dfs$ALSFRSbulbar))[99]) %>% + filter(smooth=='s(nodeID,ALSFRSbulbar)') %>% + ggplot(aes(x=nodeID,y=est,color=ALSFRSbulbar.factor)) + + geom_line(show.legend = FALSE) + + geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 ) + +``` diff --git a/examples/howto_examples/gam-tract-profiles-example.html b/examples/howto_examples/gam-tract-profiles-example.html index 395c1cd3e..2579c29f0 100644 --- a/examples/howto_examples/gam-tract-profiles-example.html +++ b/examples/howto_examples/gam-tract-profiles-example.html @@ -392,7 +392,7 @@

GAMs Tutorial

geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 )

# Yet another way to do the same thing and I don't get the difference
-g3 <- gam(md ~ s(nodeID,by=class, bs='fs') + s(subjectID, bs= 're'), data=df)
+g3 <- gam(md ~ s(nodeID,by=class, bs='fs', k=10) + s(subjectID, bs= 're'), data=df)
 # Get smooth estimates
 dfs <- smooth_estimates(g3) %>% add_confint(coverage=.68)
 
@@ -410,6 +410,27 @@ 

GAMs Tutorial

geom_ribbon(aes(ymin = lower, ymax=upper),alpha = 0.2 ) + geom_hline(yintercept=0)

+

Now look at continuous measures along the tract profile

+
# Yet another way to do the same thing and I don't get the difference
+g4 <- gam(md ~  s(nodeID,ALSFRSbulbar, bs='fs', k=10) + s(subjectID, bs= 're'), data=df)
+dfs <- smooth_estimates(g4) %>% add_confint(coverage=.68)
+#filter(dfs,smooth=='s(nodeID)') %>%
+#  ggplot(aes(x=nodeID,y=est)) +
+#  geom_line()  +
+#  geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 )
+
+# 2d plot showing how tract profiles vary as a function of AFS bulbar
+filter(dfs,smooth=='s(nodeID,ALSFRSbulbar)') %>%
+ggplot(aes(x=nodeID, y=ALSFRSbulbar))+
+  geom_raster(aes(fill=est))
+

+
# Now plot separate tract profiles by bulbar
+dfs %>% mutate(ALSFRSbulbar.factor=factor(ALSFRSbulbar)) %>% filter(ALSFRSbulbar== sort(unique(dfs$ALSFRSbulbar))[2] | ALSFRSbulbar== sort(unique(dfs$ALSFRSbulbar))[99]) %>% 
+  filter(smooth=='s(nodeID,ALSFRSbulbar)') %>%
+  ggplot(aes(x=nodeID,y=est,color=ALSFRSbulbar.factor)) +
+  geom_line(show.legend = FALSE) +
+  geom_ribbon(aes(ymin = lower_ci, ymax=upper_ci),alpha = 0.2 )
+