diff --git a/README.md b/README.md
index 58972b6..c5552bc 100644
--- a/README.md
+++ b/README.md
@@ -5,23 +5,26 @@
# Setup
Install [R](https://www.r-project.org/) to run our code.
-# Code Availability
-Our codebase is available on GitHub at [mindds.github.io/apoe-glia](https://mindds.github.io/apoe-glia/) and below.
+# Documentation
+To read our documented code, please visit [mindds.github.io/apoe-glia](https://mindds.github.io/apoe-glia) or see below.
### Functions
-* [Models](https://mindds.github.io/apoe-glia/models.html)
-* [Spectral Clustering](https://mindds.github.io/apoe-glia/spectral-clustering.html)
+* [Models](https://mindds.github.io/apoe-glia/models)
+* [Spectral Clustering](https://mindds.github.io/apoe-glia/spectral-clustering)
### ROSMAP
-* [Define Variables](https://mindds.github.io/apoe-glia/ROSMAP-variables.html)
-* [Pre-Process Data](https://mindds.github.io/apoe-glia/ROSMAP-preprocess.html)
-* [Microglia Analysis](https://mindds.github.io/apoe-glia/ROSMAP-microglia.html)
-* [Astrocyte Analysis](https://mindds.github.io/apoe-glia/ROSMAP-astrocyte.html)
+* [Define Variables](https://mindds.github.io/apoe-glia/ROSMAP-variables)
+* [Pre-Process Data](https://mindds.github.io/apoe-glia/ROSMAP-preprocess)
+* [Microglia Analysis](https://mindds.github.io/apoe-glia/ROSMAP-microglia)
+* [Astrocyte Analysis](https://mindds.github.io/apoe-glia/ROSMAP-astrocyte)
### MSBB
-* [Define Variables](https://mindds.github.io/apoe-glia/MSBB-variables.html)
-* [Merge Data](https://mindds.github.io/apoe-glia/MSBB-merge.html)
-* [Microglia Analysis](https://mindds.github.io/apoe-glia/MSBB-microglia.html)
+* [Define Variables](https://mindds.github.io/apoe-glia/MSBB-variables)
+* [Merge Data](https://mindds.github.io/apoe-glia/MSBB-merge)
+* [Microglia Analysis](https://mindds.github.io/apoe-glia/MSBB-microglia)
### qPCR Analysis
-* [qPCR Analysis](https://mindds.github.io/apoe-glia/qPCR-analysis.html)
\ No newline at end of file
+* [qPCR Analysis](https://mindds.github.io/apoe-glia/qPCR-analysis)
+
+# Code Availability
+Our full codebase is available for download on [GitHub](https://github.com/mindds/apoe-glia).
\ No newline at end of file
diff --git a/_site.yml b/_site.yml
index d3da166..5c28706 100644
--- a/_site.yml
+++ b/_site.yml
@@ -16,30 +16,30 @@ navbar:
- text: "Functions"
menu:
- text: "Models"
- href: models.html
+ href: models
- text: "Spectral Clustering"
- href: spectral-clustering.html
+ href: spectral-clustering
- text: "ROSMAP"
menu:
- text: "Define Variables"
- href: ROSMAP-variables.html
+ href: ROSMAP-variables
- text: "Pre-Process Data"
- href: ROSMAP-preprocess.html
+ href: ROSMAP-preprocess
- text: "---"
- text: "Microglia Analysis"
- href: ROSMAP-microglia.html
+ href: ROSMAP-microglia
- text: "---"
- text: "Astrocyte Analysis"
- href: ROSMAP-astrocyte.html
+ href: ROSMAP-astrocyte
- text: "MSBB"
menu:
- text: "Define Variables"
- href: MSBB-variables.html
+ href: MSBB-variables
- text: "Merge Data"
- href: MSBB-merge.html
+ href: MSBB-merge
- text: "---"
- text: "Microglia Analysis"
- href: MSBB-microglia.html
+ href: MSBB-microglia
- text: "qPCR"
- href: qPCR-analysis.html
+ href: qPCR-analysis
output: distill::distill_article
diff --git a/docs/404.html b/docs/404.html
index 030c301..321318c 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -2153,7 +2153,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2192,8 +2192,8 @@
▾
@@ -2203,12 +2203,12 @@
▾
@@ -2218,13 +2218,13 @@
▾
-qPCR
+qPCR
diff --git a/docs/MSBB-merge.html b/docs/MSBB-merge.html
index 7213a93..f4c6df2 100644
--- a/docs/MSBB-merge.html
+++ b/docs/MSBB-merge.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-qPCR
+qPCR
@@ -2263,7 +2263,7 @@ Dependencies
Load requisite packages.
diff --git a/docs/MSBB-microglia.html b/docs/MSBB-microglia.html
index 3f86ced..8e1b0a3 100644
--- a/docs/MSBB-microglia.html
+++ b/docs/MSBB-microglia.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-qPCR
+qPCR
@@ -2281,10 +2281,10 @@ Dependencies
rm ( list = ls ( ) )
require ( limma )
require ( ComplexHeatmap )
-require ( data.table )
+require ( data.table )
require ( circlize )
require ( openxlsx )
-require ( tidyverse )
+require ( tidyverse )
require ( SNFtool )
require ( pheatmap )
require ( ggprism )
@@ -2313,17 +2313,17 @@ Select Microglia Genes
genes = readLines ( "../Data/Microglia Genes.txt" )
# select microglia genes that exist in MSBB
-annot = fread ( "../Data/ENSEMBL GRCh38.p7.csv" )
-annot = unique ( annot [ GeneSymbol %in% genes ,] )
-ensembl_ids = intersect ( rownames ( mData ) , annot $ EnsemblID )
-annot = unique ( annot [ EnsemblID %in% ensembl_ids ,] )
+annot = fread ( "../Data/ENSEMBL GRCh38.p7.csv" )
+annot = unique ( annot [ GeneSymbol %in% genes ,] )
+ensembl_ids = intersect ( rownames ( mData ) , annot $ EnsemblID )
+annot = unique ( annot [ EnsemblID %in% ensembl_ids ,] )
# remove duplicated genes
-annot = annot [ ! duplicated ( GeneSymbol ) , ]
+annot = annot [ ! duplicated ( GeneSymbol ) , ]
ensembl_ids = annot $ EnsemblID
# check results
-any ( duplicated ( ensembl_ids ) ) # no duplicated ids
+any ( duplicated ( ensembl_ids ) ) # no duplicated ids
any ( is.na ( ensembl_ids ) ) # no NA
length ( ensembl_ids )
@@ -2369,7 +2369,7 @@ Model 1
# Table S2
-TableS2 = fread ( "../results/TableS2.csv" )
+TableS2 = fread ( "../results/TableS2.csv" )
TableS2 [ tT_CERAD , on= "EnsemblID" ,
c ( "MSBB_E4vsE3_logFC" , "MSBB_E4vsE3_CI.L" , "MSBB_E4vsE3_CI.R" ,
"MSBB_E4vsE3_P.Value" , "MSBB_E4vsE3_adj.P.Val" ,
@@ -2379,8 +2379,8 @@ Model 1
i.E4vsE3_P.Value , i.E4vsE3_adj.P.Val ,
i.E2vsE3_logFC , i.E2vsE3_CI.L , i.E2vsE3_CI.R ,
i.E2vsE3_P.Value , i.E2vsE3_adj.P.Val ) ]
-TableS2 = TableS2 [ order ( GeneSymbol ) , ]
-fwrite ( TableS2 , "../results/TableS2.csv" )
+TableS2 = TableS2 [ order ( GeneSymbol ) , ]
+fwrite ( TableS2 , "../results/TableS2.csv" )
@@ -2391,17 +2391,17 @@ Model 1.5
# run Model 1.5 with E4 and E2 copies as numeric
tT_CERAD_ENum = run_model1.5 ( mData , cov $ C , cov $ E4num , cov $ E2num , annot )
-e4_num_genes = intersect ( tT_CERAD_ENum [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] , tT_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 ,GeneSymbol ] )
+e4_num_genes = intersect ( tT_CERAD_ENum [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] , tT_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 ,GeneSymbol ] )
length ( e4_num_genes )
-e2_num_genes = intersect ( tT_CERAD_ENum [ e2_P.Value < 0.05 & e2_logFC < 0 , GeneSymbol ] , tT_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 , GeneSymbol ] )
+e2_num_genes = intersect ( tT_CERAD_ENum [ e2_P.Value < 0.05 & e2_logFC < 0 , GeneSymbol ] , tT_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 , GeneSymbol ] )
length ( e2_num_genes )
-intersect ( tT_CERAD_ENum [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] ,
+intersect ( tT_CERAD_ENum [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] ,
tT_CERAD_ENum [ e2_P.Value < 0.05 & e2_logFC < 0 , GeneSymbol ] )
# Table S3
-TableS3 = fread ( "../results/TableS3.csv" )
+TableS3 = fread ( "../results/TableS3.csv" )
TableS3 = TableS3 [ tT_CERAD_ENum , on = . ( EnsemblID , GeneSymbol , Description ) ,
c ( "MSBB_e4_logFC" , "MSBB_e4_CI.L" , "MSBB_e4_CI.R" ,
"MSBB_e4_P.Value" , "MSBB_e4_adj.P.Val" ) := . (
@@ -2410,7 +2410,7 @@ Model 1.5
c ( "MSBB_e2_logFC" , "MSBB_e2_CI.L" , "MSBB_e2_CI.R" ,
"MSBB_e2_P.Value" , "MSBB_e2_adj.P.Val" ) := . (
i.e2_logFC , i.e2_CI.L , i.e2_CI.R , i.e2_P.Value , i.e2_adj.P.Val ) ]
-fwrite ( TableS3 , "../results/TableS3.csv" )
+fwrite ( TableS3 , "../results/TableS3.csv" )
@@ -2450,8 +2450,8 @@ C0
# visualization
annot_row = data.frame ( cluster = as.character ( clust.C0 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.C0 $ zMtx )
-pheatmap ( clust.C0 $ zMtx [ order ( clust.C0 $ clustA ) ,] ,
- annotation_row = annot_row [ order ( clust.C0 $ clustA ) , , drop = F ] ,
+pheatmap ( clust.C0 $ zMtx [ order ( clust.C0 $ clustA ) ,] ,
+ annotation_row = annot_row [ order ( clust.C0 $ clustA ) , , drop = F ] ,
cluster_rows = F , cluster_cols = F ,
show_rownames = F )
@@ -2475,8 +2475,8 @@ C3
# visualization
annot_row = data.frame ( cluster = as.character ( clust.C3 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.C3 $ zMtx )
-pheatmap ( clust.C3 $ zMtx [ order ( clust.C3 $ clustA ) ,] ,
- annotation_row = annot_row [ order ( clust.C3 $ clustA ) , , drop = F ] ,
+pheatmap ( clust.C3 $ zMtx [ order ( clust.C3 $ clustA ) ,] ,
+ annotation_row = annot_row [ order ( clust.C3 $ clustA ) , , drop = F ] ,
cluster_rows = F , cluster_cols = F ,
show_rownames = F )
@@ -2500,8 +2500,8 @@ B1
# visualization
annot_row = data.frame ( cluster = as.character ( clust.B1 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.B1 $ zMtx )
-pheatmap ( clust.B1 $ zMtx [ order ( clust.B1 $ clustA ) ,] ,
- annotation_row = annot_row [ order ( clust.B1 $ clustA ) , , drop = F ] ,
+pheatmap ( clust.B1 $ zMtx [ order ( clust.B1 $ clustA ) ,] ,
+ annotation_row = annot_row [ order ( clust.B1 $ clustA ) , , drop = F ] ,
cluster_rows = F , cluster_cols = F ,
show_rownames = F )
@@ -2525,8 +2525,8 @@ B3
# visualization
annot_row = data.frame ( cluster = as.character ( clust.B3 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.B3 $ zMtx )
-pheatmap ( clust.B3 $ zMtx [ order ( clust.B3 $ clustA ) ,] ,
- annotation_row = annot_row [ order ( clust.B3 $ clustA ) , , drop = F ] ,
+pheatmap ( clust.B3 $ zMtx [ order ( clust.B3 $ clustA ) ,] ,
+ annotation_row = annot_row [ order ( clust.B3 $ clustA ) , , drop = F ] ,
cluster_rows = F , cluster_cols = F ,
show_rownames = F )
@@ -2552,7 +2552,7 @@ Statistical Testing
# ROSMAP C0 genes
-TableS1 = fread ( "../results/TableS1.csv" )
+TableS1 = fread ( "../results/TableS1.csv" )
rosmap.C0.genes = TableS1 $ GeneSymbol
# C0 test
@@ -2561,19 +2561,19 @@ Statistical Testing
test_avez ( z.C0 $ z , C0.genes , cov_C0 )
# C0 test based on ROSMAP genes
-test_avez ( z.C0 $ z , intersect ( rownames ( z.C0 $ z ) , rosmap.C0.genes ) , cov_C0 )
+test_avez ( z.C0 $ z , intersect ( rownames ( z.C0 $ z ) , rosmap.C0.genes ) , cov_C0 )
# adjusted by donor
-test_avez ( z.C0 $ z , intersect ( rownames ( z.C0 $ z ) , rosmap.C0.genes ) , cov_C0 , adj.donor = T , donor = colnames ( z.C0 $ z ) )
+test_avez ( z.C0 $ z , intersect ( rownames ( z.C0 $ z ) , rosmap.C0.genes ) , cov_C0 , adj.donor = T , donor = colnames ( z.C0 $ z ) )
# add to Table S1
-MSBB_aveZ = copy ( avez.C0 )
-setnames ( MSBB_aveZ , c ( "Genes" , "E2" , "E3" , "E4" ) ,
+MSBB_aveZ = copy ( avez.C0 )
+setnames ( MSBB_aveZ , c ( "Genes" , "E2" , "E3" , "E4" ) ,
c ( "GeneSymbol" , paste ( "MSBB" , c ( "E2" , "E3" , "E4" ) , sep = "_" ) ) )
TableS1 = MSBB_aveZ [ GeneSymbol %in% rosmap.C0.genes , ] [ TableS1 , on = . ( GeneSymbol ) ]
TableS1 = TableS1 [ , . ( GeneSymbol , EnsemblID , Description , ROSMAP_E2 , ROSMAP_E3 , ROSMAP_E4 ,
MSBB_E2 , MSBB_E3 , MSBB_E4 ) ]
-fwrite ( TableS1 , "../results/TableS1.csv" )
+fwrite ( TableS1 , "../results/TableS1.csv" )
# C3 test
cov_C3 = cov [ C == 3 ,]
@@ -2596,7 +2596,7 @@ Statistical Testing
# read ROSMAP C0 genes
-TableS3 = fread ( "../results/TableS3.csv" )
+TableS3 = fread ( "../results/TableS3.csv" )
rosmap_C0.genes = TableS3 $ GeneSymbol
mDataZ.rosmap_C0.genes = z.C0 $ z [ rownames ( z.C0 $ z ) %in% rosmap_C0.genes ,]
mDataZ.rosmap_C0.genes.e2 = mDataZ.rosmap_C0.genes [ ,z.C0 $ E == "E2" ]
@@ -2610,7 +2610,7 @@ Statistical Testing
E4 = ncol ( mDataZ.rosmap_C0.genes.e4 ) )
# compute intersection
-intersect ( rosmap_C0.genes , C0.genes )
+intersect ( rosmap_C0.genes , C0.genes )
@@ -2620,7 +2620,7 @@ Statistical Testing
# test on ROSMAP C0 genes
cov_C0 = cov [ C == 0 ,]
all ( cov_C0 $ individualIdentifier == colnames ( z.C0 $ z ) )
-test_avez ( z.C0 $ z , intersect ( rosmap_C0.genes , rownames ( mData ) ) , cov_C0 )
+test_avez ( z.C0 $ z , intersect ( rosmap_C0.genes , rownames ( mData ) ) , cov_C0 )
# read ROSMAP C3 genes
rosmap_C3.genes = read.xlsx ( "../results/ROSMAP_miroglia_cluster_genes_remove.low.expressed.genes.xlsx" , sheet = "C3(k=3)" , colNames = F )
@@ -2629,7 +2629,7 @@ Statistical Testing
# test on ROSMAP C3 genes
cov_C3 = cov [ C == 3 ,]
all ( cov_C3 $ individualIdentifier == colnames ( z.C3 $ z ) )
-test_avez ( z.C3 $ z , intersect ( rosmap_C3.genes , rownames ( mData ) ) , cov_C3 )
+test_avez ( z.C3 $ z , intersect ( rosmap_C3.genes , rownames ( mData ) ) , cov_C3 )
diff --git a/docs/MSBB-variables.html b/docs/MSBB-variables.html
index 4c5e65f..2c1e969 100644
--- a/docs/MSBB-variables.html
+++ b/docs/MSBB-variables.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-qPCR
+qPCR
@@ -2275,7 +2275,7 @@ Dependencies
Load requisite packages.
@@ -2319,7 +2319,7 @@ Process Expression Data
# process expression data
cat ( "processing" , brain_region , "...\n" )
data_file = unlist ( data_files [ brain_region ] )
-msbb = suppressWarnings ( { fread ( data_file , sep = "\t" ) } )
+msbb = suppressWarnings ( { fread ( data_file , sep = "\t" ) } )
gene_ids = msbb $ V1
msbb = as.data.frame ( msbb [ , - 1 , with = F ] )
rownames ( msbb ) = gene_ids
@@ -2334,13 +2334,13 @@ Process Expression Data
# extract subject IDs
key_file = "../Data/MSBB_RNAseq_covariates_November2018Update.csv"
-key_map = fread ( key_file )
-key_map = unique ( key_map [ , . ( sampleIdentifier , individualIdentifier ) ] )
+key_map = fread ( key_file )
+key_map = unique ( key_map [ , . ( sampleIdentifier , individualIdentifier ) ] )
# check for samples without an ID match
cat ( "any samples don't have id match? " )
cat ( any ( ! colnames ( msbb ) %in% key_map $ sampleIdentifier ) , "\n" )
-sub_key_map = key_map [ data.table ( sampleIdentifier = colnames ( msbb ) ) , on = . ( sampleIdentifier ) ]
+sub_key_map = key_map [ data.table ( sampleIdentifier = colnames ( msbb ) ) , on = . ( sampleIdentifier ) ]
all ( sub_key_map $ sampleIdentifier == colnames ( msbb ) )
colnames ( msbb ) = sub_key_map $ individualIdentifier
@@ -2350,9 +2350,9 @@
Process Expression Data
@@ -2362,8 +2362,8 @@
Process Covariate Data
cov_file = "../Data/MSBB_clinical.csv"
-cov = fread ( cov_file )
-setnames ( cov , c ( "NP.1" , "bbscore" ) , c ( "CERAD" , "Braak" ) )
+cov = fread ( cov_file )
+setnames ( cov , c ( "NP.1" , "bbscore" ) , c ( "CERAD" , "Braak" ) )
cov [ Braak > 6 , Braak := NA ]
@@ -2371,7 +2371,7 @@
Process Covariate Data
Extract clinical data in the same order as in the data.
-
cov = cov [ data.table ( individualIdentifier = colnames ( msbb ) ) , on = . ( individualIdentifier ) ]
+cov = cov [ data.table ( individualIdentifier = colnames ( msbb ) ) , on = . ( individualIdentifier ) ]
all ( cov $ individualIdentifier == colnames ( msbb ) )
diff --git a/docs/ROSMAP-astrocyte.html b/docs/ROSMAP-astrocyte.html
index 6b230e2..f97e639 100644
--- a/docs/ROSMAP-astrocyte.html
+++ b/docs/ROSMAP-astrocyte.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-
qPCR
+
qPCR
@@ -2287,9 +2287,9 @@
Dependencies
rm ( list = ls ( ) )
require ( ggVennDiagram )
require ( limma )
-require ( latex2exp )
+require ( latex2exp )
require ( ComplexHeatmap )
-require ( data.table )
+require ( data.table )
require ( pheatmap )
require ( ggplot2 )
require ( gridExtra )
@@ -2322,16 +2322,16 @@ Select Astrocyte Genes
genes = readLines ( "../Data/Astrocyte Genes.txt" )
# select astrocyte genes that exist in ROSMAP
-annot = fread ( "../Data/ENSEMBL GRCh38.p7.csv" )
-annot = unique ( annot [ GeneSymbol %in% genes ,] )
-ensembl_ids = intersect ( rownames ( mData ) , annot $ EnsemblID )
+annot = fread ( "../Data/ENSEMBL GRCh38.p7.csv" )
+annot = unique ( annot [ GeneSymbol %in% genes ,] )
+ensembl_ids = intersect ( rownames ( mData ) , annot $ EnsemblID )
-annot = unique ( annot [ EnsemblID %in% ensembl_ids ,] )
+annot = unique ( annot [ EnsemblID %in% ensembl_ids ,] )
# remove duplicated genes
-annot = annot [ ! duplicated ( GeneSymbol ) , ]
+annot = annot [ ! duplicated ( GeneSymbol ) , ]
ensembl_ids = annot $ EnsemblID
-any ( duplicated ( ensembl_ids ) ) # no duplicated ids
+any ( duplicated ( ensembl_ids ) ) # no duplicated ids
any ( is.na ( ensembl_ids ) ) # no NA
length ( ensembl_ids )
@@ -2452,8 +2452,8 @@
C0
# visualization
annot_row = data.frame ( cluster
= as.character ( clust.C0 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.C0 $ zMtx )
-
pheatmap ( clust.C0 $ zMtx [ order ( clust.C0 $ clustA ) ,
] ,
- annotation_row
= annot_row [ order ( clust.C0 $ clustA ) , , drop
= F ] ,
+
pheatmap ( clust.C0 $ zMtx [ order ( clust.C0 $ clustA ) ,
] ,
+ annotation_row
= annot_row [ order ( clust.C0 $ clustA ) , , drop
= F ] ,
cluster_rows
= F , cluster_cols
= F ,
show_rownames
= F )
@@ -2479,8 +2479,8 @@
C3
# visualization
annot_row = data.frame ( cluster
= as.character ( clust.C3 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.C3 $ zMtx )
-
pheatmap ( clust.C3 $ zMtx [ order ( clust.C3 $ clustA ) ,
] ,
- annotation_row
= annot_row [ order ( clust.C3 $ clustA ) , , drop
= F ] ,
+
pheatmap ( clust.C3 $ zMtx [ order ( clust.C3 $ clustA ) ,
] ,
+ annotation_row
= annot_row [ order ( clust.C3 $ clustA ) , , drop
= F ] ,
cluster_rows
= F , cluster_cols
= F ,
show_rownames
= F )
@@ -2504,8 +2504,8 @@
B1
# visualization
annot_row = data.frame ( cluster
= as.character ( clust.B1 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.B1 $ zMtx )
-
pheatmap ( clust.B1 $ zMtx [ order ( clust.B1 $ clustA ) ,
] ,
- annotation_row
= annot_row [ order ( clust.B1 $ clustA ) , , drop
= F ] ,
+
pheatmap ( clust.B1 $ zMtx [ order ( clust.B1 $ clustA ) ,
] ,
+ annotation_row
= annot_row [ order ( clust.B1 $ clustA ) , , drop
= F ] ,
cluster_rows
= F , cluster_cols
= F ,
show_rownames
= F )
@@ -2529,8 +2529,8 @@
B3
# visualization
annot_row = data.frame ( cluster
= as.character ( clust.B3 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.B3 $ zMtx )
-
pheatmap ( clust.B3 $ zMtx [ order ( clust.B3 $ clustA ) ,
] ,
- annotation_row
= annot_row [ order ( clust.B3 $ clustA ) , , drop
= F ] ,
+
pheatmap ( clust.B3 $ zMtx [ order ( clust.B3 $ clustA ) ,
] ,
+ annotation_row
= annot_row [ order ( clust.B3 $ clustA ) , , drop
= F ] ,
cluster_rows
= F , cluster_cols
= F ,
show_rownames
= F )
@@ -2570,8 +2570,8 @@
Save Results
sheetName
= "C3_clust1(k=5)" , append
= TRUE , row.names
= F ,
col.names
= F )
-
length ( intersect ( C0_c4.genes ,
C3.genes ) )
-
xlsx :: write.xlsx ( intersect ( C0_c4.genes ,
C3.genes ) ,
+
length ( intersect ( C0_c4.genes ,
C3.genes ) )
+
xlsx :: write.xlsx ( intersect ( C0_c4.genes ,
C3.genes ) ,
"../results/ROSMAP_astrocyte_new_cluster_remove.low.expressed.genes.xlsx" ,
sheetName
= "intersect C0_cluster3&C4" , append
= TRUE , row.names
= F , col.names
= F )
@@ -2586,8 +2586,8 @@
Save Results
sheetName
= "B3_clust1(k=5)" , append
= TRUE , row.names
= F ,
col.names
= F )
-
length ( intersect ( B1.genes ,
B3.genes ) )
-
xlsx :: write.xlsx ( intersect ( B1.genes ,
B3.genes ) ,
+
length ( intersect ( B1.genes ,
B3.genes ) )
+
xlsx :: write.xlsx ( intersect ( B1.genes ,
B3.genes ) ,
"../results/ROSMAP_astrocyte_new_cluster_remove.low.expressed.genes.xlsx" ,
sheetName
= "intersect B1&B3" , append
= TRUE , row.names
= F , col.names
= F )
@@ -2648,7 +2648,7 @@
# prepare data
zC0 = z.C0 $ z
-zC0 = zC0 [ order ( clust.C0 $ clustA ) ,]
+zC0 = zC0 [ order ( clust.C0 $ clustA ) ,]
zC0.genes = zC0 [ rownames ( zC0 ) %in% C0_c4.genes ,]
# color function
@@ -2736,7 +2736,7 @@
# Figure 4A
pdf ( "../results/Figure4AB.cluster1.pdf" , width = 11 )
-Heatmap ( as.matrix ( avez.C0 [ order ( clust.C0 $ clustA ) , c ( "E2" , "E3" , "E4" ) ] ) ,
+Heatmap ( as.matrix ( avez.C0 [ order ( clust.C0 $ clustA ) , c ( "E2" , "E3" , "E4" ) ] ) ,
col = colFun1 ,
cluster_columns = FALSE ,
cluster_rows = FALSE ,
@@ -2759,7 +2759,7 @@
# Figure 4B
pdf ( "../results/Figure4AB_cluster4.pdf" , width = 11 )
-Heatmap ( as.matrix ( avez.C0 [ order ( clust.C0 $ clustA ) , c ( "E2" , "E3" , "E4" ) ] ) ,
+Heatmap ( as.matrix ( avez.C0 [ order ( clust.C0 $ clustA ) , c ( "E2" , "E3" , "E4" ) ] ) ,
col = colFun1 ,
cluster_columns = FALSE ,
cluster_rows = FALSE ,
@@ -2864,7 +2864,7 @@
EC = cov $ EC [ sel ]
# average z-score
-aveZ = data.table ( Genes = rownames ( mDataZ ) ,
+aveZ = data.table ( Genes = rownames ( mDataZ ) ,
C0E2 = rowMeans ( mDataZ [ , EC == "C0:E2" ] ) ,
C0E3 = rowMeans ( mDataZ [ , EC == "C0:E3" ] ) ,
C0E4 = rowMeans ( mDataZ [ , EC == "C0:E4" ] ) ,
@@ -2881,7 +2881,7 @@
C3E3 = rowMeans ( mDataZ [ , EC == "C3:E3" ] ) ,
C3E4 = rowMeans ( mDataZ [ , EC == "C3:E4" ] ) )
-aveZ2 = melt ( aveZ , id.vars = c ( "Genes" ) )
+aveZ2 = melt ( aveZ , id.vars = c ( "Genes" ) )
colnames ( aveZ2 ) = c ( "Genes" , "EC" , "aveZ" )
aveZ2 [ , CERAD := gsub ( "E[234]$" , "" , aveZ2 $ EC ) ]
aveZ2 [ , APOE := gsub ( "C[0123]" , "" , aveZ2 $ EC ) ]
diff --git a/docs/ROSMAP-microglia.html b/docs/ROSMAP-microglia.html
index 628ebad..283a1c1 100644
--- a/docs/ROSMAP-microglia.html
+++ b/docs/ROSMAP-microglia.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-
qPCR
+
qPCR
@@ -2288,9 +2288,9 @@
Dependencies
rm ( list = ls ( ) )
require ( limma )
-require ( latex2exp )
+require ( latex2exp )
require ( ComplexHeatmap )
-require ( data.table )
+require ( data.table )
require ( pheatmap )
require ( ggplot2 )
require ( gridExtra )
@@ -2325,17 +2325,17 @@ Select Microglia Genes
genes = readLines ( "../Data/Microglia Genes.txt" )
# select microglia genes that exist in ROSMAP
-annot = fread ( "../Data/ENSEMBL GRCh38.p7.csv" )
-annot = unique ( annot [ GeneSymbol %in% genes ,] )
-ensembl_ids = intersect ( rownames ( mData ) , annot $ EnsemblID )
-annot = unique ( annot [ EnsemblID %in% ensembl_ids ,] )
+annot = fread ( "../Data/ENSEMBL GRCh38.p7.csv" )
+annot = unique ( annot [ GeneSymbol %in% genes ,] )
+ensembl_ids = intersect ( rownames ( mData ) , annot $ EnsemblID )
+annot = unique ( annot [ EnsemblID %in% ensembl_ids ,] )
# remove duplicated genes
-annot = annot [ ! duplicated ( GeneSymbol ) , ]
+annot = annot [ ! duplicated ( GeneSymbol ) , ]
ensembl_ids = annot $ EnsemblID
# check results
-any ( duplicated ( ensembl_ids ) ) # no duplicated ids
+any ( duplicated ( ensembl_ids ) ) # no duplicated ids
any ( is.na ( ensembl_ids ) ) # no NA
length ( ensembl_ids )
@@ -2385,7 +2385,7 @@
Model 1
length ( tT_Braak [ E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0 ,
] $ GeneSymbol )
# Table S2
-
TableS2 = copy ( annot [ order ( GeneSymbol ) ,
] )
+
TableS2 = copy ( annot [ order ( GeneSymbol ) ,
] )
TableS2 [ tT_CERAD , on
= "EnsemblID" ,
c ( "ROSMAP_E4vsE3_logFC" ,
"ROSMAP_E4vsE3_CI.L" ,
"ROSMAP_E4vsE3_CI.R" ,
"ROSMAP_E4vsE3_P.Value" ,
"ROSMAP_E4vsE3_adj.P.Val" ,
@@ -2395,7 +2395,7 @@
Model 1
i.E4vsE3_P.Value ,
i.E4vsE3_adj.P.Val ,
i.E2vsE3_logFC ,
i.E2vsE3_CI.L ,
i.E2vsE3_CI.R ,
i.E2vsE3_P.Value ,
i.E2vsE3_adj.P.Val ) ]
-
fwrite ( TableS2 ,
"../results/TableS2.csv" )
+
fwrite ( TableS2 ,
"../results/TableS2.csv" )
@@ -2406,17 +2406,17 @@
Model 1.5
# run Model 1.5 with E4 and E2 copies as numeric
tT_CERAD_ENum = run_model1.5 ( mData , cov $ C , cov $ E4num , cov $ E2num , annot )
-e4_num_genes = intersect ( tT_CERAD_ENum [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] , tT_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 ,GeneSymbol ] )
+e4_num_genes = intersect ( tT_CERAD_ENum [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] , tT_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 ,GeneSymbol ] )
length ( e4_num_genes )
-e2_num_genes = intersect ( tT_CERAD_ENum [ e2_P.Value < 0.05 & e2_logFC < 0 , GeneSymbol ] , tT_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 , GeneSymbol ] )
+e2_num_genes = intersect ( tT_CERAD_ENum [ e2_P.Value < 0.05 & e2_logFC < 0 , GeneSymbol ] , tT_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 , GeneSymbol ] )
length ( e2_num_genes )
-intersect ( tT_CERAD_ENum [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] ,
+intersect ( tT_CERAD_ENum [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] ,
tT_CERAD_ENum [ e2_P.Value < 0.05 & e2_logFC < 0 , GeneSymbol ] )
# Table S3
-TableS3 = annot [ order ( GeneSymbol ) , ]
+TableS3 = annot [ order ( GeneSymbol ) , ]
TableS3 = TableS3 [ tT_CERAD_ENum , on = . ( EnsemblID , GeneSymbol , Description ) ,
c ( "ROSMAP_e4_logFC" , "ROSMAP_e4_CI.L" , "ROSMAP_e4_CI.R" ,
"ROSMAP_e4_P.Value" , "ROSMAP_e4_adj.P.Val" ) := . (
@@ -2425,7 +2425,7 @@ Model 1.5
c ( "ROSMAP_e2_logFC" , "ROSMAP_e2_CI.L" , "ROSMAP_e2_CI.R" ,
"ROSMAP_e2_P.Value" , "ROSMAP_e2_adj.P.Val" ) := . (
i.e2_logFC , i.e2_CI.L , i.e2_CI.R , i.e2_P.Value , i.e2_adj.P.Val ) ]
-fwrite ( TableS3 , "../results/TableS3.csv" )
+fwrite ( TableS3 , "../results/TableS3.csv" )
@@ -2467,14 +2467,14 @@ Model 2.5
nrow ( tT_CERAD_ENum_C0 [ e4_P.Value < 0.05 , ] )
nrow ( tT_CERAD_ENum_C0 [ e2_P.Value < 0.05 , ] )
-e4Num_genes_C0 = intersect (
+e4Num_genes_C0 = intersect (
tT_CERAD_ENum_C0 [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] ,
tT2_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 , GeneSymbol ] )
-e2Num_genes_C0 = intersect (
+e2Num_genes_C0 = intersect (
tT_CERAD_ENum_C0 [ e2_P.Value < 0.05 & e2_logFC < 0 ,GeneSymbol ] ,
tT2_CERAD [ E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0 , GeneSymbol ] )
-intersect ( tT_CERAD_ENum_C0 [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] ,
+intersect ( tT_CERAD_ENum_C0 [ e4_P.Value < 0.05 & e4_logFC > 0 , GeneSymbol ] ,
tT_CERAD_ENum_C0 [ e2_P.Value < 0.05 & e2_logFC > 0 , GeneSymbol ] )
@@ -2567,8 +2567,8 @@ C0
# visualization
annot_row = data.frame ( cluster = as.character ( clust.C0 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.C0 $ zMtx )
-pheatmap ( clust.C0 $ zMtx [ order ( clust.C0 $ clustA ) ,] ,
- annotation_row = annot_row [ order ( clust.C0 $ clustA ) , , drop = F ] ,
+pheatmap ( clust.C0 $ zMtx [ order ( clust.C0 $ clustA ) ,] ,
+ annotation_row = annot_row [ order ( clust.C0 $ clustA ) , , drop = F ] ,
cluster_rows = F , cluster_cols = F ,
show_rownames = F )
@@ -2578,16 +2578,16 @@ C0
# Table S1
TableS1 = as.data.frame ( cbind ( clust.C0 $ clustA , clust.C0 $ zMtx ) )
-setDT ( TableS1 )
+setDT ( TableS1 )
TableS1 [ , GeneSymbol := rownames ( clust.C0 $ zMtx ) ]
TableS1 = annot [ GeneSymbol %in% TableS1 $ GeneSymbol ] [ TableS1 , on = . ( GeneSymbol ) ]
colnames ( TableS1 ) = c ( "EnsemblID" , "GeneSymbol" , "Description" , "Cluster" , "ROSMAP_E2" , "ROSMAP_E3" , "ROSMAP_E4" )
TableS1 = TableS1 [ Cluster == c ,]
TableS1 = TableS1 [ ,Cluster := NULL ]
-TableS1 = TableS1 [ ! duplicated ( GeneSymbol ) , ]
-TableS1 = TableS1 [ order ( GeneSymbol ) , ]
-fwrite ( TableS1 , "../results/TableS1.csv" , row.names = F )
+TableS1 = TableS1 [ ! duplicated ( GeneSymbol ) , ]
+TableS1 = TableS1 [ order ( GeneSymbol ) , ]
+fwrite ( TableS1 , "../results/TableS1.csv" , row.names = F )
@@ -2605,8 +2605,8 @@ C3
# visualization
annot_row = data.frame ( cluster = as.character ( clust.C3 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.C3 $ zMtx )
-pheatmap ( clust.C3 $ zMtx [ order ( clust.C3 $ clustA ) ,] ,
- annotation_row = annot_row [ order ( clust.C3 $ clustA ) , , drop = F ] ,
+pheatmap ( clust.C3 $ zMtx [ order ( clust.C3 $ clustA ) ,] ,
+ annotation_row = annot_row [ order ( clust.C3 $ clustA ) , , drop = F ] ,
cluster_rows = F , cluster_cols = F ,
show_rownames = F )
@@ -2630,8 +2630,8 @@ B1
# visualization
annot_row = data.frame ( cluster = as.character ( clust.B1 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.B1 $ zMtx )
-pheatmap ( clust.B1 $ zMtx [ order ( clust.B1 $ clustA ) ,] ,
- annotation_row = annot_row [ order ( clust.B1 $ clustA ) , , drop = F ] ,
+pheatmap ( clust.B1 $ zMtx [ order ( clust.B1 $ clustA ) ,] ,
+ annotation_row = annot_row [ order ( clust.B1 $ clustA ) , , drop = F ] ,
cluster_rows = F , cluster_cols = F ,
show_rownames = F )
@@ -2657,8 +2657,8 @@ B3
# visualization
annot_row = data.frame ( cluster = as.character ( clust.B3 $ clustA ) )
rownames ( annot_row ) = rownames ( clust.B3 $ zMtx )
-pheatmap ( clust.B3 $ zMtx [ order ( clust.B3 $ clustA ) ,] ,
- annotation_row = annot_row [ order ( clust.B3 $ clustA ) , , drop = F ] ,
+pheatmap ( clust.B3 $ zMtx [ order ( clust.B3 $ clustA ) ,] ,
+ annotation_row = annot_row [ order ( clust.B3 $ clustA ) , , drop = F ] ,
cluster_rows = F , cluster_cols = F ,
show_rownames = F )
@@ -2716,7 +2716,7 @@
# prepare data
zC0 = z.C0 $ z
-zC0 = zC0 [ order ( clust.C0 $ clustA ) ,]
+zC0 = zC0 [ order ( clust.C0 $ clustA ) ,]
zC0.genes = zC0 [ rownames ( zC0 ) %in% C0.genes ,]
# color function
@@ -2801,7 +2801,7 @@
# Figure 2A
# heatmap for average z-scores
pdf ( "../results/Figure2AB.pdf" , width = 11 )
-Heatmap ( as.matrix ( avez.C0 [ order ( clust.C0 $ clustA ) , c ( "E2" , "E3" , "E4" ) ] ) ,
+Heatmap ( as.matrix ( avez.C0 [ order ( clust.C0 $ clustA ) , c ( "E2" , "E3" , "E4" ) ] ) ,
col = colFun1 ,
cluster_columns = FALSE ,
cluster_rows = FALSE ,
@@ -2882,7 +2882,7 @@
EC = cov $ EC [ sel ]
# average z-score
-aveZ = data.table ( Genes = rownames ( mDataZ ) ,
+aveZ = data.table ( Genes = rownames ( mDataZ ) ,
C0E2 = rowMeans ( mDataZ [ , EC == "C0:E2" ] ) ,
C0E3 = rowMeans ( mDataZ [ , EC == "C0:E3" ] ) ,
C0E4 = rowMeans ( mDataZ [ , EC == "C0:E4" ] ) ,
@@ -2899,7 +2899,7 @@
C3E3 = rowMeans ( mDataZ [ , EC == "C3:E3" ] ) ,
C3E4 = rowMeans ( mDataZ [ , EC == "C3:E4" ] ) )
-aveZ2 = melt ( aveZ , id.vars = c ( "Genes" ) )
+aveZ2 = melt ( aveZ , id.vars = c ( "Genes" ) )
colnames ( aveZ2 ) = c ( "Genes" , "EC" , "aveZ" )
aveZ2 [ , CERAD := gsub ( "E[234]$" , "" , aveZ2 $ EC ) ]
aveZ2 [ , APOE := gsub ( "C[0123]" , "" , aveZ2 $ EC ) ]
@@ -2933,10 +2933,10 @@
# prepare data for downstream analysis
-fig3_tT2_CERAD = copy ( tT2_CERAD )
+fig3_tT2_CERAD = copy ( tT2_CERAD )
colnames ( fig3_tT2_CERAD ) = gsub ( "E4vsE3" , "C0E4vsC0E3" ,colnames ( fig3_tT2_CERAD ) )
colnames ( fig3_tT2_CERAD ) = gsub ( "E2vsE3" , "C0E2vsC0E3" ,colnames ( fig3_tT2_CERAD ) )
-fig3_tT3_CERAD = copy ( tT3_CERAD )
+fig3_tT3_CERAD = copy ( tT3_CERAD )
colnames ( fig3_tT3_CERAD ) = gsub ( "E4vsE3" , "C3E4vsC3E3" ,colnames ( fig3_tT3_CERAD ) )
colnames ( fig3_tT3_CERAD ) = gsub ( "E2vsE3" , "C3E2vsC3E3" ,colnames ( fig3_tT3_CERAD ) )
@@ -2950,7 +2950,7 @@
colnames ( fig3.1 ) = c ( "C0" , "C1" , "C2" , "C3" )
ggthemr ( "fresh" )
fig3.c = tidyr :: gather ( fig3.1 , "CERAD" , "logFC" )
-setDT ( fig3.c )
+setDT ( fig3.c )
# axis labels
axisLow = list ( "C0" = c ( "C1" , "C2" , "C3" ) ,
diff --git a/docs/ROSMAP-preprocess.html b/docs/ROSMAP-preprocess.html
index 5a02a82..7c71969 100644
--- a/docs/ROSMAP-preprocess.html
+++ b/docs/ROSMAP-preprocess.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-
qPCR
+
qPCR
@@ -2264,7 +2264,7 @@
Dependencies
Load requisite packages.
diff --git a/docs/ROSMAP-variables.html b/docs/ROSMAP-variables.html
index a99217a..b9e1e7f 100644
--- a/docs/ROSMAP-variables.html
+++ b/docs/ROSMAP-variables.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-qPCR
+qPCR
@@ -2273,7 +2273,7 @@ Dependencies
Load requisite packages.
@@ -2283,7 +2283,7 @@ Read FPKM Data
# read FPKM data
fpkm_file = "../Data/ROSMAP_RNAseq_FPKM_gene.tsv"
-rosmap_fpkm = fread ( fpkm_file , stringsAsFactors = FALSE , header = T , sep = "\t" )
+rosmap_fpkm = fread ( fpkm_file , stringsAsFactors = FALSE , header = T , sep = "\t" )
all ( rosmap_fpkm $ tracking_id == rosmap_fpkm $ gene_id )
gene_id = rosmap_fpkm $ gene_id
rosmap_fpkm = rosmap_fpkm [ , c ( - 1 ,- 2 ) , with = F ]
@@ -2304,8 +2304,8 @@ Read Covariate Data
# read ROSMAP ID map
key_map_file = "../Data/ROSMAP_IDkey.csv"
-key_map = fread ( key_map_file )
-key_map = unique ( key_map [ , . ( projid , mrna_id ) ] )
+key_map = fread ( key_map_file )
+key_map = unique ( key_map [ , . ( projid , mrna_id ) ] )
@@ -2314,10 +2314,10 @@ Read Covariate Data
# read and process covariate data
cov_file = "../Data/ROSMAP_clinical.csv"
-cov = fread ( cov_file )
+cov = fread ( cov_file )
# rename covariate data and merge with ROSMAP IDs
-setnames ( cov , c ( "projid" , "cts_mmse30_lv" , "braaksc" , "ceradsc" , "cogdx" , "apoe_genotype" ) ,
+setnames ( cov , c ( "projid" , "cts_mmse30_lv" , "braaksc" , "ceradsc" , "cogdx" , "apoe_genotype" ) ,
c ( "projid" , "MMSE" , "Braak" , "CERAD" , "ClinicalDiagnosis" , "RawAPOE" ) )
cov [ key_map , on = . ( projid ) , mrna_id := mrna_id ]
@@ -2336,7 +2336,7 @@ Reorder Data
colnames ( rosmap_fpkm ) [ ! colnames ( rosmap_fpkm ) %in% c ( cov $ mrna_id_nov ) ]
rosmap_fpkm = rosmap_fpkm [ , colnames ( rosmap_fpkm ) %in% cov $ mrna_id_nov ]
all ( colnames ( rosmap_fpkm ) %in% cov $ mrna_id_nov )
-cov = cov [ data.table ( mrna_id_nov = colnames ( rosmap_fpkm ) ) , on = . ( mrna_id_nov ) ]
+cov = cov [ data.table ( mrna_id_nov = colnames ( rosmap_fpkm ) ) , on = . ( mrna_id_nov ) ]
# check that cov and rosmap_fpkm have same order
all ( cov $ mrna_id_nov == colnames ( rosmap_fpkm ) )
diff --git a/docs/index.html b/docs/index.html
index d8812ac..b46c9f7 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-qPCR
+qPCR
@@ -2253,42 +2253,45 @@ About
Contents
Setup
Install R to run our code.
-Code Availability
-Our codebase is available on GitHub at mindds.github.io/apoe-glia and below.
+Documentation
+To read our documented code, please visit mindds.github.io/apoe-glia or see below.
Functions
ROSMAP
MSBB
qPCR Analysis
+Code Availability
+Our full codebase is available for download on GitHub .
diff --git a/docs/models.html b/docs/models.html
index 8131429..d51b69d 100644
--- a/docs/models.html
+++ b/docs/models.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-qPCR
+qPCR
@@ -2303,8 +2303,8 @@ Model 1
colnames ( tT1.sfE ) [ 4 : 11 ] = paste0 ( "E4vsE3_" , colnames ( tT1.sfE ) [ 4 : 11 ] )
colnames ( tT2.sfE ) [ 4 : 11 ] = paste0 ( "E2vsE3_" , colnames ( tT2.sfE ) [ 4 : 11 ] )
- setDT ( tT1.sfE )
- setDT ( tT2.sfE )
+ setDT ( tT1.sfE )
+ setDT ( tT2.sfE )
tT.sfE = tT1.sfE [ tT2.sfE , on= c ( "GeneSymbol" , "EnsemblID" , "Description" ) ]
return ( tT.sfE )
@@ -2351,10 +2351,10 @@ Model 1.5
tT_CERAD_E4Num = topTable ( fit.eb.sfE , sort.by= "none" , number= Inf , coef= 2 , confint= TRUE )
tT_CERAD_E2Num = topTable ( fit.eb.sfE , sort.by= "none" , number= Inf , coef= 3 , confint= TRUE )
- setDT ( tT_CERAD_E4Num )
- setDT ( tT_CERAD_E2Num )
+ setDT ( tT_CERAD_E4Num )
+ setDT ( tT_CERAD_E2Num )
- tT = copy ( annot )
+ tT = copy ( annot )
tT = tT [ tT_CERAD_E4Num , on = . ( EnsemblID , GeneSymbol , Description ) ,
c ( "e4_logFC" , "e4_CI.L" , "e4_CI.R" ,
"e4_P.Value" , "e4_adj.P.Val" ) := . (
@@ -2425,8 +2425,8 @@ Model 2
colnames ( tT1.EBase ) [ 4 : 11 ] = paste0 ( "E4vsE3_" , colnames ( tT1.EBase ) [ 4 : 11 ] )
colnames ( tT2.EBase ) [ 4 : 11 ] = paste0 ( "E2vsE3_" , colnames ( tT2.EBase ) [ 4 : 11 ] )
- setDT ( tT1.EBase )
- setDT ( tT2.EBase )
+ setDT ( tT1.EBase )
+ setDT ( tT2.EBase )
tT.EBase = tT1.EBase [ tT2.EBase , on= c ( "GeneSymbol" , "EnsemblID" , "Description" ) ]
return ( tT.EBase )
@@ -2472,10 +2472,10 @@ Model 2.5
tT_CERAD_E4Num = topTable ( fit.eb.sfE , sort.by= "none" , number= Inf , coef= 2 , confint= TRUE )
tT_CERAD_E2Num = topTable ( fit.eb.sfE , sort.by= "none" , number= Inf , coef= 3 , confint= TRUE )
- setDT ( tT_CERAD_E4Num )
- setDT ( tT_CERAD_E2Num )
+ setDT ( tT_CERAD_E4Num )
+ setDT ( tT_CERAD_E2Num )
- tT = copy ( annot )
+ tT = copy ( annot )
tT = tT [ tT_CERAD_E4Num , on = . ( EnsemblID , GeneSymbol , Description ) ,
c ( "e4_logFC" , "e4_CI.L" , "e4_CI.R" ,
"e4_P.Value" , "e4_adj.P.Val" ) := . (
@@ -2547,8 +2547,8 @@ Model 3
colnames ( tT1.EBase ) [ 4 : 11 ] = paste0 ( "E4vsE3_" , colnames ( tT1.EBase ) [ 4 : 11 ] )
colnames ( tT2.EBase ) [ 4 : 11 ] = paste0 ( "E2vsE3_" , colnames ( tT2.EBase ) [ 4 : 11 ] )
- setDT ( tT1.EBase )
- setDT ( tT2.EBase )
+ setDT ( tT1.EBase )
+ setDT ( tT2.EBase )
tT.EBase = tT1.EBase [ tT2.EBase , on= c ( "GeneSymbol" , "EnsemblID" , "Description" ) ]
@@ -2601,8 +2601,8 @@ Model 4
colnames ( tT1.EBase ) [ 4 : 11 ] = paste0 ( "E4vsE3_" , colnames ( tT1.EBase ) [ 4 : 11 ] )
colnames ( tT2.EBase ) [ 4 : 11 ] = paste0 ( "E2vsE3_" , colnames ( tT2.EBase ) [ 4 : 11 ] )
- setDT ( tT1.EBase )
- setDT ( tT2.EBase )
+ setDT ( tT1.EBase )
+ setDT ( tT2.EBase )
tT.EBase = tT1.EBase [ tT2.EBase , on= c ( "GeneSymbol" , "EnsemblID" , "Description" ) ]
@@ -2660,10 +2660,10 @@ Model 5
colnames ( tT3.EBase ) [ 4 : 11 ] = paste0 ( "C2E4vsC2E3_" , colnames ( tT3.EBase ) [ 4 : 11 ] )
colnames ( tT4.EBase ) [ 4 : 11 ] = paste0 ( "C2E2vsC2E3_" , colnames ( tT4.EBase ) [ 4 : 11 ] )
- setDT ( tT1.EBase )
- setDT ( tT2.EBase )
- setDT ( tT3.EBase )
- setDT ( tT4.EBase )
+ setDT ( tT1.EBase )
+ setDT ( tT2.EBase )
+ setDT ( tT3.EBase )
+ setDT ( tT4.EBase )
tT.EBase = tT1.EBase [ tT2.EBase , on= c ( "GeneSymbol" , "EnsemblID" , "Description" ) ]
tT.EBase = tT.EBase [ tT3.EBase , on= c ( "GeneSymbol" , "EnsemblID" , "Description" ) ]
diff --git a/docs/qPCR-analysis.html b/docs/qPCR-analysis.html
index 6538d71..3fe9243 100644
--- a/docs/qPCR-analysis.html
+++ b/docs/qPCR-analysis.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-qPCR
+qPCR
@@ -2267,7 +2267,7 @@ Dependencies
# data manipulation
-library ( data.table )
+library ( data.table )
library ( purrr )
library ( magrittr )
library ( stringr )
@@ -2289,7 +2289,7 @@ Dependencies
# caption text and display significance
library ( ggtext )
-library ( ggsignif )
+library ( ggsignif )
# heatmap
library ( pheatmap )
@@ -2331,7 +2331,7 @@ Read Data
# define function to read data
read_data = function ( sheet_number ) {
read.xlsx ( file.path ( ddir , "qPCR Data.xlsx" ) , sheet = sheet_number ) %>%
- as.data.table ( ) %>%
+ as.data.table ( ) %>%
return ( )
}
@@ -2352,7 +2352,7 @@ Process Data
fac = list ( "Genotype" , "Genotype" , c ( "Genotype" , "Treatment" ) ) %>% purrr :: set_names ( opt ) %>% . [[ nm ] ]
# extract gene names
-lab = dCT [ , ! c ( "Mouse" , "Sex" , ..fac ) ] %>% colnames ( ) %>% . [ order ( . ) ]
+lab = dCT [ , ! c ( "Mouse" , "Sex" , ..fac ) ] %>% colnames ( ) %>% . [ order ( . ) ]
# calculate average of reference group (i.e., APOE3 OR APOE3/PBS)
ctrl = dCT [ , map ( .SD , ~ mean ( .x , na.rm = TRUE ) ) , .SDcols = lab , by = fac ]
@@ -2361,10 +2361,10 @@ Process Data
ctrl = if ( nm == "LPS" ) ctrl [ Genotype == "APOE3" & Treatment == "PBS" , ..lab ] else ctrl [ Genotype == "APOE3" , ..lab ]
# calculate ddCT
-ddCT = copy ( dCT ) [ , ( lab ) := map2 ( .SD , ctrl , ~ .x - .y ) , .SDcols = lab ]
+ddCT = copy ( dCT ) [ , ( lab ) := map2 ( .SD , ctrl , ~ .x - .y ) , .SDcols = lab ]
# calculate RQ
-RQ = copy ( ddCT ) [ , ( lab ) := 2 ^ - .SD , .SDcols = lab ]
+RQ = copy ( ddCT ) [ , ( lab ) := 2 ^ - .SD , .SDcols = lab ]
@@ -2427,9 +2427,9 @@ Statistical Analysis
# create posthoc table
posthoc = stat_tukey %>%
- as.data.table ( keep.rownames = TRUE ) %>%
- setnames ( c ( "Comparison" , "Difference" , "Lower CI" , "Upper CI" , "p" ) ) %>%
- . [ , c ( "Start" , "End" ) := strsplit ( Comparison , "-" ) %>% { . ( map_chr ( . , 2 ) , map_chr ( . , 1 ) ) } ]
+ as.data.table ( keep.rownames = TRUE ) %>%
+ setnames ( c ( "Comparison" , "Difference" , "Lower CI" , "Upper CI" , "p" ) ) %>%
+ . [ , c ( "Start" , "End" ) := strsplit ( Comparison , "-" ) %>% { . ( map_chr ( . , 2 ) , map_chr ( . , 1 ) ) } ]
# reorder for APOE
if ( nm == "APOE" ) { posthoc = posthoc [ c ( 1 , 3 , 2 ) , ] } # E2/E3, E3/E4, E2/E4
@@ -2437,15 +2437,15 @@ Statistical Analysis
# rename for LPS
if ( nm == "LPS" ) {
posthoc = posthoc %>%
- . [ , Treatment := map_chr ( strsplit ( Start , ":" ) , 2 ) ] %>%
+ . [ , Treatment := map_chr ( strsplit ( Start , ":" ) , 2 ) ] %>%
. [ , Treatment := factor ( Treatment , levels = c ( "PBS" , "LPS" ) ) ] %>%
- . [ , Start := map_chr ( strsplit ( Start , ":" ) , 1 ) ] %>%
- . [ , End := map_chr ( strsplit ( End , ":" ) , 1 ) ]
+ . [ , Start := map_chr ( strsplit ( Start , ":" ) , 1 ) ] %>%
+ . [ , End := map_chr ( strsplit ( End , ":" ) , 1 ) ]
}
# create output table
comps = if ( nm == "LPS" ) posthoc [ , paste0 ( Treatment , " " , Start , "/" , End ) ] else posthoc [ , paste ( Start , End , sep = "/" ) ]
- output = data.table ( Gene = gene )
+ output = data.table ( Gene = gene )
# function to add columns by reference
create_anova_col = function ( cn ) { output [ , paste ( "ANOVA" , cn , c ( "p" , "p adj." ) ) := . ( stat_aov [ cn , "Pr(>F)" ] , stat_aov [ cn , "Pr(>F)" ] ) ] }
@@ -2461,12 +2461,12 @@ Statistical Analysis
# reorder columns to group by comparison
unlist ( map ( comps , ~ grep ( .x , colnames ( output ) ) ) ) %>%
c ( 1 : ( ncol ( output ) - length ( . ) ) , . ) %>%
- setcolorder ( output , . )
+ setcolorder ( output , . )
# calculate mean and SD of RQ values, then ensure error bars are not negative
desc = describeBy ( RQ [ , ..gene ] , RQ [ , ..fac ] , mat = TRUE , digits = 4 ) %>%
- as.data.table ( ) %>%
- setnames ( c ( "group1" , "group2" ) , c ( "Genotype" , "Treatment" ) , skip_absent = TRUE ) %>%
+ as.data.table ( ) %>%
+ setnames ( c ( "group1" , "group2" ) , c ( "Genotype" , "Treatment" ) , skip_absent = TRUE ) %>%
. [ , .SD , .SDcols = c ( fac , "mean" , "sd" , "se" ) ] %>%
. [ , c ( "maxY" , "minY" ) := . ( mean + se , mean - se ) ] %>%
. [ minY < 0 , minY := 0 ]
@@ -2486,14 +2486,14 @@ Statistical Analysis
# adjust bar height for PBS/LPS
if ( nrow ( posthoc ) > 0 & nm == "LPS" ) {
posthoc = posthoc %>%
- merge ( desc [ , max ( mean + 1.3 * sd ) , by = "Treatment" ] , by = "Treatment" , all = TRUE ) %>%
- setnames ( "V1" , "GroupHeight" )
+ merge ( desc [ , max ( mean + 1.3 * sd ) , by = "Treatment" ] , by = "Treatment" , all = TRUE ) %>%
+ setnames ( "V1" , "GroupHeight" )
}
# create caption labels
caption_labels = stat_aov %>%
. [ ! ( rownames ( . ) == "Residuals" ) , ] %>%
- { data.table ( Comparison = rownames ( . ) , pVal = . [ , "Pr(>F)" ] ) } %>%
+ { data.table ( Comparison = rownames ( . ) , pVal = . [ , "Pr(>F)" ] ) } %>%
. [ Comparison == "Genotype:Treatment" , Comparison := "Interaction" ] %>%
. [ , Label := map_chr ( pVal , generate_label ) ]
@@ -2562,7 +2562,7 @@ Plot Data
output [ , ( adj ) := map ( .SD , ~ p.adjust ( .x , method = "BH" ) ) , .SDcols = adj ]
# save output
-fwrite ( output , file.path ( rdir , paste ( nm , "Statistical Results.csv" ) ) )
+fwrite ( output , file.path ( rdir , paste ( nm , "Statistical Results.csv" ) ) )
@@ -2688,7 +2688,7 @@ Save Results
hm $ tree_row %>%
{ . $ labels [ . $ order ] } %>%
cor_dat [ . , . ] %>%
- as.data.table ( keep.rownames = "" ) %>%
+ as.data.table ( keep.rownames = "" ) %>%
writeData ( wb , s2 , x = . , headerStyle = hs )
# set column widths and row heights for statistical results
diff --git a/docs/search.json b/docs/search.json
index e975dea..5c04d2b 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -5,15 +5,15 @@
"title": "404 Error",
"author": [],
"contents": "\r\nYou may have reached this page in error. Please return to the home page, or open an issue on our GitHub repository.\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:45:28-07:00"
+ "last_modified": "2021-05-19T22:38:32-07:00"
},
{
"path": "index.html",
"title": "About",
"description": "We investigated the differential effects of *APOE* alleles on the microglia and astrocyte transcriptome in the absence and presence of neuritic plaques (NPs) using bulk brain RNA-seq data from two large brain banks and performed follow-on experiments in *APOE* knock-in mice.\n",
"author": [],
- "contents": "\r\n\r\nContents\r\nSetup\r\nCode Availability\r\nFunctions\r\nROSMAP\r\nMSBB\r\nqPCR Analysis\r\n\r\n\r\nSetup\r\nInstall R to run our code.\r\nCode Availability\r\nOur codebase is available on GitHub at mindds.github.io/apoe-glia and below.\r\nFunctions\r\nModels\r\nSpectral Clustering\r\nROSMAP\r\nDefine Variables\r\nPre-Process Data\r\nMicroglia Analysis\r\nAstrocyte Analysis\r\nMSBB\r\nDefine Variables\r\nMerge Data\r\nMicroglia Analysis\r\nqPCR Analysis\r\nqPCR Analysis\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:45:31-07:00"
+ "contents": "\r\n\r\nContents\r\nSetup\r\nDocumentation\r\nFunctions\r\nROSMAP\r\nMSBB\r\nqPCR Analysis\r\n\r\nCode Availability\r\n\r\nSetup\r\nInstall R to run our code.\r\nDocumentation\r\nTo read our documented code, please visit mindds.github.io/apoe-glia or see below.\r\nFunctions\r\nModels\r\nSpectral Clustering\r\nROSMAP\r\nDefine Variables\r\nPre-Process Data\r\nMicroglia Analysis\r\nAstrocyte Analysis\r\nMSBB\r\nDefine Variables\r\nMerge Data\r\nMicroglia Analysis\r\nqPCR Analysis\r\nqPCR Analysis\r\nCode Availability\r\nOur full codebase is available for download on GitHub.\r\n\r\n\r\n\r\n",
+ "last_modified": "2021-05-19T22:38:34-07:00"
},
{
"path": "models.html",
@@ -21,7 +21,7 @@
"description": "This script defines the models which are used in subsequent scripts.\n",
"author": [],
"contents": "\r\n\r\nContents\r\nModel 1\r\nModel 1.5\r\nModel 2\r\nModel 2.5\r\nModel 3\r\nModel 4\r\nModel 5\r\n\r\nModel 1\r\nFunction to run Model 1, which is exp ~ APOE + CERAD/Braak.\r\n\r\n\r\n#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]\r\n#' @param SF factor; Braak or CERAD stage\r\n#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)\r\n#' @param annot data.table; annotation for genes; \r\n#' @param useVoom T/F; use limma-voom or not\r\nrun_model1 = function(mData, SF, E, annot, useVoom = F) {\r\n # remove samples with NA meta data\r\n sel = (!is.na(SF) & !is.na(E))\r\n SF = SF[sel]\r\n E = E[sel]\r\n mData = mData[,sel]\r\n cat(sum(!sel), \"out of\", length(sel), \"samples were removed because of lack of clinical records\\n\")\r\n cat(\"Final sample size: \", sum(sel), \"\\n\")\r\n \r\n # design matrix\r\n design.sfE = model.matrix(~ E + SF + 0 )\r\n colnames(design.sfE) = gsub(\"EE\",\"E\", colnames(design.sfE))\r\n print(head(design.sfE))\r\n \r\n # limma model\r\n fit.sfE = lmFit(mData, design.sfE)\r\n fit.c.sfE = contrasts.fit(fit.sfE, makeContrasts(C1=\"E4-E3\", C2=\"E2-E3\", levels=design.sfE))\r\n if(useVoom){ \r\n fit.eb.sfE = eBayes(fit.c.sfE, robust=TRUE)\r\n }else{\r\n fit.eb.sfE = eBayes(fit.c.sfE, trend = T)\r\n }\r\n \r\n # annotation\r\n fit.eb.sfE$genes = annot\r\n \r\n # call topTable\r\n tT1.sfE = topTable(fit.eb.sfE, sort.by=\"none\", number=Inf, coef=\"C1\", confint=TRUE)\r\n tT2.sfE = topTable(fit.eb.sfE, sort.by=\"none\", number=Inf, coef=\"C2\", confint=TRUE)\r\n \r\n colnames(tT1.sfE)[4:11] = paste0(\"E4vsE3_\", colnames(tT1.sfE)[4:11])\r\n colnames(tT2.sfE)[4:11] = paste0(\"E2vsE3_\", colnames(tT2.sfE)[4:11])\r\n setDT(tT1.sfE)\r\n setDT(tT2.sfE)\r\n tT.sfE = tT1.sfE[tT2.sfE, on=c(\"GeneSymbol\", \"EnsemblID\", \"Description\")]\r\n \r\n return(tT.sfE)\r\n}\r\n\r\n\r\n\r\nModel 1.5\r\nFunction to run Model 1.5, which is exp ~ E4Num + E2Num + CERAD.\r\n\r\n\r\n#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]\r\n#' @param SF factor; Braak or CERAD stage\r\n#' @param E4Num numeric; the number of copy of e4 \r\n#' @param E2Num numeric; the number of copy of e2\r\n#' @param annot data.table; annotation for genes; \r\n#' @param useVoom T/F; use limma-voom or not\r\nrun_model1.5 = function(mData, SF, E4Num, E2Num, annot, useVoom = F) {\r\n # remove samples with NA meta data\r\n sel = (!is.na(SF) & !is.na(E4Num) & !is.na(E2Num) )\r\n SF = SF[sel]\r\n E4Num = E4Num[sel]\r\n E2Num = E2Num[sel]\r\n mData = mData[,sel]\r\n cat(sum(!sel), \"out of\", length(sel), \"samples were removed because of lack of clinical records\\n\")\r\n cat(\"Final sample size: \", sum(sel), \"\\n\")\r\n \r\n # design matrix\r\n design.sfE = model.matrix(~ E4Num + E2Num + SF )\r\n print(head(design.sfE))\r\n \r\n # limma model\r\n fit.sfE = lmFit(mData, design.sfE)\r\n if(useVoom){ \r\n fit.eb.sfE = eBayes(fit.sfE, robust=TRUE)\r\n }else{\r\n fit.eb.sfE = eBayes(fit.sfE, trend = T)\r\n }\r\n \r\n # annotation\r\n fit.eb.sfE$genes = annot\r\n \r\n # call toptable\r\n tT_CERAD_E4Num = topTable(fit.eb.sfE, sort.by=\"none\", number=Inf, coef=2, confint=TRUE)\r\n tT_CERAD_E2Num = topTable(fit.eb.sfE, sort.by=\"none\", number=Inf, coef=3, confint=TRUE)\r\n \r\n setDT(tT_CERAD_E4Num)\r\n setDT(tT_CERAD_E2Num)\r\n \r\n tT = copy(annot)\r\n tT = tT[tT_CERAD_E4Num, on = .(EnsemblID, GeneSymbol, Description), \r\n c(\"e4_logFC\", \"e4_CI.L\", \"e4_CI.R\",\r\n \"e4_P.Value\", \"e4_adj.P.Val\") := .(\r\n i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]\r\n tT = tT[tT_CERAD_E2Num, on = .(EnsemblID, GeneSymbol, Description), \r\n c(\"e2_logFC\", \"e2_CI.L\", \"e2_CI.R\",\r\n \"e2_P.Value\", \"e2_adj.P.Val\") := .(\r\n i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]\r\n \r\n return(tT)\r\n}\r\n\r\n\r\n\r\nModel 2\r\nFunction to run Model 2 baseline, which is C0E4-C0E3, C0E2-C0E3 | B1E4-B1E3, B1E2-B1E3.\r\n\r\n\r\n#' @param sf string; analysis for Braak or CERAD\r\n#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]\r\n#' @param SF factor; Braak or CERAD stage\r\n#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)\r\n#' @param annot data.table; annotation for genes; \r\n#' @param useVoom T/F; use limma-voom or not\r\nrun_model2 = function(sf, mData, SF, E, annot, useVoom = F) {\r\n \r\n # remove samples with NA meta data\r\n sel = (!is.na(SF) & !is.na(E))\r\n SF = SF[sel]\r\n E = E[sel]\r\n mData = mData[,sel]\r\n cat(sum(!sel), \"out of\", length(sel), \"samples were removed because of lack of clinical records\\n\")\r\n cat(\"Final sample size: \", sum(sel), \"\\n\")\r\n \r\n if(sf==\"CERAD\") {\r\n f3=factor(paste0(\"C\",SF,E))\r\n } else {\r\n f3=factor(paste0(\"B\",SF,E))\r\n }\r\n \r\n # design matrix\r\n design.EBase = model.matrix(~f3 + 0)\r\n colnames(design.EBase) = gsub(\"f3\",\"\", colnames(design.EBase))\r\n print(head(design.EBase))\r\n \r\n # limma model\r\n if(sf==\"CERAD\") {\r\n fit.EBase = lmFit(mData, design.EBase)\r\n fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1=\"C0E4-C0E3\", C2=\"C0E2-C0E3\", levels=design.EBase))\r\n } else {\r\n fit.EBase = lmFit(mData, design.EBase)\r\n fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1=\"B1E4-B1E3\", C2=\"B1E2-B1E3\", levels=design.EBase))\r\n }\r\n \r\n if(useVoom){ \r\n fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)\r\n }else{\r\n fit.eb.EBase = eBayes(fit.c.EBase, trend = T)\r\n }\r\n \r\n # annotation\r\n fit.eb.EBase$genes = annot\r\n \r\n # call topTable\r\n tT1.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C1\", confint=TRUE)\r\n tT2.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C2\", confint=TRUE)\r\n \r\n colnames(tT1.EBase)[4:11] = paste0(\"E4vsE3_\", colnames(tT1.EBase)[4:11])\r\n colnames(tT2.EBase)[4:11] = paste0(\"E2vsE3_\", colnames(tT2.EBase)[4:11])\r\n \r\n setDT(tT1.EBase)\r\n setDT(tT2.EBase)\r\n tT.EBase = tT1.EBase[tT2.EBase, on=c(\"GeneSymbol\", \"EnsemblID\", \"Description\")]\r\n\r\n return(tT.EBase)\r\n}\r\n\r\n\r\n\r\nModel 2.5\r\nFunction to run Model 2.5, which is exp ~ E4Num + E2Num for C0 subjects only.\r\n\r\n\r\n#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]\r\n#' @param SF factor; Braak or CERAD stage\r\n#' @param E4Num numeric; the number of copy of e4 \r\n#' @param E2Num numeric; the number of copy of e2\r\n#' @param annot data.table; annotation for genes; \r\n#' @param useVoom T/F; use limma-voom or not\r\nrun_model2.5 = function(mData, E4Num, E2Num, annot, useVoom = F) {\r\n # remove samples with NA meta data\r\n sel = (!is.na(E4Num) & !is.na(E2Num) )\r\n E4Num = E4Num[sel]\r\n E2Num = E2Num[sel]\r\n mData = mData[,sel]\r\n cat(sum(!sel), \"out of\", length(sel), \"samples were removed because of lack of clinical records\\n\")\r\n cat(\"Final sample size: \", sum(sel), \"\\n\")\r\n \r\n # design matrix\r\n design.sfE = model.matrix(~ E4Num + E2Num)\r\n print(head(design.sfE))\r\n \r\n # limma model\r\n fit.sfE = lmFit(mData, design.sfE)\r\n if(useVoom){ \r\n fit.eb.sfE = eBayes(fit.sfE, robust=TRUE)\r\n }else{\r\n fit.eb.sfE = eBayes(fit.sfE, trend = T)\r\n }\r\n \r\n # annotation\r\n fit.eb.sfE$genes = annot\r\n \r\n # call toptable\r\n tT_CERAD_E4Num = topTable(fit.eb.sfE, sort.by=\"none\", number=Inf, coef=2, confint=TRUE)\r\n tT_CERAD_E2Num = topTable(fit.eb.sfE, sort.by=\"none\", number=Inf, coef=3, confint=TRUE)\r\n \r\n setDT(tT_CERAD_E4Num)\r\n setDT(tT_CERAD_E2Num)\r\n \r\n tT = copy(annot)\r\n tT = tT[tT_CERAD_E4Num, on = .(EnsemblID, GeneSymbol, Description), \r\n c(\"e4_logFC\", \"e4_CI.L\", \"e4_CI.R\",\r\n \"e4_P.Value\", \"e4_adj.P.Val\") := .(\r\n i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]\r\n tT = tT[tT_CERAD_E2Num, on = .(EnsemblID, GeneSymbol, Description), \r\n c(\"e2_logFC\", \"e2_CI.L\", \"e2_CI.R\",\r\n \"e2_P.Value\", \"e2_adj.P.Val\") := .(\r\n i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]\r\n \r\n return(tT)\r\n}\r\n\r\n\r\n\r\nModel 3\r\nFunction to run Model 3, which is C3E4-C3E3, C3E2-C3E3 | B3E4-B3E3, B3E2-B3E3.\r\n\r\n\r\n#' @param sf string; analysis for Braak or CERAD\r\n#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]\r\n#' @param SF factor; Braak or CERAD stage\r\n#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)\r\n#' @param annot data.table; annotation for genes; \r\n#' @param useVoom T/F; use limma-voom or not\r\nrun_model3 = function(sf, mData, SF, E, annot, useVoom = F) {\r\n \r\n # remove samples with NA meta data\r\n sel = (!is.na(SF) & !is.na(E))\r\n SF = SF[sel]\r\n E = E[sel]\r\n mData = mData[,sel]\r\n cat(sum(!sel), \"out of\", length(sel), \"samples were removed because of lack of clinical records\\n\")\r\n cat(\"Final sample size: \", sum(sel), \"\\n\")\r\n \r\n # factors for creating design matrix: SF.E\r\n # SF: CERAD or Braak stages\r\n # E: APOE2 (including 22, 23), APOE3 (including 33), APOE4 (including 44, 34, 24)\r\n if(sf==\"CERAD\") {\r\n f3=factor(paste0(\"C\",SF,E))\r\n } else {\r\n f3=factor(paste0(\"B\",SF,E))\r\n }\r\n \r\n design.EBase = model.matrix(~f3 + 0)\r\n colnames(design.EBase) = gsub(\"f3\",\"\", colnames(design.EBase))\r\n print(head(design.EBase))\r\n \r\n # limma model\r\n if(sf==\"CERAD\") {\r\n fit.EBase = lmFit(mData, design.EBase)\r\n fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1=\"C3E4-C3E3\", C2=\"C3E2-C3E3\", levels=design.EBase))\r\n } else {\r\n fit.EBase = lmFit(mData, design.EBase)\r\n fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1=\"B3E4-B3E3\", C2=\"B3E2-B3E3\", levels=design.EBase))\r\n }\r\n \r\n if(useVoom){ \r\n fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)\r\n }else{\r\n fit.eb.EBase = eBayes(fit.c.EBase, trend = T)\r\n }\r\n \r\n # annotation\r\n fit.eb.EBase$genes = annot\r\n \r\n # call topTable\r\n tT1.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C1\", confint=TRUE)\r\n tT2.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C2\", confint=TRUE)\r\n colnames(tT1.EBase)[4:11] = paste0(\"E4vsE3_\", colnames(tT1.EBase)[4:11])\r\n colnames(tT2.EBase)[4:11] = paste0(\"E2vsE3_\", colnames(tT2.EBase)[4:11])\r\n \r\n setDT(tT1.EBase)\r\n setDT(tT2.EBase)\r\n \r\n tT.EBase = tT1.EBase[tT2.EBase, on=c(\"GeneSymbol\", \"EnsemblID\", \"Description\")]\r\n \r\n return(tT.EBase)\r\n}\r\n\r\n\r\n\r\nModel 4\r\nFunction to run Model 4, which is C23E4-C23E3, C23E2-C23E3.\r\n\r\n\r\n#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]\r\n#' @param SF factor; Braak or CERAD stage\r\n#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)\r\n#' @param annot data.table; annotation for genes; \r\n#' @param useVoom T/F; use limma-voom or not\r\nrun_model4 = function(mData, SF, E, annot, useVoom = F) {\r\n # remove samples with NA meta data\r\n sel = (!is.na(SF) & !is.na(E))\r\n SF = SF[sel]\r\n E = E[sel]\r\n mData = mData[,sel]\r\n cat(sum(!sel), \"out of\", length(sel), \"samples were removed because of lack of clinical records\\n\")\r\n cat(\"Final sample size: \", sum(sel), \"\\n\")\r\n \r\n SF = gsub(\"2|3\", \"23\", SF)\r\n f3 = factor(paste0(\"C\",SF,E))\r\n \r\n design.EBase = model.matrix(~f3 + 0)\r\n colnames(design.EBase) = gsub(\"f3\",\"\", colnames(design.EBase))\r\n print(head(design.EBase))\r\n \r\n # limma model\r\n fit.EBase = lmFit(mData, design.EBase)\r\n fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1=\"C23E4-C23E3\", C2=\"C23E2-C23E3\", levels=design.EBase))\r\n \r\n if(useVoom){ \r\n fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)\r\n }else{\r\n fit.eb.EBase = eBayes(fit.c.EBase, trend = T)\r\n }\r\n \r\n # annotation\r\n fit.eb.EBase$genes = annot\r\n \r\n # call topTable\r\n tT1.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C1\", confint=TRUE)\r\n tT2.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C2\", confint=TRUE)\r\n colnames(tT1.EBase)[4:11] = paste0(\"E4vsE3_\", colnames(tT1.EBase)[4:11])\r\n colnames(tT2.EBase)[4:11] = paste0(\"E2vsE3_\", colnames(tT2.EBase)[4:11])\r\n \r\n setDT(tT1.EBase)\r\n setDT(tT2.EBase)\r\n\r\n tT.EBase = tT1.EBase[tT2.EBase, on=c(\"GeneSymbol\", \"EnsemblID\", \"Description\")]\r\n \r\n return(tT.EBase)\r\n}\r\n\r\n\r\n\r\nModel 5\r\nFunction to run Model 5, which is CXE4-CXE3, CXE2-CXE3, where X could be either 1 or 2.\r\n\r\n\r\n#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]\r\n#' @param SF factor; Braak or CERAD stage\r\n#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)\r\n#' @param annot data.table; annotation for genes; \r\n#' @param useVoom T/F; use limma-voom or not\r\nrun_model5 = function(mData, SF, E, annot, useVoom = F) {\r\n \r\n # remove samples with NA meta data\r\n sel = (!is.na(SF) & !is.na(E))\r\n SF = SF[sel]\r\n E = E[sel]\r\n mData = mData[,sel]\r\n cat(sum(!sel), \"out of\", length(sel), \"samples were removed because of lack of clinical records\\n\")\r\n cat(\"Final sample size: \", sum(sel), \"\\n\")\r\n \r\n f3 = factor(paste0(\"C\",SF,E))\r\n \r\n design.EBase = model.matrix(~f3 + 0)\r\n colnames(design.EBase) = gsub(\"f3\",\"\", colnames(design.EBase))\r\n print(head(design.EBase))\r\n \r\n # limma model\r\n fit.EBase = lmFit(mData, design.EBase)\r\n fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1=\"C1E4-C1E3\", C2=\"C1E2-C1E3\", \r\n C3=\"C2E4-C2E3\", C4=\"C2E2-C2E3\",levels=design.EBase))\r\n \r\n if(useVoom){ \r\n fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)\r\n }else{\r\n fit.eb.EBase = eBayes(fit.c.EBase, trend = T)\r\n }\r\n \r\n # annotation\r\n fit.eb.EBase$genes = annot\r\n \r\n # call topTable\r\n tT1.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C1\", confint=TRUE)\r\n tT2.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C2\", confint=TRUE)\r\n tT3.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C3\", confint=TRUE)\r\n tT4.EBase = topTable(fit.eb.EBase, sort.by=\"none\", number=Inf, coef=\"C4\", confint=TRUE)\r\n colnames(tT1.EBase)[4:11] = paste0(\"C1E4vsC1E3_\", colnames(tT1.EBase)[4:11])\r\n colnames(tT2.EBase)[4:11] = paste0(\"C1E2vsC1E3_\", colnames(tT2.EBase)[4:11])\r\n colnames(tT3.EBase)[4:11] = paste0(\"C2E4vsC2E3_\", colnames(tT3.EBase)[4:11])\r\n colnames(tT4.EBase)[4:11] = paste0(\"C2E2vsC2E3_\", colnames(tT4.EBase)[4:11])\r\n \r\n setDT(tT1.EBase)\r\n setDT(tT2.EBase)\r\n setDT(tT3.EBase)\r\n setDT(tT4.EBase)\r\n \r\n tT.EBase = tT1.EBase[tT2.EBase, on=c(\"GeneSymbol\", \"EnsemblID\", \"Description\")]\r\n tT.EBase = tT.EBase[tT3.EBase, on=c(\"GeneSymbol\", \"EnsemblID\", \"Description\")]\r\n tT.EBase = tT.EBase[tT4.EBase, on=c(\"GeneSymbol\", \"EnsemblID\", \"Description\")]\r\n \r\n return(tT.EBase)\r\n}\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:45:44-07:00"
+ "last_modified": "2021-05-19T22:38:48-07:00"
},
{
"path": "MSBB-merge.html",
@@ -29,7 +29,7 @@
"description": "This script uses ComBat to merge MSBB data.\n",
"author": [],
"contents": "\r\n\r\nContents\r\nDependencies\r\nRead Data\r\nRemove Batch Effects\r\nSave Results\r\n\r\nDependencies\r\nLoad requisite packages.\r\n\r\n\r\nlibrary(data.table)\r\nlibrary(sva)\r\n\r\n\r\n\r\nRead Data\r\nDefine brain regions.\r\n\r\n\r\nbrain_regions = c(\"IFG\", \"STG\", \"PHG\", \"FP\")\r\n\r\n\r\n\r\nRead expression data.\r\n\r\n\r\n# read mData\r\nMSBB_mData = lapply(brain_regions, function(brain_region){\r\n # process expression file\r\n cat(\"processing\", brain_region, \"...\\n\")\r\n load(paste0(\"../Data/MSBB-\", brain_region,\"-24.Rdata\"))\r\n return(expSet$mData)\r\n})\r\nall(unlist(lapply(MSBB_mData, function(x) all(rownames(x) == rownames(MSBB_mData[[1]])))))\r\nMSBB_mData = do.call(cbind, MSBB_mData)\r\n\r\n\r\n\r\nRead covariate data.\r\n\r\n\r\n# load cov data\r\nMSBB_cov = lapply(brain_regions, function(brain_region){\r\n # process expression file\r\n cat(\"processing\", brain_region, \"...\\n\")\r\n load(paste0(\"../Data/MSBB-\", brain_region,\"-24.Rdata\"))\r\n expSet$cov$brain_region = brain_region\r\n return(expSet$cov)\r\n})\r\nMSBB_cov = do.call(rbind, MSBB_cov)\r\n\r\n\r\n\r\nConfirm sample order.\r\n\r\n\r\nall(colnames(MSBB_mData) == MSBB_cov$individualIdentifier)\r\n\r\n\r\n\r\nRemove Batch Effects\r\nFirst, create the model matrix for the adjustment variables, including the variable of interest. Note that you do not include batch when creating this model matrix; rather, batch will be included in the ComBat function call in the following chunk. In this case, there are no other adjustment variables so we simply fit an intercept term.\r\n\r\n\r\nmodcombat = model.matrix(~1, data=MSBB_cov)\r\n\r\n\r\n\r\nNow, remove batch effects using the ComBat approach from the sva package. The input data is assumed to be cleaned and normalized before batch effect removal.\r\n\r\n\r\nMSBB_combat = ComBat(dat=MSBB_mData, batch=MSBB_cov$brain_region, mod=modcombat)\r\n\r\n\r\n\r\nSave Results\r\n\r\n\r\nexpSet = list(mData = MSBB_combat,\r\n cov = MSBB_cov)\r\nsave(expSet, file = \"../Data/MSBB-combated.Rdata\")\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:45:48-07:00"
+ "last_modified": "2021-05-19T22:38:51-07:00"
},
{
"path": "MSBB-microglia.html",
@@ -37,7 +37,7 @@
"description": "This script performs analysis of MSBB microglia data.\n",
"author": [],
"contents": "\r\n\r\nContents\r\nDependencies\r\nSelect Microglia Genes\r\nPre-Processing\r\nRun Models\r\nModel 1\r\nModel 1.5\r\n\r\nZ-Score Analysis\r\nSpectral Clustering\r\nC0\r\nC3\r\nB1\r\nB3\r\nOverlap\r\n\r\nStatistical Testing\r\nROSMAP/MSBB Plot\r\n\r\nDependencies\r\nLoad requisite packages.\r\n\r\n\r\nrm(list = ls())\r\nrequire(limma)\r\nrequire(ComplexHeatmap)\r\nrequire(data.table)\r\nrequire(circlize)\r\nrequire(openxlsx)\r\nrequire(tidyverse)\r\nrequire(SNFtool)\r\nrequire(pheatmap)\r\nrequire(ggprism)\r\n\r\nds = \"MSBB-combat\"\r\ndtype = \"TMM\"\r\nbrainRegion = \"combated\"\r\nload(paste0(\"../Data/MSBB-\", brainRegion,\".Rdata\"))\r\ncat(\"\\nBrain Region: \", brainRegion, \"\\n\")\r\n\r\nsource(\"models.R\")\r\nsource(\"spectral-clustering.R\")\r\n\r\n# load expression data \r\nmData = expSet$mData\r\n\r\n\r\n\r\nSelect Microglia Genes\r\nFirst, select only microglia genes. Of the microglial genes, select only those that exist in MSBB.\r\n\r\n\r\ncelltype = \"microglia\"\r\n\r\n# select only microglial genes\r\ngenes = readLines(\"../Data/Microglia Genes.txt\")\r\n\r\n# select microglia genes that exist in MSBB\r\nannot = fread(\"../Data/ENSEMBL GRCh38.p7.csv\")\r\nannot = unique(annot[GeneSymbol %in% genes,])\r\nensembl_ids = intersect(rownames(mData), annot$EnsemblID)\r\nannot = unique(annot[EnsemblID %in% ensembl_ids,])\r\n\r\n# remove duplicated genes\r\nannot = annot[!duplicated(GeneSymbol), ]\r\nensembl_ids = annot$EnsemblID\r\n\r\n# check results\r\nany(duplicated(ensembl_ids)) # no duplicated ids\r\nany(is.na(ensembl_ids)) # no NA\r\nlength(ensembl_ids)\r\n\r\n\r\n\r\nPre-Processing\r\n\r\n\r\n# select interesting genes\r\nmData = mData[annot$EnsemblID, ]\r\nall(rownames(mData) == annot$EnsemblID)\r\nrownames(mData) = annot$GeneSymbol\r\n\r\n# set metadata\r\ncov = expSet$cov\r\n\r\n\r\n\r\nRun Models\r\nModel 1: exp ~ APOE + CERAD | exp ~ APOE + Braak\r\nModel 1.5: exp ~ APOE + CERAD e2, e4 dosage model\r\nModel 1\r\nRun Model 1, which is exp ~ APOE + CERAD | exp ~ APOE + Braak. Then, generate Table S2, which is comprised of differentially expressed microglia genes across APOE groups for MSBB data.\r\n\r\n\r\n# run Model 1\r\ntT_CERAD = run_model1(mData, cov$C, cov$E, annot)\r\ntT_Braak = run_model1(mData, cov$B,cov$E, annot)\r\n\r\nlength(tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\n\r\nlength(tT_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol) \r\n\r\nlength(tT_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol) \r\n\r\nlength(tT_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol) \r\n\r\n\r\n# Table S2\r\nTableS2 = fread(\"../results/TableS2.csv\")\r\nTableS2[tT_CERAD, on=\"EnsemblID\", \r\n c(\"MSBB_E4vsE3_logFC\", \"MSBB_E4vsE3_CI.L\", \"MSBB_E4vsE3_CI.R\", \r\n \"MSBB_E4vsE3_P.Value\", \"MSBB_E4vsE3_adj.P.Val\", \r\n \"MSBB_E2vsE3_logFC\", \"MSBB_E2vsE3_CI.L\", \"MSBB_E2vsE3_CI.R\", \r\n \"MSBB_E2vsE3_P.Value\", \"MSBB_E2vsE3_adj.P.Val\") := .(\r\n i.E4vsE3_logFC, i.E4vsE3_CI.L, i.E4vsE3_CI.R,\r\n i.E4vsE3_P.Value, i.E4vsE3_adj.P.Val, \r\n i.E2vsE3_logFC, i.E2vsE3_CI.L, i.E2vsE3_CI.R, \r\n i.E2vsE3_P.Value, i.E2vsE3_adj.P.Val)]\r\nTableS2 = TableS2[order(GeneSymbol), ]\r\nfwrite(TableS2, \"../results/TableS2.csv\")\r\n\r\n\r\n\r\nModel 1.5\r\nRun Model 1.5, which is exp ~ APOE + CERAD e2, e4 dosage model. Then, generate Table S3, which is comprised of differentially expressed microglia genes across APOE groups for ROSMAP DLPFC and each of MSBB brain regions using a dosage model.\r\n\r\n\r\n# run Model 1.5 with E4 and E2 copies as numeric\r\ntT_CERAD_ENum = run_model1.5(mData, cov$C, cov$E4num, cov$E2num, annot)\r\n\r\ne4_num_genes = intersect(tT_CERAD_ENum[e4_P.Value < 0.05 & e4_logFC > 0, GeneSymbol], tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,GeneSymbol])\r\nlength(e4_num_genes)\r\n\r\ne2_num_genes = intersect(tT_CERAD_ENum[e2_P.Value < 0.05 & e2_logFC < 0, GeneSymbol], tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0, GeneSymbol])\r\nlength(e2_num_genes)\r\n\r\nintersect(tT_CERAD_ENum[e4_P.Value < 0.05 & e4_logFC > 0, GeneSymbol],\r\n tT_CERAD_ENum[e2_P.Value < 0.05 & e2_logFC < 0, GeneSymbol])\r\n\r\n# Table S3\r\nTableS3 = fread(\"../results/TableS3.csv\")\r\nTableS3 = TableS3[tT_CERAD_ENum, on = .(EnsemblID, GeneSymbol, Description),\r\n c(\"MSBB_e4_logFC\", \"MSBB_e4_CI.L\", \"MSBB_e4_CI.R\", \r\n \"MSBB_e4_P.Value\", \"MSBB_e4_adj.P.Val\") := .(\r\n i.e4_logFC, i.e4_CI.L, i.e4_CI.R, i.e4_P.Value, i.e4_adj.P.Val)]\r\nTableS3 = TableS3[tT_CERAD_ENum, on = .(EnsemblID, GeneSymbol, Description), \r\n c(\"MSBB_e2_logFC\", \"MSBB_e2_CI.L\", \"MSBB_e2_CI.R\",\r\n \"MSBB_e2_P.Value\", \"MSBB_e2_adj.P.Val\") := .(\r\n i.e2_logFC, i.e2_CI.L, i.e2_CI.R, i.e2_P.Value, i.e2_adj.P.Val)]\r\nfwrite(TableS3, \"../results/TableS3.csv\")\r\n\r\n\r\n\r\nZ-Score Analysis\r\nCompute z-scores.\r\n\r\n\r\nz.C0 = to_z_score(\"C\", 0, mData, cov)\r\nz.C3 = to_z_score(\"C\", 3, mData, cov)\r\nz.B1 = to_z_score(\"B\", 1, mData, cov)\r\nz.B3 = to_z_score(\"B\", 3, mData, cov)\r\n\r\n\r\n\r\nCalculate average z-scores.\r\n\r\n\r\navez.C0 = to_ave_z(z.C0)\r\navez.C3 = to_ave_z(z.C3)\r\navez.B1 = to_ave_z(z.B1)\r\navez.B3 = to_ave_z(z.B3)\r\n\r\n\r\n\r\nSpectral Clustering\r\nC0\r\nNumber of Clusters: 3\r\nIndex of Cluster of Interest: 3\r\n\r\n\r\n# spectral clustering\r\nc = 3; k = 3\r\nclust.C0 = spectral_clustering(avez.C0, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.C0$clustA))\r\nrownames(annot_row) = rownames(clust.C0$zMtx)\r\npheatmap(clust.C0$zMtx[order(clust.C0$clustA),],\r\n annotation_row = annot_row[order(clust.C0$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nC0.genes = sort(rownames(clust.C0$zMtx)[clust.C0$clustA == c])\r\nlength(C0.genes)\r\n\r\n\r\n\r\nC3\r\nNumber of Clusters: 3\r\nIndex of Cluster of Interest: 2\r\n\r\n\r\n# spectral clustering\r\nc = 2; k = 3\r\nclust.C3 = spectral_clustering(avez.C3, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.C3$clustA))\r\nrownames(annot_row) = rownames(clust.C3$zMtx)\r\npheatmap(clust.C3$zMtx[order(clust.C3$clustA),],\r\n annotation_row = annot_row[order(clust.C3$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nC3.genes = sort(rownames(clust.C3$zMtx)[clust.C3$clustA == c])\r\nlength(C3.genes)\r\n\r\n\r\n\r\nB1\r\nNumber of Clusters: 3\r\nIndex of Cluster of Interest: 3\r\n\r\n\r\n# spectral clustering\r\nk = 3; c = 3\r\nclust.B1 = spectral_clustering(avez.B1, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.B1$clustA))\r\nrownames(annot_row) = rownames(clust.B1$zMtx)\r\npheatmap(clust.B1$zMtx[order(clust.B1$clustA),],\r\n annotation_row = annot_row[order(clust.B1$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nB1.genes = sort(rownames(clust.B1$zMtx)[clust.B1$clustA == c])\r\nlength(B1.genes)\r\n\r\n\r\n\r\nB3\r\nNumber of Clusters: 3\r\nIndex of Cluster of Interest: 2\r\n\r\n\r\n# spectral clustering\r\nk = 3; c = 2\r\nclust.B3 = spectral_clustering(avez.B3, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.B3$clustA))\r\nrownames(annot_row) = rownames(clust.B3$zMtx)\r\npheatmap(clust.B3$zMtx[order(clust.B3$clustA),],\r\n annotation_row = annot_row[order(clust.B3$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nB3.genes = sort(rownames(clust.B3$zMtx)[clust.B3$clustA == c])\r\nlength(B3.genes)\r\n\r\n\r\n\r\nOverlap\r\n\r\n\r\n# hypergeometric test \r\nphyper_test(C0.genes, B1.genes)\r\nphyper_test(C3.genes, B3.genes)\r\nphyper_test(C0.genes, C3.genes)\r\nphyper_test(B1.genes, B3.genes)\r\n\r\n\r\n\r\nStatistical Testing\r\nStatistical testing of ROSMAP microglia-APOE genes across APOE genotypes. Use the average of z-scores for genes of interest for each individual as the outcome, with APOE genotype as a covariate and APOE E3/E3 as the baseline.\r\n\r\n\r\n# ROSMAP C0 genes\r\nTableS1 = fread(\"../results/TableS1.csv\")\r\nrosmap.C0.genes = TableS1$GeneSymbol\r\n\r\n# C0 test\r\ncov_C0 = cov[C == 0,]\r\nall(cov_C0$individualIdentifier == colnames(z.C0$z))\r\ntest_avez(z.C0$z, C0.genes, cov_C0)\r\n\r\n# C0 test based on ROSMAP genes\r\ntest_avez(z.C0$z, intersect(rownames(z.C0$z), rosmap.C0.genes), cov_C0)\r\n\r\n# adjusted by donor\r\ntest_avez(z.C0$z, intersect(rownames(z.C0$z), rosmap.C0.genes), cov_C0, adj.donor = T, donor = colnames(z.C0$z))\r\n\r\n# add to Table S1\r\nMSBB_aveZ = copy(avez.C0)\r\nsetnames(MSBB_aveZ, c(\"Genes\", \"E2\", \"E3\", \"E4\"), \r\n c(\"GeneSymbol\", paste(\"MSBB\", c(\"E2\", \"E3\", \"E4\"), sep = \"_\")))\r\nTableS1 = MSBB_aveZ[GeneSymbol %in% rosmap.C0.genes, ][TableS1, on = .(GeneSymbol)]\r\nTableS1 = TableS1[, .(GeneSymbol, EnsemblID, Description, ROSMAP_E2, ROSMAP_E3, ROSMAP_E4,\r\n MSBB_E2, MSBB_E3, MSBB_E4)]\r\nfwrite(TableS1, \"../results/TableS1.csv\")\r\n\r\n# C3 test\r\ncov_C3 = cov[C == 3,]\r\nall(cov_C3$individualIdentifier == colnames(z.C3$z))\r\ntest_avez(z.C3$z, C3.genes, cov_C3)\r\n\r\n# B1 test\r\ncov_B1 = cov[B == 1,]\r\nall(cov_B1$individualIdentifier == colnames(z.B1$z))\r\ntest_avez(z.B1$z, B1.genes, cov_B1)\r\n\r\n# B3 test\r\ncov_B3 = cov[B == 3,]\r\nall(cov_B1$individualIdentifier == colnames(z.B1$z))\r\ntest_avez(z.B3$z, B3.genes, cov_B3)\r\n\r\n\r\n\r\nSelect ROSMAP C0 genes.\r\n\r\n\r\n# read ROSMAP C0 genes\r\nTableS3 = fread(\"../results/TableS3.csv\")\r\nrosmap_C0.genes = TableS3$GeneSymbol\r\nmDataZ.rosmap_C0.genes = z.C0$z[rownames(z.C0$z) %in% rosmap_C0.genes,]\r\nmDataZ.rosmap_C0.genes.e2 = mDataZ.rosmap_C0.genes[,z.C0$E == \"E2\"]\r\nmDataZ.rosmap_C0.genes.e3 = mDataZ.rosmap_C0.genes[,z.C0$E == \"E3\"]\r\nmDataZ.rosmap_C0.genes.e4 = mDataZ.rosmap_C0.genes[,z.C0$E == \"E4\"]\r\n\r\n# check dimension\r\ntable(z.C0$E) == data.frame(\r\n E3 = ncol(mDataZ.rosmap_C0.genes.e3),\r\n E2 = ncol(mDataZ.rosmap_C0.genes.e2), \r\n E4 = ncol(mDataZ.rosmap_C0.genes.e4))\r\n\r\n# compute intersection\r\nintersect(rosmap_C0.genes, C0.genes)\r\n\r\n\r\n\r\nStatistical testing of ROSMAP microglia-APOE C0 genes across APOE genotypes. Use the average of z-scores for genes of interest for each individual as the outcome, with APOE genotype as a covariate and APOE E3/E3 as the baseline.\r\n\r\n\r\n# test on ROSMAP C0 genes\r\ncov_C0 = cov[C == 0,]\r\nall(cov_C0$individualIdentifier == colnames(z.C0$z))\r\ntest_avez(z.C0$z, intersect(rosmap_C0.genes, rownames(mData)), cov_C0)\r\n\r\n# read ROSMAP C3 genes\r\nrosmap_C3.genes = read.xlsx(\"../results/ROSMAP_miroglia_cluster_genes_remove.low.expressed.genes.xlsx\", sheet = \"C3(k=3)\", colNames = F)\r\nrosmap_C3.genes = rosmap_C3.genes$X1\r\n\r\n# test on ROSMAP C3 genes\r\ncov_C3 = cov[C == 3,]\r\nall(cov_C3$individualIdentifier == colnames(z.C3$z))\r\ntest_avez(z.C3$z, intersect(rosmap_C3.genes, rownames(mData)), cov_C3)\r\n\r\n\r\n\r\nROSMAP/MSBB Plot\r\nPlot of ROSMAP C0 genes in MSBB C0 subjects where each dot represents a person.\r\n\r\n\r\n# reshape data\r\nmy.dat = list(`2` = colMeans(mDataZ.rosmap_C0.genes.e2), \r\n `3` = colMeans(mDataZ.rosmap_C0.genes.e3), \r\n `4` = colMeans(mDataZ.rosmap_C0.genes.e4))\r\nmy.df = reshape2::melt(my.dat)\r\nmy.df$L1 = as.numeric(my.df$L1)\r\ncolnames(my.df) = c(\"z-score\", \"APOE\")\r\ncolnames(my.df) = c(\"z-score\", \"APOE.num\")\r\nmy.df$APOE = as.factor(my.df$APOE.num)\r\n \r\n# colors\r\nggthemr('greyscale')\r\nto_swap = c(\"#62bba5\", # E4\r\n \"#785d37\", # E3\r\n \"#ffb84d\") # E2\r\n\r\nggplot(my.df, aes(x = APOE.num, y = `z-score`, color = APOE)) +\r\n geom_violin(fill = \"white\", trim = FALSE) +\r\n geom_quasirandom(alpha = 0.3, width = 0.1, dodge.width = 0.9, varwidth = TRUE) +\r\n scale_color_manual(values = c(\"#62bba5\", \"#785d37\", \"#ffb84d\")) +\r\n labs(y = \"Average Z-Score\", x = \"APOE Genotype\") +\r\n theme(text = element_text(size = 14),\r\n legend.position=\"bottom\", legend.direction = \"horizontal\",\r\n axis.title.x = element_text(face = \"bold\", size = 11),\r\n axis.title.y = element_text(face = \"bold\", size = 11),\r\n axis.text.x = element_blank(),\r\n axis.ticks.x = element_blank(),\r\n legend.title = element_text(face = \"bold\")) +\r\n stat_summary(fun.data=mean_cl_normal, geom=\"errorbar\", width=0.2, position = position_dodge(0.9), color = \"black\")\r\nggsave(\"../results/Figure2-combat.pdf\", width = 5, height = 4, dpi = 300)\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:46:01-07:00"
+ "last_modified": "2021-05-19T22:39:02-07:00"
},
{
"path": "MSBB-variables.html",
@@ -45,7 +45,7 @@
"description": "This script first matches MSBB expression data to the clinical data, then defines variables in the covariate data.\n",
"author": [],
"contents": "\r\n\r\nContents\r\nDependencies\r\nDefine Paths\r\nAnalyze Brain Regions\r\nProcess Expression Data\r\nProcess Covariate Data\r\nDefine Clinical Variables\r\nBraak\r\nCERAD\r\nRaw APOE\r\nAPOE\r\nE4 Count\r\nE2 Count\r\nAge at Death\r\n\r\nSave Results\r\n\r\nDependencies\r\nLoad requisite packages.\r\n\r\n\r\nlibrary(data.table)\r\n\r\n\r\n\r\nDefine Paths\r\nDefine brain regions for analysis and paths to normalized data files (i.e., Trimmed Mean of M-Values or TMM matrices).\r\n\r\n\r\nbrain_regions = c(\"IFG\", \"STG\", \"PHG\", \"FP\")\r\ndata_files = list(\r\n IFG = \"../Data/AMP-AD_MSBB_MSSM_BM_44.normalized.sex_race_age_RIN_PMI_exonicRate_rRnaRate_batch_adj.tsv\",\r\n STG = \"../Data/AMP-AD_MSBB_MSSM_BM_22.normalized.sex_race_age_RIN_PMI_exonicRate_rRnaRate_batch_adj.tsv\",\r\n PHG = \"../Data/AMP-AD_MSBB_MSSM_BM_36.normalized.sex_race_age_RIN_PMI_exonicRate_rRnaRate_batch_adj.tsv\",\r\n FP = \"../Data/AMP-AD_MSBB_MSSM_BM_10.normalized.sex_race_age_RIN_PMI_exonicRate_rRnaRate_batch_adj.tsv\"\r\n)\r\n\r\n\r\n\r\nAnalyze Brain Regions\r\nIterate over previously specified brain regions to process each region. All code after this chunk should be encapsulated in a for loop. To appropriately document the code, pseudocode of the for loop is shown below, while the contents of the for loop are described in subsequent chunks.\r\n\r\n\r\nfor(brain_region in brain_regions) {\r\n \r\n # insert following chunks here\r\n \r\n}\r\n\r\n\r\n\r\nFor the purposes of illustration, we define the brain_region of interest as the inferior frontal fygrus (IFG).\r\n\r\n\r\nbrain_region = \"IFG\"\r\n\r\n\r\n\r\nProcess Expression Data\r\nProcess expression data and remove unnecessary headers from sample IDs (e.g., S109B355.BM_10_798 becomes BM_10_798).\r\n\r\n\r\n# process expression data\r\ncat(\"processing\", brain_region, \"...\\n\")\r\ndata_file = unlist(data_files[brain_region])\r\nmsbb = suppressWarnings({fread(data_file, sep = \"\\t\")})\r\ngene_ids = msbb$V1\r\nmsbb = as.data.frame(msbb[, -1, with = F])\r\nrownames(msbb) = gene_ids\r\n\r\n# remove unnecessary headers\r\ncolnames(msbb) = gsub(\"^.*\\\\.\", \"\", colnames(msbb))\r\n\r\n\r\n\r\nExtract subject IDs in the same order as the sample IDs, then check for samples without an ID match.\r\n\r\n\r\n# extract subject IDs\r\nkey_file = \"../Data/MSBB_RNAseq_covariates_November2018Update.csv\"\r\nkey_map = fread(key_file)\r\nkey_map = unique(key_map[, .(sampleIdentifier, individualIdentifier)])\r\n\r\n# check for samples without an ID match\r\ncat(\"any samples don't have id match? \")\r\ncat(any(!colnames(msbb) %in% key_map$sampleIdentifier), \"\\n\")\r\nsub_key_map = key_map[data.table(sampleIdentifier = colnames(msbb)), on = .(sampleIdentifier)]\r\nall(sub_key_map$sampleIdentifier == colnames(msbb))\r\ncolnames(msbb) = sub_key_map$individualIdentifier\r\n\r\n\r\n\r\nCheck for duplicated samples.\r\n\r\n\r\n# any duplicated samples?\r\nif(any(duplicated(colnames(msbb)))){\r\n cat(\"remove duplicate samples..\\n\")\r\n msbb = msbb[, !duplicated(colnames(msbb))]\r\n}\r\n\r\n\r\n\r\nProcess Covariate Data\r\nNote that subject #1009 has an invalid Braak score of 9.\r\n\r\n\r\ncov_file = \"../Data/MSBB_clinical.csv\"\r\ncov = fread(cov_file)\r\nsetnames(cov, c(\"NP.1\", \"bbscore\"), c(\"CERAD\", \"Braak\"))\r\ncov[Braak > 6, Braak := NA]\r\n\r\n\r\n\r\nExtract clinical data in the same order as in the data.\r\n\r\n\r\ncov = cov[data.table(individualIdentifier = colnames(msbb)), on = .(individualIdentifier)]\r\nall(cov$individualIdentifier == colnames(msbb))\r\n\r\n\r\n\r\nDefine Clinical Variables\r\nBraak\r\n\r\nGroup levels into three categories:\r\n\r\n\r\n1 = Normal/I/II\r\n\r\n\r\n2 = III/IV\r\n\r\n\r\n3 = V/VI\r\n\r\n\r\n\r\ncov[, B := as.factor(cov$Braak)]\r\nlevels(cov$B) = c(1,1,1,2,2,3,3)\r\ncov[, B := as.factor(as.character(B))]\r\ntable(cov$B, cov$Braak)\r\n\r\n# double-check\r\ntable(cov$C, cov$Braak)\r\n\r\n\r\n\r\nCERAD\r\n\r\nOld MSBB levels:\r\n\r\n\r\n1 = Normal\r\n\r\n\r\n2 = Definite\r\n\r\n\r\n3 = Probable\r\n\r\n\r\n4 = Possible\r\n\r\n\r\nDefine new levels:\r\n\r\n\r\n1 = Normal\r\n\r\n\r\n2 = Possible\r\n\r\n\r\n3 = Probable\r\n\r\n\r\n4 = Definite\r\n\r\n\r\n\r\n# refactor\r\ncov[, C := as.factor(CERAD)]\r\nlevels(cov$C) = c(0,3,2,1)\r\ncov[, C := as.factor(as.character(C))]\r\n\r\n# double-check\r\ntable(cov$C, cov$CERAD)\r\n\r\n\r\n\r\nRaw APOE\r\n\r\nDefine three categories:\r\n\r\n\r\n1 = E2/E2 and E3/E3\r\n\r\n\r\n2 = E3/E4, E2/E4, and E4/E4\r\n\r\nNA values will be treated as E3/E3.\r\n\r\n\r\n# define variable\r\ncov$RawAPOE = paste0(cov$Apo1, cov$Apo2)\r\ncov$RawAPOE = gsub(\"NANA\", NA, cov$RawAPOE)\r\n\r\n# treat NA values as E3/E3\r\ncov[is.na(RawAPOE), RawAPOE := \"33\"]\r\n\r\n\r\n\r\nAPOE\r\n\r\nGroup levels into three categories:\r\n\r\n\r\nE2 = E2/E2 and E2/E3\r\n\r\n\r\nE3 = E3/E3\r\n\r\n\r\nE4 = E2/E4, E3/E4, and E4/E4\r\n\r\n\r\n\r\n# refactor\r\nlevels(cov$E) = c(\"E2\", \"E2\", \"E4\", \"E3\", \"E4\", \"E4\")\r\ncov[, E := factor(as.character(E), levels = c(\"E3\", \"E2\", \"E4\"))]\r\n\r\n# double-check\r\ntable(cov$E, cov$RawAPOE)\r\n\r\n\r\n\r\nE4 Count\r\n\r\nGroup levels into three categories:\r\n\r\n\r\n0 = E2/E2, E3/E3, and E2/E3\r\n\r\n\r\n1 = E2/E4 and E3/E4\r\n\r\n\r\n2 = E4/E4\r\n\r\n\r\n\r\n# refactor\r\ncov[, E4num := as.factor(RawAPOE)]\r\nlevels(cov$E4num) = c(0, 0, 1, 0, 1, 2)\r\ncov[, E4num := as.numeric(as.character(E4num))]\r\n\r\n# double-check\r\ntable(cov$E4num, cov$RawAPOE)\r\n\r\n\r\n\r\nE2 Count\r\n\r\nGroup levels into three categories:\r\n\r\n\r\n0 = E4/E4, E3/E3, and E3/E4\r\n\r\n\r\n1 = E2/E4 and E2/E3\r\n\r\n\r\n2 = E2/E2\r\n\r\n\r\n\r\n# refactor\r\ncov[, E2num := as.factor(RawAPOE)]\r\nlevels(cov$E2num) = c(2, 1, 1, 0, 0, 0)\r\ncov[, E2num := as.numeric(as.character(E2num))]\r\n\r\n# double-check\r\ntable(cov$E2num, cov$RawAPOE)\r\n\r\n\r\n\r\nAge at Death\r\nClean age at death variable.\r\n\r\n\r\n# clean age at death\r\ncov[, age_at_death := gsub(\"90\\\\+\", \"91\", AOD)]\r\ncov[, age_at_death := as.numeric(age_at_death)]\r\n\r\n\r\n\r\nSave Results\r\nChoose levels and contrasts, then save results.\r\n\r\n\r\n# save results\r\nexpSet = list(mData = msbb, cov = cov)\r\nsave(expSet, file = paste0(\"../Data/MSBB-\", brain_region,\"-24.Rdata\"))\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:46:09-07:00"
+ "last_modified": "2021-05-19T22:39:10-07:00"
},
{
"path": "qPCR-analysis.html",
@@ -53,7 +53,7 @@
"description": "This script performs analysis of RT-qPCR data.\n",
"author": [],
"contents": "\r\n\r\nContents\r\nDependencies\r\nRead Data\r\nProcess Data\r\nStatistical Analysis\r\nPlot Data\r\nCorrelation Heatmaps\r\nSave Results\r\n\r\nDependencies\r\nLoad requisite packages. Note that the package wilkelab/ggtext hosted on GitHub is used to annotate the plots. This package can be downloaded via devtools::install_github(\"wilkelab/ggtext\").\r\n\r\n\r\n# data manipulation\r\nlibrary(data.table)\r\nlibrary(purrr)\r\nlibrary(magrittr)\r\nlibrary(stringr)\r\n\r\n# file manipulation\r\nlibrary(fs)\r\n\r\n# read and write to Excel files\r\nlibrary(openxlsx)\r\n\r\n# statistical analysis\r\nlibrary(psych)\r\nlibrary(moments)\r\n\r\n# plot data\r\nlibrary(ggplot2)\r\nlibrary(RColorBrewer)\r\nlibrary(ggpubr)\r\n\r\n# caption text and display significance\r\nlibrary(ggtext)\r\nlibrary(ggsignif)\r\n\r\n# heatmap\r\nlibrary(pheatmap)\r\n\r\n\r\n\r\nDefine reference directories.\r\n\r\n\r\n# define analysis name as APOE OR APPPS1 OR LPS\r\nopt = c(\"APOE\", \"APPPS1\", \"LPS\") %>% purrr::set_names(.)\r\nnm = opt[menu(opt, title = \"Please select the desired analysis.\")]\r\n\r\n# define directories\r\nbdir = getwd()\r\nddir = file.path(bdir, \"Data\")\r\nrdir = file.path(bdir, \"Results\", nm)\r\nlogd = file.path(rdir, paste(nm, \"Log.txt\"))\r\n\r\n# source utilities script\r\nsource(file.path(bdir, \"Code\", \"Utilities\", \"Utilities.R\"))\r\n\r\n# overwrite log file\r\nif(file_exists(logd)) file_delete(logd)\r\n\r\n# clear existing analyses\r\nif(dir_exists(rdir)) { \r\n dir_delete(rdir)\r\n dir_create(rdir)\r\n dir_create(file.path(rdir, \"Gene Plots\"))\r\n}\r\n\r\n\r\n\r\nRead Data\r\nRead qPCR results from .xlsx data file. Read \\(\\Delta Ct\\) values.\r\n\r\n\r\n# define function to read data\r\nread_data = function(sheet_number) {\r\n read.xlsx(file.path(ddir, \"qPCR Data.xlsx\"), sheet = sheet_number) %>%\r\n as.data.table() %>%\r\n return()\r\n}\r\n\r\n# read data\r\ndCT = c(1, 4, 7) %>% purrr::set_names(opt) %>%\r\n .[[nm]] %>% read_data()\r\n\r\n# reorder APOE factor\r\nif(nm == \"LPS\") dCT[, Treatment := factor(Treatment, levels = c(\"PBS\", \"LPS\"))]\r\n\r\n\r\n\r\nProcess Data\r\nGenerate both \\(\\Delta \\Delta Ct\\), and relative quantification (i.e., RQ where \\(RQ = 2^{-\\Delta \\Delta Ct}\\)) values.\r\n\r\n\r\n# create factor variable\r\nfac = list(\"Genotype\", \"Genotype\", c(\"Genotype\", \"Treatment\")) %>% purrr::set_names(opt) %>% .[[nm]]\r\n \r\n# extract gene names\r\nlab = dCT[, !c(\"Mouse\", \"Sex\", ..fac)] %>% colnames() %>% .[order(.)]\r\n\r\n# calculate average of reference group (i.e., APOE3 OR APOE3/PBS)\r\nctrl = dCT[, map(.SD, ~mean(.x, na.rm = TRUE)), .SDcols = lab, by = fac]\r\n\r\n# select appropriate reference group\r\nctrl = if(nm == \"LPS\") ctrl[Genotype == \"APOE3\" & Treatment == \"PBS\", ..lab] else ctrl[Genotype == \"APOE3\", ..lab]\r\n\r\n# calculate ddCT\r\nddCT = copy(dCT)[, (lab) := map2(.SD, ctrl, ~.x-.y), .SDcols = lab]\r\n\r\n# calculate RQ\r\nRQ = copy(ddCT)[, (lab) := 2^-.SD, .SDcols = lab]\r\n\r\n\r\n\r\nStatistical Analysis\r\nFunctions to generate caption labels.\r\n\r\n\r\ngenerate_label = function(p) {\r\n \r\n p = if(p <= 0.0001) signif(p, digits = 2) else round(p, 4)\r\n \r\n if(p <= 0.0001) {\r\n return(paste0(\"*p* = \", p, \"<\/span>\"))\r\n } else if (p < 0.05) {\r\n return(paste0(\"*p* = \", p, \"<\/span>\"))\r\n } else if (p < 0.1) {\r\n return(paste0(\"*p* = \", p, \"<\/span>\"))\r\n } else if(p >= 0.1) {\r\n return(\"N.S.<\/span>\")\r\n }\r\n \r\n}\r\n\r\ngenerate_caption = function(comparison, label) {\r\n \r\n paste0(\"**\", comparison, \":** <\/span>\", label, \" \")\r\n \r\n}\r\n\r\n# set colors\r\ncolors = if(nm == \"APOE\") c(\"#62BBA5\", \"#785D37\", \"#FFB84D\") else c(\"#785D37\", \"#FFB84D\")\r\n\r\n\r\n\r\nFunction to perform statistical analysis for each gene and create the respective plots. Note that we compute the mean and standard deviation on ddCT then transform by \\(2^{-x}\\) for mean and \\(2^{x}\\) standard deviation (i.e., SD is independent of mean).\r\n\r\n\r\ngene_analysis = function(gene) {\r\n \r\n # write output message\r\n cat(paste0(\"- \", gene, \"\\n\"))\r\n \r\n # perform one-way ANOVA on dCT values\r\n res_aov = if(nm == \"LPS\") aov(get(gene) ~ Genotype * Treatment, data = dCT) else aov(get(gene) ~ Genotype, data = dCT)\r\n stat_aov = summary(res_aov)[[1]]\r\n rownames(stat_aov) = trimws(rownames(stat_aov), \"both\")\r\n \r\n # perform posthoc Tukey test on dCT values\r\n res_tukey = TukeyHSD(res_aov)\r\n stat_tukey = if(nm == \"LPS\") res_tukey %>%\r\n .[[\"Genotype:Treatment\"]] %>%\r\n .[c(\"APOE4:LPS-APOE3:LPS\", \"APOE4:PBS-APOE3:PBS\"), ] else res_tukey[[\"Genotype\"]]\r\n \r\n # print to log file\r\n sink(file = logd, append = TRUE)\r\n cat(paste0(gene, \" ANOVA:\\n\")); print(stat_aov)\r\n cat(paste0(\"\\n\", gene, \" TUKEY:\\n\")); print(stat_tukey)\r\n cat(\"\\n\\n\")\r\n sink()\r\n \r\n # create posthoc table\r\n posthoc = stat_tukey %>%\r\n as.data.table(keep.rownames = TRUE) %>%\r\n setnames(c(\"Comparison\", \"Difference\", \"Lower CI\", \"Upper CI\", \"p\")) %>%\r\n .[, c(\"Start\", \"End\") := strsplit(Comparison, \"-\") %>% { .(map_chr(., 2), map_chr(., 1)) }]\r\n \r\n # reorder for APOE\r\n if(nm == \"APOE\") { posthoc = posthoc[c(1, 3, 2), ] } # E2/E3, E3/E4, E2/E4 \r\n \r\n # rename for LPS\r\n if(nm == \"LPS\") {\r\n posthoc = posthoc %>% \r\n .[, Treatment := map_chr(strsplit(Start, \":\"), 2)] %>%\r\n .[, Treatment := factor(Treatment, levels = c(\"PBS\", \"LPS\"))] %>%\r\n .[, Start := map_chr(strsplit(Start, \":\"), 1)] %>%\r\n .[, End := map_chr(strsplit(End, \":\"), 1)]\r\n }\r\n \r\n # create output table\r\n comps = if(nm == \"LPS\") posthoc[, paste0( Treatment, \" \", Start, \"/\", End)] else posthoc[, paste(Start, End, sep = \"/\")]\r\n output = data.table(Gene = gene)\r\n \r\n # function to add columns by reference\r\n create_anova_col = function(cn) { output[, paste(\"ANOVA\", cn, c(\"p\", \"p adj.\")) := .(stat_aov[cn, \"Pr(>F)\"], stat_aov[cn, \"Pr(>F)\"])] }\r\n create_tukey_col = function(cn) { output[, paste(comps, cn) := as.list(posthoc[[cn]])] }\r\n \r\n # create ANOVA columns\r\n { if(nm == \"LPS\") c(\"Genotype\", \"Treatment\", \"Genotype:Treatment\") else c(\"Genotype\") } %>%\r\n walk(create_anova_col)\r\n \r\n # create Tukey columns\r\n if(nm == \"APOE\") walk(c(\"Difference\", \"Lower CI\", \"Upper CI\", \"p\"), create_tukey_col)\r\n \r\n # reorder columns to group by comparison\r\n unlist(map(comps, ~grep(.x, colnames(output)))) %>%\r\n c(1:(ncol(output)-length(.)), .) %>%\r\n setcolorder(output, .)\r\n \r\n # calculate mean and SD of RQ values, then ensure error bars are not negative\r\n desc = describeBy(RQ[, ..gene], RQ[, ..fac], mat = TRUE, digits = 4) %>%\r\n as.data.table() %>%\r\n setnames(c(\"group1\", \"group2\"), c(\"Genotype\", \"Treatment\"), skip_absent = TRUE) %>%\r\n .[, .SD, .SDcols = c(fac, \"mean\", \"sd\", \"se\")] %>%\r\n .[, c(\"maxY\", \"minY\") := .(mean + se, mean - se)] %>%\r\n .[minY < 0, minY := 0]\r\n \r\n # refactor order for LPS/PBS only\r\n if(nm == \"LPS\") desc[, Treatment := factor(Treatment, levels = c(\"PBS\", \"LPS\"))]\r\n \r\n # calculate range of RQ values\r\n range_RQ = RQ[, max(.SD, na.rm = T) - min(.SD, na.rm = T), .SDcols = gene]\r\n \r\n # set significance bar height\r\n posthoc = posthoc %>%\r\n .[, Height := desc[, max(mean + 1.3*sd)] + range_RQ*seq(0, 0.2, length.out = nrow(.))] %>%\r\n .[, Label := paste(\"p =\", round(p, 4))] %>%\r\n .[!(p > 0.1), ]\r\n \r\n # adjust bar height for PBS/LPS\r\n if(nrow(posthoc) > 0 & nm == \"LPS\") {\r\n posthoc = posthoc %>% \r\n merge(desc[, max(mean + 1.3*sd), by = \"Treatment\"], by = \"Treatment\", all = TRUE) %>%\r\n setnames(\"V1\", \"GroupHeight\")\r\n }\r\n \r\n # create caption labels\r\n caption_labels = stat_aov %>%\r\n .[!(rownames(.) == \"Residuals\"), ] %>%\r\n { data.table(Comparison = rownames(.), pVal = .[, \"Pr(>F)\"]) } %>%\r\n .[Comparison == \"Genotype:Treatment\", Comparison := \"Interaction\"] %>%\r\n .[, Label := map_chr(pVal, generate_label)]\r\n \r\n # create caption\r\n caption = paste0(pmap_chr(caption_labels[, .(Comparison, Label)], ~generate_caption(.x, .y)), collapse = \"\")\r\n \r\n # add dummy facet variable\r\n if(nm == \"APOE\") { desc[, Treatment := \"\"] }\r\n if(nm == \"APPPS1\") { desc[, Treatment := \"\"] }\r\n \r\n # create barplot\r\n bp = ggplot(desc, aes(x = Genotype, y = mean, color = Genotype, group = Genotype)) +\r\n geom_bar(stat = \"identity\", fill = \"white\", size = 1, width = 0.5) +\r\n scale_color_manual(values = colors) +\r\n geom_errorbar(aes(ymax = maxY, ymin = minY), color = \"#41414E\", width = 0.25) +\r\n geom_jitter(data = RQ, aes(x = Genotype, y = .data[[gene]], color = Genotype, shape = Sex),\r\n size = 2, width = 0.15) +\r\n ggtitle(gene) + xlab(\"\") + labs(caption = caption) +\r\n ylab(bquote(bold('Expression Fold-Change ('*2^{-Delta*Delta*bolditalic(Ct)}*')'))) +\r\n scale_y_continuous(expand = expansion(mult = c(-0.01, .1))) +\r\n theme_linedraw() +\r\n guides(fill = FALSE, color = FALSE, shape = FALSE) +\r\n theme(plot.title = element_text(size = 20, hjust = 0.5, color = \"#41414E\", face = \"bold.italic\"),\r\n plot.caption = element_markdown(hjust = 0.5, size = 16),\r\n axis.text.x.bottom = element_text(size = 12, face = \"bold.italic\", color = \"#41414E\"),\r\n axis.title.y = element_text(size = 14, face = \"bold\", color = \"#41414E\"),\r\n strip.background = element_rect(fill = \"#5D5D6F\"),\r\n strip.text = element_text(size=14, color = \"white\", face = \"bold\"),\r\n panel.background = element_rect(fill = \"#FDF5ED\"),\r\n axis.text.x = element_text(size=12, color = \"#41414E\", face = \"italic\"),\r\n axis.ticks.x = element_blank(),\r\n panel.grid = element_blank()\r\n )\r\n \r\n # add facet\r\n bp = bp + facet_wrap(~ Treatment)\r\n \r\n # save plot\r\n ggsave(file.path(rdir, \"Gene Plots\", paste(gene, nm, \"Plot.pdf\")), bp, width = 5, height = 6)\r\n \r\n # return plot\r\n return(list(bp + theme(plot.margin = margin(0.5, 0.5, 0.5, 0.5, \"cm\")), output))\r\n \r\n}\r\n\r\n\r\n\r\nPlot Data\r\nMap gene_analysis function over list of genes to create individual plots.\r\n\r\n\r\n# run analysis\r\ncat(paste(nm, \"ANALYSIS:\\n\"))\r\nplots = map(lab, gene_analysis) %>%\r\n purrr::set_names(lab)\r\n\r\n# separate output\r\noutput = map_dfr(plots, 2)\r\nplots = map(plots, 1)\r\n\r\n# fix column names\r\nif(nm == \"LPS\") colnames(output) = gsub(\"Genotype:Treatment\", \"Interaction\", colnames(output))\r\n\r\n# adjust p-values for multiple comparisons\r\nadj = colnames(output) %>% .[grep(\"adj.\", ., fixed = TRUE)]\r\noutput[, (adj) := map(.SD, ~p.adjust(.x, method = \"BH\")), .SDcols = adj]\r\n\r\n# save output\r\nfwrite(output, file.path(rdir, paste(nm, \"Statistical Results.csv\")))\r\n\r\n\r\n\r\nAggregate individual plots to create composite figure. First create the shared legend and define a function to create the shared y-axes.\r\n\r\n\r\n# create legend\r\nleg = { ggplot(RQ, aes(x = Genotype, y = Gfap, color = Genotype, shape = Sex)) +\r\n scale_color_manual(values = colors) +\r\n geom_jitter(size = 2, width = 0.1) + \r\n theme(legend.title = element_text(size = 18, face = \"bold\", color = \"#41414E\"),\r\n legend.text = element_text(size = 14, color = \"#41414E\"),\r\n legend.position = \"top\") } %>%\r\n get_legend()\r\n\r\n# create shared y-axis per row\r\nshared_y = function(p, idx, nwidth) {if((idx - 1) %% nwidth == 0) return(p) else return(p + rremove(\"ylab\"))}\r\n\r\n\r\n\r\nFunction to aggregate plots.\r\n\r\n\r\n# function to aggregate plots\r\naggregate_plots = function(plist, pcol, plab) {\r\n \r\n # subset markers and create shared axis\r\n plist = plots[plist] %>%\r\n map2(seq_along(.), ~shared_y(.x, .y, pcol))\r\n \r\n # get number of rows\r\n prow = ceiling(length(plist)/pcol)\r\n \r\n # join plots together\r\n composite = ggarrange(plotlist = plist, ncol = pcol, nrow = prow,\r\n legend.grob = leg, legend = \"bottom\",\r\n widths = c(1.117, rep(1, pcol - 1)))\r\n \r\n # save figure\r\n ggsave(file.path(rdir, paste(nm, plab, \"qPCR Plots.pdf\")), composite,\r\n width = 3.6*pcol, height = 6*prow)\r\n \r\n}\r\n\r\n\r\n\r\nCreate the composite figures.\r\n\r\n\r\n# define main and supplemental markers\r\nmks = list(Main = c('Trem2', 'Tyrobp', 'C1qa', 'Cd68', 'P2ry12', 'C3', 'Cd74', 'Spp1', 'Msr1', 'Tgfbr1'), Supplemental = c('Cx3cr1', 'Gfap', 'Clu', 'huAPOE'))\r\n\r\n# adjust for extra LPS genes\r\nif(nm == \"LPS\") { mks$Supplemental = c('Tnfa', 'Il1b', mks$Supplemental); rc = c(5, 4) } else rc = c(5, 4)\r\n\r\n# create composite figures\r\npwalk(list(mks, rc, names(mks)), ~aggregate_plots(...))\r\n\r\n\r\n\r\nCorrelation Heatmaps\r\nCompute Spearman correlations and visualize the correlation matrix in a heatmap.\r\n\r\n\r\n# subset dCT data\r\ncor_dat = dCT[, ..lab]\r\nprint(sapply(cor_dat, function(x) agostino.test(as.numeric(x), alternative = \"two.sided\")$p.value))\r\n\r\n# calculate correlation, define new names\r\ncor_dat = corr.test(cor_dat, method = \"spearman\", use = \"pairwise\")$r\r\nnewnames = map(rownames(cor_dat), ~bquote(italic(.(.x))))\r\n\r\n# plot heatmap\r\nhm = pheatmap(cor_dat,\r\n color = colorRampPalette(c(\"#313695\", \"#8FC3DC\", \"#E3E0DD\", \"#F88D52\", \"#A50026\"))(100),\r\n breaks = seq(from = -1, to = 1, length.out = 101),\r\n cellwidth = 25, cellheight = 25,\r\n cluster_cols = TRUE, cluster_rows = TRUE,\r\n border_color = NA,\r\n show_colnames = TRUE,\r\n show_rownames = TRUE,\r\n silent = TRUE,\r\n labels_row = as.expression(newnames),\r\n labels_col = as.expression(newnames))\r\n\r\n# save figure\r\nif(nm == \"LPS\") ggsave(file.path(rdir, paste(nm, \"Correlation Heatmap.pdf\")), hm, width = 8, height = 7.77) else ggsave(file.path(rdir, paste(nm, \"Correlation Heatmap.pdf\")), hm, width = 7, height = 6.8)\r\n\r\n\r\n\r\nSave Results\r\nSave statistical results and correlation matrix as worksheets in an Excel workbook.\r\n\r\n\r\n# create workbook object\r\nwb = createWorkbook()\r\n\r\n# add statistical sheet\r\ns1 = paste(nm, \"Statistical Results\")\r\naddWorksheet(wb, sheetName = s1)\r\nidx1 = 2:(nrow(output) + 1)\r\n\r\n# add correlation sheet\r\ns2 = paste(nm, \"Correlation Matrix\")\r\naddWorksheet(wb, sheetName = s2)\r\nidx2 = 2:(nrow(cor_dat) + 1)\r\n\r\n# define header style\r\nhs = createStyle(fontColour = \"#FFFFFF\", fgFill = \"#1A1B41\", fontName = \"Arial Black\",\r\n halign = \"center\", valign = \"center\", textDecoration = \"bold\",\r\n border = \"bottom\", borderStyle = \"thick\", fontSize = 10)\r\n\r\n# define row styles\r\nr1 = createStyle(fontColour = \"#363635\", fgFill = \"#FFFFFF\", fontName = \"Arial\",\r\n fontSize = 10, halign = \"center\", valign = \"center\", border = \"TopBottomLeftRight\")\r\nr2 = createStyle(fontColour = \"#363635\", fgFill = \"#F6F4F4\", fontName = \"Arial\",\r\n fontSize = 10, halign = \"center\", valign = \"center\", border = \"TopBottomLeftRight\")\r\n\r\n# write output\r\nwriteData(wb, s1, x = output, headerStyle = hs)\r\n\r\n# write correlation matrix\r\nhm$tree_row %>%\r\n {.$labels[.$order]} %>%\r\n cor_dat[., .] %>%\r\n as.data.table(keep.rownames = \"\") %>%\r\n writeData(wb, s2, x = ., headerStyle = hs)\r\n\r\n# set column widths and row heights for statistical results\r\nsetColWidths(wb, s1, idx1, 30)\r\nsetRowHeights(wb, s1, idx2, 20)\r\n\r\n# set column widths and row heights for correlation matrix\r\nsetColWidths(wb, s2, idx2, 10)\r\nsetColWidths(wb, s2, 1, 10/6)\r\nsetRowHeights(wb, s2, idx2, 60)\r\n\r\n# add row styling for statistical results\r\naddStyle(wb, s1, r1, rows = idx1, cols = 1:ncol(output), gridExpand = T)\r\nfreezePane(wb, s1, firstRow = TRUE, firstCol = TRUE)\r\n\r\n# add striped header styling for statistical results\r\nif(nm == \"APOE\") {\r\n addStyle(wb, s1, createStyle(fgFill = \"#CEEAE3\"),\r\n cols = grep(\"APOE2/APOE3\", colnames(output)), rows = idx1, stack = TRUE, gridExpand = T)\r\n addStyle(wb, s1, createStyle(fgFill = \"#DECEB8\"),\r\n cols = grep(\"APOE3/APOE4\", colnames(output)), rows = idx1, stack = TRUE, gridExpand = T)\r\n addStyle(wb, s1, createStyle(fgFill = \"#FFE9C8\"),\r\n cols = grep(\"APOE2/APOE4\", colnames(output)), rows = idx1, stack = TRUE, gridExpand = T)\r\n# } else if(nm == \"LPS\") {\r\n# addStyle(wb, s1, createStyle(fgFill = \"#DECEB8\"),\r\n# cols = grep(\"PBS\", colnames(output)), rows = idx1, stack = TRUE, gridExpand = T)\r\n# addStyle(wb, s1, createStyle(fgFill = \"#FFE9C8\"),\r\n# cols = grep(\"LPS\", colnames(output)), rows = idx1, stack = TRUE, gridExpand = T)\r\n} else {\r\n idx1[which(idx1 %% 2 == 0)] %>% addStyle(wb, s1, r2, rows = ., cols = 1:ncol(output), gridExpand = T)\r\n}\r\n\r\n# add header styling for statistical results\r\naddStyle(wb, s1, createStyle(textDecoration = \"italic\"), rows = idx1, cols = 1, stack = TRUE)\r\naddStyle(wb, s1, createStyle(textDecoration = \"bold\", fgFill = \"#ECD6D5\"),\r\n rows = idx1, cols = 1, stack = TRUE)\r\n\r\n# add row styling for correlation matrix\r\naddStyle(wb, s2, r2, rows = idx2, cols = idx2, gridExpand = T)\r\nidx2[which(idx2 %% 2 == 0)] %>% { \r\n addStyle(wb, s2, r2, rows = ., cols = ., gridExpand = T)\r\n addStyle(wb, s2, r2, rows = ., cols = ., gridExpand = T)\r\n}\r\n\r\n# add header style for correlation matrix\r\naddStyle(wb, s2, hs, rows = idx2, cols = 1, gridExpand = T)\r\naddStyle(wb, s2, createStyle(textRotation = 90), rows = idx2, cols = 1, stack = TRUE)\r\n\r\n# add conditional formatting\r\nconditionalFormatting(wb, s2, idx2, idx2, type = \"colourScale\",\r\n style = c(\"#5A8AC6\", \"#FCFCFF\", \"#F8696B\"))\r\n\r\n# save workbook\r\nsaveWorkbook(wb, file.path(rdir, paste(nm, \"Supplementary Table.xlsx\")), overwrite = TRUE)\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:46:28-07:00"
+ "last_modified": "2021-05-19T22:39:29-07:00"
},
{
"path": "ROSMAP-astrocyte.html",
@@ -61,7 +61,7 @@
"description": "This script performs analysis of ROSMAP astrocyte data.\n",
"author": [],
"contents": "\r\n\r\nContents\r\nDependencies\r\nSelect Astrocyte Genes\r\nPre-Processing\r\nRun Models\r\nModel 1\r\nModel 2\r\nModel 3\r\n\r\nZ-Score Analysis\r\nSpectral Clustering\r\nC0\r\nC3\r\nB1\r\nB3\r\nOverlap\r\n\r\nSave Results\r\nVenn Diagram\r\nStatistical Testing\r\nFigure 4A and 4B\r\nFigure 4C and 4D\r\nFigure 4E and 4F\r\n\r\nDependencies\r\nLoad requisite packages.\r\nLoad requisite packages. Note that package cttobin/ggthemr hosted on GitHub is used to provide ggplot2 themes. This package can be downloaded via devtools::install_github(\"cttobin/ggthemr\").\r\n\r\n\r\nrm(list = ls())\r\nrequire(ggVennDiagram)\r\nrequire(limma)\r\nrequire(latex2exp)\r\nrequire(ComplexHeatmap)\r\nrequire(data.table)\r\nrequire(pheatmap)\r\nrequire(ggplot2)\r\nrequire(gridExtra)\r\nrequire(ggthemr)\r\nrequire(SNFtool)\r\nrequire(circlize)\r\nrequire(patchwork)\r\nrequire(cowplot)\r\nrequire(ggpubr)\r\n\r\nds = \"ROSMAP\"\r\ndtype = \"log2FPKM\"\r\nload(\"../Data/ROSMAP-24-adj-low.expr.genes.removed.Rdata\")\r\n\r\nsource(\"models.R\")\r\nsource(\"spectral-clustering.R\")\r\n\r\n# load expression data \r\nmData = expSet$mData\r\n\r\n\r\n\r\nSelect Astrocyte Genes\r\nFirst, select only astrocyte genes. Of the microglial genes, select only those that exist in ROSMAP.\r\n\r\n\r\ncelltype = \"astro\"\r\n\r\n# select only astrocyte genes\r\ngenes = readLines(\"../Data/Astrocyte Genes.txt\")\r\n\r\n# select astrocyte genes that exist in ROSMAP\r\nannot = fread(\"../Data/ENSEMBL GRCh38.p7.csv\")\r\nannot = unique(annot[GeneSymbol %in% genes,])\r\nensembl_ids = intersect(rownames(mData), annot$EnsemblID)\r\n\r\nannot = unique(annot[EnsemblID %in% ensembl_ids,])\r\n# remove duplicated genes\r\nannot = annot[!duplicated(GeneSymbol), ]\r\nensembl_ids = annot$EnsemblID\r\n\r\nany(duplicated(ensembl_ids)) # no duplicated ids\r\nany(is.na(ensembl_ids)) # no NA\r\nlength(ensembl_ids)\r\n\r\n\r\n\r\nPre-Processing\r\n\r\n\r\n# select interesting genes\r\nmData = mData[annot$EnsemblID, ]\r\nall(rownames(mData) == annot$EnsemblID)\r\nrownames(mData) = annot$GeneSymbol\r\n\r\n# set metadata\r\ncov = expSet$cov\r\n\r\n\r\n\r\nRun Models\r\nModel 1: exp ~ APOE + CERAD | exp ~ APOE + Braak\r\nModel 2 baseline: C0E4-C0E3, C0E2-C0E3 | B1E4-B1E3, B1E2-B1E3\r\nModel 3: C3E4-C3E3, C3E2-C3E3 | B3E4-B3E3, B3E2-B3E3\r\nModel 1\r\nRun Model 1, which is exp ~ APOE + CERAD | exp ~ APOE + Braak.\r\n\r\n\r\n# run Model 1\r\ntT_CERAD = run_model1(mData, cov$C, cov$E, annot)\r\ntT_Braak = run_model1(mData, cov$B, cov$E, annot)\r\n\r\nlength(tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\n\r\nlength(tT_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol) \r\n\r\nlength(tT_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol) \r\n\r\nlength(tT_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol) \r\n\r\nTableS2.astro = tT_CERAD[, .(EnsemblID, GeneSymbol, E4vsE3_logFC, E4vsE3_P.Value, E2vsE3_logFC, E2vsE3_P.Value)]\r\n\r\n\r\n\r\nModel 2\r\nRun Model 2 baseline, which is C0E4-C0E3, C0E2-C0E3 | B1E4-B1E3, B1E2-B1E3.\r\n\r\n\r\n# run Model 2\r\ntT2_CERAD = run_model2(\"CERAD\", mData, cov$C, cov$E, annot) \r\ntT2_Braak = run_model2(\"Braak\", mData, cov$B, cov$E, annot)\r\n\r\nlength(tT2_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT2_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\nlength(tT2_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol)\r\nlength(tT2_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\n\r\n\r\n\r\nModel 3\r\nRun Model 3, which is C3E4-C3E3, C3E2-C3E3 | B3E4-B3E3, B3E2-B3E3.\r\n\r\n\r\n# run Model 3\r\ntT3_CERAD = run_model3(\"CERAD\", mData, cov$C, cov$E, annot) \r\ntT3_Braak = run_model3(\"Braak\", mData, cov$B, cov$E, annot)\r\n\r\nlength(tT3_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT3_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\nlength(tT3_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol)\r\nlength(tT3_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\n\r\n\r\n\r\nZ-Score Analysis\r\nCompute z-scores.\r\n\r\n\r\nz.C0 = to_z_score(\"C\", 0, mData, cov)\r\nz.C3 = to_z_score(\"C\", 3, mData, cov)\r\nz.B1 = to_z_score(\"B\", 1, mData, cov)\r\nz.B3 = to_z_score(\"B\", 3, mData, cov)\r\n\r\n\r\n\r\nCalculate average z-scores.\r\n\r\n\r\navez.C0 = to_ave_z(z.C0)\r\navez.C3 = to_ave_z(z.C3)\r\navez.B1 = to_ave_z(z.B1)\r\navez.B3 = to_ave_z(z.B3)\r\n\r\n\r\n\r\nSpectral Clustering\r\nC0\r\nNumber of Clusters: 5\r\nIndex of Cluster of Interest: 4 and 1\r\nNote that Cluster #4 above was originally Cluster #3; they are switched for convenience.\r\n\r\n\r\n# spectral clustering\r\nc = 4; k = 5\r\nclust.C0 = spectral_clustering(avez.C0, k)\r\nclust.C0$clustA = as.factor(clust.C0$clustA)\r\n\r\n# switch clusters 3 and 4 for convenience\r\nlevels(clust.C0$clustA) = c(\"1\", \"2\", \"4\", \"3\", \"5\")\r\nclust.C0$clustA = as.numeric(as.character(clust.C0$clustA))\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.C0$clustA))\r\nrownames(annot_row) = rownames(clust.C0$zMtx)\r\npheatmap(clust.C0$zMtx[order(clust.C0$clustA),],\r\n annotation_row = annot_row[order(clust.C0$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nC0_c4.genes = sort(rownames(clust.C0$zMtx)[clust.C0$clustA == c])\r\nlength(C0_c4.genes)\r\nC0_c1.genes = sort(rownames(clust.C0$zMtx)[clust.C0$clustA == 1])\r\nlength(C0_c1.genes)\r\n\r\n\r\n\r\nC3\r\nNumber of Clusters: 5\r\nIndex of Cluster of Interest: 5\r\n\r\n\r\n# spectral clustering\r\nc = 5; k = 5\r\nclust.C3 = spectral_clustering(avez.C3, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.C3$clustA))\r\nrownames(annot_row) = rownames(clust.C3$zMtx)\r\npheatmap(clust.C3$zMtx[order(clust.C3$clustA),],\r\n annotation_row = annot_row[order(clust.C3$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nC3.genes = sort(rownames(clust.C3$zMtx)[clust.C3$clustA == c])\r\nlength(C3.genes)\r\n\r\n\r\n\r\nB1\r\nNumber of Clusters: 5\r\nIndex of Cluster of Interest: 6\r\n\r\n\r\n# spectral clustering\r\nk = 5; c = 5\r\nclust.B1 = spectral_clustering(avez.B1, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.B1$clustA))\r\nrownames(annot_row) = rownames(clust.B1$zMtx)\r\npheatmap(clust.B1$zMtx[order(clust.B1$clustA),],\r\n annotation_row = annot_row[order(clust.B1$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nB1.genes = sort(rownames(clust.B1$zMtx)[clust.B1$clustA == c])\r\nlength(B1.genes)\r\n\r\n\r\n\r\nB3\r\nNumber of Clusters: 5\r\nIndex of Cluster of Interest: 1\r\n\r\n\r\n# spectral clustering\r\nk = 5; c = 1\r\nclust.B3 = spectral_clustering(avez.B3, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.B3$clustA))\r\nrownames(annot_row) = rownames(clust.B3$zMtx)\r\npheatmap(clust.B3$zMtx[order(clust.B3$clustA),],\r\n annotation_row = annot_row[order(clust.B3$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nB3.genes = sort(rownames(clust.B3$zMtx)[clust.B3$clustA == c])\r\nlength(B3.genes)\r\n\r\n\r\n\r\nOverlap\r\n\r\n\r\n# hypergeometric tests\r\nphyper_test(C0_c4.genes, B1.genes)\r\nphyper_test(C3.genes, B3.genes)\r\nphyper_test(C0_c4.genes, C3.genes)\r\nphyper_test(B1.genes, B3.genes)\r\n\r\n\r\n\r\nSave Results\r\nSave results to Excel files.\r\n\r\n\r\n# CERAD genes\r\nxlsx::write.xlsx(C0_c4.genes, \r\n \"../results/ROSMAP_astrocyte_new_cluster_remove.low.expressed.genes.xlsx\", \r\n sheetName = \"C0_clust4(k=5)\", append = TRUE, row.names = F, \r\n col.names = F)\r\nxlsx::write.xlsx(C0_c1.genes, \r\n \"../results/ROSMAP_astrocyte_new_cluster_remove.low.expressed.genes.xlsx\", \r\n sheetName = \"C0_clust1(k=5)\", append = TRUE, row.names = F, \r\n col.names = F)\r\n\r\nxlsx::write.xlsx(C3.genes, \r\n \"../results/ROSMAP_astrocyte_new_cluster_remove.low.expressed.genes.xlsx\", \r\n sheetName = \"C3_clust1(k=5)\", append = TRUE, row.names = F, \r\n col.names = F)\r\n\r\nlength(intersect(C0_c4.genes, C3.genes)) \r\nxlsx::write.xlsx(intersect(C0_c4.genes, C3.genes), \r\n \"../results/ROSMAP_astrocyte_new_cluster_remove.low.expressed.genes.xlsx\", \r\n sheetName = \"intersect C0_cluster3&C4\", append = TRUE, row.names = F, col.names = F)\r\n\r\n# Braak genes\r\nxlsx::write.xlsx(B1.genes, \r\n \"../results/ROSMAP_astrocyte_new_cluster_remove.low.expressed.genes.xlsx\", \r\n sheetName = \"B1_clust5(k=5)\", append = TRUE, row.names = F, \r\n col.names = F)\r\n\r\nxlsx::write.xlsx(B3.genes, \r\n \"../results/ROSMAP_astrocyte_new_cluster_remove.low.expressed.genes.xlsx\", \r\n sheetName = \"B3_clust1(k=5)\", append = TRUE, row.names = F, \r\n col.names = F)\r\n\r\nlength(intersect(B1.genes, B3.genes)) \r\nxlsx::write.xlsx(intersect(B1.genes, B3.genes), \r\n \"../results/ROSMAP_astrocyte_new_cluster_remove.low.expressed.genes.xlsx\", \r\n sheetName = \"intersect B1&B3\", append = TRUE, row.names = F, col.names = F)\r\n\r\n\r\n\r\nVenn Diagram\r\nPrepare Venn diagram.\r\n\r\n\r\nrosmap_genes = list(\r\n `C0 clust4` = C0_c4.genes,\r\n C3 = C3.genes,\r\n B1 = B1.genes,\r\n B3 = B3.genes\r\n)\r\nggVennDiagram(rosmap_genes, label_alpha=0)\r\nggsave(\"../results/venn_rosmap_astrocyte.pdf\")\r\n\r\nggVennDiagram(rosmap_genes[c(\"C0 clust4\", \"C3\")], label_alpha=0)\r\nggsave(\"../results/venn_rosmap_astrocyte_C0_c4&C3.pdf\")\r\nggVennDiagram(rosmap_genes[c(\"B1\", \"B3\")], label_alpha=0)\r\nggsave(\"../results/venn_rosmap_astrocyte_B1&B3.pdf\")\r\n\r\n\r\n\r\nStatistical Testing\r\nStatistical testing of astrocyte-APOE genes across APOE genotypes. Use the average of z-scores for genes of interest for each individual as the outcome, with APOE genotype as a covariate and APOE E3/E3 as the baseline.\r\n\r\n\r\n# C0 test\r\ncov_C0 = cov[C == 0,]\r\nall(cov_C0$individualIdentifier == colnames(z.C0$z))\r\ntest_avez(z.C0$z, C0_c4.genes, cov_C0)\r\n\r\n# C3 test\r\ncov_C3 = cov[C == 3,]\r\nall(cov_C3$individualIdentifier == colnames(z.C3$z))\r\ntest_avez(z.C3$z, C3.genes, cov_C3)\r\n\r\n# B1 test\r\ncov_B1 = cov[B == 1,]\r\nall(cov_B1$individualIdentifier == colnames(z.B1$z))\r\ntest_avez(z.B1$z, B1.genes, cov_B1)\r\n\r\n# B3 test\r\ncov_B3 = cov[B == 3,]\r\nall(cov_B1$individualIdentifier == colnames(z.B1$z))\r\ntest_avez(z.B3$z, B3.genes, cov_B3)\r\n\r\n\r\n\r\nFigure 4A and 4B\r\nFigure 4A: Individual z-score heatmap for ROSMAP C0 subjects for Cluster #1.\r\nFigure 4B: Individual z-score heatmap for ROSMAP C0 subjects for Cluster #4.\r\n\r\n\r\n# prepare data\r\nzC0 = z.C0$z\r\nzC0 = zC0[order(clust.C0$clustA),]\r\nzC0.genes = zC0[rownames(zC0) %in% C0_c4.genes,]\r\n\r\n# color function\r\ncolFun1 = colorRamp2(c(-1, -0.5, 0, 0.5, 1), \r\n c(\"#4575b4\", \"#74add1\", \"white\", \"#f46d43\", \"#d73027\"))\r\ncolFun2 = colorRamp2(c(-4, -2, 0, 2, 4), \r\n c(\"#0000FFFF\", \"#7C50FDFF\", \"#EEEEEEFF\", \"#FF6545FF\", \"#FF0000FF\"))\r\nclusters = c(1,4)\r\n\r\n# Figure 4\r\npanel_fun = function(index, nm) {\r\n if(index %in% which(sort(clust.C0$clustA) %in% clusters)){\r\n pushViewport(viewport())\r\n grid.rect(gp = gpar(fill = \"white\", col = \"white\"))\r\n grid.lines(c(0, 1, 1, 0), c(0, 0, 1, 1), gp = gpar(col = \"#AAAAAA\"), \r\n default.units = \"npc\")\r\n hD = zC0\r\n topAnnot = HeatmapAnnotation(z = anno_barplot(colMeans(hD[index,]), \r\n smooth = TRUE, axis_param = list(gp = gpar(fontsize = 5))),\r\n annotation_height = unit(0.7, \"cm\"))\r\n h = Heatmap(hD[index,], \r\n name = \"z-score\",\r\n col = colFun2,\r\n show_row_names = FALSE,\r\n show_column_names = FALSE,\r\n cluster_rows = TRUE,\r\n show_heatmap_legend = FALSE,\r\n column_split = factor(z.C0$E, levels = c(\"E2\", \"E3\", \"E4\")),\r\n cluster_column_slices = FALSE,\r\n column_title_gp = gpar(col = \"black\", fontsize = 12),\r\n show_column_dend = TRUE,\r\n show_row_dend = TRUE,\r\n border = \"#AAAAAA\",\r\n row_dend_side = \"right\",\r\n height = unit(4.2, \"cm\"),\r\n column_dend_height = unit(0.7, \"cm\"),\r\n row_dend_width = unit(0.7, \"cm\"),\r\n width = unit(18, \"cm\"),\r\n top_annotation = topAnnot\r\n )\r\n draw(h, newpage = FALSE)\r\n popViewport() \r\n }\r\n}\r\n\r\n# box for Figure 4\r\nzoom_idx1 = which(rownames(zC0) %in% C0_c1.genes)\r\nzoom_idx2 = which(rownames(zC0) %in% C0_c4.genes)\r\nlayer_fun = function(j, i, x, y, width, height, fill) {\r\n v = pindex(zC0, i, j)\r\n if(i %in% zoom_idx2) {\r\n grid.rect(gp = gpar(lwd = 2, col = \"black\"))\r\n }\r\n if(i %in% zoom_idx1) {\r\n grid.rect(gp = gpar(lwd = 2, col = \"black\"))\r\n }\r\n}\r\n\r\nanno = function(c, clust){\r\n return(anno_zoom(align_to = which(clust %in% c), \r\n which = \"row\", \r\n panel_fun = panel_fun, \r\n width = unit(21.5, \"cm\"), \r\n gap = unit(5, \"cm\"),\r\n size = unit(7, \"cm\"),\r\n link_width = unit(2, \"cm\"),\r\n link_height = unit(5, \"cm\"),\r\n link_gp = gpar(fill = \"white\", col = \"#AAAAAA\"), internal_line = FALSE)) \r\n}\r\n\r\n\r\n# legend\r\nlgd.h = Legend(col_fun = colFun1, title = \"A: Average z-score\", border = \"black\", \r\n legend_width = unit(9.2, \"cm\"), \r\n direction = \"horizontal\")\r\n\r\nlgd.have = Legend(col_fun = colFun2, title = \"B: z-score\", border = \"black\", \r\n legend_width = unit(9.2, \"cm\"), \r\n direction = \"horizontal\")\r\n\r\npd = packLegend(lgd.h, lgd.have, \r\n column_gap = unit(1, \"cm\"),\r\n direction = \"horizontal\")\r\n\r\n\r\n# Figure 4A\r\npdf(\"../results/Figure4AB.cluster1.pdf\", width = 11)\r\nHeatmap(as.matrix(avez.C0[order(clust.C0$clustA), c(\"E2\", \"E3\", \"E4\")]), \r\n col = colFun1,\r\n cluster_columns = FALSE,\r\n cluster_rows = FALSE,\r\n show_row_names = FALSE,\r\n show_column_names = TRUE,\r\n right_annotation = rowAnnotation(foo = anno(1, sort(clust.C0$clustA))),\r\n row_split = sort(clust.C0$clustA), \r\n column_names_side = \"bottom\",\r\n layer_fun = layer_fun,\r\n show_heatmap_legend = FALSE,\r\n width = unit(5, \"cm\"),\r\n column_names_rot = 0,\r\n column_names_centered = T,\r\n row_title_gp = gpar(fontsize = 11)\r\n)\r\ndraw(pd, x = unit(18, \"cm\"), y = unit(0.95, \"cm\"))\r\ngrid.text(\"A\", x = unit(0.5, \"cm\"), y = unit(17.2, \"cm\"), gp = gpar(fontsize=16))\r\ngrid.text(\"B\", x = unit(8.5, \"cm\"), y = unit(17.2, \"cm\"), gp = gpar(fontsize=16))\r\ndev.off()\r\n\r\n# Figure 4B\r\npdf(\"../results/Figure4AB_cluster4.pdf\", width = 11)\r\nHeatmap(as.matrix(avez.C0[order(clust.C0$clustA), c(\"E2\", \"E3\", \"E4\")]), \r\n col = colFun1,\r\n cluster_columns = FALSE,\r\n cluster_rows = FALSE,\r\n show_row_names = FALSE,\r\n show_column_names = TRUE,\r\n right_annotation = rowAnnotation(foo = anno(4, sort(clust.C0$clustA))),\r\n row_split = sort(clust.C0$clustA), \r\n column_names_side = \"bottom\",\r\n layer_fun = layer_fun,\r\n show_heatmap_legend = FALSE,\r\n width = unit(5, \"cm\"),\r\n column_names_rot = 0,\r\n column_names_centered = T,\r\n row_title_gp = gpar(fontsize = 11)\r\n)\r\ndraw(pd, x = unit(18, \"cm\"), y = unit(0.95, \"cm\"))\r\ngrid.text(\"A\", x = unit(0.5, \"cm\"), y = unit(17.2, \"cm\"), gp = gpar(fontsize=16))\r\ngrid.text(\"B\", x = unit(8.5, \"cm\"), y = unit(17.2, \"cm\"), gp = gpar(fontsize=16))\r\ndev.off()\r\n\r\n\r\n\r\nFigure 4C and 4D\r\nViolin plots where each dot represents a gene.\r\n\r\n\r\nmy_violin = function(genes){\r\n mDataZ.c = z.C0$z[genes, ]\r\n mDataZ.c.e2 = mDataZ.c[,z.C0$E == \"E2\"]\r\n mDataZ.c.e3 = mDataZ.c[,z.C0$E == \"E3\"]\r\n mDataZ.c.e4 = mDataZ.c[,z.C0$E == \"E4\"]\r\n\r\n my.dat = list(`2` = colMeans(mDataZ.c.e2), \r\n `3` = colMeans(mDataZ.c.e3), \r\n `4` = colMeans(mDataZ.c.e4))\r\n my.df = reshape2::melt(my.dat)\r\n my.df$L1 = as.numeric(my.df$L1)\r\n colnames(my.df) = c(\"z-score\", \"APOE\")\r\n my.df$Genes = unlist(lapply(my.dat, function(x) names(x)))\r\n\r\n ggthemr('greyscale')\r\n colnames(my.df) = c(\"z-score\", \"APOE.num\", \"Genes\")\r\n my.df$APOE = as.factor(my.df$APOE.num)\r\n \r\n to_swap = c(\"#62bba5\", # E4\r\n \"#785d37\", # E3\r\n \"#ffb84d\") # E2\r\n \r\n # figure label\r\n APOE_label = c(\"\\u03B52\", \"\\u03B53\", \"\\u03B54\")\r\n names(APOE_label) = c(\"E2\", \"E3\", \"E4\")\r\n \r\n to_swap = c(\"#62bba5\", # E4\r\n \"#785d37\", # E3\r\n \"#ffb84d\") # E2\r\n \r\n# violin plot\r\nfig4c = ggplot(my.df, aes(x = APOE.num, y = `z-score`, color = APOE)) +\r\n geom_violin(fill = \"white\", trim = FALSE) +\r\n geom_quasirandom(alpha = 0.3, width = 0.1, dodge.width = 0.9, varwidth = TRUE) +\r\n scale_color_manual(values = c(\"#62bba5\", \"#785d37\", \"#ffb84d\")) +\r\n labs(y = \"Average Z-Score\", x = \"APOE Genotype\") +\r\n theme(text = element_text(size = 14),\r\n legend.position=\"bottom\", legend.direction = \"horizontal\",\r\n axis.title.x = element_text(face = \"bold\", size = 11),\r\n axis.title.y = element_text(face = \"bold\", size = 11),\r\n axis.text.x = element_blank(),\r\n axis.ticks.x = element_blank(),\r\n legend.title = element_text(face = \"bold\")) +\r\n stat_summary(fun.data=mean_cl_normal, geom=\"errorbar\", width=0.2, position = position_dodge(0.9), color = \"black\")\r\n \r\n return(fig4c)\r\n}\r\n\r\nfig4c.c1 = my_violin(C0_c1.genes)\r\nfig4c.c4 = my_violin(C0_c4.genes)\r\n\r\nfig4c.c1.p = annotate_figure(fig4c.c1, fig.lab = \"C\",\r\n fig.lab.size = 16)\r\nfig4c.c4.p = annotate_figure(fig4c.c4, fig.lab = \"D\", \r\n fig.lab.size = 16)\r\n\r\nggsave(\"../results/Figure4C_0505.pdf\", fig4c.c1.p, width = 5, height = 4, device=cairo_pdf)\r\nggsave(\"../results/Figure4D_0505.pdf\", fig4c.c4.p, width = 5, height = 4, device=cairo_pdf)\r\n\r\n\r\n\r\nFigure 4E and 4F\r\nE4 vs. E3 average Z-score line plot from C0 to C3. Note that the warnings are due to the Greek letters in the plot.\r\n\r\n\r\n# remove subjects which have no APOE or CERAD record\r\nsel = (!is.na(cov$APOE)) & (!is.na(cov$C))\r\nmData.2 = mData[, sel]\r\n\r\n# z-score\r\nmeans = rowMeans(mData.2)\r\nsds = apply(mData.2, 1, sd)\r\nzscore = function(x) return((x - means)/sds)\r\nmDataZ = apply(mData.2, 2, zscore)\r\ncov[, EC := paste(paste0(\"C\", C), E, sep = \":\")]\r\nEC = cov$EC[sel]\r\n\r\n# average z-score\r\naveZ = data.table(Genes = rownames(mDataZ),\r\n C0E2 = rowMeans(mDataZ[, EC == \"C0:E2\"]),\r\n C0E3 = rowMeans(mDataZ[, EC == \"C0:E3\"]),\r\n C0E4 = rowMeans(mDataZ[, EC == \"C0:E4\"]),\r\n \r\n C1E2 = rowMeans(mDataZ[, EC == \"C1:E2\"]),\r\n C1E3 = rowMeans(mDataZ[, EC == \"C1:E3\"]),\r\n C1E4 = rowMeans(mDataZ[, EC == \"C1:E4\"]),\r\n \r\n C2E2 = rowMeans(mDataZ[, EC == \"C2:E2\"]),\r\n C2E3 = rowMeans(mDataZ[, EC == \"C2:E3\"]),\r\n C2E4 = rowMeans(mDataZ[, EC == \"C2:E4\"]),\r\n \r\n C3E2 = rowMeans(mDataZ[, EC == \"C3:E2\"]),\r\n C3E3 = rowMeans(mDataZ[, EC == \"C3:E3\"]),\r\n C3E4 = rowMeans(mDataZ[, EC == \"C3:E4\"]))\r\n \r\naveZ2 = melt(aveZ, id.vars = c(\"Genes\"))\r\ncolnames(aveZ2) = c(\"Genes\", \"EC\", \"aveZ\") \r\naveZ2[, CERAD := gsub(\"E[234]$\", \"\", aveZ2$EC)]\r\naveZ2[, APOE := gsub(\"C[0123]\", \"\", aveZ2$EC)]\r\n\r\n# colors\r\nggthemr(\"fresh\")\r\nto_swap = c(\"#62bba5\", # E4\r\n \"#785d37\", # E3\r\n \"#ffb84d\") # E2\r\n\r\n# figure label\r\nAPOE_label = c(\"\\u03B52\", \"\\u03B53\", \"\\u03B54\")\r\nnames(APOE_label) = c(\"E2\", \"E3\", \"E4\")\r\n\r\n# warnings because greek letters\r\nfig4.f = ggplot(aveZ2[Genes %in% C0_c4.genes], \r\n aes(x = CERAD, y = aveZ, color = APOE)) + \r\n geom_point() +\r\n geom_line(aes(group = factor(Genes)),\r\n color = \"black\",\r\n alpha = 0.1) +\r\n ylab(\"Average z-score\") +\r\n xlab(\"CERAD NP score\") +\r\n facet_grid(cols = vars(APOE), labeller = as_labeller(APOE_label)) +\r\n scale_color_manual(values = rev(to_swap), labels = APOE_label)\r\nfig4.f = annotate_figure(fig4.f, fig.lab = \"F\", fig.lab.size = 16)\r\nggsave(\"../results/Figure4F-astro-C4.pdf\", fig4.f, width = 10, height = 5, device=cairo_pdf)\r\n\r\n# warnings because greek letters\r\nfig4.e = ggplot(aveZ2[Genes %in% C0_c1.genes], \r\n aes(x = CERAD, y = aveZ, color = APOE)) + \r\n geom_point() +\r\n geom_line(aes(group = factor(Genes)),\r\n color = \"black\",\r\n alpha = 0.1) +\r\n ylab(\"Average z-score\") +\r\n xlab(\"CERAD NP score\") +\r\n facet_grid(cols = vars(APOE), labeller = as_labeller(APOE_label)) +\r\n scale_color_manual(values = rev(to_swap), labels = APOE_label)\r\nfig4.e = annotate_figure(fig4.e, fig.lab = \"E\", fig.lab.size = 16)\r\nggsave(\"../results/Figure4E-astro-C1.pdf\", fig4.e, width = 10, height = 5, device=cairo_pdf)\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:46:47-07:00"
+ "last_modified": "2021-05-19T22:39:48-07:00"
},
{
"path": "ROSMAP-microglia.html",
@@ -69,7 +69,7 @@
"description": "This script performs analysis of ROSMAP microglia data.\n",
"author": [],
"contents": "\r\n\r\nContents\r\nDependencies\r\nSelect Microglia Genes\r\nPre-Processing\r\nRun Models\r\nModel 1\r\nModel 1.5\r\nModel 2\r\nModel 2.5\r\nModel 3\r\nModel 4\r\nModel 5\r\n\r\nZ-Score Analysis\r\nSpectral Clustering\r\nC0\r\nC3\r\nB1\r\nB3\r\nOverlap\r\n\r\nStatistical Testing\r\nFigure 2A and 2B\r\nFigure 2C\r\nFigure 3B\r\nFigure 3C\r\n\r\nDependencies\r\nLoad requisite packages. Note that package cttobin/ggthemr hosted on GitHub is used to provide ggplot2 themes. This package can be downloaded via devtools::install_github(\"cttobin/ggthemr\").\r\n\r\n\r\nrm(list = ls())\r\nrequire(limma)\r\nrequire(latex2exp)\r\nrequire(ComplexHeatmap)\r\nrequire(data.table)\r\nrequire(pheatmap)\r\nrequire(ggplot2)\r\nrequire(gridExtra)\r\nrequire(ggthemr)\r\nrequire(SNFtool)\r\nrequire(circlize)\r\nrequire(patchwork)\r\nrequire(cowplot)\r\nrequire(ggpubr)\r\nrequire(ggprism)\r\nrequire(ggbeeswarm)\r\n\r\nds = \"ROSMAP\"\r\ndtype = \"log2FPKM\"\r\nload(\"../Data/ROSMAP-24-adj-low.expr.genes.removed.Rdata\")\r\n\r\nsource(\"models.R\")\r\nsource(\"spectral-clustering.R\")\r\n\r\n# load expression data \r\nmData = expSet$mData\r\n\r\n\r\n\r\nSelect Microglia Genes\r\nFirst, select only microglia genes. Of the microglial genes, select only those that exist in ROSMAP.\r\n\r\n\r\ncelltype = \"microglia\"\r\n\r\n# select only microglia genes\r\ngenes = readLines(\"../Data/Microglia Genes.txt\")\r\n\r\n# select microglia genes that exist in ROSMAP\r\nannot = fread(\"../Data/ENSEMBL GRCh38.p7.csv\")\r\nannot = unique(annot[GeneSymbol %in% genes,])\r\nensembl_ids = intersect(rownames(mData), annot$EnsemblID)\r\nannot = unique(annot[EnsemblID %in% ensembl_ids,])\r\n\r\n# remove duplicated genes\r\nannot = annot[!duplicated(GeneSymbol), ]\r\nensembl_ids = annot$EnsemblID\r\n\r\n# check results\r\nany(duplicated(ensembl_ids)) # no duplicated ids\r\nany(is.na(ensembl_ids)) # no NA\r\nlength(ensembl_ids)\r\n\r\n\r\n\r\nPre-Processing\r\n\r\n\r\n# select interesting genes\r\nmData = mData[annot$EnsemblID, ]\r\nall(rownames(mData) == annot$EnsemblID)\r\nrownames(mData) = annot$GeneSymbol\r\n\r\n# set metadata\r\ncov = expSet$cov\r\n\r\n\r\n\r\nRun Models\r\nModel 1: exp ~ APOE + CERAD | exp ~ APOE + Braak\r\nModel 1.5: exp ~ APOE + CERAD e2, e4 dosage model\r\nModel 2 baseline: C0E4-C0E3, C0E2-C0E3 | B1E4-B1E3, B1E2-B1E3\r\nModel 2.5: exp ~ APOE + CERAD e2, e4 dosage model for C0 subjects only\r\nModel 3: C3E4-C3E3, C3E2-C3E3 | B3E4-B3E3, B3E2-B3E3\r\nModel 4: C23E4-C23E3, C23E2-C23E3\r\nModel 5: C1E4-C1E3, C1E2-C1E3\r\nModel 1\r\nRun Model 1, which is exp ~ APOE + CERAD | exp ~ APOE + Braak. Then, generate Table S2, which is comprised of differentially expressed microglia genes across APOE groups for ROSMAP DLPFC and each of MSBB brain regions.\r\n\r\n\r\n# run Model 1\r\ntT_CERAD = run_model1(mData, cov$C, cov$E, annot)\r\ntT_Braak = run_model1(mData, cov$B,cov$E, annot)\r\n\r\nlength(tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\n\r\nlength(tT_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol) \r\n\r\nlength(tT_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol) \r\n\r\nlength(tT_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol) \r\n\r\n# Table S2\r\nTableS2 = copy(annot[order(GeneSymbol), ])\r\nTableS2[tT_CERAD, on=\"EnsemblID\", \r\n c(\"ROSMAP_E4vsE3_logFC\", \"ROSMAP_E4vsE3_CI.L\", \"ROSMAP_E4vsE3_CI.R\", \r\n \"ROSMAP_E4vsE3_P.Value\", \"ROSMAP_E4vsE3_adj.P.Val\", \r\n \"ROSMAP_E2vsE3_logFC\", \"ROSMAP_E2vsE3_CI.L\", \"ROSMAP_E2vsE3_CI.R\", \r\n \"ROSMAP_E2vsE3_P.Value\", \"ROSMAP_E2vsE3_adj.P.Val\") := .(\r\n i.E4vsE3_logFC, i.E4vsE3_CI.L, i.E4vsE3_CI.R,\r\n i.E4vsE3_P.Value, i.E4vsE3_adj.P.Val, \r\n i.E2vsE3_logFC, i.E2vsE3_CI.L, i.E2vsE3_CI.R, \r\n i.E2vsE3_P.Value, i.E2vsE3_adj.P.Val)]\r\nfwrite(TableS2, \"../results/TableS2.csv\")\r\n\r\n\r\n\r\nModel 1.5\r\nRun Model 1.5, which is exp ~ APOE + CERAD e2, e4 dosage model. Then, generate Table S3, which is comprised of differentially expressed microglia genes across APOE groups for ROSMAP DLPFC and each of MSBB brain regions using a dosage model.\r\n\r\n\r\n# run Model 1.5 with E4 and E2 copies as numeric\r\ntT_CERAD_ENum = run_model1.5(mData, cov$C, cov$E4num, cov$E2num, annot)\r\n\r\ne4_num_genes = intersect(tT_CERAD_ENum[e4_P.Value < 0.05 & e4_logFC > 0, GeneSymbol], tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,GeneSymbol])\r\nlength(e4_num_genes)\r\n\r\ne2_num_genes = intersect(tT_CERAD_ENum[e2_P.Value < 0.05 & e2_logFC < 0, GeneSymbol], tT_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0, GeneSymbol])\r\nlength(e2_num_genes)\r\n\r\nintersect(tT_CERAD_ENum[e4_P.Value < 0.05 & e4_logFC > 0, GeneSymbol],\r\n tT_CERAD_ENum[e2_P.Value < 0.05 & e2_logFC < 0, GeneSymbol])\r\n\r\n# Table S3\r\nTableS3 = annot[order(GeneSymbol), ]\r\nTableS3 = TableS3[tT_CERAD_ENum, on = .(EnsemblID, GeneSymbol, Description),\r\n c(\"ROSMAP_e4_logFC\", \"ROSMAP_e4_CI.L\", \"ROSMAP_e4_CI.R\", \r\n \"ROSMAP_e4_P.Value\", \"ROSMAP_e4_adj.P.Val\") := .(\r\n i.e4_logFC, i.e4_CI.L, i.e4_CI.R, i.e4_P.Value, i.e4_adj.P.Val)]\r\nTableS3 = TableS3[tT_CERAD_ENum, on = .(EnsemblID, GeneSymbol, Description), \r\n c(\"ROSMAP_e2_logFC\", \"ROSMAP_e2_CI.L\", \"ROSMAP_e2_CI.R\",\r\n \"ROSMAP_e2_P.Value\", \"ROSMAP_e2_adj.P.Val\") := .(\r\n i.e2_logFC, i.e2_CI.L, i.e2_CI.R, i.e2_P.Value, i.e2_adj.P.Val)]\r\nfwrite(TableS3, \"../results/TableS3.csv\")\r\n\r\n\r\n\r\nModel 2\r\nRun Model 2 baseline, which is C0E4-C0E3, C0E2-C0E3 | B1E4-B1E3, B1E2-B1E3.\r\n\r\n\r\n# run Model 2\r\ntT2_CERAD = run_model2(\"CERAD\", mData, cov$C, cov$E, annot) \r\ntT2_Braak = run_model2(\"Braak\", mData, cov$B, cov$E, annot)\r\n\r\nlength(tT2_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT2_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol) \r\n\r\nlength(tT2_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT2_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol)\r\n\r\nlength(tT2_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol)\r\nlength(tT2_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\n\r\nlength(tT2_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol) \r\nlength(tT2_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol)\r\n\r\n\r\n\r\nModel 2.5\r\nRun Model 2.5, which is exp ~ APOE + CERAD e2, e4 dosage model for C0 subjects only.\r\n\r\n\r\n# run Model 2.5 with with E4 and E2 copies as numeric, C0 subjects only\r\n\r\n# select only the C0 subjects\r\ncSubject = 0\r\nmData_C0 = mData[, which(cov$C == cSubject)]\r\n\r\ntT_CERAD_ENum_C0 = run_model2.5(mData_C0, cov[C == cSubject, E4num], \r\n cov[C == cSubject, E2num], annot)\r\n\r\nnrow(tT_CERAD_ENum_C0[e4_P.Value < 0.05, ]) \r\nnrow(tT_CERAD_ENum_C0[e2_P.Value < 0.05, ])\r\n\r\ne4Num_genes_C0 = intersect(\r\n tT_CERAD_ENum_C0[e4_P.Value < 0.05 & e4_logFC > 0, GeneSymbol], \r\n tT2_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0, GeneSymbol])\r\ne2Num_genes_C0 = intersect(\r\n tT_CERAD_ENum_C0[e2_P.Value < 0.05 & e2_logFC < 0,GeneSymbol], \r\n tT2_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0, GeneSymbol])\r\n\r\nintersect(tT_CERAD_ENum_C0[e4_P.Value < 0.05 & e4_logFC > 0, GeneSymbol],\r\n tT_CERAD_ENum_C0[e2_P.Value < 0.05 & e2_logFC > 0, GeneSymbol])\r\n\r\n\r\n\r\nModel 3\r\nRun Model 3, which is C3E4-C3E3, C3E2-C3E3 | B3E4-B3E3, B3E2-B3E3.\r\n\r\n\r\n# run Model 3\r\ntT3_CERAD = run_model3(\"CERAD\", mData, cov$C, cov$E, annot) \r\ntT3_Braak = run_model3(\"Braak\", mData, cov$B, cov$E, annot)\r\n\r\nlength(tT3_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol)\r\nlength(tT3_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\n\r\nlength(tT3_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol)\r\nlength(tT3_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol)\r\n\r\nlength(tT3_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol)\r\nlength(tT3_Braak[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\n\r\nlength(tT3_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol)\r\nlength(tT3_Braak[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol)\r\n\r\n\r\n\r\nModel 4\r\nRun Model 4, which is C23E4-C23E3, C23E2-C23E3.\r\n\r\n\r\n# run Model 4\r\ntT4_CERAD = run_model4(mData, cov$C, cov$APOE, annot) \r\n\r\nnrow(tT4_CERAD[E4vsE3_P.Value < 0.05,])\r\n\r\nlength(tT4_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC > 0,]$GeneSymbol)\r\nlength(tT4_CERAD[E4vsE3_P.Value < 0.05 & E4vsE3_logFC < 0,]$GeneSymbol)\r\n\r\nlength(tT4_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC > 0,]$GeneSymbol)\r\nlength(tT4_CERAD[E2vsE3_P.Value < 0.05 & E2vsE3_logFC < 0,]$GeneSymbol)\r\n\r\n\r\n\r\nModel 5\r\nRun Model 5, which is C1E4-C1E3, C1E2-C1E3.\r\n\r\n\r\n# run Model 5\r\ntT5_CERAD = run_model5(mData, cov$C, cov$APOE, annot) \r\n\r\nnrow(tT5_CERAD[C1E4vsC1E3_P.Value < 0.05,])\r\nnrow(tT5_CERAD[C2E4vsC2E3_P.Value < 0.05,])\r\n\r\n\r\n\r\nZ-Score Analysis\r\nCompute z-scores.\r\n\r\n\r\nz.C0 = to_z_score(\"C\", 0, mData, cov)\r\nz.C3 = to_z_score(\"C\", 3, mData, cov)\r\nz.B1 = to_z_score(\"B\", 1, mData, cov)\r\nz.B3 = to_z_score(\"B\", 3, mData, cov)\r\n\r\n\r\n\r\nCalculate average z-scores.\r\n\r\n\r\navez.C0 = to_ave_z(z.C0)\r\navez.C3 = to_ave_z(z.C3)\r\navez.B1 = to_ave_z(z.B1)\r\navez.B3 = to_ave_z(z.B3)\r\n\r\n\r\n\r\nSpectral Clustering\r\nC0\r\nNumber of Clusters: 3\r\nIndex of Cluster of Interest: 3\r\nAfter running spectral clustering, generate Table S1, which is a detailed list of microglia-APOE cluster genes in ROSMAP DLPFC and MSBB STG CERAD 0 subjects.\r\n\r\n\r\n# spectral clustering\r\nc = 3; k = 3\r\nclust.C0 = spectral_clustering(avez.C0, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.C0$clustA))\r\nrownames(annot_row) = rownames(clust.C0$zMtx)\r\npheatmap(clust.C0$zMtx[order(clust.C0$clustA),],\r\n annotation_row = annot_row[order(clust.C0$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nC0.genes = sort(rownames(clust.C0$zMtx)[clust.C0$clustA == c])\r\nlength(C0.genes)\r\n\r\n# Table S1\r\nTableS1 = as.data.frame(cbind(clust.C0$clustA, clust.C0$zMtx))\r\nsetDT(TableS1)\r\nTableS1[, GeneSymbol := rownames(clust.C0$zMtx)]\r\nTableS1 = annot[GeneSymbol %in% TableS1$GeneSymbol][TableS1, on = .(GeneSymbol)]\r\n\r\ncolnames(TableS1) = c(\"EnsemblID\", \"GeneSymbol\", \"Description\", \"Cluster\", \"ROSMAP_E2\", \"ROSMAP_E3\", \"ROSMAP_E4\")\r\nTableS1 = TableS1[Cluster == c,]\r\nTableS1 = TableS1[,Cluster := NULL]\r\nTableS1 = TableS1[!duplicated(GeneSymbol), ]\r\nTableS1 = TableS1[order(GeneSymbol), ]\r\nfwrite(TableS1, \"../results/TableS1.csv\", row.names = F)\r\n\r\n\r\n\r\nC3\r\nNumber of Clusters: 3\r\nIndex of Cluster of Interest: 3\r\n\r\n\r\n# spectral clustering\r\nc = 3; k = 3\r\nclust.C3 = spectral_clustering(avez.C3, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.C3$clustA))\r\nrownames(annot_row) = rownames(clust.C3$zMtx)\r\npheatmap(clust.C3$zMtx[order(clust.C3$clustA),],\r\n annotation_row = annot_row[order(clust.C3$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nC3.genes = sort(rownames(clust.C3$zMtx)[clust.C3$clustA == c])\r\nlength(C3.genes)\r\n\r\n\r\n\r\nB1\r\nNumber of Clusters: 3\r\nIndex of Cluster of Interest: 3\r\n\r\n\r\n# spectral clustering\r\nk = 3; c = 3\r\nclust.B1 = spectral_clustering(avez.B1, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.B1$clustA))\r\nrownames(annot_row) = rownames(clust.B1$zMtx)\r\npheatmap(clust.B1$zMtx[order(clust.B1$clustA),],\r\n annotation_row = annot_row[order(clust.B1$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\nB1.genes = sort(rownames(clust.B1$zMtx)[clust.B1$clustA == c])\r\nsum(clust.B1$clustA == c)\r\nlength(B1.genes)\r\n\r\n\r\n\r\nB3\r\nNumber of Clusters: 3\r\nIndex of Cluster of Interest: 2\r\n\r\n\r\n# spectral clustering\r\nk = 3; c = 2\r\n# k = 4; c = 3\r\nclust.B3 = spectral_clustering(avez.B3, k)\r\n\r\n# visualization\r\nannot_row = data.frame(cluster = as.character(clust.B3$clustA))\r\nrownames(annot_row) = rownames(clust.B3$zMtx)\r\npheatmap(clust.B3$zMtx[order(clust.B3$clustA),],\r\n annotation_row = annot_row[order(clust.B3$clustA), , drop = F],\r\n cluster_rows = F, cluster_cols = F,\r\n show_rownames = F)\r\n\r\n# interested genes\r\n# interested in cluster 2\r\nB3.genes = sort(rownames(clust.B3$zMtx)[clust.B3$clustA == c])\r\nlength(B3.genes)\r\n\r\n\r\n\r\nOverlap\r\n\r\n\r\n# hypergeometric tests\r\nphyper_test(C0.genes, B1.genes)\r\nphyper_test(C3.genes, B3.genes)\r\nphyper_test(C0.genes, C3.genes)\r\nphyper_test(B1.genes, B3.genes)\r\n\r\n\r\n\r\nStatistical Testing\r\nStatistical testing of ROSMAP microglia-APOE genes across APOE genotypes. Use the average of z-scores for genes of interest for each individual as the outcome, with APOE genotype as a covariate and APOE E3/E3 as the baseline.\r\n\r\n\r\n# C0 test\r\ncov_C0 = cov[C == 0,]\r\nall(cov_C0$projid == colnames(z.C0$z))\r\ntest_avez(z.C0$z, C0.genes, cov_C0)\r\n\r\n# C3 test\r\ncov_C3 = cov[C == 3,]\r\nall(cov_C3$projid == colnames(z.C3$z))\r\ntest_avez(z.C3$z, C3.genes, cov_C3)\r\n\r\n# B1 test\r\ncov_B1 = cov[B == 1,]\r\nall(cov_B1$projid == colnames(z.B1$z))\r\ntest_avez(z.B1$z, B1.genes, cov_B1)\r\n\r\n# B3 test\r\ncov_B3 = cov[B == 3,]\r\nall(cov_B1$projid == colnames(z.B1$z))\r\ntest_avez(z.B3$z, B3.genes, cov_B3)\r\n\r\n\r\n\r\nFigure 2A and 2B\r\nFigure 2A: the heatmap for spectral clustering.\r\nFigure 2B: the individual z-score heatmap (C0 subjects).\r\nColumn cluster (subjects) based on E2, E3, E4 group separately. Row cluster (genes) based on dendrogram.\r\n\r\n\r\n# prepare data\r\nzC0 = z.C0$z\r\nzC0 = zC0[order(clust.C0$clustA),]\r\nzC0.genes = zC0[rownames(zC0) %in% C0.genes,]\r\n\r\n# color function\r\ncolFun1 = colorRamp2(c(-1, -0.5, 0, 0.5, 1), \r\n c(\"#4575b4\", \"#74add1\", \"white\", \"#f46d43\", \"#d73027\"))\r\ncolFun2 = colorRamp2(c(-4, -2, 0, 2, 4), \r\n c(\"#0000FFFF\", \"#7C50FDFF\", \"#EEEEEEFF\", \"#FF6545FF\", \"#FF0000FF\"))\r\n\r\n# Figure 2B\r\n# individual z-score heatmap for ROSMAP C0 subjects\r\ntopAnnot = HeatmapAnnotation(z = anno_barplot(\r\n colMeans(zC0.genes), smooth = TRUE, \r\n axis_param = list(gp = gpar(fontsize = 5))), \r\n annotation_height = unit(0.7, \"cm\"))\r\n\r\nh = Heatmap(zC0.genes, \r\n name = \"z-score\",\r\n col = colFun2,\r\n show_row_names = FALSE,\r\n show_column_names = FALSE,\r\n cluster_rows = TRUE,\r\n show_heatmap_legend = FALSE,\r\n column_split = factor(z.C0$E, levels = c(\"E2\", \"E3\", \"E4\")),\r\n cluster_column_slices = FALSE,\r\n column_title_gp = gpar(col = \"black\", fontsize = 12),\r\n show_column_dend = TRUE,\r\n show_row_dend = TRUE,\r\n border = \"#AAAAAA\",\r\n row_dend_side = \"right\",\r\n height = unit(12.7, \"cm\"),\r\n column_dend_height = unit(0.7, \"cm\"),\r\n row_dend_width = unit(0.7, \"cm\"),\r\n width = unit(17, \"cm\"),\r\n top_annotation = topAnnot\r\n)\r\n\r\npanel_fun = function(index, nm) {\r\n pushViewport(viewport())\r\n grid.rect()\r\n grid.rect(gp = gpar(fill = \"white\", col = \"white\"))\r\n grid.lines(c(0, 1, 1, 0), c(0, 0, 1, 1), gp = gpar(col = \"#AAAAAA\"), \r\n default.units = \"npc\")\r\n draw(h, newpage = FALSE)\r\n popViewport()\r\n}\r\n\r\n\r\n# box for Figure 2B\r\nzoom_idx = which(rownames(zC0) %in% C0.genes)\r\nlayer_fun = function(j, i, x, y, width, height, fill) {\r\n v = pindex(zC0, i, j)\r\n if(i %in% zoom_idx) {\r\n grid.rect(gp = gpar(lwd = 2, col = \"black\"))\r\n }\r\n}\r\n\r\nanno = anno_zoom(align_to = zoom_idx, which = \"row\", \r\n panel_fun = panel_fun, \r\n width = unit(21, \"cm\"), \r\n gap = unit(5, \"cm\"),\r\n size = unit(15.5, \"cm\"),\r\n link_width = unit(2, \"cm\"),\r\n link_height = unit(5, \"cm\"),\r\n link_gp = gpar(fill = \"white\", col = \"#AAAAAA\"),\r\n internal_line = FALSE)\r\n\r\n\r\n# legend\r\nlgd.h = Legend(col_fun = colFun1, title = \"A: Average z-score\", border = \"black\", \r\n legend_width = unit(8.9, \"cm\"), \r\n direction = \"horizontal\")\r\n\r\nlgd.have = Legend(col_fun = colFun2, title = \"B: z-score\", border = \"black\", \r\n legend_width = unit(8.9, \"cm\"), \r\n direction = \"horizontal\")\r\n\r\npd = packLegend(lgd.h, lgd.have, \r\n column_gap = unit(0.5, \"cm\"),\r\n direction = \"horizontal\")\r\n\r\n\r\n# Figure 2A\r\n# heatmap for average z-scores\r\npdf(\"../results/Figure2AB.pdf\", width = 11)\r\nHeatmap(as.matrix(avez.C0[order(clust.C0$clustA), c(\"E2\", \"E3\", \"E4\")]), \r\n col = colFun1,\r\n cluster_columns = FALSE,\r\n cluster_rows = FALSE,\r\n show_row_names = FALSE,\r\n show_column_names = TRUE,\r\n right_annotation = rowAnnotation(foo = anno),\r\n row_split = sort(clust.C0$clustA), \r\n column_names_side = \"bottom\",\r\n layer_fun = layer_fun,\r\n show_heatmap_legend = FALSE,\r\n width = unit(5, \"cm\"),\r\n column_names_rot = 0,\r\n column_names_centered = T,\r\n row_title_gp = gpar(fontsize = 11)\r\n)\r\ndraw(pd, x = unit(18.3, \"cm\"), y = unit(17, \"cm\"))\r\ngrid.text(\"A\", x = unit(0.5, \"cm\"), y = unit(17.2, \"cm\"), gp = gpar(fontsize=16))\r\ngrid.text(\"B\", x = unit(8.5, \"cm\"), y = unit(17.2, \"cm\"), gp = gpar(fontsize=16))\r\ndev.off()\r\n\r\n\r\n\r\nFigure 2C\r\nViolin plots where each dot represents a person.\r\n\r\n\r\n# zC0: z-score of C0 subjects only\r\nzC0 = z.C0$z[C0.genes, ]\r\nzC0.e2 = zC0[,z.C0$E == \"E2\"]\r\nzC0.e3 = zC0[,z.C0$E == \"E3\"]\r\nzC0.e4 = zC0[,z.C0$E == \"E4\"]\r\n\r\nmy.dat = list(`2` = colMeans(zC0.e2), `3` = colMeans(zC0.e3), `4` = colMeans(zC0.e4))\r\nmy.df = reshape2::melt(my.dat)\r\nmy.df$L1 = as.numeric(my.df$L1)\r\ncolnames(my.df) = c(\"z-score\", \"APOE.num\")\r\nggthemr('greyscale')\r\nmy.df$APOE = as.factor(my.df$APOE.num)\r\n \r\nto_swap = c(\"#62bba5\", # E4\r\n \"#785d37\", # E3\r\n \"#ffb84d\") # E2\r\n \r\n# violin plot\r\nggplot(my.df, aes(x = APOE.num, y = `z-score`, color = APOE)) +\r\n geom_violin(fill = \"white\", trim = FALSE) +\r\n geom_quasirandom(alpha = 0.3, width = 0.1, dodge.width = 0.9, varwidth = TRUE) +\r\n scale_color_manual(values = c(\"#62bba5\", \"#785d37\", \"#ffb84d\")) +\r\n labs(y = \"Average Z-Score\", x = \"APOE Genotype\") +\r\n theme(text = element_text(size = 14),\r\n legend.position=\"bottom\", legend.direction = \"horizontal\",\r\n axis.title.x = element_text(face = \"bold\", size = 11),\r\n axis.title.y = element_text(face = \"bold\", size = 11),\r\n axis.text.x = element_blank(),\r\n axis.ticks.x = element_blank(),\r\n legend.title = element_text(face = \"bold\")) +\r\n stat_summary(fun.data=mean_cl_normal, geom=\"errorbar\", width=0.2, position = position_dodge(0.9), color = \"black\")\r\nggsave(\"../results/Figure2C-ROSMAP.pdf\", width = 5, height = 4, dpi = 300)\r\n\r\n\r\n\r\nFigure 3B\r\nE4 vs. E3 average Z-score line plot from C0 to C3.\r\n\r\n\r\nall(colnames(mData) == cov$projid)\r\n\r\n# remove subjects which have no APOE or CERAD record\r\nsel = (!is.na(cov$APOE)) & (!is.na(cov$C))\r\nmData.2 = mData[, sel]\r\n\r\n# z-score\r\nmeans = rowMeans(mData.2)\r\nsds = apply(mData.2, 1, sd)\r\nzscore = function(x) return((x - means)/sds)\r\nmDataZ = apply(mData.2, 2, zscore)\r\ncov[, EC := paste(paste0(\"C\", C), E, sep = \":\")]\r\nEC = cov$EC[sel]\r\n\r\n# average z-score\r\naveZ = data.table(Genes = rownames(mDataZ),\r\n C0E2 = rowMeans(mDataZ[, EC == \"C0:E2\"]),\r\n C0E3 = rowMeans(mDataZ[, EC == \"C0:E3\"]),\r\n C0E4 = rowMeans(mDataZ[, EC == \"C0:E4\"]),\r\n \r\n C1E2 = rowMeans(mDataZ[, EC == \"C1:E2\"]),\r\n C1E3 = rowMeans(mDataZ[, EC == \"C1:E3\"]),\r\n C1E4 = rowMeans(mDataZ[, EC == \"C1:E4\"]),\r\n \r\n C2E2 = rowMeans(mDataZ[, EC == \"C2:E2\"]),\r\n C2E3 = rowMeans(mDataZ[, EC == \"C2:E3\"]),\r\n C2E4 = rowMeans(mDataZ[, EC == \"C2:E4\"]),\r\n \r\n C3E2 = rowMeans(mDataZ[, EC == \"C3:E2\"]),\r\n C3E3 = rowMeans(mDataZ[, EC == \"C3:E3\"]),\r\n C3E4 = rowMeans(mDataZ[, EC == \"C3:E4\"]))\r\n\r\naveZ2 = melt(aveZ, id.vars = c(\"Genes\"))\r\ncolnames(aveZ2) = c(\"Genes\", \"EC\", \"aveZ\") \r\naveZ2[, CERAD := gsub(\"E[234]$\", \"\", aveZ2$EC)]\r\naveZ2[, APOE := gsub(\"C[0123]\", \"\", aveZ2$EC)]\r\n\r\n# colors\r\nggthemr(\"fresh\")\r\nto_swap = c(\"#62bba5\", # E4\r\n \"#785d37\", # E3\r\n \"#ffb84d\") # E2\r\n\r\n# figure label\r\nAPOE_label = c(\"\\u03B52\", \"\\u03B53\", \"\\u03B54\")\r\nnames(APOE_label) = c(\"E2\", \"E3\", \"E4\")\r\n\r\n# Figure 3B\r\nggplot(aveZ2[Genes %in% C13.genes], aes(x = CERAD, y = aveZ, color = APOE)) + \r\n geom_point() +\r\n geom_line(aes(group = factor(Genes)),\r\n color = \"black\",\r\n alpha = 0.1) +\r\n ylab(\"Average z-score\") +\r\n xlab(\"CERAD NP score\") +\r\n facet_grid(cols = vars(APOE), labeller = as_labeller(APOE_label)) +\r\n scale_color_manual(values = rev(to_swap), labels = APOE_label)\r\nggsave(\"../results/Figure3B_intersect_C0&C3.genes.pdf\", width = 10, height = 5, device=cairo_pdf)\r\n\r\n\r\n\r\nFigure 3C\r\nCorrelation matrix comparing the APOE4 versus APOE3 change.\r\n\r\n\r\n# prepare data for downstream analysis\r\nfig3_tT2_CERAD = copy(tT2_CERAD)\r\ncolnames(fig3_tT2_CERAD) = gsub(\"E4vsE3\", \"C0E4vsC0E3\",colnames(fig3_tT2_CERAD))\r\ncolnames(fig3_tT2_CERAD) = gsub(\"E2vsE3\", \"C0E2vsC0E3\",colnames(fig3_tT2_CERAD))\r\nfig3_tT3_CERAD = copy(tT3_CERAD)\r\ncolnames(fig3_tT3_CERAD) = gsub(\"E4vsE3\", \"C3E4vsC3E3\",colnames(fig3_tT3_CERAD))\r\ncolnames(fig3_tT3_CERAD) = gsub(\"E2vsE3\", \"C3E2vsC3E3\",colnames(fig3_tT3_CERAD))\r\n\r\nfig3 = fig3_tT2_CERAD[tT5_CERAD, on=c(\"GeneSymbol\", \"EnsemblID\", \"Description\")]\r\nfig3 = fig3[fig3_tT3_CERAD, on=c(\"GeneSymbol\", \"EnsemblID\", \"Description\")]\r\n\r\nfig3.1 = fig3[GeneSymbol %in% C13.genes, \r\n .(C0E4vsC0E3_logFC, C1E4vsC1E3_logFC, C2E4vsC2E3_logFC, C3E4vsC3E3_logFC)]\r\nfig3.2 = tidyr::gather(fig3.1, \"CERAD\", \"logFC\", 1:4) \r\n\r\ncolnames(fig3.1) = c(\"C0\", \"C1\", \"C2\", \"C3\")\r\nggthemr(\"fresh\")\r\nfig3.c = tidyr::gather(fig3.1, \"CERAD\", \"logFC\")\r\nsetDT(fig3.c)\r\n\r\n# axis labels\r\naxisLow = list(\"C0\" = c(\"C1\", \"C2\", \"C3\"),\r\n \"C1\" = c(\"C2\", \"C3\"),\r\n \"C2\" = c(\"C3\"))\r\n\r\naxisUp = list(\"C1\" = c(\"C0\"),\r\n \"C2\" = c(\"C0\", \"C1\"),\r\n \"C3\" = c(\"C0\", \"C1\", \"C2\"))\r\n\r\n# color\r\ncolor = list(\"C0\" = \"#f1c40f\",\r\n \"C1\" = \"#3498db\",\r\n \"C2\" = \"#2ecc71\",\r\n \"C3\" = \"#e74c3c\")\r\n\r\n# Figure 3C\r\np = list()\r\nfor(pCol in names(axisLow)){\r\n x = pCol\r\n for (y in axisLow[[pCol]]){\r\n label = paste0(x, y)\r\n p[[\"low\"]][[label]] = ggplot(data = fig3.1, mapping = aes_string(x, y)) +\r\n geom_point(size = 0.5) +\r\n geom_abline(intercept = 0, slope = 1, color = \"black\", linetype = 2, alpha = 1.4) +\r\n xlim(-0.4, 1.2) + ylim(-0.4, 1.2) +\r\n theme(axis.line=element_line(color = \"#786049\"),\r\n panel.border = element_rect(color = \"#786049\", fill = NA, size=1)) +\r\n ylab(y) + xlab(x) \r\n }\r\n}\r\n\r\nfor(pCol in names(axisUp)){\r\n x = pCol\r\n for (y in axisUp[[pCol]]){\r\n label = paste0(x, y)\r\n p[[\"up\"]][[label]] = ggplot(fig3.c[CERAD %in% c(x, y), ], \r\n aes(x = logFC, group = CERAD, fill = CERAD, color = CERAD)) +\r\n geom_density(alpha = 0.5) +\r\n ylab(y) +\r\n xlab(x) +\r\n xlim(-0.4, 1.2) +\r\n ylim(0, 4.5) +\r\n theme(panel.border = element_rect(color = \"#786049\", fill = NA, size=1)) + \r\n scale_fill_manual(breaks = c(\"C0\", \"C1\", \"C2\", \"C3\"), \r\n values = c(\"#f1c40f\", \"#3498db\", \"#2ecc71\", \"#e74c3c\"))+ \r\n scale_color_manual(breaks = c(\"C0\", \"C1\", \"C2\", \"C3\"), \r\n values = c(\"#f1c40f\", \"#3498db\", \"#2ecc71\", \"#e74c3c\")) + \r\n theme(legend.position = \"none\") \r\n }\r\n}\r\n\r\nfor(pCol in c(\"C0\", \"C1\", \"C2\", \"C3\")){\r\n x = pCol\r\n p[[x]] = ggplot() + \r\n annotate(\"text\", x = 4, y = 25, size=8, label = x, color = \"#555555\") + \r\n theme(panel.border = element_rect(color = \"#786049\", fill = NA, size=1),\r\n panel.grid.major = element_blank(),\r\n axis.ticks = element_line(color = \"white\")\r\n ) + \r\n scale_x_discrete(labels= \" \") +\r\n scale_y_discrete(labels= \" \") +\r\n xlab(x) +\r\n ylab(x) \r\n}\r\n\r\n# legend \r\npAllD = ggplot(fig3.c, \r\n aes(x = logFC, group = CERAD, fill = CERAD, color = CERAD)) +\r\n geom_density(alpha = 0.5) +\r\n scale_fill_manual(breaks = c(\"C0\", \"C1\", \"C2\", \"C3\"), \r\n values = c(\"#f1c40f\", \"#3498db\", \"#2ecc71\", \"#e74c3c\"))+ \r\n scale_color_manual(breaks = c(\"C0\", \"C1\", \"C2\", \"C3\"), \r\n values = c(\"#f1c40f\", \"#3498db\", \"#2ecc71\", \"#e74c3c\")) \r\nlegend = cowplot::get_legend(pAllD)\r\n\r\nfig3c = ggarrange(p[[\"C0\"]], p[[\"up\"]][[\"C1C0\"]], p[[\"up\"]][[\"C2C0\"]], \r\n p[[\"up\"]][[\"C3C0\"]], p[[\"low\"]][[\"C0C1\"]], p[[\"C1\"]],\r\n p[[\"up\"]][[\"C2C1\"]], p[[\"up\"]][[\"C3C1\"]],\r\n p[[\"low\"]][[\"C0C2\"]], p[[\"low\"]][[\"C1C2\"]], \r\n p[[\"C2\"]], p[[\"up\"]][[\"C3C2\"]],\r\n p[[\"low\"]][[\"C0C3\"]], p[[\"low\"]][[\"C1C3\"]], \r\n p[[\"low\"]][[\"C2C3\"]], p[[\"C3\"]],\r\n nrow = 4, ncol = 4, heights = c(3, 3, 3, 3), align = \"hv\")\r\nfig3c2 = annotate_figure(fig3c,\r\n bottom = text_grob(\"log Fold Change E4 vs E3 in CERAD NP score\", \r\n size = 12, color = \"#555555\"),\r\n left = text_grob(\"log Fold Change E4 vs E3 in CERAD NP score\", \r\n size = 12, rot = 90, color = \"#555555\")\r\n)\r\nfig3c3 = cowplot::plot_grid(fig3c2, NULL, legend, \r\n rel_widths = c(4, 0.02, .3), nrow = 1)\r\nggsave(\"../results/Figure3C_intersect_C0&C3.genes.pdf\", fig3c3, width = 9, height = 8)\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:47:07-07:00"
+ "last_modified": "2021-05-19T22:40:07-07:00"
},
{
"path": "ROSMAP-preprocess.html",
@@ -77,7 +77,7 @@
"description": "This script removes low-expressed ROSMAP genes and adjusts by age, sex, and post-mortem interval (PMI).\n",
"author": [],
"contents": "\r\n\r\nContents\r\nDependencies\r\nLoad Data\r\nFilter Genes\r\nAdjust and Transform\r\nSave Data\r\n\r\nDependencies\r\nLoad requisite packages.\r\n\r\n\r\nlibrary(data.table)\r\n\r\n\r\n\r\nLoad Data\r\nLoad .Rdata object saved by prior script.\r\n\r\n\r\n# load data\r\nload(\"../Data/ROSMAP-24.Rdata\")\r\nrosmap_fpkm = rosmap$fpkm\r\ncov = rosmap$cov\r\n\r\n\r\n\r\nFilter Genes\r\nRemove low-expressed genes.\r\n\r\n\r\n# remove low expressed genes\r\ngene_sel = rowMeans(rosmap_fpkm) > 0.1\r\nrosmap_fpkm = rosmap_fpkm[gene_sel, ]\r\n\r\n\r\n\r\nAdjust and Transform\r\nAdjust by age, sex, and post-mortem interval (PMI), then log-transform data.\r\n\r\n\r\n# create empty matrix to populate\r\nadj_exp = matrix(NA, ncol = ncol(rosmap_fpkm), nrow = nrow(rosmap_fpkm))\r\ncolnames(adj_exp) = cov$projid\r\nrownames(adj_exp) = rownames(rosmap_fpkm)\r\n\r\n# adjust by age, sex, and PMI\r\nmsex = cov$msex\r\nage_at_death = cov$age_at_death\r\npmi = cov$pmi\r\n\r\n# impute missing values with mean\r\npmi[is.na(pmi)] = mean(pmi, na.rm = T)\r\n\r\n\r\n\r\nLog-transform data.\r\n\r\n\r\n# log2(fpkm + 0.0001) adjusted by age, sex, and PMI\r\nfor(gene in rownames(rosmap_fpkm)){\r\n exp = log2(as.numeric(rosmap_fpkm[gene,]) + 0.1^4)\r\n lm_fit = lm(exp ~ msex + age_at_death + pmi)\r\n adj_exp[gene, ] = lm_fit$residuals\r\n}\r\n\r\n\r\n\r\nSave Data\r\n\r\n\r\n# save data\r\nexpSet = list(mData = adj_exp,\r\n cov = cov)\r\nsave(expSet, file = \"../Data/ROSMAP-24-adj-low.expr.genes.removed.Rdata\")\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:47:11-07:00"
+ "last_modified": "2021-05-19T22:40:10-07:00"
},
{
"path": "ROSMAP-variables.html",
@@ -85,7 +85,7 @@
"description": "This script first performs preprocessing of ROSMAP FPKM and covariate data so that they have same order, then defines variables in the covariate data.\n",
"author": [],
"contents": "\r\n\r\nContents\r\nDependencies\r\nRead FPKM Data\r\nRead Covariate Data\r\nReorder Data\r\nDefine Clinical Variables\r\nBraak\r\nCERAD\r\nAPOE\r\nE4 Count\r\nE2 Count\r\nAge at Death\r\n\r\nSave Results\r\n\r\nDependencies\r\nLoad requisite packages.\r\n\r\n\r\nlibrary(data.table)\r\n\r\n\r\n\r\nRead FPKM Data\r\nRead FPKM data, remove the version from Ensembl ID, and remove duplicated samples.\r\n\r\n\r\n# read FPKM data\r\nfpkm_file = \"../Data/ROSMAP_RNAseq_FPKM_gene.tsv\"\r\nrosmap_fpkm = fread(fpkm_file, stringsAsFactors = FALSE, header = T, sep = \"\\t\")\r\nall(rosmap_fpkm$tracking_id == rosmap_fpkm$gene_id)\r\ngene_id = rosmap_fpkm$gene_id\r\nrosmap_fpkm = rosmap_fpkm[, c(-1,-2), with = F]\r\nrosmap_fpkm = as.data.frame(rosmap_fpkm)\r\n\r\n# remove version from Ensembl ID\r\nrownames(rosmap_fpkm) = gsub(\"\\\\.\\\\d*$\", \"\", gene_id)\r\n\r\n# remove duplicated samples\r\nduplicate_sample_ids = c(\"492_120515_0\",\"492_120515_6\")\r\nrosmap_fpkm = rosmap_fpkm[, -which(colnames(rosmap_fpkm) %in% duplicate_sample_ids)]\r\n\r\n\r\n\r\nRead Covariate Data\r\nRead ROSMAP ID mapping file.\r\n\r\n\r\n# read ROSMAP ID map\r\nkey_map_file = \"../Data/ROSMAP_IDkey.csv\"\r\nkey_map = fread(key_map_file)\r\nkey_map = unique(key_map[, .(projid, mrna_id)])\r\n\r\n\r\n\r\nRead and pre-process covariate data.\r\n\r\n\r\n# read and process covariate data\r\ncov_file = \"../Data/ROSMAP_clinical.csv\"\r\ncov = fread(cov_file)\r\n\r\n# rename covariate data and merge with ROSMAP IDs\r\nsetnames(cov, c(\"projid\", \"cts_mmse30_lv\", \"braaksc\", \"ceradsc\", \"cogdx\", \"apoe_genotype\"), \r\n c(\"projid\", \"MMSE\", \"Braak\", \"CERAD\", \"ClinicalDiagnosis\", \"RawAPOE\"))\r\ncov[key_map, on = .(projid), mrna_id := mrna_id]\r\n\r\n# no version number\r\ncov[, mrna_id_nov := gsub(\"_\\\\d$\", \"\", mrna_id)]\r\n\r\n\r\n\r\nReorder Data\r\n\r\n\r\n# extract clinical data in the same order as in the data\r\ncolnames(rosmap_fpkm) = gsub(\"_\\\\d$\", \"\", colnames(rosmap_fpkm)) \r\n\r\n# remove samples without clinical data\r\ncolnames(rosmap_fpkm)[!colnames(rosmap_fpkm) %in% c(cov$mrna_id_nov)]\r\nrosmap_fpkm = rosmap_fpkm[, colnames(rosmap_fpkm) %in% cov$mrna_id_nov]\r\nall(colnames(rosmap_fpkm) %in% cov$mrna_id_nov)\r\ncov = cov[data.table(mrna_id_nov = colnames(rosmap_fpkm)), on = .(mrna_id_nov)]\r\n\r\n# check that cov and rosmap_fpkm have same order\r\nall(cov$mrna_id_nov == colnames(rosmap_fpkm))\r\ncolnames(rosmap_fpkm) = cov$projid\r\n\r\n\r\n\r\nDefine Clinical Variables\r\nBraak\r\n\r\nGroup levels into three categories:\r\n\r\n\r\n1 = Normal/I/II\r\n\r\n\r\n2 = III/IV\r\n\r\n\r\n3 = V/VI\r\n\r\n\r\n\r\n# refactor\r\ncov[, B := as.factor(cov$Braak)]\r\nlevels(cov$B) = c(1,1,1,2,2,3,3)\r\ncov[, B := as.character(B)]\r\ncov[, B := factor(B)]\r\n\r\n# double-check\r\ntable(cov$B, cov$Braak)\r\n\r\n\r\n\r\nCERAD\r\n\r\nOld ROSMAP levels:\r\n\r\n\r\n1 = Definite\r\n\r\n\r\n2 = Probable\r\n\r\n\r\n3 = Possible\r\n\r\n\r\n4 = Normal\r\n\r\n\r\nDefine new levels:\r\n\r\n\r\n1 = Normal\r\n\r\n\r\n2 = Possible\r\n\r\n\r\n3 = Probable\r\n\r\n\r\n4 = Definite\r\n\r\n\r\n\r\n# refactor\r\ncov[, C := as.factor(CERAD)]\r\nlevels(cov$C) = c(3,2,1,0)\r\ncov[, C := as.character(C)]\r\ncov[, C := factor(C)]\r\n\r\n# double-check\r\ntable(cov$C, cov$CERAD)\r\n\r\n\r\n\r\nAPOE\r\n\r\nGroup levels into three categories:\r\n\r\n\r\nE2 = E2/E2 and E2/E3\r\n\r\n\r\nE3 = E3/E3\r\n\r\n\r\nE4 = E2/E4, E3/E4, and E4/E4\r\n\r\n\r\n\r\n# refactor\r\ncov[, APOE := as.factor(RawAPOE)]\r\nlevels(cov$APOE) = c(\"E2\", \"E2\", \"E4\", \"E3\", \"E4\", \"E4\")\r\ncov[, E := as.character(APOE)]\r\ncov[, E := factor(E, levels = c(\"E3\", \"E2\", \"E4\"))]\r\n\r\n# double-check\r\ntable(cov$E, cov$RawAPOE)\r\n\r\n\r\n\r\nE4 Count\r\n\r\nGroup levels into three categories:\r\n\r\n\r\n0 = E2/E2, E3/E3, and E2/E3\r\n\r\n\r\n1 = E2/E4 and E3/E4\r\n\r\n\r\n2 = E4/E4\r\n\r\n\r\n\r\n# refactor\r\ncov[, E4num := as.factor(RawAPOE)]\r\nlevels(cov$E4num) = c(0, 0, 1, 0, 1, 2)\r\ncov[, E4num := as.numeric(as.character(E4num))]\r\n\r\n# double-check\r\ntable(cov$E4num, cov$RawAPOE)\r\n\r\n\r\n\r\nE2 Count\r\n\r\nGroup levels into three categories:\r\n\r\n\r\n0 = E4/E4, E3/E3, and E3/E4\r\n\r\n\r\n1 = E2/E4 and E2/E3\r\n\r\n\r\n2 = E2/E2\r\n\r\n\r\n\r\n# refactor\r\ncov[, E2num := as.factor(RawAPOE)]\r\nlevels(cov$E2num) = c(2, 1, 1, 0, 0, 0)\r\ncov[, E2num := as.numeric(as.character(E2num))]\r\n\r\n# double-check\r\ntable(cov$E2num, cov$RawAPOE)\r\n\r\n\r\n\r\nAge at Death\r\nClean age at death variable.\r\n\r\n\r\n# clean age at death\r\ncov[, age_at_death := gsub(\"90\\\\+\", \"91\", age_death)]\r\ncov[, age_at_death := as.numeric(age_at_death)]\r\n\r\n\r\n\r\nSave Results\r\n\r\n\r\n# save results\r\nrosmap = list(fpkm = rosmap_fpkm,\r\n cov = cov)\r\nsave(rosmap, file = \"../Data/ROSMAP-24.Rdata\")\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:47:18-07:00"
+ "last_modified": "2021-05-19T22:40:15-07:00"
},
{
"path": "spectral-clustering.html",
@@ -93,7 +93,7 @@
"description": "This script defines the functions which are used to perform spectral clustering.\n",
"author": [],
"contents": "\r\n\r\nContents\r\nCalculate Affinity Matrix\r\nZ-Scores\r\nCalculate Z-Score\r\nCalculate Average Z-Score\r\n\r\nSpectral Clustering\r\nStatistical Testing\r\nHypergeometric Tests\r\n\r\nCalculate Affinity Matrix\r\nCalculate affinity matrix for spectral clustering. Code is derived from the SNFtool package, see associated publication in Nature Methods here.\r\n\r\n\r\n#' @param diff \r\naffinityCustom = function (diff, sigma = 0.5) \r\n{\r\n N <- nrow(diff)\r\n diff <- (diff + t(diff))/2\r\n diag(diff) <- 0\r\n sortedColumns <- as.matrix(t(apply(diff, 2, sort)))\r\n finiteMean <- function(x) {\r\n return(mean(x[is.finite(x)]))\r\n }\r\n # this line has been modified to remove [, 1:K + 1]\r\n means <- apply(sortedColumns, 1, finiteMean) + \r\n .Machine$double.eps\r\n avg <- function(x, y) {\r\n return((x + y)/2)\r\n }\r\n Sig <- outer(means, means, avg)/3 * 2 + diff/3 + .Machine$double.eps\r\n Sig[Sig <= .Machine$double.eps] <- .Machine$double.eps\r\n densities <- dnorm(diff, 0, sigma * Sig, log = FALSE)\r\n W <- (densities + t(densities))/2\r\n return(W)\r\n}\r\n\r\n\r\n\r\nZ-Scores\r\nCalculate Z-Score\r\n\r\n\r\n#' @param sf string; C(CERAD)/B(Braak);\r\n#' @param level string; level of CERAD or Braak stage;\r\n#' @param mData matrix; expression matrix\r\n#' @param cov data.table; clinical data\r\nto_z_score = function(sf, level, mDataG, cov){\r\n if(!all(colnames(mDataG) == cov$projid)){\r\n stop()\r\n }\r\n if(sf == \"C\"){\r\n cat(\"select CERAD\", level, \"samples..\\n\")\r\n remove_proj = cov[,is.na(C) | is.na(E)]\r\n if(any(remove_proj)){\r\n cat(\"remove\", sum(remove_proj), \"samples with missing values..\\n\")\r\n mDataG = mDataG[, !remove_proj]\r\n cov = cov[!is.na(C) & !is.na(E),]\r\n }\r\n E = cov[C == level, E]\r\n sel = cov$C == level\r\n }else if(sf == \"B\"){\r\n cat(\"select Braak\", level, \"samples..\\n\")\r\n remove_proj = cov[,is.na(B) | is.na(E)]\r\n if(any(remove_proj)){\r\n cat(\"remove\", sum(remove_proj),\"samples with missing values..\\n\")\r\n mDataG = mDataG[, !remove_proj]\r\n cov = cov[!is.na(B) & !is.na(E),]\r\n }\r\n E = cov[B == level, E]\r\n sel = cov$B == level\r\n }else{\r\n stop()\r\n }\r\n \r\n means = rowMeans(mDataG[, sel])\r\n sds = apply(mDataG[, sel], 1, sd)\r\n zscore = function(x) return((x - means)/sds)\r\n mData_z = apply(mDataG[, sel], 2, zscore)\r\n \r\n # check should be TRUE\r\n cat(\"random check:\",\r\n all(mData_z[9,] == (mDataG[9,sel] - mean(as.numeric(mDataG[9,sel])))/sd(as.numeric(mDataG[9,sel]))),\r\n \"\\n\")\r\n return(list(z = mData_z, E = E, mData = mDataG[, sel],\r\n means = means, sds = sds))\r\n}\r\n\r\n\r\n\r\nCalculate Average Z-Score\r\n\r\n\r\n#' @param z_list list; list generated by to_z_score()\r\nto_ave_z = function(z_list){\r\n z = z_list$z\r\n E = z_list$E\r\n return(data.table(Genes = rownames(z),\r\n E2 = rowMeans(z[, E == \"E2\"]),\r\n E3 = rowMeans(z[, E == \"E3\"]),\r\n E4 = rowMeans(z[, E == \"E4\"])))\r\n}\r\n\r\n\r\n\r\nSpectral Clustering\r\n\r\n\r\n#' @param avez data.table/data.frame/matrix containing the average z-scores of E2, E3, and E4\r\n#' @param k number of cluster\r\nspectral_clustering = function(avez, k){\r\n set.seed(9)\r\n \r\n zMtx = as.matrix(avez[, c(\"E2\", \"E3\", \"E4\")])\r\n rownames(zMtx) = avez$Genes\r\n zMtx = zMtx[!duplicated(rownames(zMtx)), ]\r\n \r\n # calculate distance matrix\r\n distM = zMtx %>% dist2(., .) %>% .^(1/2)\r\n # calculate similarity matrix\r\n simM = affinityCustom(distM)\r\n # perform spectral clustering\r\n clustA = spectralClustering(simM, K = k)\r\n \r\n return(list(clustA = clustA,\r\n zMtx = zMtx))\r\n}\r\n\r\n\r\n\r\nStatistical Testing\r\nStatistical testing of interested microglia-APOE genes across APOE genotypes.\r\n\r\n\r\n#' @param z matrix; z \r\n#' @param genes vector; vector of interested genes\r\n#' @param cov data.table; clinical data\r\n#' @param adj.donor T/F; adjusted by donor or not\r\n#' @param donor donor id; need provide if adj.donor = T\r\ntest_avez = function(zmtx, clust_genes, cov, donor = NULL, adj.donor = FALSE){\r\n data = data.frame(\r\n ave_z = colMeans(zmtx[clust_genes, ]),\r\n APO = cov$E)\r\n if(levels(cov$E)[1] != \"E3\"){\r\n stop(\"must use E3 as reference\\n\")\r\n }\r\n if(adj.donor){\r\n require(lme4)\r\n library(lmerTest)\r\n data$donor = donor\r\n fit = lmer(ave_z ~ (1|donor)+ APO, data = data)\r\n }else{\r\n fit = lm(ave_z ~ APO, data = data)\r\n }\r\n return(summary(fit, confint = TRUE, digits = 3))\r\n}\r\n\r\n\r\n\r\nHypergeometric Tests\r\nHypergeometric tests of interested microglia-APOE genes across APOE genotypes.\r\n\r\n\r\n#' @param genes1 vector;\r\n#' @param genes2 vector; \r\nphyper_test = function(genes1, genes2){\r\n q = length(intersect(genes1, genes2))\r\n s = length(genes1)\r\n n = nrow(mData)\r\n m = length(genes2)\r\n p = phyper(q,m,n,s, lower.tail = F)\r\n return(p)\r\n}\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
- "last_modified": "2021-05-19T14:47:26-07:00"
+ "last_modified": "2021-05-19T22:40:21-07:00"
}
],
"collections": []
diff --git a/docs/site_libs/header-attrs-2.6/header-attrs.js b/docs/site_libs/header-attrs-2.8/header-attrs.js
similarity index 100%
rename from docs/site_libs/header-attrs-2.6/header-attrs.js
rename to docs/site_libs/header-attrs-2.8/header-attrs.js
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 5388a70..578e7b6 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -6,7 +6,7 @@
https://mindds.github.io/apoe-glia/
- 2021-05-19T14:08:17-07:00
+ 2021-05-19T22:17:56-07:00
https://mindds.github.io/apoe-glia/models.html
diff --git a/docs/spectral-clustering.html b/docs/spectral-clustering.html
index ce7ffc6..d1300af 100644
--- a/docs/spectral-clustering.html
+++ b/docs/spectral-clustering.html
@@ -2156,7 +2156,7 @@
color: var(--hover-color, white);
}
-
+
@@ -2195,8 +2195,8 @@
▾
@@ -2206,12 +2206,12 @@
▾
@@ -2221,13 +2221,13 @@
▾
-qPCR
+qPCR
@@ -2274,7 +2274,7 @@ Calculate Affinity Matrix
N <- nrow ( diff )
diff <- ( diff + t ( diff ) ) / 2
diag ( diff ) <- 0
- sortedColumns <- as.matrix ( t ( apply ( diff , 2 , sort ) ) )
+ sortedColumns <- as.matrix ( t ( apply ( diff , 2 , sort ) ) )
finiteMean <- function ( x ) {
return ( mean ( x [ is.finite ( x ) ] ) )
}
@@ -2351,7 +2351,7 @@ Calculate Average Z-Score
to_ave_z = function ( z_list ) {
z = z_list $ z
E = z_list $ E
- return ( data.table ( Genes = rownames ( z ) ,
+ return ( data.table ( Genes = rownames ( z ) ,
E2 = rowMeans ( z [ , E == "E2" ] ) ,
E3 = rowMeans ( z [ , E == "E3" ] ) ,
E4 = rowMeans ( z [ , E == "E4" ] ) ) )
@@ -2367,9 +2367,9 @@ Spectral Clustering
spectral_clustering = function ( avez , k ) {
set.seed ( 9 )
- zMtx = as.matrix ( avez [ , c ( "E2" , "E3" , "E4" ) ] )
+ zMtx = as.matrix ( avez [ , c ( "E2" , "E3" , "E4" ) ] )
rownames ( zMtx ) = avez $ Genes
- zMtx = zMtx [ ! duplicated ( rownames ( zMtx ) ) , ]
+ zMtx = zMtx [ ! duplicated ( rownames ( zMtx ) ) , ]
# calculate distance matrix
distM = zMtx %>% dist2 ( . , . ) %>% . ^ ( 1 / 2 )
@@ -2420,7 +2420,7 @@ Hypergeometric Tests
#' @param genes1 vector;
#' @param genes2 vector;
phyper_test = function ( genes1 , genes2 ) {
- q = length ( intersect ( genes1 , genes2 ) )
+ q = length ( intersect ( genes1 , genes2 ) )
s = length ( genes1 )
n = nrow ( mData )
m = length ( genes2 )
diff --git a/index.Rmd b/index.Rmd
index 44e983e..fd24329 100644
--- a/index.Rmd
+++ b/index.Rmd
@@ -23,23 +23,26 @@ clean_site()
# Setup
Install [R](https://www.r-project.org/) to run our code.
-# Code Availability
-Our codebase is available on GitHub at [mindds.github.io/apoe-glia](https://mindds.github.io/apoe-glia/) and below.
+# Documentation
+To read our documented code, please visit [mindds.github.io/apoe-glia](https://mindds.github.io/apoe-glia) or see below.
### Functions
-* [Models](https://mindds.github.io/apoe-glia/models.html)
-* [Spectral Clustering](https://mindds.github.io/apoe-glia/spectral-clustering.html)
+* [Models](https://mindds.github.io/apoe-glia/models)
+* [Spectral Clustering](https://mindds.github.io/apoe-glia/spectral-clustering)
### ROSMAP
-* [Define Variables](https://mindds.github.io/apoe-glia/ROSMAP-variables.html)
-* [Pre-Process Data](https://mindds.github.io/apoe-glia/ROSMAP-preprocess.html)
-* [Microglia Analysis](https://mindds.github.io/apoe-glia/ROSMAP-microglia.html)
-* [Astrocyte Analysis](https://mindds.github.io/apoe-glia/ROSMAP-astrocyte.html)
+* [Define Variables](https://mindds.github.io/apoe-glia/ROSMAP-variables)
+* [Pre-Process Data](https://mindds.github.io/apoe-glia/ROSMAP-preprocess)
+* [Microglia Analysis](https://mindds.github.io/apoe-glia/ROSMAP-microglia)
+* [Astrocyte Analysis](https://mindds.github.io/apoe-glia/ROSMAP-astrocyte)
### MSBB
-* [Define Variables](https://mindds.github.io/apoe-glia/MSBB-variables.html)
-* [Merge Data](https://mindds.github.io/apoe-glia/MSBB-merge.html)
-* [Microglia Analysis](https://mindds.github.io/apoe-glia/MSBB-microglia.html)
+* [Define Variables](https://mindds.github.io/apoe-glia/MSBB-variables)
+* [Merge Data](https://mindds.github.io/apoe-glia/MSBB-merge)
+* [Microglia Analysis](https://mindds.github.io/apoe-glia/MSBB-microglia)
### qPCR Analysis
-* [qPCR Analysis](https://mindds.github.io/apoe-glia/qPCR-analysis.html)
\ No newline at end of file
+* [qPCR Analysis](https://mindds.github.io/apoe-glia/qPCR-analysis)
+
+# Code Availability
+Our full codebase is available for download on [GitHub](https://github.com/mindds/apoe-glia).
\ No newline at end of file