diff --git a/publication_rmd/tutorial_vignette/tutorial.html b/publication_rmd/tutorial_vignette/tutorial.html index c784436..dcbd080 100755 --- a/publication_rmd/tutorial_vignette/tutorial.html +++ b/publication_rmd/tutorial_vignette/tutorial.html @@ -19,12 +19,8 @@ - - - - - + @@ -70,9 +66,6 @@ .table th:not([align]) { text-align: left; } -#rmd-source-code { - display: none; -} @@ -108,30 +101,6 @@ } -
This is a vignette for applying PC based residualization on gene expression data for building co-expression networks.
- - - -
-## Libraries and functions
-library("sva")
-library("recount", quietly = T)
-library("WGCNA", quietly = T)
-
-q_normalize <- function(dat){
+## Libraries and functions
+library("sva")
+## Warning: package 'mgcv' was built under R version 3.5.2
+library("recount", quietly = T)
+## Warning: package 'matrixStats' was built under R version 3.5.2
+library("WGCNA", quietly = T)
+## Warning: package 'WGCNA' was built under R version 3.5.2
+## Warning: package 'dynamicTreeCut' was built under R version 3.5.2
+## Warning: package 'fastcluster' was built under R version 3.5.2
+## ==========================================================================
+## *
+## * Package WGCNA 1.66 loaded.
+## *
+## * Important note: It appears that your system supports multi-threading,
+## * but it is not enabled within WGCNA in R.
+## * To allow multi-threading within WGCNA with all available cores, use
+## *
+## * allowWGCNAThreads()
+## *
+## * within R. Use disableWGCNAThreads() to disable threading if necessary.
+## * Alternatively, set the following environment variable on your system:
+## *
+## * ALLOW_WGCNA_THREADS=<number_of_processors>
+## *
+## * for example
+## *
+## * ALLOW_WGCNA_THREADS=8
+## *
+## * To set the environment variable in linux bash shell, type
+## *
+## * export ALLOW_WGCNA_THREADS=8
+## *
+## * before running R. Other operating systems or shells will
+## * have a similar command to achieve the same aim.
+## *
+## ==========================================================================
+q_normalize <- function(dat){
n = nrow(dat)
p = ncol(dat)
rank.dat = dat # matrix for ranking
@@ -259,77 +235,79 @@ PC correction tutorial with example dataset
U = rank.dat/(n+1)
qnorm(U)
}
-
-
-
We will begin with loading a subsetted version of GTEx liver data. This subsetting was done for ease of tutorial
-
-
-
load("liver_subset_gtex.Rdata")
rse_raw <- t(rse_raw) # transpose data so that rows are samples and columns are gene expression measurements
-
-
-
Apply PC based correction to confounded gene expression measurements
Using the permutation approach, we identify the number of top PCs that contribute to noise and confounding in the gene expression measurements. This noise if not removed, can contribute to spurious correlations between genes thereby introducing false positive connections in the network.
To correct for this, using a linear model we residualize gene expression measurements using the number of PCs esimated.
-
-
-
mod=matrix(1,nrow=dim(rse_raw)[1],ncol=1)
colnames(mod)="Intercept"
nsv=num.sv(t(rse_raw),mod, method = "be") ## num.sv requires data matrix with features(genes) in the rows and samples in the column
-print(paste("Number of PCs estimated to be removed:", nsv))
-
-## PC residualization of gene expression data using sva_network. Rows correspond to samples, Columns correspond to features
+print(paste("Number of PCs estimated to be removed:", nsv))
+## [1] "Number of PCs estimated to be removed: 14"
+## PC residualization of gene expression data using sva_network. Rows correspond to samples, Columns correspond to features
exprs_corrected = sva_network(rse_raw, nsv)
-
-
-
-
-
-
## Quantile normalize the corrected gene expression measurements such that each expression of every gene follows a gaussian distribution
exprs_corrected_norm <- q_normalize(exprs_corrected)
-
-
-
Build co-expression networks with WGCNA
Using PC adjusted expression measurements, we build co-expression networks with WGCNA.
-
-
-
## select soft-thresholding power transform for WGCNA
-wgcna_soft_thr <- WGCNA::pickSoftThreshold(exprs_corrected_norm, networkType = "signed")
-wgcna_power <- wgcna_soft_thr$powerEstimate
+wgcna_soft_thr <- WGCNA::pickSoftThreshold(exprs_corrected_norm, networkType = "signed")
+## Warning: executing %dopar% sequentially: no parallel backend registered
+## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
+## 1 1 0.000524 1.030 0.950 509.000 509.0000 532.00
+## 2 2 0.014900 2.550 0.966 264.000 264.0000 290.00
+## 3 3 0.000195 -0.183 0.931 140.000 139.0000 165.00
+## 4 4 0.111000 -2.840 0.777 75.800 75.0000 102.00
+## 5 5 0.383000 -3.730 0.697 42.200 41.1000 67.80
+## 6 6 0.652000 -3.760 0.743 24.200 22.9000 48.40
+## 7 7 0.811000 -3.580 0.830 14.400 13.1000 36.60
+## 8 8 0.923000 -3.110 0.933 8.880 7.6600 28.90
+## 9 9 0.931000 -2.770 0.939 5.760 4.5900 23.60
+## 10 10 0.905000 -2.500 0.900 3.920 2.8500 19.80
+## 11 12 0.984000 -1.850 0.989 2.110 1.1700 14.50
+## 12 14 0.956000 -1.540 0.945 1.340 0.5390 11.10
+## 13 16 0.894000 -1.470 0.865 0.970 0.2680 10.00
+## 14 18 0.950000 -1.420 0.945 0.758 0.1430 9.32
+## 15 20 0.933000 -1.410 0.928 0.623 0.0784 8.69
+wgcna_power <- wgcna_soft_thr$powerEstimate
if(is.na(wgcna_power)){
print(paste("no power reached r-suared cut-off, now choosing max r-squared based power"))
wgcna_power <- wgcna_soft_thr$fitIndices$Power[which(wgcna_soft_thr$fitIndices$SFT.R.sq == max(wgcna_soft_thr$fitIndices$SFT.R.sq))]
- }
-
-
-
-
-
-
-
+ }
wgcna_net <- blockwiseModules(exprs_corrected_norm, power = wgcna_power,
- verbose = 3, numericLabels = T)
-table(wgcna_net$colors)
-
-
-
+ verbose = 3, numericLabels = T)
+## Calculating module eigengenes block-wise from all genes
+## Flagging genes and samples with too many missing values...
+## ..step 1
+## ..Working on block 1 .
+## TOM calculation: adjacency..
+## ..will not use multithreading.
+## Fraction of slow calculations: 0.000000
+## ..connectivity..
+## ..matrix multiplication (system BLAS)..
+## ..normalization..
+## ..done.
+## ....clustering..
+## ....detecting modules..
+## ....calculating module eigengenes..
+## ....checking kME in modules..
+## ..merging modules that are too close..
+## mergeCloseModules: Merging modules whose distance is less than 0.15
+## Calculating new MEs...
+table(wgcna_net$colors)
+##
+## 0 1 2 3 4 5 6
+## 799 91 25 23 22 20 20
WGCNA identifies 6 co-expression modules. It also contains 799 genes unassigned to any modules.
-
-LS0tDQp0aXRsZTogIlBDIGNvcnJlY3Rpb24gdHV0b3JpYWwgd2l0aCBleGFtcGxlIGRhdGFzZXQiDQpvdXRwdXQ6DQogICAgaHRtbF9kb2N1bWVudDoNCiAgICBjb2RlX2ZvbGRpbmc6ICJoaWRlIg0KLS0tDQpUaGlzIGlzIGEgdmlnbmV0dGUgZm9yIGFwcGx5aW5nIFBDIGJhc2VkIHJlc2lkdWFsaXphdGlvbiBvbiBnZW5lIGV4cHJlc3Npb24gZGF0YSBmb3IgYnVpbGRpbmcgY28tZXhwcmVzc2lvbiBuZXR3b3Jrcy4NCg0KYGBge3IsIG1lc3NhZ2UgPSBGfQ0KDQojIyBMaWJyYXJpZXMgYW5kIGZ1bmN0aW9ucw0KbGlicmFyeSgic3ZhIikNCmxpYnJhcnkoInJlY291bnQiLCBxdWlldGx5ID0gVCkNCmxpYnJhcnkoIldHQ05BIiwgcXVpZXRseSA9IFQpDQoNCnFfbm9ybWFsaXplIDwtIGZ1bmN0aW9uKGRhdCl7DQogIG4gPSBucm93KGRhdCkNCiAgcCA9IG5jb2woZGF0KQ0KICByYW5rLmRhdCA9ICBkYXQgIyBtYXRyaXggZm9yIHJhbmtpbmcNCiAgZm9yIChpIGluIDE6cCl7DQogICAgcmFuay5kYXRbLGldID0gcmFuayhkYXRbLGldKQ0KICB9DQogIFUgPSByYW5rLmRhdC8obisxKQ0KICBxbm9ybShVKQ0KfQ0KYGBgDQoNCldlIHdpbGwgYmVnaW4gd2l0aCBsb2FkaW5nIGEgc3Vic2V0dGVkIHZlcnNpb24gb2YgR1RFeCBsaXZlciBkYXRhLiBUaGlzIHN1YnNldHRpbmcgd2FzIGRvbmUgZm9yIGVhc2Ugb2YgdHV0b3JpYWwNCmBgYHtyfQ0KbG9hZCgibGl2ZXJfc3Vic2V0X2d0ZXguUmRhdGEiKQ0KcnNlX3JhdyA8LSB0KHJzZV9yYXcpICMgdHJhbnNwb3NlIGRhdGEgc28gdGhhdCByb3dzIGFyZSBzYW1wbGVzIGFuZCBjb2x1bW5zIGFyZSBnZW5lIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzDQpgYGANCg0KIyMjIEFwcGx5IFBDIGJhc2VkIGNvcnJlY3Rpb24gdG8gY29uZm91bmRlZCBnZW5lIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzDQpVc2luZyB0aGUgcGVybXV0YXRpb24gYXBwcm9hY2gsIHdlIGlkZW50aWZ5IHRoZSBudW1iZXIgb2YgdG9wIFBDcyB0aGF0IGNvbnRyaWJ1dGUgdG8gbm9pc2UgYW5kIGNvbmZvdW5kaW5nIGluIHRoZSBnZW5lIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzLiBUaGlzIG5vaXNlIGlmIG5vdCByZW1vdmVkLCBjYW4gY29udHJpYnV0ZSB0byBzcHVyaW91cyBjb3JyZWxhdGlvbnMgYmV0d2VlbiBnZW5lcyB0aGVyZWJ5IGludHJvZHVjaW5nIGZhbHNlIHBvc2l0aXZlIGNvbm5lY3Rpb25zIGluIHRoZSBuZXR3b3JrLg0KDQpUbyBjb3JyZWN0IGZvciB0aGlzLCB1c2luZyBhIGxpbmVhciBtb2RlbCB3ZSByZXNpZHVhbGl6ZSBnZW5lIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzIHVzaW5nIHRoZSBudW1iZXIgb2YgUENzIGVzaW1hdGVkLg0KDQpgYGB7cn0NCm1vZD1tYXRyaXgoMSxucm93PWRpbShyc2VfcmF3KVsxXSxuY29sPTEpDQpjb2xuYW1lcyhtb2QpPSJJbnRlcmNlcHQiDQpuc3Y9bnVtLnN2KHQocnNlX3JhdyksbW9kLCBtZXRob2QgPSAiYmUiKSAjIyBudW0uc3YgcmVxdWlyZXMgZGF0YSBtYXRyaXggd2l0aCBmZWF0dXJlcyhnZW5lcykgaW4gdGhlIHJvd3MgYW5kIHNhbXBsZXMgaW4gdGhlIGNvbHVtbg0KDQpwcmludChwYXN0ZSgiTnVtYmVyIG9mIFBDcyBlc3RpbWF0ZWQgdG8gYmUgcmVtb3ZlZDoiLCBuc3YpKQ0KDQojIyBQQyByZXNpZHVhbGl6YXRpb24gb2YgZ2VuZSBleHByZXNzaW9uIGRhdGEgdXNpbmcgc3ZhX25ldHdvcmsuIFJvd3MgY29ycmVzcG9uZCB0byBzYW1wbGVzLCBDb2x1bW5zIGNvcnJlc3BvbmQgdG8gZmVhdHVyZXMNCmV4cHJzX2NvcnJlY3RlZCA9IHN2YV9uZXR3b3JrKHJzZV9yYXcsIG5zdikNCmBgYA0KDQpgYGB7cn0NCiMjIFF1YW50aWxlIG5vcm1hbGl6ZSB0aGUgY29ycmVjdGVkIGdlbmUgZXhwcmVzc2lvbiBtZWFzdXJlbWVudHMgc3VjaCB0aGF0IGVhY2ggZXhwcmVzc2lvbiBvZiBldmVyeSBnZW5lIGZvbGxvd3MgYSBnYXVzc2lhbiBkaXN0cmlidXRpb24NCg0KZXhwcnNfY29ycmVjdGVkX25vcm0gPC0gcV9ub3JtYWxpemUoZXhwcnNfY29ycmVjdGVkKQ0KYGBgDQojIyMgQnVpbGQgY28tZXhwcmVzc2lvbiBuZXR3b3JrcyB3aXRoIFdHQ05BDQoNClVzaW5nIFBDIGFkanVzdGVkIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzLCB3ZSBidWlsZCBjby1leHByZXNzaW9uIG5ldHdvcmtzIHdpdGggV0dDTkEuIA0KDQpgYGB7cn0NCiMjIHNlbGVjdCBzb2Z0LXRocmVzaG9sZGluZyBwb3dlciB0cmFuc2Zvcm0gZm9yIFdHQ05BDQp3Z2NuYV9zb2Z0X3RociA8LSBXR0NOQTo6cGlja1NvZnRUaHJlc2hvbGQoZXhwcnNfY29ycmVjdGVkX25vcm0sIG5ldHdvcmtUeXBlID0gInNpZ25lZCIpDQp3Z2NuYV9wb3dlciA8LSB3Z2NuYV9zb2Z0X3RociRwb3dlckVzdGltYXRlDQogaWYoaXMubmEod2djbmFfcG93ZXIpKXsNCglwcmludChwYXN0ZSgibm8gcG93ZXIgcmVhY2hlZCByLXN1YXJlZCBjdXQtb2ZmLCBub3cgY2hvb3NpbmcgbWF4IHItc3F1YXJlZCBiYXNlZCBwb3dlciIpKQ0KCXdnY25hX3Bvd2VyIDwtIHdnY25hX3NvZnRfdGhyJGZpdEluZGljZXMkUG93ZXJbd2hpY2god2djbmFfc29mdF90aHIkZml0SW5kaWNlcyRTRlQuUi5zcSA9PSBtYXgod2djbmFfc29mdF90aHIkZml0SW5kaWNlcyRTRlQuUi5zcSkpXQ0KIH0NCg0KYGBgDQoNCmBgYHtyfQ0Kd2djbmFfbmV0IDwtIGJsb2Nrd2lzZU1vZHVsZXMoZXhwcnNfY29ycmVjdGVkX25vcm0sIHBvd2VyID0gd2djbmFfcG93ZXIsDQogICAgICAgICAgICAgICAgICAgICAgIHZlcmJvc2UgPSAzLCBudW1lcmljTGFiZWxzID0gVCkNCnRhYmxlKHdnY25hX25ldCRjb2xvcnMpDQpgYGANCg0KV0dDTkEgaWRlbnRpZmllcyA2IGNvLWV4cHJlc3Npb24gbW9kdWxlcy4gSXQgYWxzbyBjb250YWlucyA3OTkgZ2VuZXMgdW5hc3NpZ25lZCB0byBhbnkgbW9kdWxlcy4=
@@ -345,17 +323,6 @@ Build co-expression networks with WGCNA
bootstrapStylePandocTables();
});
-$(document).ready(function () {
- $('.knitsql-table').addClass('kable-table');
- var container = $('.kable-table');
- container.each(function() {
-
- // move the caption out of the table
- var table = $(this).children('table');
- var caption = table.children('caption').detach();
- caption.insertBefore($(this)).css('display', 'inherit');
- });
-});