From 1bb8d3dad163d231ab5b0088e29c4d7da46bf09c Mon Sep 17 00:00:00 2001 From: Princy Parsana Date: Tue, 12 Feb 2019 17:15:29 -0500 Subject: [PATCH] tutorial reknit --- .../tutorial_vignette/tutorial.html | 201 ++++++++---------- 1 file changed, 84 insertions(+), 117 deletions(-) diff --git a/publication_rmd/tutorial_vignette/tutorial.html b/publication_rmd/tutorial_vignette/tutorial.html index c784436..dcbd080 100755 --- a/publication_rmd/tutorial_vignette/tutorial.html +++ b/publication_rmd/tutorial_vignette/tutorial.html @@ -19,12 +19,8 @@ - - - - - + @@ -70,9 +66,6 @@ .table th:not([align]) { text-align: left; } -#rmd-source-code { - display: none; -} @@ -108,30 +101,6 @@ } -
@@ -205,15 +174,6 @@ - - @@ -221,16 +181,6 @@ -

This is a vignette for applying PC based residualization on gene expression data for building co-expression networks.

- - - -

-## Libraries and functions
-library("sva")
-library("recount", quietly = T)
-library("WGCNA", quietly = T)
-
-q_normalize <- function(dat){
+
## Libraries and functions
+library("sva")
+
## Warning: package 'mgcv' was built under R version 3.5.2
+
library("recount", quietly = T)
+
## Warning: package 'matrixStats' was built under R version 3.5.2
+
library("WGCNA", quietly = T)
+
## Warning: package 'WGCNA' was built under R version 3.5.2
+
## Warning: package 'dynamicTreeCut' was built under R version 3.5.2
+
## Warning: package 'fastcluster' was built under R version 3.5.2
+
## ==========================================================================
+## *
+## *  Package WGCNA 1.66 loaded.
+## *
+## *    Important note: It appears that your system supports multi-threading,
+## *    but it is not enabled within WGCNA in R. 
+## *    To allow multi-threading within WGCNA with all available cores, use 
+## *
+## *          allowWGCNAThreads()
+## *
+## *    within R. Use disableWGCNAThreads() to disable threading if necessary.
+## *    Alternatively, set the following environment variable on your system:
+## *
+## *          ALLOW_WGCNA_THREADS=<number_of_processors>
+## *
+## *    for example 
+## *
+## *          ALLOW_WGCNA_THREADS=8
+## *
+## *    To set the environment variable in linux bash shell, type 
+## *
+## *           export ALLOW_WGCNA_THREADS=8
+## *
+## *     before running R. Other operating systems or shells will
+## *     have a similar command to achieve the same aim.
+## *
+## ==========================================================================
+
q_normalize <- function(dat){
   n = nrow(dat)
   p = ncol(dat)
   rank.dat =  dat # matrix for ranking
@@ -259,77 +235,79 @@ 

PC correction tutorial with example dataset

U = rank.dat/(n+1) qnorm(U) }
- - -

We will begin with loading a subsetted version of GTEx liver data. This subsetting was done for ease of tutorial

- - -
load("liver_subset_gtex.Rdata")
 rse_raw <- t(rse_raw) # transpose data so that rows are samples and columns are gene expression measurements
- - -

Apply PC based correction to confounded gene expression measurements

Using the permutation approach, we identify the number of top PCs that contribute to noise and confounding in the gene expression measurements. This noise if not removed, can contribute to spurious correlations between genes thereby introducing false positive connections in the network.

To correct for this, using a linear model we residualize gene expression measurements using the number of PCs esimated.

- - -
mod=matrix(1,nrow=dim(rse_raw)[1],ncol=1)
 colnames(mod)="Intercept"
 nsv=num.sv(t(rse_raw),mod, method = "be") ## num.sv requires data matrix with features(genes) in the rows and samples in the column
 
-print(paste("Number of PCs estimated to be removed:", nsv))
-
-## PC residualization of gene expression data using sva_network. Rows correspond to samples, Columns correspond to features
+print(paste("Number of PCs estimated to be removed:", nsv))
+
## [1] "Number of PCs estimated to be removed: 14"
+
## PC residualization of gene expression data using sva_network. Rows correspond to samples, Columns correspond to features
 exprs_corrected = sva_network(rse_raw, nsv)
- - - - - -
## Quantile normalize the corrected gene expression measurements such that each expression of every gene follows a gaussian distribution
 
 exprs_corrected_norm <- q_normalize(exprs_corrected)
- - -

Build co-expression networks with WGCNA

Using PC adjusted expression measurements, we build co-expression networks with WGCNA.

- - -
## select soft-thresholding power transform for WGCNA
-wgcna_soft_thr <- WGCNA::pickSoftThreshold(exprs_corrected_norm, networkType = "signed")
-wgcna_power <- wgcna_soft_thr$powerEstimate
+wgcna_soft_thr <- WGCNA::pickSoftThreshold(exprs_corrected_norm, networkType = "signed")
+
## Warning: executing %dopar% sequentially: no parallel backend registered
+
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
+## 1      1 0.000524  1.030          0.950 509.000  509.0000 532.00
+## 2      2 0.014900  2.550          0.966 264.000  264.0000 290.00
+## 3      3 0.000195 -0.183          0.931 140.000  139.0000 165.00
+## 4      4 0.111000 -2.840          0.777  75.800   75.0000 102.00
+## 5      5 0.383000 -3.730          0.697  42.200   41.1000  67.80
+## 6      6 0.652000 -3.760          0.743  24.200   22.9000  48.40
+## 7      7 0.811000 -3.580          0.830  14.400   13.1000  36.60
+## 8      8 0.923000 -3.110          0.933   8.880    7.6600  28.90
+## 9      9 0.931000 -2.770          0.939   5.760    4.5900  23.60
+## 10    10 0.905000 -2.500          0.900   3.920    2.8500  19.80
+## 11    12 0.984000 -1.850          0.989   2.110    1.1700  14.50
+## 12    14 0.956000 -1.540          0.945   1.340    0.5390  11.10
+## 13    16 0.894000 -1.470          0.865   0.970    0.2680  10.00
+## 14    18 0.950000 -1.420          0.945   0.758    0.1430   9.32
+## 15    20 0.933000 -1.410          0.928   0.623    0.0784   8.69
+
wgcna_power <- wgcna_soft_thr$powerEstimate
  if(is.na(wgcna_power)){
     print(paste("no power reached r-suared cut-off, now choosing max r-squared based power"))
     wgcna_power <- wgcna_soft_thr$fitIndices$Power[which(wgcna_soft_thr$fitIndices$SFT.R.sq == max(wgcna_soft_thr$fitIndices$SFT.R.sq))]
- }
-
- - - - - - + }
wgcna_net <- blockwiseModules(exprs_corrected_norm, power = wgcna_power,
-                       verbose = 3, numericLabels = T)
-table(wgcna_net$colors)
- - - + verbose = 3, numericLabels = T) +
##  Calculating module eigengenes block-wise from all genes
+##    Flagging genes and samples with too many missing values...
+##     ..step 1
+##  ..Working on block 1 .
+##     TOM calculation: adjacency..
+##     ..will not use multithreading.
+##      Fraction of slow calculations: 0.000000
+##     ..connectivity..
+##     ..matrix multiplication (system BLAS)..
+##     ..normalization..
+##     ..done.
+##  ....clustering..
+##  ....detecting modules..
+##  ....calculating module eigengenes..
+##  ....checking kME in modules..
+##  ..merging modules that are too close..
+##      mergeCloseModules: Merging modules whose distance is less than 0.15
+##        Calculating new MEs...
+
table(wgcna_net$colors)
+
## 
+##   0   1   2   3   4   5   6 
+## 799  91  25  23  22  20  20

WGCNA identifies 6 co-expression modules. It also contains 799 genes unassigned to any modules.

-
-
LS0tDQp0aXRsZTogIlBDIGNvcnJlY3Rpb24gdHV0b3JpYWwgd2l0aCBleGFtcGxlIGRhdGFzZXQiDQpvdXRwdXQ6DQogICAgaHRtbF9kb2N1bWVudDoNCiAgICBjb2RlX2ZvbGRpbmc6ICJoaWRlIg0KLS0tDQpUaGlzIGlzIGEgdmlnbmV0dGUgZm9yIGFwcGx5aW5nIFBDIGJhc2VkIHJlc2lkdWFsaXphdGlvbiBvbiBnZW5lIGV4cHJlc3Npb24gZGF0YSBmb3IgYnVpbGRpbmcgY28tZXhwcmVzc2lvbiBuZXR3b3Jrcy4NCg0KYGBge3IsIG1lc3NhZ2UgPSBGfQ0KDQojIyBMaWJyYXJpZXMgYW5kIGZ1bmN0aW9ucw0KbGlicmFyeSgic3ZhIikNCmxpYnJhcnkoInJlY291bnQiLCBxdWlldGx5ID0gVCkNCmxpYnJhcnkoIldHQ05BIiwgcXVpZXRseSA9IFQpDQoNCnFfbm9ybWFsaXplIDwtIGZ1bmN0aW9uKGRhdCl7DQogIG4gPSBucm93KGRhdCkNCiAgcCA9IG5jb2woZGF0KQ0KICByYW5rLmRhdCA9ICBkYXQgIyBtYXRyaXggZm9yIHJhbmtpbmcNCiAgZm9yIChpIGluIDE6cCl7DQogICAgcmFuay5kYXRbLGldID0gcmFuayhkYXRbLGldKQ0KICB9DQogIFUgPSByYW5rLmRhdC8obisxKQ0KICBxbm9ybShVKQ0KfQ0KYGBgDQoNCldlIHdpbGwgYmVnaW4gd2l0aCBsb2FkaW5nIGEgc3Vic2V0dGVkIHZlcnNpb24gb2YgR1RFeCBsaXZlciBkYXRhLiBUaGlzIHN1YnNldHRpbmcgd2FzIGRvbmUgZm9yIGVhc2Ugb2YgdHV0b3JpYWwNCmBgYHtyfQ0KbG9hZCgibGl2ZXJfc3Vic2V0X2d0ZXguUmRhdGEiKQ0KcnNlX3JhdyA8LSB0KHJzZV9yYXcpICMgdHJhbnNwb3NlIGRhdGEgc28gdGhhdCByb3dzIGFyZSBzYW1wbGVzIGFuZCBjb2x1bW5zIGFyZSBnZW5lIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzDQpgYGANCg0KIyMjIEFwcGx5IFBDIGJhc2VkIGNvcnJlY3Rpb24gdG8gY29uZm91bmRlZCBnZW5lIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzDQpVc2luZyB0aGUgcGVybXV0YXRpb24gYXBwcm9hY2gsIHdlIGlkZW50aWZ5IHRoZSBudW1iZXIgb2YgdG9wIFBDcyB0aGF0IGNvbnRyaWJ1dGUgdG8gbm9pc2UgYW5kIGNvbmZvdW5kaW5nIGluIHRoZSBnZW5lIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzLiBUaGlzIG5vaXNlIGlmIG5vdCByZW1vdmVkLCBjYW4gY29udHJpYnV0ZSB0byBzcHVyaW91cyBjb3JyZWxhdGlvbnMgYmV0d2VlbiBnZW5lcyB0aGVyZWJ5IGludHJvZHVjaW5nIGZhbHNlIHBvc2l0aXZlIGNvbm5lY3Rpb25zIGluIHRoZSBuZXR3b3JrLg0KDQpUbyBjb3JyZWN0IGZvciB0aGlzLCB1c2luZyBhIGxpbmVhciBtb2RlbCB3ZSByZXNpZHVhbGl6ZSBnZW5lIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzIHVzaW5nIHRoZSBudW1iZXIgb2YgUENzIGVzaW1hdGVkLg0KDQpgYGB7cn0NCm1vZD1tYXRyaXgoMSxucm93PWRpbShyc2VfcmF3KVsxXSxuY29sPTEpDQpjb2xuYW1lcyhtb2QpPSJJbnRlcmNlcHQiDQpuc3Y9bnVtLnN2KHQocnNlX3JhdyksbW9kLCBtZXRob2QgPSAiYmUiKSAjIyBudW0uc3YgcmVxdWlyZXMgZGF0YSBtYXRyaXggd2l0aCBmZWF0dXJlcyhnZW5lcykgaW4gdGhlIHJvd3MgYW5kIHNhbXBsZXMgaW4gdGhlIGNvbHVtbg0KDQpwcmludChwYXN0ZSgiTnVtYmVyIG9mIFBDcyBlc3RpbWF0ZWQgdG8gYmUgcmVtb3ZlZDoiLCBuc3YpKQ0KDQojIyBQQyByZXNpZHVhbGl6YXRpb24gb2YgZ2VuZSBleHByZXNzaW9uIGRhdGEgdXNpbmcgc3ZhX25ldHdvcmsuIFJvd3MgY29ycmVzcG9uZCB0byBzYW1wbGVzLCBDb2x1bW5zIGNvcnJlc3BvbmQgdG8gZmVhdHVyZXMNCmV4cHJzX2NvcnJlY3RlZCA9IHN2YV9uZXR3b3JrKHJzZV9yYXcsIG5zdikNCmBgYA0KDQpgYGB7cn0NCiMjIFF1YW50aWxlIG5vcm1hbGl6ZSB0aGUgY29ycmVjdGVkIGdlbmUgZXhwcmVzc2lvbiBtZWFzdXJlbWVudHMgc3VjaCB0aGF0IGVhY2ggZXhwcmVzc2lvbiBvZiBldmVyeSBnZW5lIGZvbGxvd3MgYSBnYXVzc2lhbiBkaXN0cmlidXRpb24NCg0KZXhwcnNfY29ycmVjdGVkX25vcm0gPC0gcV9ub3JtYWxpemUoZXhwcnNfY29ycmVjdGVkKQ0KYGBgDQojIyMgQnVpbGQgY28tZXhwcmVzc2lvbiBuZXR3b3JrcyB3aXRoIFdHQ05BDQoNClVzaW5nIFBDIGFkanVzdGVkIGV4cHJlc3Npb24gbWVhc3VyZW1lbnRzLCB3ZSBidWlsZCBjby1leHByZXNzaW9uIG5ldHdvcmtzIHdpdGggV0dDTkEuIA0KDQpgYGB7cn0NCiMjIHNlbGVjdCBzb2Z0LXRocmVzaG9sZGluZyBwb3dlciB0cmFuc2Zvcm0gZm9yIFdHQ05BDQp3Z2NuYV9zb2Z0X3RociA8LSBXR0NOQTo6cGlja1NvZnRUaHJlc2hvbGQoZXhwcnNfY29ycmVjdGVkX25vcm0sIG5ldHdvcmtUeXBlID0gInNpZ25lZCIpDQp3Z2NuYV9wb3dlciA8LSB3Z2NuYV9zb2Z0X3RociRwb3dlckVzdGltYXRlDQogaWYoaXMubmEod2djbmFfcG93ZXIpKXsNCglwcmludChwYXN0ZSgibm8gcG93ZXIgcmVhY2hlZCByLXN1YXJlZCBjdXQtb2ZmLCBub3cgY2hvb3NpbmcgbWF4IHItc3F1YXJlZCBiYXNlZCBwb3dlciIpKQ0KCXdnY25hX3Bvd2VyIDwtIHdnY25hX3NvZnRfdGhyJGZpdEluZGljZXMkUG93ZXJbd2hpY2god2djbmFfc29mdF90aHIkZml0SW5kaWNlcyRTRlQuUi5zcSA9PSBtYXgod2djbmFfc29mdF90aHIkZml0SW5kaWNlcyRTRlQuUi5zcSkpXQ0KIH0NCg0KYGBgDQoNCmBgYHtyfQ0Kd2djbmFfbmV0IDwtIGJsb2Nrd2lzZU1vZHVsZXMoZXhwcnNfY29ycmVjdGVkX25vcm0sIHBvd2VyID0gd2djbmFfcG93ZXIsDQogICAgICAgICAgICAgICAgICAgICAgIHZlcmJvc2UgPSAzLCBudW1lcmljTGFiZWxzID0gVCkNCnRhYmxlKHdnY25hX25ldCRjb2xvcnMpDQpgYGANCg0KV0dDTkEgaWRlbnRpZmllcyA2IGNvLWV4cHJlc3Npb24gbW9kdWxlcy4gSXQgYWxzbyBjb250YWlucyA3OTkgZ2VuZXMgdW5hc3NpZ25lZCB0byBhbnkgbW9kdWxlcy4=
@@ -345,17 +323,6 @@

Build co-expression networks with WGCNA

bootstrapStylePandocTables(); }); -$(document).ready(function () { - $('.knitsql-table').addClass('kable-table'); - var container = $('.kable-table'); - container.each(function() { - - // move the caption out of the table - var table = $(this).children('table'); - var caption = table.children('caption').detach(); - caption.insertBefore($(this)).css('display', 'inherit'); - }); -});