diff --git a/publication_rmd/simulation_scale_free/confounded_data.Rds b/publication_rmd/simulation_scale_free/confounded_data.Rds new file mode 100755 index 0000000..8aefc57 Binary files /dev/null and b/publication_rmd/simulation_scale_free/confounded_data.Rds differ diff --git a/publication_rmd/simulation_scale_free/scale_free_sim.Rmd b/publication_rmd/simulation_scale_free/scale_free_sim.Rmd new file mode 100755 index 0000000..cf07d00 --- /dev/null +++ b/publication_rmd/simulation_scale_free/scale_free_sim.Rmd @@ -0,0 +1,90 @@ +--- +title: "R Notebook" +output: + html_document: + df_print: paged + html_notebook: default + pdf_document: default +--- + +This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. + +Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. + +```{r, message=FALSE} +library(huge, quietly = T) +library(sva, quietly = T) +``` + +```{r} +lambda=seq(0,1,length.out=200) + +set.seed(101) +## generate simulated scale free network +dat <- huge.generator(n = 10000, d = 100, graph = "scale-free", v = NULL, u = NULL, + g = NULL, prob = NULL, vis = F, verbose = TRUE) + + +sim.dat <- dat$data +n <- nrow(sim.dat) +p <- ncol(sim.dat) + +## infer networks using simulated data +sim.net <- huge(sim.dat, lambda = lambda, method = "glasso", verbose = F) + +## Count edges from inferred networks, and common edges +true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the true network:", true_ecount)) +sim_ecount <- sum(sim.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the inferred network", sim_ecount)) +sim_true_ecount <- sum(dat$theta + sim.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta)) +print(paste("The number common edges in the inferred and true network", sim_true_ecount)) + +``` + +```{r} +## confounded data + +sim.confounded=sim.dat +set.seed(101) +grp=rnorm(n) +for(i in 10:30){ + sim.confounded[,i] = sim.confounded[,i] + 5*grp +} + + +saveRDS(sim.confounded, file = "~/research/networks_correction/simulation_scale_free/confounded_data.Rds") + +## infer networks +sim.confounded.net <- huge(sim.confounded, lambda = lambda, method = "glasso", verbose = F) + +## Count edges from inferred networks, and common edges +true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the true network:", true_ecount)) +confounded_ecount <- sum(sim.confounded.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the inferred network (confounded data): ", confounded_ecount)) +sim_confounded_ecount <- sum(dat$theta + sim.confounded.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta)) +print(paste("The number common edges in the inferred (confounded) and true network:", sim_confounded_ecount)) +``` + +```{r} +## PC correction +mod=matrix(1,nrow=dim(sim.confounded)[1],ncol=1) +colnames(mod)="Intercept" +nsv=num.sv(t(sim.confounded),mod, method = "be") +print(paste("the number of PCs estimated to be removed:", nsv)) +ss=svd(scale(sim.confounded)) +grp.est=ss$u[,1:nsv] +sim.corrected=lm(sim.confounded~grp.est)$residuals + +#infer network +sim.corrected.net <- huge(sim.corrected, lambda = lambda, method = "glasso", verbose = F) + +## Count edges from inferred networks, and common edges +true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the true network:", true_ecount)) +corrected_ecount <- sum(sim.corrected.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the inferred network (PC corrected data): ", corrected_ecount)) +sim_corrected_ecount <- sum(dat$theta + sim.corrected.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta)) +print(paste("The number common edges in the inferred (PC corrected) and true network:", sim_corrected_ecount)) +``` diff --git a/publication_rmd/simulation_scale_free/scale_free_sim.html b/publication_rmd/simulation_scale_free/scale_free_sim.html new file mode 100755 index 0000000..be698b2 --- /dev/null +++ b/publication_rmd/simulation_scale_free/scale_free_sim.html @@ -0,0 +1,302 @@ + + + + + + + + + + + + + +R Notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + +

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

+

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

+
library(huge, quietly = T)
+
## Warning: package 'huge' was built under R version 3.5.2
+
## Warning: package 'igraph' was built under R version 3.5.2
+
library(sva, quietly = T)
+
## Warning: package 'mgcv' was built under R version 3.5.2
+
lambda=seq(0,1,length.out=200)
+
+set.seed(101)
+## generate simulated scale free network
+dat <- huge.generator(n = 10000, d = 100, graph = "scale-free", v = NULL, u = NULL,
+               g = NULL, prob = NULL, vis = F, verbose = TRUE)
+
## Generating data from the multivariate normal distribution with the scale-free graph structure....done.
+
sim.dat <- dat$data
+n <- nrow(sim.dat)
+p <- ncol(sim.dat)
+
+## infer networks using simulated data
+sim.net <- huge(sim.dat, lambda = lambda, method = "glasso", verbose = F)
+
+## Count edges from inferred networks, and common edges
+true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the true network:", true_ecount))
+
## [1] "The number of edges in the true network: 99"
+
sim_ecount <- sum(sim.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the inferred network", sim_ecount))
+
## [1] "The number of edges in the inferred network 99"
+
sim_true_ecount <- sum(dat$theta + sim.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta))
+print(paste("The number common edges in the inferred and true network", sim_true_ecount))
+
## [1] "The number common edges in the inferred and true network 99"
+
## confounded data
+
+sim.confounded=sim.dat
+set.seed(101)
+grp=rnorm(n)
+for(i in 10:30){
+  sim.confounded[,i] = sim.confounded[,i] + 5*grp
+}
+
+
+saveRDS(sim.confounded, file = "~/research/networks_correction/simulation_scale_free/confounded_data.Rds")
+
+## infer networks
+sim.confounded.net <- huge(sim.confounded, lambda = lambda, method = "glasso", verbose = F)
+
+## Count edges from inferred networks, and common edges
+true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the true network:", true_ecount))
+
## [1] "The number of edges in the true network: 99"
+
confounded_ecount <- sum(sim.confounded.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the inferred network (confounded data): ", confounded_ecount))
+
## [1] "The number of edges in the inferred network (confounded data):  272"
+
sim_confounded_ecount <- sum(dat$theta + sim.confounded.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta))
+print(paste("The number common edges in the inferred (confounded) and true network:", sim_confounded_ecount))
+
## [1] "The number common edges in the inferred (confounded) and true network: 70"
+
## PC correction
+mod=matrix(1,nrow=dim(sim.confounded)[1],ncol=1)
+colnames(mod)="Intercept"
+nsv=num.sv(t(sim.confounded),mod, method = "be")
+print(paste("the number of PCs estimated to be removed:", nsv))
+
## [1] "the number of PCs estimated to be removed: 1"
+
ss=svd(scale(sim.confounded))
+grp.est=ss$u[,1:nsv]
+sim.corrected=lm(sim.confounded~grp.est)$residuals
+
+#infer network
+sim.corrected.net <- huge(sim.corrected, lambda = lambda, method = "glasso", verbose = F)
+
+## Count edges from inferred networks, and common edges
+true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the true network:", true_ecount))
+
## [1] "The number of edges in the true network: 99"
+
corrected_ecount <- sum(sim.corrected.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the inferred network (PC corrected data): ", corrected_ecount))
+
## [1] "The number of edges in the inferred network (PC corrected data):  99"
+
sim_corrected_ecount <- sum(dat$theta + sim.corrected.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta))
+print(paste("The number common edges in the inferred (PC corrected) and true network:", sim_corrected_ecount))
+
## [1] "The number common edges in the inferred (PC corrected) and true network: 99"
+ + + + +
+ + + + + + + + diff --git a/publication_rmd/simulation_scale_free/scale_free_sim.nb.html b/publication_rmd/simulation_scale_free/scale_free_sim.nb.html new file mode 100755 index 0000000..709b01a --- /dev/null +++ b/publication_rmd/simulation_scale_free/scale_free_sim.nb.html @@ -0,0 +1,366 @@ + + + + + + + + + + + + + +R Notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

+

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

+ + + +
library(huge, quietly = T)
+
+ + +
package <U+393C><U+3E31>huge<U+393C><U+3E32> was built under R version 3.5.2
+Attaching package: <U+393C><U+3E31>Matrix<U+393C><U+3E32>
+
+The following object is masked from <U+393C><U+3E31>package:S4Vectors<U+393C><U+3E32>:
+
+    expand
+
+package <U+393C><U+3E31>igraph<U+393C><U+3E32> was built under R version 3.5.2
+Attaching package: <U+393C><U+3E31>igraph<U+393C><U+3E32>
+
+The following object is masked from <U+393C><U+3E31>package:S4Vectors<U+393C><U+3E32>:
+
+    union
+
+The following objects are masked from <U+393C><U+3E31>package:BiocGenerics<U+393C><U+3E32>:
+
+    normalize, path, union
+
+The following objects are masked from <U+393C><U+3E31>package:stats<U+393C><U+3E32>:
+
+    decompose, spectrum
+
+The following object is masked from <U+393C><U+3E31>package:base<U+393C><U+3E32>:
+
+    union
+ + +
library(huge, quietly = T)
+library(sva, quietly = T)
+ + +
package <U+393C><U+3E31>mgcv<U+393C><U+3E32> was built under R version 3.5.2This is mgcv 1.8-26. For overview type 'help("mgcv-package")'.
+
+Attaching package: <U+393C><U+3E31>genefilter<U+393C><U+3E32>
+
+The following object is masked from <U+393C><U+3E31>package:MASS<U+393C><U+3E32>:
+
+    area
+ + + + + + +
print(paste("The number of edges in the inferred network", sim_true_ecount))
+
+ + +
[1] "The number of edges in the inferred network 99"
+ + + + + + +
print(paste("The number common edges in the inferred (confounded) and true network:", sim_confounded_ecount))
+
+ + +
[1] "The number common edges in the inferred (confounded) and true network: 70"
+ + + + + + +
print(paste("The number common edges in the inferred (PC corrected) and true network:", sim_corrected_ecount))
+
+ + +
[1] "The number common edges in the inferred (PC corrected) and true network: 99"
+ + + +
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0DQotLS0NCg0KVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gDQoNClRyeSBleGVjdXRpbmcgdGhpcyBjaHVuayBieSBjbGlja2luZyB0aGUgKlJ1biogYnV0dG9uIHdpdGhpbiB0aGUgY2h1bmsgb3IgYnkgcGxhY2luZyB5b3VyIGN1cnNvciBpbnNpZGUgaXQgYW5kIHByZXNzaW5nICpDdHJsK1NoaWZ0K0VudGVyKi4gDQoNCmBgYHtyLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeShodWdlLCBxdWlldGx5ID0gVCkNCmxpYnJhcnkoc3ZhLCBxdWlldGx5ID0gVCkNCmBgYA0KDQpgYGB7cn0NCmxhbWJkYT1zZXEoMCwxLGxlbmd0aC5vdXQ9MjAwKQ0KDQpzZXQuc2VlZCgxMDEpDQojIyBnZW5lcmF0ZSBzaW11bGF0ZWQgc2NhbGUgZnJlZSBuZXR3b3JrDQpkYXQgPC0gaHVnZS5nZW5lcmF0b3IobiA9IDEwMDAwLCBkID0gMTAwLCBncmFwaCA9ICJzY2FsZS1mcmVlIiwgdiA9IE5VTEwsIHUgPSBOVUxMLA0KICAgICAgICAgICAgICAgZyA9IE5VTEwsIHByb2IgPSBOVUxMLCB2aXMgPSBGLCB2ZXJib3NlID0gVFJVRSkNCg0KDQpzaW0uZGF0IDwtIGRhdCRkYXRhDQpuIDwtIG5yb3coc2ltLmRhdCkNCnAgPC0gbmNvbChzaW0uZGF0KQ0KDQojIyBpbmZlciBuZXR3b3JrcyB1c2luZyBzaW11bGF0ZWQgZGF0YQ0Kc2ltLm5ldCA8LSBodWdlKHNpbS5kYXQsIGxhbWJkYSA9IGxhbWJkYSwgbWV0aG9kID0gImdsYXNzbyIsIHZlcmJvc2UgPSBGKQ0KDQojIyBDb3VudCBlZGdlcyBmcm9tIGluZmVycmVkIG5ldHdvcmtzLCBhbmQgY29tbW9uIGVkZ2VzDQp0cnVlX2Vjb3VudCA8LSBzdW0oZGF0JHRoZXRhID09IDEgJiBjb2woZGF0JHRoZXRhKSA8IHJvdyhkYXQkdGhldGEpKQ0KcHJpbnQocGFzdGUoIlRoZSBudW1iZXIgb2YgZWRnZXMgaW4gdGhlIHRydWUgbmV0d29yazoiLCB0cnVlX2Vjb3VudCkpDQpzaW1fZWNvdW50IDwtIHN1bShzaW0ubmV0JHBhdGhbWzM5XV0gPT0gMSAmIGNvbChkYXQkdGhldGEpIDwgcm93KGRhdCR0aGV0YSkpDQpwcmludChwYXN0ZSgiVGhlIG51bWJlciBvZiBlZGdlcyBpbiB0aGUgaW5mZXJyZWQgbmV0d29yayIsIHNpbV9lY291bnQpKQ0Kc2ltX3RydWVfZWNvdW50IDwtIHN1bShkYXQkdGhldGEgKyBzaW0ubmV0JHBhdGhbWzM5XV0gPT0gMiAmIGNvbChkYXQkdGhldGEpIDwgcm93KGRhdCR0aGV0YSkpDQpwcmludChwYXN0ZSgiVGhlIG51bWJlciBjb21tb24gZWRnZXMgaW4gdGhlIGluZmVycmVkIGFuZCB0cnVlIG5ldHdvcmsiLCBzaW1fdHJ1ZV9lY291bnQpKQ0KDQpgYGANCg0KYGBge3J9DQojIyBjb25mb3VuZGVkIGRhdGENCg0Kc2ltLmNvbmZvdW5kZWQ9c2ltLmRhdA0Kc2V0LnNlZWQoMTAxKQ0KZ3JwPXJub3JtKG4pDQpmb3IoaSBpbiAxMDozMCl7DQogIHNpbS5jb25mb3VuZGVkWyxpXSA9IHNpbS5jb25mb3VuZGVkWyxpXSArIDUqZ3JwDQp9DQoNCg0Kc2F2ZVJEUyhzaW0uY29uZm91bmRlZCwgZmlsZSA9ICJ+L3Jlc2VhcmNoL25ldHdvcmtzX2NvcnJlY3Rpb24vc2ltdWxhdGlvbl9zY2FsZV9mcmVlL2NvbmZvdW5kZWRfZGF0YS5SZHMiKQ0KDQojIyBpbmZlciBuZXR3b3Jrcw0Kc2ltLmNvbmZvdW5kZWQubmV0IDwtIGh1Z2Uoc2ltLmNvbmZvdW5kZWQsIGxhbWJkYSA9IGxhbWJkYSwgbWV0aG9kID0gImdsYXNzbyIsIHZlcmJvc2UgPSBGKQ0KDQojIyBDb3VudCBlZGdlcyBmcm9tIGluZmVycmVkIG5ldHdvcmtzLCBhbmQgY29tbW9uIGVkZ2VzDQp0cnVlX2Vjb3VudCA8LSBzdW0oZGF0JHRoZXRhID09IDEgJiBjb2woZGF0JHRoZXRhKSA8IHJvdyhkYXQkdGhldGEpKQ0KcHJpbnQocGFzdGUoIlRoZSBudW1iZXIgb2YgZWRnZXMgaW4gdGhlIHRydWUgbmV0d29yazoiLCB0cnVlX2Vjb3VudCkpDQpjb25mb3VuZGVkX2Vjb3VudCA8LSBzdW0oc2ltLmNvbmZvdW5kZWQubmV0JHBhdGhbWzM5XV0gPT0gMSAmIGNvbChkYXQkdGhldGEpIDwgcm93KGRhdCR0aGV0YSkpDQpwcmludChwYXN0ZSgiVGhlIG51bWJlciBvZiBlZGdlcyBpbiB0aGUgaW5mZXJyZWQgbmV0d29yayAoY29uZm91bmRlZCBkYXRhKTogIiwgY29uZm91bmRlZF9lY291bnQpKQ0Kc2ltX2NvbmZvdW5kZWRfZWNvdW50IDwtIHN1bShkYXQkdGhldGEgKyBzaW0uY29uZm91bmRlZC5uZXQkcGF0aFtbMzldXSA9PSAyICYgY29sKGRhdCR0aGV0YSkgPCByb3coZGF0JHRoZXRhKSkNCnByaW50KHBhc3RlKCJUaGUgbnVtYmVyIGNvbW1vbiBlZGdlcyBpbiB0aGUgaW5mZXJyZWQgKGNvbmZvdW5kZWQpIGFuZCB0cnVlIG5ldHdvcms6Iiwgc2ltX2NvbmZvdW5kZWRfZWNvdW50KSkNCmBgYA0KDQpgYGB7cn0NCiMjIFBDIGNvcnJlY3Rpb24NCm1vZD1tYXRyaXgoMSxucm93PWRpbShzaW0uY29uZm91bmRlZClbMV0sbmNvbD0xKQ0KY29sbmFtZXMobW9kKT0iSW50ZXJjZXB0Ig0KbnN2PW51bS5zdih0KHNpbS5jb25mb3VuZGVkKSxtb2QsIG1ldGhvZCA9ICJiZSIpDQpwcmludChwYXN0ZSgidGhlIG51bWJlciBvZiBQQ3MgZXN0aW1hdGVkIHRvIGJlIHJlbW92ZWQ6IiwgbnN2KSkNCnNzPXN2ZChzY2FsZShzaW0uY29uZm91bmRlZCkpDQpncnAuZXN0PXNzJHVbLDE6bnN2XQ0Kc2ltLmNvcnJlY3RlZD1sbShzaW0uY29uZm91bmRlZH5ncnAuZXN0KSRyZXNpZHVhbHMNCg0KI2luZmVyIG5ldHdvcmsNCnNpbS5jb3JyZWN0ZWQubmV0IDwtIGh1Z2Uoc2ltLmNvcnJlY3RlZCwgbGFtYmRhID0gbGFtYmRhLCBtZXRob2QgPSAiZ2xhc3NvIiwgdmVyYm9zZSA9IEYpDQoNCiMjIENvdW50IGVkZ2VzIGZyb20gaW5mZXJyZWQgbmV0d29ya3MsIGFuZCBjb21tb24gZWRnZXMNCnRydWVfZWNvdW50IDwtIHN1bShkYXQkdGhldGEgPT0gMSAmIGNvbChkYXQkdGhldGEpIDwgcm93KGRhdCR0aGV0YSkpDQpwcmludChwYXN0ZSgiVGhlIG51bWJlciBvZiBlZGdlcyBpbiB0aGUgdHJ1ZSBuZXR3b3JrOiIsIHRydWVfZWNvdW50KSkNCmNvcnJlY3RlZF9lY291bnQgPC0gc3VtKHNpbS5jb3JyZWN0ZWQubmV0JHBhdGhbWzM5XV0gPT0gMSAmIGNvbChkYXQkdGhldGEpIDwgcm93KGRhdCR0aGV0YSkpDQpwcmludChwYXN0ZSgiVGhlIG51bWJlciBvZiBlZGdlcyBpbiB0aGUgaW5mZXJyZWQgbmV0d29yayAoUEMgY29ycmVjdGVkIGRhdGEpOiAiLCBjb3JyZWN0ZWRfZWNvdW50KSkNCnNpbV9jb3JyZWN0ZWRfZWNvdW50IDwtIHN1bShkYXQkdGhldGEgKyBzaW0uY29ycmVjdGVkLm5ldCRwYXRoW1szOV1dID09IDIgJiBjb2woZGF0JHRoZXRhKSA8IHJvdyhkYXQkdGhldGEpKQ0KcHJpbnQocGFzdGUoIlRoZSBudW1iZXIgY29tbW9uIGVkZ2VzIGluIHRoZSBpbmZlcnJlZCAoUEMgY29ycmVjdGVkKSBhbmQgdHJ1ZSBuZXR3b3JrOiIsIHNpbV9jb3JyZWN0ZWRfZWNvdW50KSkNCmBgYA0K
+ + + +
+ + + + + + + +