diff --git a/publication_rmd/simulation_scale_free/confounded_data.Rds b/publication_rmd/simulation_scale_free/confounded_data.Rds new file mode 100755 index 0000000..8aefc57 Binary files /dev/null and b/publication_rmd/simulation_scale_free/confounded_data.Rds differ diff --git a/publication_rmd/simulation_scale_free/scale_free_sim.Rmd b/publication_rmd/simulation_scale_free/scale_free_sim.Rmd new file mode 100755 index 0000000..cf07d00 --- /dev/null +++ b/publication_rmd/simulation_scale_free/scale_free_sim.Rmd @@ -0,0 +1,90 @@ +--- +title: "R Notebook" +output: + html_document: + df_print: paged + html_notebook: default + pdf_document: default +--- + +This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. + +Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. + +```{r, message=FALSE} +library(huge, quietly = T) +library(sva, quietly = T) +``` + +```{r} +lambda=seq(0,1,length.out=200) + +set.seed(101) +## generate simulated scale free network +dat <- huge.generator(n = 10000, d = 100, graph = "scale-free", v = NULL, u = NULL, + g = NULL, prob = NULL, vis = F, verbose = TRUE) + + +sim.dat <- dat$data +n <- nrow(sim.dat) +p <- ncol(sim.dat) + +## infer networks using simulated data +sim.net <- huge(sim.dat, lambda = lambda, method = "glasso", verbose = F) + +## Count edges from inferred networks, and common edges +true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the true network:", true_ecount)) +sim_ecount <- sum(sim.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the inferred network", sim_ecount)) +sim_true_ecount <- sum(dat$theta + sim.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta)) +print(paste("The number common edges in the inferred and true network", sim_true_ecount)) + +``` + +```{r} +## confounded data + +sim.confounded=sim.dat +set.seed(101) +grp=rnorm(n) +for(i in 10:30){ + sim.confounded[,i] = sim.confounded[,i] + 5*grp +} + + +saveRDS(sim.confounded, file = "~/research/networks_correction/simulation_scale_free/confounded_data.Rds") + +## infer networks +sim.confounded.net <- huge(sim.confounded, lambda = lambda, method = "glasso", verbose = F) + +## Count edges from inferred networks, and common edges +true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the true network:", true_ecount)) +confounded_ecount <- sum(sim.confounded.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the inferred network (confounded data): ", confounded_ecount)) +sim_confounded_ecount <- sum(dat$theta + sim.confounded.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta)) +print(paste("The number common edges in the inferred (confounded) and true network:", sim_confounded_ecount)) +``` + +```{r} +## PC correction +mod=matrix(1,nrow=dim(sim.confounded)[1],ncol=1) +colnames(mod)="Intercept" +nsv=num.sv(t(sim.confounded),mod, method = "be") +print(paste("the number of PCs estimated to be removed:", nsv)) +ss=svd(scale(sim.confounded)) +grp.est=ss$u[,1:nsv] +sim.corrected=lm(sim.confounded~grp.est)$residuals + +#infer network +sim.corrected.net <- huge(sim.corrected, lambda = lambda, method = "glasso", verbose = F) + +## Count edges from inferred networks, and common edges +true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the true network:", true_ecount)) +corrected_ecount <- sum(sim.corrected.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta)) +print(paste("The number of edges in the inferred network (PC corrected data): ", corrected_ecount)) +sim_corrected_ecount <- sum(dat$theta + sim.corrected.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta)) +print(paste("The number common edges in the inferred (PC corrected) and true network:", sim_corrected_ecount)) +``` diff --git a/publication_rmd/simulation_scale_free/scale_free_sim.html b/publication_rmd/simulation_scale_free/scale_free_sim.html new file mode 100755 index 0000000..be698b2 --- /dev/null +++ b/publication_rmd/simulation_scale_free/scale_free_sim.html @@ -0,0 +1,302 @@ + + + + +
+ + + + + + + + +This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
+Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
+library(huge, quietly = T)
+## Warning: package 'huge' was built under R version 3.5.2
+## Warning: package 'igraph' was built under R version 3.5.2
+library(sva, quietly = T)
+## Warning: package 'mgcv' was built under R version 3.5.2
+lambda=seq(0,1,length.out=200)
+
+set.seed(101)
+## generate simulated scale free network
+dat <- huge.generator(n = 10000, d = 100, graph = "scale-free", v = NULL, u = NULL,
+ g = NULL, prob = NULL, vis = F, verbose = TRUE)
+## Generating data from the multivariate normal distribution with the scale-free graph structure....done.
+sim.dat <- dat$data
+n <- nrow(sim.dat)
+p <- ncol(sim.dat)
+
+## infer networks using simulated data
+sim.net <- huge(sim.dat, lambda = lambda, method = "glasso", verbose = F)
+
+## Count edges from inferred networks, and common edges
+true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the true network:", true_ecount))
+## [1] "The number of edges in the true network: 99"
+sim_ecount <- sum(sim.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the inferred network", sim_ecount))
+## [1] "The number of edges in the inferred network 99"
+sim_true_ecount <- sum(dat$theta + sim.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta))
+print(paste("The number common edges in the inferred and true network", sim_true_ecount))
+## [1] "The number common edges in the inferred and true network 99"
+## confounded data
+
+sim.confounded=sim.dat
+set.seed(101)
+grp=rnorm(n)
+for(i in 10:30){
+ sim.confounded[,i] = sim.confounded[,i] + 5*grp
+}
+
+
+saveRDS(sim.confounded, file = "~/research/networks_correction/simulation_scale_free/confounded_data.Rds")
+
+## infer networks
+sim.confounded.net <- huge(sim.confounded, lambda = lambda, method = "glasso", verbose = F)
+
+## Count edges from inferred networks, and common edges
+true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the true network:", true_ecount))
+## [1] "The number of edges in the true network: 99"
+confounded_ecount <- sum(sim.confounded.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the inferred network (confounded data): ", confounded_ecount))
+## [1] "The number of edges in the inferred network (confounded data): 272"
+sim_confounded_ecount <- sum(dat$theta + sim.confounded.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta))
+print(paste("The number common edges in the inferred (confounded) and true network:", sim_confounded_ecount))
+## [1] "The number common edges in the inferred (confounded) and true network: 70"
+## PC correction
+mod=matrix(1,nrow=dim(sim.confounded)[1],ncol=1)
+colnames(mod)="Intercept"
+nsv=num.sv(t(sim.confounded),mod, method = "be")
+print(paste("the number of PCs estimated to be removed:", nsv))
+## [1] "the number of PCs estimated to be removed: 1"
+ss=svd(scale(sim.confounded))
+grp.est=ss$u[,1:nsv]
+sim.corrected=lm(sim.confounded~grp.est)$residuals
+
+#infer network
+sim.corrected.net <- huge(sim.corrected, lambda = lambda, method = "glasso", verbose = F)
+
+## Count edges from inferred networks, and common edges
+true_ecount <- sum(dat$theta == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the true network:", true_ecount))
+## [1] "The number of edges in the true network: 99"
+corrected_ecount <- sum(sim.corrected.net$path[[39]] == 1 & col(dat$theta) < row(dat$theta))
+print(paste("The number of edges in the inferred network (PC corrected data): ", corrected_ecount))
+## [1] "The number of edges in the inferred network (PC corrected data): 99"
+sim_corrected_ecount <- sum(dat$theta + sim.corrected.net$path[[39]] == 2 & col(dat$theta) < row(dat$theta))
+print(paste("The number common edges in the inferred (PC corrected) and true network:", sim_corrected_ecount))
+## [1] "The number common edges in the inferred (PC corrected) and true network: 99"
+
+
+
+
+This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
+Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
+ + + +library(huge, quietly = T)
+
+
+
+package <U+393C><U+3E31>huge<U+393C><U+3E32> was built under R version 3.5.2
+Attaching package: <U+393C><U+3E31>Matrix<U+393C><U+3E32>
+
+The following object is masked from <U+393C><U+3E31>package:S4Vectors<U+393C><U+3E32>:
+
+ expand
+
+package <U+393C><U+3E31>igraph<U+393C><U+3E32> was built under R version 3.5.2
+Attaching package: <U+393C><U+3E31>igraph<U+393C><U+3E32>
+
+The following object is masked from <U+393C><U+3E31>package:S4Vectors<U+393C><U+3E32>:
+
+ union
+
+The following objects are masked from <U+393C><U+3E31>package:BiocGenerics<U+393C><U+3E32>:
+
+ normalize, path, union
+
+The following objects are masked from <U+393C><U+3E31>package:stats<U+393C><U+3E32>:
+
+ decompose, spectrum
+
+The following object is masked from <U+393C><U+3E31>package:base<U+393C><U+3E32>:
+
+ union
+
+
+library(huge, quietly = T)
+library(sva, quietly = T)
+
+
+package <U+393C><U+3E31>mgcv<U+393C><U+3E32> was built under R version 3.5.2This is mgcv 1.8-26. For overview type 'help("mgcv-package")'.
+
+Attaching package: <U+393C><U+3E31>genefilter<U+393C><U+3E32>
+
+The following object is masked from <U+393C><U+3E31>package:MASS<U+393C><U+3E32>:
+
+ area
+
+
+
+
+
+
+print(paste("The number of edges in the inferred network", sim_true_ecount))
+
+
+
+[1] "The number of edges in the inferred network 99"
+
+
+
+
+
+
+print(paste("The number common edges in the inferred (confounded) and true network:", sim_confounded_ecount))
+
+
+
+[1] "The number common edges in the inferred (confounded) and true network: 70"
+
+
+
+
+
+
+print(paste("The number common edges in the inferred (PC corrected) and true network:", sim_corrected_ecount))
+
+
+
+[1] "The number common edges in the inferred (PC corrected) and true network: 99"
+
+
+
+