Merge pull request #1 from abelew/master

initial commit
elsayed-lab · Sep 2, 2020 · 5cc5716 · 5cc5716
2 parents 8b79249 + 87c1173
commit 5cc5716
Show file tree

Hide file tree

Showing 52 changed files with 28,107 additions and 0 deletions.
diff --git a/20200326_MvGM-R-Markdown.html b/20200326_MvGM-R-Markdown.html
diff --git a/20200402_MvGM_atb.html b/20200402_MvGM_atb.html
diff --git a/20200403_MvGM_atb.html b/20200403_MvGM_atb.html
diff --git a/20200408_MvGM_atb.html b/20200408_MvGM_atb.html
diff --git a/20200413_MvGM_atb.html b/20200413_MvGM_atb.html
diff --git a/20200415_scrnaseq_analyses.html b/20200415_scrnaseq_analyses.html
diff --git a/20200417_MvGM_atb.html b/20200417_MvGM_atb.html
diff --git a/20200417_scrnaseq_analyses.html b/20200417_scrnaseq_analyses.html
diff --git a/MvGM R Markdown.Rmd b/MvGM R Markdown.Rmd
diff --git a/MvGM_atb.Rmd b/MvGM_atb.Rmd
diff --git a/kallisto abundance files/HPGL0912/abundance.tsv.xz b/kallisto abundance files/HPGL0912/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0913/abundance.tsv.xz b/kallisto abundance files/HPGL0913/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0914/abundance.tsv.xz b/kallisto abundance files/HPGL0914/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0915/abundance.tsv.xz b/kallisto abundance files/HPGL0915/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0916/abundance.tsv.xz b/kallisto abundance files/HPGL0916/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0917/abundance.tsv.xz b/kallisto abundance files/HPGL0917/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0918/abundance.tsv.xz b/kallisto abundance files/HPGL0918/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0919/abundance.tsv.xz b/kallisto abundance files/HPGL0919/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0920/abundance.tsv.xz b/kallisto abundance files/HPGL0920/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0921/abundance.tsv.xz b/kallisto abundance files/HPGL0921/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0922/abundance.tsv.xz b/kallisto abundance files/HPGL0922/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0923/abundance.tsv.xz b/kallisto abundance files/HPGL0923/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0924/abundance.tsv.xz b/kallisto abundance files/HPGL0924/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0925/abundance.tsv.xz b/kallisto abundance files/HPGL0925/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0926/abundance.tsv.xz b/kallisto abundance files/HPGL0926/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0927/abundance.tsv.xz b/kallisto abundance files/HPGL0927/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0928/abundance.tsv.xz b/kallisto abundance files/HPGL0928/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0929/abundance.tsv.xz b/kallisto abundance files/HPGL0929/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0930/abundance.tsv.xz b/kallisto abundance files/HPGL0930/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0931/abundance.tsv.xz b/kallisto abundance files/HPGL0931/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0942/abundance.tsv.xz b/kallisto abundance files/HPGL0942/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0943/abundance.tsv.xz b/kallisto abundance files/HPGL0943/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0944/abundance.tsv.xz b/kallisto abundance files/HPGL0944/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0945/abundance.tsv.xz b/kallisto abundance files/HPGL0945/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0946/abundance.tsv.xz b/kallisto abundance files/HPGL0946/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0947/abundance.tsv.xz b/kallisto abundance files/HPGL0947/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0948/abundance.tsv.xz b/kallisto abundance files/HPGL0948/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0949/abundance.tsv.xz b/kallisto abundance files/HPGL0949/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0950/abundance.tsv.xz b/kallisto abundance files/HPGL0950/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0951/abundance.tsv.xz b/kallisto abundance files/HPGL0951/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0952/abundance.tsv.xz b/kallisto abundance files/HPGL0952/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0953/abundance.tsv.xz b/kallisto abundance files/HPGL0953/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0954/abundance.tsv.xz b/kallisto abundance files/HPGL0954/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0955/abundance.tsv.xz b/kallisto abundance files/HPGL0955/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0956/abundance.tsv.xz b/kallisto abundance files/HPGL0956/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0957/abundance.tsv.xz b/kallisto abundance files/HPGL0957/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0958/abundance.tsv.xz b/kallisto abundance files/HPGL0958/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0959/abundance.tsv.xz b/kallisto abundance files/HPGL0959/abundance.tsv.xz
diff --git a/kallisto abundance files/HPGL0960/abundance.tsv.xz b/kallisto abundance files/HPGL0960/abundance.tsv.xz
diff --git a/sample_sheets/MetaData only 4 hour.txt b/sample_sheets/MetaData only 4 hour.txt
@@ -0,0 +1,40 @@
+HPGL Identifier	Stimulation	Patient	Batch	Growth
+HPGL0912	NS	H1	A	M
+HPGL0913	NS	H2	B	M
+HPGL0914	NS	H3	C	M
+HPGL0915	NS	H4	D	M
+HPGL0916	NS	H5	E	M
+HPGL0917	LPS	H1	A	M
+HPGL0918	LPS	H2	B	M
+HPGL0919	LPS	H3	C	M
+HPGL0920	LPS	H4	D	M
+HPGL0921	LPS	H5	E	M
+HPGL0922	LA	H1	A	M
+HPGL0923	LA	H2	B	M
+HPGL0924	LA	H3	C	M
+HPGL0925	LA	H4	D	M
+HPGL0926	LA	H5	E	M
+HPGL0927	LP	H1	A	M
+HPGL0928	LP	H2	B	M
+HPGL0929	LP	H3	F	M
+HPGL0930	LP	H4	D	M
+HPGL0931	LP	H5	E	M
+HPGL0942	NS	H1	A	GM
+HPGL0943	NS	H3	F	GM
+HPGL0944	NS	H4	D	GM
+HPGL0945	NS	H5	E	GM
+HPGL0946	LPS	H1	A	GM
+HPGL0947	LPS	H2	C	GM
+HPGL0948	LPS	H3	F	GM
+HPGL0949	LPS	H4	D	GM
+HPGL0950	LPS	H5	E	GM
+HPGL0951	LA	H1	B	GM
+HPGL0952	LA	H2	C	GM
+HPGL0953	LA	H3	F	GM
+HPGL0954	LA	H4	D	GM
+HPGL0955	LA	H5	E	GM
+HPGL0956	LP	H1	B	GM
+HPGL0957	LP	H2	C	GM
+HPGL0958	LP	H3	F	GM
+HPGL0959	LP	H4	E	GM
+HPGL0960	LP	H5	G	GM
diff --git a/scrnaseq_analyses.Rmd b/scrnaseq_analyses.Rmd
@@ -0,0 +1,340 @@
+---
+title: "Experimenting with some scRNASeq data."
+author: "atb [email protected]"
+date: "`r Sys.Date()`"
+output:
+  html_document:
+    code_download: true
+    code_folding: show
+    fig_caption: true
+    fig_height: 7
+    fig_width: 7
+    highlight: tango
+    keep_md: false
+    mode: selfcontained
+    number_sections: true
+    self_contained: true
+    theme: readable
+    toc: true
+    toc_float:
+      collapsed: false
+      smooth_scroll: false
+  rmdformats::readthedown:
+    code_download: true
+    code_folding: show
+    df_print: paged
+    fig_caption: true
+    fig_height: 7
+    fig_width: 7
+    highlight: tango
+    width: 300
+    keep_md: false
+    mode: selfcontained
+    toc_float: true
+  BiocStyle::html_document:
+    code_download: true
+    code_folding: show
+    fig_caption: true
+    fig_height: 7
+    fig_width: 7
+    highlight: tango
+    keep_md: false
+    mode: selfcontained
+    toc_float: true
+---
+
+<style type="text/css">
+body, td {
+  font-size: 16px;
+}
+code.r{
+  font-size: 16px;
+}
+pre {
+ font-size: 16px
+}
+</style>
+
+```{r options, include=FALSE}
+library("hpgltools")
+tt <- devtools::load_all("/data/hpgltools")
+knitr::opts_knit$set(width=120,
+                     progress=TRUE,
+                     verbose=TRUE,
+                     echo=TRUE)
+knitr::opts_chunk$set(error=TRUE,
+                      dpi=96)
+old_options <- options(digits=4,
+                       stringsAsFactors=FALSE,
+                       knitr.duplicate.label="allow")
+ggplot2::theme_set(ggplot2::theme_bw(base_size=10))
+rundate <- format(Sys.Date(), format="%Y%m%d")
+previous_file <- "index.Rmd"
+ver <- "202004"
+
+##tmp <- sm(loadme(filename=paste0(gsub(pattern="\\.Rmd", replace="", x=previous_file), "-v", ver, ".rda.xz")))
+rmd_file <- "scrnaseq_analyses.Rmd"
+```
+
+# Testing seurat
+
+```{r seurat_loading}
+library(Seurat)
+library(ggplot2)
+## library(SeuratData)
+## The Seurat vignettes show how to load hdf5 files and the metadata required.
+## They are using a pacreas dataset, I presume it is the 'panc8' data in SeuratData.
+## Thus I will poke at that to see if I can learn about how they handle the metadata.
+
+la_data <- Seurat::Read10X("khamidza_158974/KH_LA/outs/filtered_feature_bc_matrix")
+lp_data <- Seurat::Read10X("khamidza_158974/KH_LP/outs/filtered_feature_bc_matrix")
+lps_data <- Seurat::Read10X("khamidza_158974/KH_LPS/outs/filtered_feature_bc_matrix")
+ns_data <- Seurat::Read10X("khamidza_158974/KH_NS/outs/filtered_feature_bc_matrix")
+
+la <- Seurat::CreateSeuratObject(la_data, project="la")
+lp <- Seurat::CreateSeuratObject(lp_data, project="lp")
+lps <- Seurat::CreateSeuratObject(lps_data, project="lps")
+ns <- Seurat::CreateSeuratObject(ns_data, project="ns")
+
+all <- merge(la, lp)
+all <- merge(all, lps)
+all <- merge(all, ns)
+
+## I think this is redundant, but interesting for me to understand sc metadata
+cluster_letters <- as.factor(LETTERS[Idents(object=all)])
+names(cluster_letters) <- colnames(x=all)
+
+stimulated <- as.character(cluster_letters)
+ns_idx <- stimulated == "D"
+stimulated[ns_idx] <- "unstimulated"
+stimulated[!ns_idx] <- "stimulated"
+stimulated <- as.factor(stimulated)
+
+stimulation <- as.character(cluster_letters)
+ns_idx <- stimulation == "D"
+stimulation[ns_idx] <- "Unstimulated"
+la_idx <- stimulation == "A"
+stimulation[la_idx] <- "L+Ado"
+lp_idx <- stimulation == "B"
+stimulation[lp_idx] <- "L+PGE2"
+lps_idx <- stimulation == "C"
+stimulation[lps_idx] <- "LPS"
+stimulation <- as.factor(stimulation)
+
+all <- AddMetaData(
+  object=all,
+  metadata=cluster_letters,
+  col.name="cluster_letters")
+all <- AddMetaData(
+    object=all,
+    metadata=stimulated,
+    col.name="stimulatedp")
+all <- AddMetaData(
+    object=all,
+    metadata=stimulation,
+    col.name="stimulation")
+
+all[["percent_mt"]] <- PercentageFeatureSet(all, pattern="^MT-")
+VlnPlot(all, features="nFeature_RNA", pt.size=0.1)
+VlnPlot(all, features="percent_mt", pt.size=0.1)
+VlnPlot(all, features="nCount_RNA", pt.size=0.1)
+
+all <- NormalizeData(object=all)
+all <- FindVariableFeatures(object=all)
+all <- ScaleData(object=all)
+all <- RunPCA(object=all)
+all <- FindNeighbors(object=all)
+all <- FindClusters(object=all)
+all <- RunTSNE(object=all)
+all <- RunUMAP(all, reduction="pca", dims=1:20)
+DimPlot(object=all, reduction="tsne")
+plotted <- DimPlot(all, reduction="umap", group.by="cluster_letters", label=TRUE)
+plotted
+
+## Try to set some metrics to drop crappy stuff.
+FeatureScatter(all, feature1="nCount_RNA", feature2="percent_mt")
+## Suggests that we want ~  < 15% mt
+## Suggests we want > 1000 nCounts
+FeatureScatter(all, feature1="nCount_RNA", feature2="nFeature_RNA")
+## Suggests we want > 200 nFeature
+FeatureScatter(all, feature1="percent_mt", feature2="nFeature_RNA")
+all_sub <- subset(all,
+                  subset=nFeature_RNA > 200 & percent_mt < 15)
+all_sub <- NormalizeData(object=all_sub)
+all_sub <- FindVariableFeatures(object=all_sub)
+most_var <- head(VariableFeatures(all_sub), 30)
+variable_plot <- VariableFeaturePlot(all_sub)
+variable_plot <- LabelPoints(plot=variable_plot, points=most_var, repel=TRUE)
+variable_plot
+all_sub <- ScaleData(object=all_sub)
+all_sub <- RunPCA(object=all_sub)
+VizDimLoadings(all_sub, dims=1:2, reduction="pca")
+all_sub <- JackStraw(all_sub, num.replicate=100)
+all_sub <- ScoreJackStraw(all_sub, dims=1:20)
+JackStrawPlot(all_sub, dims=1:15)
+ElbowPlot(all_sub)
+all_sub <- FindNeighbors(object=all_sub)
+all_sub <- FindClusters(object=all_sub)
+all_sub <- RunTSNE(object=all_sub)
+all_sub <- RunUMAP(all_sub, reduction="pca", dims=1:20)
+DimPlot(object=all_sub, reduction="tsne")
+plotted <- DimPlot(all_sub, reduction="umap", group.by="stimulation", label=TRUE)
+plotted
+plotted <- DimPlot(all_sub, reduction="pca", label=TRUE)
+plotted
+```
+
+```{r markers}
+DefaultAssay(all) <- "RNA"
+markers <- FindConservedMarkers(all_sub, ident.1=4, grouping.var="stimulation", verbose=TRUE)
+
+## Note that I renamed them according to Dave's suggestion.
+lps_vs_unstim <- FindMarkers(all_sub,
+                             ident.1="LPS",
+                             ident.2="Unstimulated",
+                             group.by="stimulation",
+                             min.pct=0.25)
+head(lps_vs_unstim, n=20)
+ade_vs_unstim <- FindMarkers(all_sub,
+                             ident.1="L+Ado",
+                             ident.2="Unstimulated",
+                             group.by="stimulation",
+                             min.pct=0.25)
+head(ade_vs_unstim, n=20)
+pge_vs_unstim <- FindMarkers(all_sub,
+                             ident.1="L+PGE2",
+                             ident.2="Unstimulated",
+                             group.by="stimulation",
+                             min.pct=0.25)
+head(pge_vs_unstim, n=20)
+
+ado_lps <- FindMarkers(all_sub,
+                       ident.1="L+Ado",
+                       ident.2="LPS",
+                       group.by="stimulation", min.pct=0.25)
+head(ado_lps, n=20)
+
+pge2_lps <- FindMarkers(all_sub,
+                        ident.1="L+PGE2",
+                        ident.2="LPS",
+                        group.by="stimulation", min.pct=0.25)
+head(pge2_lps, n=20)
+```
+
+```{r features}
+FeaturePlot(all_sub, features=c("THBS1"),
+            split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
+FeaturePlot(all_sub, features=c("PLEK"),
+            split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
+FeaturePlot(all_sub, features=c("FNIP2"),
+            split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
+FeaturePlot(all_sub, features=c("G0S2"),
+            split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
+```
+
+## Figure 2A
+
+This looks to me like a bar plot of the top 10 up/down genes.  But I have no
+clue where these numbers are coming from, a range of -5 < x < 5 logFC just does
+not seem to me to exist in this data.  The range of logFCs on my sheet goes from
+-1.5 < x < 1.7.  In addition, I do not see where in the Seurat data structures
+the error bars are coming from.
+
+```{r fig2a}
+ado_lps <- FindMarkers(all_sub, test.use="DESeq2",
+                       ident.1="L+Ado",
+                       ident.2="LPS",
+                       group.by="stimulation", min.pct=0.25)
+
+```
+
+## Figure 2B
+
+Reading from the text, this appears to me to be from the total cell RNASeq?  No,
+that cannot be true, the bulk data did not compare these things.  It must be
+this data.
+
+I did generate a couple of Venns using the scRNA data and got similar numbers in
+the opposite orientation.
+
+```{r venn}
+ado <- rownames(ado_lps)
+ado_up <- ado[ado_lps[["avg_logFC"]] > 0]
+ado_down <- ado[ado_lps[["avg_logFC"]] < 0]
+pge2 <- rownames(pge2_lps)
+pge_up <- pge2[pge2_lps[["avg_logFC"]] > 0]
+pge_down <- pge2[pge2_lps[["avg_logFC"]] < 0]
+
+library(Vennerable)
+ups <- list(ado_up, pge_up)
+up_data <- Venn(ups, SetNames=c("LPS+Ado", "LPS+PGE2"), numberOfSets=2)
+plot(up_data, doWeights=FALSE)
+
+downs <- list(ado_down, pge_down)
+v_data <- Venn(downs, SetNames=c("LPS+Ado", "LPS+PGE2"), numberOfSets=2)
+plot(v_data, doWeights=FALSE)
+```
+
+## Figure 2C
+
+```{r fig2c}
+plotted <- DimPlot(all_sub, reduction="umap", group.by="stimulation", label=TRUE)
+plotted
+```
+
+## Figure 5A
+
+```{r fig5a}
+thbs1 <- VlnPlot(all, features="THBS1", group.by="stimulation", pt.size=0.1)
+thbs1
+box <- ggplot(data=thbs1$data, aes(x=ident, y=THBS1)) +
+  geom_boxplot(notch=TRUE)
+box
+input <- thbs1$data
+thbs1_ggstats <- ggstatsplot::ggbetweenstats(
+                                  data=input, x=ident, y=THBS1,
+                                  notch=TRUE, mean.ci=TRUE, k=3,
+                                  pairwise.comparisons=TRUE)
+thbs1_ggstats
+##  pairwise.comparisons=TRUE, # display significant pairwise comparisons
+##  p.adjust.method="bonferroni", # method for adjusting p-values for multiple comparisons
+##  ## adding new components to `ggstatsplot` default
+##  ##ggplot.component = list(ggplot2::scale_y_continuous(sec.axis = ggplot2::dup_axis())),
+##  k=3, title.prefix="THBS1", palette="default_jama",
+##  package="ggsci", messages=TRUE, plotgrid.args=list(nrow=2))
+
+vegfa <- VlnPlot(all, features="VEGFA", group.by="stimulation", pt.size=0.1)
+vegfa
+vegfa_ggstats <- ggstatsplot::ggbetweenstats(
+                                  data=vegfa$data, x=ident, y=VEGFA,
+                                  notch=TRUE, mean.ci=TRUE, k=3,
+                                  pairwise.comparisons=TRUE)
+vegfa_ggstats
+
+cd300e <- VlnPlot(all, features="CD300E", group.by="stimulation", pt.size=0.1)
+cd300e_ggstats <- ggstatsplot::ggbetweenstats(
+                                   data=cd300e$data, x=ident, y=CD300E,
+                                   notch=TRUE, mean.ci=TRUE, k=3,
+                                   pairwise.comparisons=TRUE)
+cd300e_ggstats
+
+plaur <- VlnPlot(all, features="PLAUR", group.by="stimulation", pt.size=0.1)
+plaur_ggstats <- ggstatsplot::ggbetweenstats(
+                                   data=plaur$data, x=ident, y=PLAUR,
+                                   notch=TRUE, mean.ci=TRUE, k=3,
+                                   pairwise.comparisons=TRUE)
+plaur_ggstats
+```
+
+```{r saveme}
+pander::pander(sessionInfo())
+message(paste0("This is hpgltools commit: ", get_git_commit()))
+this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
+message(paste0("Saving to ", this_save))
+tmp <- sm(saveme(filename=this_save))
+```
+
+```{r loadme, eval=FALSE}
+this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
+loaded <- loadme(filename=this_save)
+```