Skip to content

Commit

Permalink
Merge pull request #1 from abelew/master
Browse files Browse the repository at this point in the history
initial commit
  • Loading branch information
abelew authored Sep 2, 2020
2 parents 8b79249 + 87c1173 commit 5cc5716
Show file tree
Hide file tree
Showing 52 changed files with 28,107 additions and 0 deletions.
1,544 changes: 1,544 additions & 0 deletions 20200326_MvGM-R-Markdown.html

Large diffs are not rendered by default.

3,303 changes: 3,303 additions & 0 deletions 20200402_MvGM_atb.html

Large diffs are not rendered by default.

3,258 changes: 3,258 additions & 0 deletions 20200403_MvGM_atb.html

Large diffs are not rendered by default.

3,546 changes: 3,546 additions & 0 deletions 20200408_MvGM_atb.html

Large diffs are not rendered by default.

3,737 changes: 3,737 additions & 0 deletions 20200413_MvGM_atb.html

Large diffs are not rendered by default.

2,264 changes: 2,264 additions & 0 deletions 20200415_scrnaseq_analyses.html

Large diffs are not rendered by default.

3,892 changes: 3,892 additions & 0 deletions 20200417_MvGM_atb.html

Large diffs are not rendered by default.

2,338 changes: 2,338 additions & 0 deletions 20200417_scrnaseq_analyses.html

Large diffs are not rendered by default.

1,007 changes: 1,007 additions & 0 deletions MvGM R Markdown.Rmd

Large diffs are not rendered by default.

2,017 changes: 2,017 additions & 0 deletions MvGM_atb.Rmd

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
40 changes: 40 additions & 0 deletions sample_sheets/MetaData only 4 hour.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
HPGL Identifier Stimulation Patient Batch Growth
HPGL0912 NS H1 A M
HPGL0913 NS H2 B M
HPGL0914 NS H3 C M
HPGL0915 NS H4 D M
HPGL0916 NS H5 E M
HPGL0917 LPS H1 A M
HPGL0918 LPS H2 B M
HPGL0919 LPS H3 C M
HPGL0920 LPS H4 D M
HPGL0921 LPS H5 E M
HPGL0922 LA H1 A M
HPGL0923 LA H2 B M
HPGL0924 LA H3 C M
HPGL0925 LA H4 D M
HPGL0926 LA H5 E M
HPGL0927 LP H1 A M
HPGL0928 LP H2 B M
HPGL0929 LP H3 F M
HPGL0930 LP H4 D M
HPGL0931 LP H5 E M
HPGL0942 NS H1 A GM
HPGL0943 NS H3 F GM
HPGL0944 NS H4 D GM
HPGL0945 NS H5 E GM
HPGL0946 LPS H1 A GM
HPGL0947 LPS H2 C GM
HPGL0948 LPS H3 F GM
HPGL0949 LPS H4 D GM
HPGL0950 LPS H5 E GM
HPGL0951 LA H1 B GM
HPGL0952 LA H2 C GM
HPGL0953 LA H3 F GM
HPGL0954 LA H4 D GM
HPGL0955 LA H5 E GM
HPGL0956 LP H1 B GM
HPGL0957 LP H2 C GM
HPGL0958 LP H3 F GM
HPGL0959 LP H4 E GM
HPGL0960 LP H5 G GM
340 changes: 340 additions & 0 deletions scrnaseq_analyses.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,340 @@
---
title: "Experimenting with some scRNASeq data."
author: "atb [email protected]"
date: "`r Sys.Date()`"
output:
html_document:
code_download: true
code_folding: show
fig_caption: true
fig_height: 7
fig_width: 7
highlight: tango
keep_md: false
mode: selfcontained
number_sections: true
self_contained: true
theme: readable
toc: true
toc_float:
collapsed: false
smooth_scroll: false
rmdformats::readthedown:
code_download: true
code_folding: show
df_print: paged
fig_caption: true
fig_height: 7
fig_width: 7
highlight: tango
width: 300
keep_md: false
mode: selfcontained
toc_float: true
BiocStyle::html_document:
code_download: true
code_folding: show
fig_caption: true
fig_height: 7
fig_width: 7
highlight: tango
keep_md: false
mode: selfcontained
toc_float: true
---

<style type="text/css">
body, td {
font-size: 16px;
}
code.r{
font-size: 16px;
}
pre {
font-size: 16px
}
</style>

```{r options, include=FALSE}
library("hpgltools")
tt <- devtools::load_all("/data/hpgltools")
knitr::opts_knit$set(width=120,
progress=TRUE,
verbose=TRUE,
echo=TRUE)
knitr::opts_chunk$set(error=TRUE,
dpi=96)
old_options <- options(digits=4,
stringsAsFactors=FALSE,
knitr.duplicate.label="allow")
ggplot2::theme_set(ggplot2::theme_bw(base_size=10))
rundate <- format(Sys.Date(), format="%Y%m%d")
previous_file <- "index.Rmd"
ver <- "202004"
##tmp <- sm(loadme(filename=paste0(gsub(pattern="\\.Rmd", replace="", x=previous_file), "-v", ver, ".rda.xz")))
rmd_file <- "scrnaseq_analyses.Rmd"
```

# Testing seurat

```{r seurat_loading}
library(Seurat)
library(ggplot2)
## library(SeuratData)
## The Seurat vignettes show how to load hdf5 files and the metadata required.
## They are using a pacreas dataset, I presume it is the 'panc8' data in SeuratData.
## Thus I will poke at that to see if I can learn about how they handle the metadata.
la_data <- Seurat::Read10X("khamidza_158974/KH_LA/outs/filtered_feature_bc_matrix")
lp_data <- Seurat::Read10X("khamidza_158974/KH_LP/outs/filtered_feature_bc_matrix")
lps_data <- Seurat::Read10X("khamidza_158974/KH_LPS/outs/filtered_feature_bc_matrix")
ns_data <- Seurat::Read10X("khamidza_158974/KH_NS/outs/filtered_feature_bc_matrix")
la <- Seurat::CreateSeuratObject(la_data, project="la")
lp <- Seurat::CreateSeuratObject(lp_data, project="lp")
lps <- Seurat::CreateSeuratObject(lps_data, project="lps")
ns <- Seurat::CreateSeuratObject(ns_data, project="ns")
all <- merge(la, lp)
all <- merge(all, lps)
all <- merge(all, ns)
## I think this is redundant, but interesting for me to understand sc metadata
cluster_letters <- as.factor(LETTERS[Idents(object=all)])
names(cluster_letters) <- colnames(x=all)
stimulated <- as.character(cluster_letters)
ns_idx <- stimulated == "D"
stimulated[ns_idx] <- "unstimulated"
stimulated[!ns_idx] <- "stimulated"
stimulated <- as.factor(stimulated)
stimulation <- as.character(cluster_letters)
ns_idx <- stimulation == "D"
stimulation[ns_idx] <- "Unstimulated"
la_idx <- stimulation == "A"
stimulation[la_idx] <- "L+Ado"
lp_idx <- stimulation == "B"
stimulation[lp_idx] <- "L+PGE2"
lps_idx <- stimulation == "C"
stimulation[lps_idx] <- "LPS"
stimulation <- as.factor(stimulation)
all <- AddMetaData(
object=all,
metadata=cluster_letters,
col.name="cluster_letters")
all <- AddMetaData(
object=all,
metadata=stimulated,
col.name="stimulatedp")
all <- AddMetaData(
object=all,
metadata=stimulation,
col.name="stimulation")
all[["percent_mt"]] <- PercentageFeatureSet(all, pattern="^MT-")
VlnPlot(all, features="nFeature_RNA", pt.size=0.1)
VlnPlot(all, features="percent_mt", pt.size=0.1)
VlnPlot(all, features="nCount_RNA", pt.size=0.1)
all <- NormalizeData(object=all)
all <- FindVariableFeatures(object=all)
all <- ScaleData(object=all)
all <- RunPCA(object=all)
all <- FindNeighbors(object=all)
all <- FindClusters(object=all)
all <- RunTSNE(object=all)
all <- RunUMAP(all, reduction="pca", dims=1:20)
DimPlot(object=all, reduction="tsne")
plotted <- DimPlot(all, reduction="umap", group.by="cluster_letters", label=TRUE)
plotted
## Try to set some metrics to drop crappy stuff.
FeatureScatter(all, feature1="nCount_RNA", feature2="percent_mt")
## Suggests that we want ~ < 15% mt
## Suggests we want > 1000 nCounts
FeatureScatter(all, feature1="nCount_RNA", feature2="nFeature_RNA")
## Suggests we want > 200 nFeature
FeatureScatter(all, feature1="percent_mt", feature2="nFeature_RNA")
all_sub <- subset(all,
subset=nFeature_RNA > 200 & percent_mt < 15)
all_sub <- NormalizeData(object=all_sub)
all_sub <- FindVariableFeatures(object=all_sub)
most_var <- head(VariableFeatures(all_sub), 30)
variable_plot <- VariableFeaturePlot(all_sub)
variable_plot <- LabelPoints(plot=variable_plot, points=most_var, repel=TRUE)
variable_plot
all_sub <- ScaleData(object=all_sub)
all_sub <- RunPCA(object=all_sub)
VizDimLoadings(all_sub, dims=1:2, reduction="pca")
all_sub <- JackStraw(all_sub, num.replicate=100)
all_sub <- ScoreJackStraw(all_sub, dims=1:20)
JackStrawPlot(all_sub, dims=1:15)
ElbowPlot(all_sub)
all_sub <- FindNeighbors(object=all_sub)
all_sub <- FindClusters(object=all_sub)
all_sub <- RunTSNE(object=all_sub)
all_sub <- RunUMAP(all_sub, reduction="pca", dims=1:20)
DimPlot(object=all_sub, reduction="tsne")
plotted <- DimPlot(all_sub, reduction="umap", group.by="stimulation", label=TRUE)
plotted
plotted <- DimPlot(all_sub, reduction="pca", label=TRUE)
plotted
```

```{r markers}
DefaultAssay(all) <- "RNA"
markers <- FindConservedMarkers(all_sub, ident.1=4, grouping.var="stimulation", verbose=TRUE)
## Note that I renamed them according to Dave's suggestion.
lps_vs_unstim <- FindMarkers(all_sub,
ident.1="LPS",
ident.2="Unstimulated",
group.by="stimulation",
min.pct=0.25)
head(lps_vs_unstim, n=20)
ade_vs_unstim <- FindMarkers(all_sub,
ident.1="L+Ado",
ident.2="Unstimulated",
group.by="stimulation",
min.pct=0.25)
head(ade_vs_unstim, n=20)
pge_vs_unstim <- FindMarkers(all_sub,
ident.1="L+PGE2",
ident.2="Unstimulated",
group.by="stimulation",
min.pct=0.25)
head(pge_vs_unstim, n=20)
ado_lps <- FindMarkers(all_sub,
ident.1="L+Ado",
ident.2="LPS",
group.by="stimulation", min.pct=0.25)
head(ado_lps, n=20)
pge2_lps <- FindMarkers(all_sub,
ident.1="L+PGE2",
ident.2="LPS",
group.by="stimulation", min.pct=0.25)
head(pge2_lps, n=20)
```

```{r features}
FeaturePlot(all_sub, features=c("THBS1"),
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
FeaturePlot(all_sub, features=c("PLEK"),
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
FeaturePlot(all_sub, features=c("FNIP2"),
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
FeaturePlot(all_sub, features=c("G0S2"),
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
```

## Figure 2A

This looks to me like a bar plot of the top 10 up/down genes. But I have no
clue where these numbers are coming from, a range of -5 < x < 5 logFC just does
not seem to me to exist in this data. The range of logFCs on my sheet goes from
-1.5 < x < 1.7. In addition, I do not see where in the Seurat data structures
the error bars are coming from.

```{r fig2a}
ado_lps <- FindMarkers(all_sub, test.use="DESeq2",
ident.1="L+Ado",
ident.2="LPS",
group.by="stimulation", min.pct=0.25)
```

## Figure 2B

Reading from the text, this appears to me to be from the total cell RNASeq? No,
that cannot be true, the bulk data did not compare these things. It must be
this data.

I did generate a couple of Venns using the scRNA data and got similar numbers in
the opposite orientation.

```{r venn}
ado <- rownames(ado_lps)
ado_up <- ado[ado_lps[["avg_logFC"]] > 0]
ado_down <- ado[ado_lps[["avg_logFC"]] < 0]
pge2 <- rownames(pge2_lps)
pge_up <- pge2[pge2_lps[["avg_logFC"]] > 0]
pge_down <- pge2[pge2_lps[["avg_logFC"]] < 0]
library(Vennerable)
ups <- list(ado_up, pge_up)
up_data <- Venn(ups, SetNames=c("LPS+Ado", "LPS+PGE2"), numberOfSets=2)
plot(up_data, doWeights=FALSE)
downs <- list(ado_down, pge_down)
v_data <- Venn(downs, SetNames=c("LPS+Ado", "LPS+PGE2"), numberOfSets=2)
plot(v_data, doWeights=FALSE)
```

## Figure 2C

```{r fig2c}
plotted <- DimPlot(all_sub, reduction="umap", group.by="stimulation", label=TRUE)
plotted
```

## Figure 5A

```{r fig5a}
thbs1 <- VlnPlot(all, features="THBS1", group.by="stimulation", pt.size=0.1)
thbs1
box <- ggplot(data=thbs1$data, aes(x=ident, y=THBS1)) +
geom_boxplot(notch=TRUE)
box
input <- thbs1$data
thbs1_ggstats <- ggstatsplot::ggbetweenstats(
data=input, x=ident, y=THBS1,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=TRUE)
thbs1_ggstats
## pairwise.comparisons=TRUE, # display significant pairwise comparisons
## p.adjust.method="bonferroni", # method for adjusting p-values for multiple comparisons
## ## adding new components to `ggstatsplot` default
## ##ggplot.component = list(ggplot2::scale_y_continuous(sec.axis = ggplot2::dup_axis())),
## k=3, title.prefix="THBS1", palette="default_jama",
## package="ggsci", messages=TRUE, plotgrid.args=list(nrow=2))
vegfa <- VlnPlot(all, features="VEGFA", group.by="stimulation", pt.size=0.1)
vegfa
vegfa_ggstats <- ggstatsplot::ggbetweenstats(
data=vegfa$data, x=ident, y=VEGFA,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=TRUE)
vegfa_ggstats
cd300e <- VlnPlot(all, features="CD300E", group.by="stimulation", pt.size=0.1)
cd300e_ggstats <- ggstatsplot::ggbetweenstats(
data=cd300e$data, x=ident, y=CD300E,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=TRUE)
cd300e_ggstats
plaur <- VlnPlot(all, features="PLAUR", group.by="stimulation", pt.size=0.1)
plaur_ggstats <- ggstatsplot::ggbetweenstats(
data=plaur$data, x=ident, y=PLAUR,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=TRUE)
plaur_ggstats
```

```{r saveme}
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
```

```{r loadme, eval=FALSE}
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
loaded <- loadme(filename=this_save)
```
Loading

0 comments on commit 5cc5716

Please sign in to comment.