-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from abelew/master
initial commit
- Loading branch information
Showing
52 changed files
with
28,107 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
HPGL Identifier Stimulation Patient Batch Growth | ||
HPGL0912 NS H1 A M | ||
HPGL0913 NS H2 B M | ||
HPGL0914 NS H3 C M | ||
HPGL0915 NS H4 D M | ||
HPGL0916 NS H5 E M | ||
HPGL0917 LPS H1 A M | ||
HPGL0918 LPS H2 B M | ||
HPGL0919 LPS H3 C M | ||
HPGL0920 LPS H4 D M | ||
HPGL0921 LPS H5 E M | ||
HPGL0922 LA H1 A M | ||
HPGL0923 LA H2 B M | ||
HPGL0924 LA H3 C M | ||
HPGL0925 LA H4 D M | ||
HPGL0926 LA H5 E M | ||
HPGL0927 LP H1 A M | ||
HPGL0928 LP H2 B M | ||
HPGL0929 LP H3 F M | ||
HPGL0930 LP H4 D M | ||
HPGL0931 LP H5 E M | ||
HPGL0942 NS H1 A GM | ||
HPGL0943 NS H3 F GM | ||
HPGL0944 NS H4 D GM | ||
HPGL0945 NS H5 E GM | ||
HPGL0946 LPS H1 A GM | ||
HPGL0947 LPS H2 C GM | ||
HPGL0948 LPS H3 F GM | ||
HPGL0949 LPS H4 D GM | ||
HPGL0950 LPS H5 E GM | ||
HPGL0951 LA H1 B GM | ||
HPGL0952 LA H2 C GM | ||
HPGL0953 LA H3 F GM | ||
HPGL0954 LA H4 D GM | ||
HPGL0955 LA H5 E GM | ||
HPGL0956 LP H1 B GM | ||
HPGL0957 LP H2 C GM | ||
HPGL0958 LP H3 F GM | ||
HPGL0959 LP H4 E GM | ||
HPGL0960 LP H5 G GM |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,340 @@ | ||
--- | ||
title: "Experimenting with some scRNASeq data." | ||
author: "atb [email protected]" | ||
date: "`r Sys.Date()`" | ||
output: | ||
html_document: | ||
code_download: true | ||
code_folding: show | ||
fig_caption: true | ||
fig_height: 7 | ||
fig_width: 7 | ||
highlight: tango | ||
keep_md: false | ||
mode: selfcontained | ||
number_sections: true | ||
self_contained: true | ||
theme: readable | ||
toc: true | ||
toc_float: | ||
collapsed: false | ||
smooth_scroll: false | ||
rmdformats::readthedown: | ||
code_download: true | ||
code_folding: show | ||
df_print: paged | ||
fig_caption: true | ||
fig_height: 7 | ||
fig_width: 7 | ||
highlight: tango | ||
width: 300 | ||
keep_md: false | ||
mode: selfcontained | ||
toc_float: true | ||
BiocStyle::html_document: | ||
code_download: true | ||
code_folding: show | ||
fig_caption: true | ||
fig_height: 7 | ||
fig_width: 7 | ||
highlight: tango | ||
keep_md: false | ||
mode: selfcontained | ||
toc_float: true | ||
--- | ||
|
||
<style type="text/css"> | ||
body, td { | ||
font-size: 16px; | ||
} | ||
code.r{ | ||
font-size: 16px; | ||
} | ||
pre { | ||
font-size: 16px | ||
} | ||
</style> | ||
|
||
```{r options, include=FALSE} | ||
library("hpgltools") | ||
tt <- devtools::load_all("/data/hpgltools") | ||
knitr::opts_knit$set(width=120, | ||
progress=TRUE, | ||
verbose=TRUE, | ||
echo=TRUE) | ||
knitr::opts_chunk$set(error=TRUE, | ||
dpi=96) | ||
old_options <- options(digits=4, | ||
stringsAsFactors=FALSE, | ||
knitr.duplicate.label="allow") | ||
ggplot2::theme_set(ggplot2::theme_bw(base_size=10)) | ||
rundate <- format(Sys.Date(), format="%Y%m%d") | ||
previous_file <- "index.Rmd" | ||
ver <- "202004" | ||
##tmp <- sm(loadme(filename=paste0(gsub(pattern="\\.Rmd", replace="", x=previous_file), "-v", ver, ".rda.xz"))) | ||
rmd_file <- "scrnaseq_analyses.Rmd" | ||
``` | ||
|
||
# Testing seurat | ||
|
||
```{r seurat_loading} | ||
library(Seurat) | ||
library(ggplot2) | ||
## library(SeuratData) | ||
## The Seurat vignettes show how to load hdf5 files and the metadata required. | ||
## They are using a pacreas dataset, I presume it is the 'panc8' data in SeuratData. | ||
## Thus I will poke at that to see if I can learn about how they handle the metadata. | ||
la_data <- Seurat::Read10X("khamidza_158974/KH_LA/outs/filtered_feature_bc_matrix") | ||
lp_data <- Seurat::Read10X("khamidza_158974/KH_LP/outs/filtered_feature_bc_matrix") | ||
lps_data <- Seurat::Read10X("khamidza_158974/KH_LPS/outs/filtered_feature_bc_matrix") | ||
ns_data <- Seurat::Read10X("khamidza_158974/KH_NS/outs/filtered_feature_bc_matrix") | ||
la <- Seurat::CreateSeuratObject(la_data, project="la") | ||
lp <- Seurat::CreateSeuratObject(lp_data, project="lp") | ||
lps <- Seurat::CreateSeuratObject(lps_data, project="lps") | ||
ns <- Seurat::CreateSeuratObject(ns_data, project="ns") | ||
all <- merge(la, lp) | ||
all <- merge(all, lps) | ||
all <- merge(all, ns) | ||
## I think this is redundant, but interesting for me to understand sc metadata | ||
cluster_letters <- as.factor(LETTERS[Idents(object=all)]) | ||
names(cluster_letters) <- colnames(x=all) | ||
stimulated <- as.character(cluster_letters) | ||
ns_idx <- stimulated == "D" | ||
stimulated[ns_idx] <- "unstimulated" | ||
stimulated[!ns_idx] <- "stimulated" | ||
stimulated <- as.factor(stimulated) | ||
stimulation <- as.character(cluster_letters) | ||
ns_idx <- stimulation == "D" | ||
stimulation[ns_idx] <- "Unstimulated" | ||
la_idx <- stimulation == "A" | ||
stimulation[la_idx] <- "L+Ado" | ||
lp_idx <- stimulation == "B" | ||
stimulation[lp_idx] <- "L+PGE2" | ||
lps_idx <- stimulation == "C" | ||
stimulation[lps_idx] <- "LPS" | ||
stimulation <- as.factor(stimulation) | ||
all <- AddMetaData( | ||
object=all, | ||
metadata=cluster_letters, | ||
col.name="cluster_letters") | ||
all <- AddMetaData( | ||
object=all, | ||
metadata=stimulated, | ||
col.name="stimulatedp") | ||
all <- AddMetaData( | ||
object=all, | ||
metadata=stimulation, | ||
col.name="stimulation") | ||
all[["percent_mt"]] <- PercentageFeatureSet(all, pattern="^MT-") | ||
VlnPlot(all, features="nFeature_RNA", pt.size=0.1) | ||
VlnPlot(all, features="percent_mt", pt.size=0.1) | ||
VlnPlot(all, features="nCount_RNA", pt.size=0.1) | ||
all <- NormalizeData(object=all) | ||
all <- FindVariableFeatures(object=all) | ||
all <- ScaleData(object=all) | ||
all <- RunPCA(object=all) | ||
all <- FindNeighbors(object=all) | ||
all <- FindClusters(object=all) | ||
all <- RunTSNE(object=all) | ||
all <- RunUMAP(all, reduction="pca", dims=1:20) | ||
DimPlot(object=all, reduction="tsne") | ||
plotted <- DimPlot(all, reduction="umap", group.by="cluster_letters", label=TRUE) | ||
plotted | ||
## Try to set some metrics to drop crappy stuff. | ||
FeatureScatter(all, feature1="nCount_RNA", feature2="percent_mt") | ||
## Suggests that we want ~ < 15% mt | ||
## Suggests we want > 1000 nCounts | ||
FeatureScatter(all, feature1="nCount_RNA", feature2="nFeature_RNA") | ||
## Suggests we want > 200 nFeature | ||
FeatureScatter(all, feature1="percent_mt", feature2="nFeature_RNA") | ||
all_sub <- subset(all, | ||
subset=nFeature_RNA > 200 & percent_mt < 15) | ||
all_sub <- NormalizeData(object=all_sub) | ||
all_sub <- FindVariableFeatures(object=all_sub) | ||
most_var <- head(VariableFeatures(all_sub), 30) | ||
variable_plot <- VariableFeaturePlot(all_sub) | ||
variable_plot <- LabelPoints(plot=variable_plot, points=most_var, repel=TRUE) | ||
variable_plot | ||
all_sub <- ScaleData(object=all_sub) | ||
all_sub <- RunPCA(object=all_sub) | ||
VizDimLoadings(all_sub, dims=1:2, reduction="pca") | ||
all_sub <- JackStraw(all_sub, num.replicate=100) | ||
all_sub <- ScoreJackStraw(all_sub, dims=1:20) | ||
JackStrawPlot(all_sub, dims=1:15) | ||
ElbowPlot(all_sub) | ||
all_sub <- FindNeighbors(object=all_sub) | ||
all_sub <- FindClusters(object=all_sub) | ||
all_sub <- RunTSNE(object=all_sub) | ||
all_sub <- RunUMAP(all_sub, reduction="pca", dims=1:20) | ||
DimPlot(object=all_sub, reduction="tsne") | ||
plotted <- DimPlot(all_sub, reduction="umap", group.by="stimulation", label=TRUE) | ||
plotted | ||
plotted <- DimPlot(all_sub, reduction="pca", label=TRUE) | ||
plotted | ||
``` | ||
|
||
```{r markers} | ||
DefaultAssay(all) <- "RNA" | ||
markers <- FindConservedMarkers(all_sub, ident.1=4, grouping.var="stimulation", verbose=TRUE) | ||
## Note that I renamed them according to Dave's suggestion. | ||
lps_vs_unstim <- FindMarkers(all_sub, | ||
ident.1="LPS", | ||
ident.2="Unstimulated", | ||
group.by="stimulation", | ||
min.pct=0.25) | ||
head(lps_vs_unstim, n=20) | ||
ade_vs_unstim <- FindMarkers(all_sub, | ||
ident.1="L+Ado", | ||
ident.2="Unstimulated", | ||
group.by="stimulation", | ||
min.pct=0.25) | ||
head(ade_vs_unstim, n=20) | ||
pge_vs_unstim <- FindMarkers(all_sub, | ||
ident.1="L+PGE2", | ||
ident.2="Unstimulated", | ||
group.by="stimulation", | ||
min.pct=0.25) | ||
head(pge_vs_unstim, n=20) | ||
ado_lps <- FindMarkers(all_sub, | ||
ident.1="L+Ado", | ||
ident.2="LPS", | ||
group.by="stimulation", min.pct=0.25) | ||
head(ado_lps, n=20) | ||
pge2_lps <- FindMarkers(all_sub, | ||
ident.1="L+PGE2", | ||
ident.2="LPS", | ||
group.by="stimulation", min.pct=0.25) | ||
head(pge2_lps, n=20) | ||
``` | ||
|
||
```{r features} | ||
FeaturePlot(all_sub, features=c("THBS1"), | ||
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred")) | ||
FeaturePlot(all_sub, features=c("PLEK"), | ||
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred")) | ||
FeaturePlot(all_sub, features=c("FNIP2"), | ||
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred")) | ||
FeaturePlot(all_sub, features=c("G0S2"), | ||
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred")) | ||
``` | ||
|
||
## Figure 2A | ||
|
||
This looks to me like a bar plot of the top 10 up/down genes. But I have no | ||
clue where these numbers are coming from, a range of -5 < x < 5 logFC just does | ||
not seem to me to exist in this data. The range of logFCs on my sheet goes from | ||
-1.5 < x < 1.7. In addition, I do not see where in the Seurat data structures | ||
the error bars are coming from. | ||
|
||
```{r fig2a} | ||
ado_lps <- FindMarkers(all_sub, test.use="DESeq2", | ||
ident.1="L+Ado", | ||
ident.2="LPS", | ||
group.by="stimulation", min.pct=0.25) | ||
``` | ||
|
||
## Figure 2B | ||
|
||
Reading from the text, this appears to me to be from the total cell RNASeq? No, | ||
that cannot be true, the bulk data did not compare these things. It must be | ||
this data. | ||
|
||
I did generate a couple of Venns using the scRNA data and got similar numbers in | ||
the opposite orientation. | ||
|
||
```{r venn} | ||
ado <- rownames(ado_lps) | ||
ado_up <- ado[ado_lps[["avg_logFC"]] > 0] | ||
ado_down <- ado[ado_lps[["avg_logFC"]] < 0] | ||
pge2 <- rownames(pge2_lps) | ||
pge_up <- pge2[pge2_lps[["avg_logFC"]] > 0] | ||
pge_down <- pge2[pge2_lps[["avg_logFC"]] < 0] | ||
library(Vennerable) | ||
ups <- list(ado_up, pge_up) | ||
up_data <- Venn(ups, SetNames=c("LPS+Ado", "LPS+PGE2"), numberOfSets=2) | ||
plot(up_data, doWeights=FALSE) | ||
downs <- list(ado_down, pge_down) | ||
v_data <- Venn(downs, SetNames=c("LPS+Ado", "LPS+PGE2"), numberOfSets=2) | ||
plot(v_data, doWeights=FALSE) | ||
``` | ||
|
||
## Figure 2C | ||
|
||
```{r fig2c} | ||
plotted <- DimPlot(all_sub, reduction="umap", group.by="stimulation", label=TRUE) | ||
plotted | ||
``` | ||
|
||
## Figure 5A | ||
|
||
```{r fig5a} | ||
thbs1 <- VlnPlot(all, features="THBS1", group.by="stimulation", pt.size=0.1) | ||
thbs1 | ||
box <- ggplot(data=thbs1$data, aes(x=ident, y=THBS1)) + | ||
geom_boxplot(notch=TRUE) | ||
box | ||
input <- thbs1$data | ||
thbs1_ggstats <- ggstatsplot::ggbetweenstats( | ||
data=input, x=ident, y=THBS1, | ||
notch=TRUE, mean.ci=TRUE, k=3, | ||
pairwise.comparisons=TRUE) | ||
thbs1_ggstats | ||
## pairwise.comparisons=TRUE, # display significant pairwise comparisons | ||
## p.adjust.method="bonferroni", # method for adjusting p-values for multiple comparisons | ||
## ## adding new components to `ggstatsplot` default | ||
## ##ggplot.component = list(ggplot2::scale_y_continuous(sec.axis = ggplot2::dup_axis())), | ||
## k=3, title.prefix="THBS1", palette="default_jama", | ||
## package="ggsci", messages=TRUE, plotgrid.args=list(nrow=2)) | ||
vegfa <- VlnPlot(all, features="VEGFA", group.by="stimulation", pt.size=0.1) | ||
vegfa | ||
vegfa_ggstats <- ggstatsplot::ggbetweenstats( | ||
data=vegfa$data, x=ident, y=VEGFA, | ||
notch=TRUE, mean.ci=TRUE, k=3, | ||
pairwise.comparisons=TRUE) | ||
vegfa_ggstats | ||
cd300e <- VlnPlot(all, features="CD300E", group.by="stimulation", pt.size=0.1) | ||
cd300e_ggstats <- ggstatsplot::ggbetweenstats( | ||
data=cd300e$data, x=ident, y=CD300E, | ||
notch=TRUE, mean.ci=TRUE, k=3, | ||
pairwise.comparisons=TRUE) | ||
cd300e_ggstats | ||
plaur <- VlnPlot(all, features="PLAUR", group.by="stimulation", pt.size=0.1) | ||
plaur_ggstats <- ggstatsplot::ggbetweenstats( | ||
data=plaur$data, x=ident, y=PLAUR, | ||
notch=TRUE, mean.ci=TRUE, k=3, | ||
pairwise.comparisons=TRUE) | ||
plaur_ggstats | ||
``` | ||
|
||
```{r saveme} | ||
pander::pander(sessionInfo()) | ||
message(paste0("This is hpgltools commit: ", get_git_commit())) | ||
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz") | ||
message(paste0("Saving to ", this_save)) | ||
tmp <- sm(saveme(filename=this_save)) | ||
``` | ||
|
||
```{r loadme, eval=FALSE} | ||
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz") | ||
loaded <- loadme(filename=this_save) | ||
``` |
Oops, something went wrong.