Skip to content

Commit

Permalink
updating scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
drneavin committed Aug 31, 2022
1 parent 11c445f commit b6b0ec7
Show file tree
Hide file tree
Showing 47 changed files with 6,874 additions and 103 deletions.
9 changes: 7 additions & 2 deletions Clustering/All_Integrated_QC.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,15 @@ save_figs <- function(plot, basename, width = 17, height = 17, units = "cm"){
seurat <- readRDS(paste0(datadir,"seurat_integrated_all_times_clustered.rds"))
seurat@meta.data$Location <- gsub("_.+", "", seurat@meta.data$Location_Time)
seurat@meta.data$phases <- factor(seurat@meta.data$phases, levels = c("G1", "S", "G2M"))

seurat@meta.data$Village <- gsub("Baseline", "Uni-Culture", seurat@meta.data$Time) %>% gsub("Village Day 4", "Village", .) %>% gsub("Thawed Village Day 0", "Uni-Culture", .) %>% gsub("Thawed Village Day 7", "Village", .)

##### Set up Colors #####
cell_line_colors <- c("FSA0006" = "#F79E29", "MBE1006" = "#9B2C99", "TOB0421"= "#35369C")
replicate_colors <- c("1" = "#ACD39E", "2" = "#5AAE61", "3" = "#1B7837")
time_colors <- c("Baseline" = "#b9cee4", "Village Day 4" = "#8a92bb", "Thawed Village Day 0" = "#7d57a0", "Thawed Village Day 7" = "#853786")
village_colors <- c("Uni-Culture" = "#613246", "Village" = "#A286AA")
cycle_colors <- c("G1" = "#4393C3", "S" = "#92C5DE", "G2M" = "#D1E5F0")
site_colors <- c("Brisbane" = "#5D59AB", "Sydney" = "#A7C9A9", "Melbourne" = "#179085")
site_colors <- c("Brisbane" = "#536CB4", "Sydney" = "#62BD67", "Melbourne" = "#D24F72")
cluster_colors <- c("0" = "#C9D8EA", "1" = "#928CC5", "2" = "#A7C9A9", "3" = "#179085", "4" = "#F79F9F", "5" = "#C35B76", "6" = "#F4C893", "7" = "#F6AA4B")


Expand All @@ -54,6 +55,10 @@ cluster_colors <- c("0" = "#C9D8EA", "1" = "#928CC5", "2" = "#A7C9A9", "3" = "#1
UMAP_cell_line <- DimPlot(seurat, reduction = "umap", group.by = c("integrated_snn_res.0.28"), cols = cluster_colors) + labs(color="Cluster") + ggtitle(NULL)
save_figs(UMAP_cell_line, basename = paste0(outdir,"Cluster_umap"),width = 15, height = 15)

## With Village ##
UMAP_village <- DimPlot(seurat, reduction = "umap", group.by = c("Village"), cols = village_colors) + labs(color="Village") + ggtitle(NULL)
save_figs(UMAP_village, basename = paste0(outdir,"Village_umap"),width = 15, height = 15)

## By Cell Line ##
UMAP_cell_line <- DimPlot(seurat, reduction = "umap", group.by = c("Final_Assignment"), cols = cell_line_colors) + labs(color="Cell Line") + ggtitle(NULL)
save_figs(UMAP_cell_line, basename = paste0(outdir,"Cell_Line_umap"),width = 15, height = 15)
Expand Down
66 changes: 55 additions & 11 deletions Expression_Boxplots/Expression_Boxplots.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ save_figs <- function(plot, basename, width = 17, height = 17, units = "cm"){
##### Set up colors #####
variable_colors <- c(Village = "#A2B0D0", Replicate = "#64A66B", Line = "#68319B")
line_colors <- c(FSA0006 = "#F79E29", MBE1006 = "#9B2C99", TOB0421 = "#35369C")
village_colors <- c(Baseline = "#b9cee4", Village = "#8a92bb" )
village_colors <- c("Uni-Culture" = "#613246", "Village" = "#A286AA")

site_updates <- c("Brisbane" = "Site 1" ,"Melbourne" = "Site 2", "Sydney" = "Site 3")

Expand All @@ -57,7 +57,7 @@ names(seurat_list) <- gsub("_seurat.rds", "", files)

seurat_list <- lapply(seurat_list, function(x){
x$Location <- ifelse(x$Time %in% c("Thawed Village Day 0", "Thawed Village Day 7"), "Sydney_Cryopreserved", x$Location)
x$Time <- ifelse(x$Time %in% c("Village Day 4", "Thawed Village Day 7"), "Village", "Baseline")
x$Time <- ifelse(x$Time %in% c("Village Day 4", "Thawed Village Day 7"), "Village", "Uni-Culture")
for (location in names(site_updates)){
x$Location <- gsub(location, site_updates[location], x$Location)
}
Expand Down Expand Up @@ -455,29 +455,73 @@ for (gene in pluri_genes$Gene){
df_list[[gene]] <- expression_df(seurat_list, pluri_genes[which(pluri_genes$Gene == gene),"ENSG"], c("Location", "Time", "Final_Assignment"))
df_list[[gene]]$Gene <- gene
}
df <- do.call(rbind, df_list)
df <- data.table(do.call(rbind, df_list))

dir.create(paste0(outdir,"pluri_genes/"))


p_counts <- ggplot(df[which(df$Location != "Site 3_Cryopreserved"),], aes(x = Final_Assignment, y = Counts, color = Time)) +
geom_boxplot(outlier.size = 0.5) +


##### Read in significance #####
pluri_deg <- readRDS("/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/output/Expression_Boxplots/pluri_degs/LR_DEGs_4pluri_genes.rds")


pluri_deg <- lapply(names(pluri_deg), function(x){
strings <- unlist(str_split(x, "_"))
pluri_deg[[x]]$Location <- gsub("Brisbane", "Site 1", strings[1]) %>% gsub("Melbourne", "Site 2", .) %>% gsub("Sydney", "Site 3", .)
pluri_deg[[x]]$Cryopreservation <- strings[2]
pluri_deg[[x]]$Final_Assignment <- strings[3]
return(pluri_deg[[x]])
})

pluri_deg_dt <- do.call(rbind,pluri_deg)
colnames(pluri_deg_dt) <- gsub("GeneID", "Gene", colnames(pluri_deg_dt))
pluri_deg_dt$Symbol <- "*"
pluri_deg_dt$counts_position <- ifelse(pluri_deg_dt$Gene == "MYC", 26,
ifelse(pluri_deg_dt$Gene == "NANOG", 22,
ifelse(pluri_deg_dt$Gene == "POU5F1", 115, 60)))

df$Cryopreservation <- ifelse(df$Location != "3_Cryopreserved", "Fresh", "Cryopreserved")
df$Location <- gsub("_Cryopreserved", "", df$Location)


p_counts <- ggplot(df[Cryopreservation != "Cryopreserved"], aes(x = Final_Assignment, y = Counts, color = Time)) +
geom_boxplot(aes(fill = Time), outlier.size = 0.5) +
theme_classic() +
facet_grid(Gene ~ Location, scales = "free_y") +
scale_color_manual(values = village_colors) +
scale_fill_manual(values = alpha(village_colors, 0.3)) +
theme(legend.position = "none",
axis.title.x = element_blank())
save_figs(p_counts, paste0(outdir,"pluri_genes/pluri_counts"), width = 16, height = 14)
axis.title.x = element_blank()) +
geom_text(
data = pluri_deg_dt[Cryopreservation != "Cryopreserved"],
aes(x = Final_Assignment, y = counts_position,label = Symbol),
color = "black",
size = 4) +
facet_grid(Gene ~ Location, scales = "free_y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

save_figs(p_counts, paste0(outdir,"pluri_genes/pluri_counts"), width = 16, height = 16)


p_scaled <- ggplot(df[which(df$Location != "Site 3_Cryopreserved"),], aes(x = Final_Assignment, y = Normalized, color = Time)) +
geom_boxplot(outlier.size = 0.5) +

pluri_deg_dt$counts_position <- ifelse(pluri_deg_dt$Gene == "MYC", 15.5,
ifelse(pluri_deg_dt$Gene == "NANOG", 10,
ifelse(pluri_deg_dt$Gene == "POU5F1", 9, 12)))

p_scaled <- ggplot(df[Location != "Cryopreserved"], aes(x = Final_Assignment, y = Normalized, color = Time)) +
geom_boxplot(aes(fill = Time),outlier.size = 0.15, lwd=0.3) +
theme_classic() +
facet_grid(Gene ~ Location, scales = "free_y") +
scale_color_manual(values = village_colors) +
scale_fill_manual(values = alpha(village_colors, 0.4)) +
theme(axis.title.x = element_blank()) +
ylab("Normalized Expression") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(
data = pluri_deg_dt[Cryopreservation != "Cryopreserved"],
aes(x = Final_Assignment, y = counts_position,label = Symbol),
color = "black",
size = 3)
save_figs(p_scaled, paste0(outdir,"pluri_genes/pluri_normalized"), width = 15, height = 10)


Expand Down
128 changes: 128 additions & 0 deletions Expression_Boxplots/pluripotency_deg_boxplots.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
library(data.table)
library(tidyverse)
library(Seurat)




outdir <- "/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/output/Expression_Boxplots/pluri_degs/"
dir.create(outdir, recursive = TRUE)


pluri_genes <- fread("/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/data/pluripotency_genes.tsv")

seurat <- readRDS("/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/output/All_data_integrated_remove_bad/seurat_integrated_all_times_clustered.rds")
seurat@meta.data$Location <- gsub("_.+", "", seurat@meta.data$Location)
seurat@meta.data$Cryopreserved <- ifelse(grepl("Thawed", seurat@meta.data$Location_Time), "Cryopreserved", "Fresh")
seurat@meta.data$Location_Cryopreserved_Line <- paste0(seurat@meta.data$Location, "_", seurat@meta.data$Cryopreserved, "_", seurat@meta.data$Final_Assignment)
seurat@meta.data$Village <- gsub("Baseline", "Uni-Culture", seurat@meta.data$Time) %>%
gsub("Thawed Village Day 0", "Uni-Culture", .) %>%
gsub("Thawed Village Day 7", "Village", .) %>%
gsub("Village Day 4", "Village", .)


##### Combine replicates from same groups together #####
seurat_list <- lapply(unique(seurat@meta.data$Location_Cryopreserved_Line), function(x){
subset(seurat, subset = Location_Cryopreserved_Line == x)
})
names(seurat_list) <- unique(seurat@meta.data$Location_Cryopreserved_Line)



seurat_list <- lapply(seurat_list, function(x){
Idents(x) <- "Village"
return(x)
})



seurat_list <- lapply(seurat_list, function(x){
SCTransform(x, vars.to.regress = c("scores.G1", "scores.S", "scores.G2M", "percent.mt", "percent.rb"), return.only.var.genes = FALSE)
})

seurat_list <- lapply(seurat_list, function(x){
PrepSCTFindMarkers(x)
})

saveRDS(seurat_list, paste0(outdir, "sct_location_cryo_line.rds"))
seurat_list <- readRDS(paste0(outdir, "sct_location_cryo_line.rds"))



##### Combine Village and Baselines together #####
### Try with logistic MAST ###
MAST_DEGs <- lapply(seurat_list, function(x){
FindMarkers(x, ident.1 = "Uni-Culture", ident.2 = "Village", latent.vars = "MULTI_classification", test.use = "MAST", logfc.threshold = 0, assay = "RNA")
})

saveRDS(MAST_DEGs, paste0(outdir, "MAST_DEGs.rds"))
MAST_DEGs <- readRDS(paste0(outdir, "MAST_DEGs.rds"))


### Update multiple testing for all sites ###
MAST_DEGs_nrow_list <- lapply(MAST_DEGs, function(x) data.table(nrow(x)))
MAST_DEGs_nrow <- sum(do.call(rbind, MAST_DEGs_nrow_list)$V1)

MAST_DEGs <- lapply(MAST_DEGs, function(x){
x$p_val_adj_updated <- p.adjust(x$p_val, method = "bonferroni", n = MAST_DEGs_nrow)
return(x)
})


MAST_DEGs_pluri <- lapply(MAST_DEGs, function(x){
x$ENSG <- rownames(x)
x <- data.table(x)
x <- x[pluri_genes, on = c("ENSG")]
x <- x[!is.na(p_val) & p_val_adj_updated < 0.05]
return(x)
})

MAST_DEGs_pluri <- lapply(MAST_DEGs, function(x){
x$ENSG <- rownames(x)
x <- data.table(x)
x <- x[pluri_genes, on = c("ENSG")]
x <- x[!is.na(p_val) & p_val_adj < 0.05]
return(x)
})

MAST_DEGs_pluri_4 <- lapply(MAST_DEGs_pluri, function(x){
x[GeneID %in% c("MYC", "NANOG", "POU5F1", "SOX2")]
})


### Try with logistic regression ###
LR_DEGs <- lapply(seurat_list, function(x){
FindMarkers(x, ident.1 = "Uni-Culture", ident.2 = "Village", latent.vars = "MULTI_classification", test.use = "LR", logfc.threshold = 0)
})

saveRDS(LR_DEGs, paste0(outdir, "LR_DEGs.rds"))
##### Will use LR instead of MAST - MAST finds more DEG and lower p-values which I'm not sure is legitimate #####


### Update multiple testing for all sites ###
LR_DEGs_nrow_list <- lapply(LR_DEGs, function(x) data.table(nrow(x)))
LR_DEGs_nrow <- sum(do.call(rbind, LR_DEGs_nrow_list)$V1)

LR_DEGs <- lapply(LR_DEGs, function(x){
x$p_val_adj_updated <- p.adjust(x$p_val, method = "bonferroni", n = LR_DEGs_nrow)
return(x)
})


LR_DEGs_pluri <- lapply(LR_DEGs, function(x){
x$ENSG <- rownames(x)
x <- data.table(x)
x <- x[pluri_genes, on = c("ENSG")]
x <- x[!is.na(p_val) & p_val_adj_updated < 0.05]
return(x)
})


LR_DEGs_pluri_4 <- lapply(LR_DEGs_pluri, function(x){
x[GeneID %in% c("MYC", "NANOG", "POU5F1", "SOX2")]
})



saveRDS(LR_DEGs_pluri_4, paste0(outdir, "LR_DEGs_4pluri_genes.rds"))

Loading

0 comments on commit b6b0ec7

Please sign in to comment.