Skip to content

Commit

Permalink
general updates
Browse files Browse the repository at this point in the history
  • Loading branch information
drneavin committed Jun 9, 2022
1 parent d19889f commit 11c445f
Show file tree
Hide file tree
Showing 46 changed files with 44,797 additions and 12 deletions.
35 changes: 23 additions & 12 deletions Nona_multiome/cell_line_proportions.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,39 @@ library("ggpomological")

##### Set up directories #####
datadir <- "/directflow/SCCGGroupShare/projects/himaro/imputing_snp/demultiplexing/demultiplex_Nona/processed_data_demultiplex2/log_dir/"
non_dir <- "/directflow/SCCGGroupShare/projects/nonfar/analysis/cardiac_multiome_directflow/demux_obj/"
outdir <- "/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/output/Nona_multiome/"

dir.create(outdir, recursive = TRUE)


##### Get a list of the village pools #####
villages <- list.files(datadir, pattern = "Village")
villages <- list.files(non_dir, pattern = "Village")
villages <- grep("DemuxALL", villages, value = TRUE)



##### Get the singlets from the file #####
village_id_list <- lapply(villages, function(x){
print(x)
tmp <- fread(paste0(datadir,x,"/CombinedResults/Final_Assignments_demultiplexing_doublets_new_edit.txt"), sep = "\t")
tmp$Pool_ID <- x
tmp$Day <- as.numeric(as.character(gsub("Village_Day", "", tmp$Pool_ID)))
tmp$V1 <- NULL
return(tmp)
# tmp <- fread(paste0(datadir,x,"/CombinedResults/Final_Assignments_demultiplexing_doublets_new_edit.txt"), sep = "\t")
tmp <- readRDS(paste0(non_dir,x))
dt <- data.table(tmp@meta.data)
dt$Pool_ID <- gsub("_DemuxALL.rds", "",x))
dt$Day <- as.numeric(as.character(gsub("Village_Day", "", dt$Pool_ID)))
return(dt)
})

village_id <- do.call(rbind, village_id_list)

village_id$Pool_ID_updated <- gsub("Day7$", "Day5b", village_id$Pool_ID) %>%
gsub("Day5$", "Day7b", .) %>%
gsub("Day15$", "Day4b", .) %>%
gsub("Day4$", "Day15b", .) %>%
gsub("b", "", .)

village_id$Day_updated <- as.numeric(as.character(gsub("Village_Day", "", village_id$Pool_ID_updated)))

### Update columnes ###
village_id$Day <- factor(village_id$Day, levels = c(0,1,2,3,4,5,7,15))
village_id$Assignment <- gsub("^0_", "", village_id$Assignment) %>%
Expand All @@ -47,17 +58,17 @@ village_id$Assignment <- gsub("^0_", "", village_id$Assignment) %>%

village_id$DropletType <- ifelse(village_id$DropletType != "singlet", "doublet", village_id$DropletType)

data.table(prop.table(table(village_id[,c("DropletType", "Day")]), margin = 2))
data.table(prop.table(table(village_id[,c("DropletType", "Day_updated")]), margin = 2))

village_summary <- data.table(prop.table(table(village_id[,c("Assignment", "Day")]), margin = 2))
village_summary <- data.table(prop.table(table(village_id[,c("Assignment", "Day_updated")]), margin = 2))

village_summary_singlets <- data.table(prop.table(table(village_id[Assignment != "unassigned" & Assignment != "doublet",c("Assignment", "Day")]), margin = 2))
village_summary_singlets <- data.table(prop.table(table(village_id[Assignment != "unassigned" & Assignment != "doublet",c("Assignment", "Day_updated")]), margin = 2))

village_summary_singlets$Assignment <- factor(village_summary_singlets$Assignment, levels = rev(village_summary_singlets[Day == 15]$Assignment[order(village_summary_singlets[Day == 15]$N)]))
village_summary_singlets$Assignment <- factor(village_summary_singlets$Assignment, levels = rev(village_summary_singlets[Day_updated == 15]$Assignment[order(village_summary_singlets[Day_updated == 15]$N)]))


##### Make proportion plots (area plot) #####
p_stacked_area <- ggplot(village_summary_singlets, aes(x = as.numeric(as.character(Day)), y = N, fill = factor(Assignment), group = Assignment)) +
p_stacked_area <- ggplot(village_summary_singlets, aes(x = as.numeric(as.character(Day_updated)), y = N, fill = factor(Assignment), group = Assignment)) +
geom_area(alpha=0.6 , size=0.5, colour="black") +
theme_classic() +
scale_fill_manual(values = c("#f44336", "#e81f63", "#9c27b0", "#673ab7", "#3f51b5", "#2096f3","#2096f3", "#009688", "#4caf50", "#8bc34a", "#cddc39", "#ffeb3b", "#ffc108", "#ff9801", "#ff5723" ,"#795548", "#9e9e9e", "#607d8b")) +
Expand All @@ -70,7 +81,7 @@ ggsave(p_stacked_area, filename = paste0(outdir,"stacked_area.pdf"), width = 7,


##### Make line plot of propotion over time #####
p_line <- ggplot(village_summary_singlets, aes(x = as.numeric(as.character(Day)), y = N, color = Assignment)) +
p_line <- ggplot(village_summary_singlets, aes(x = as.numeric(as.character(Day_updated)), y = N, color = Assignment)) +
geom_point() +
theme_classic() +
geom_line() +
Expand Down
44 changes: 44 additions & 0 deletions Variance/RNAvelocity/post_review/prepare_pseudotime.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
library(tidyverse)
library(haven)
library(ggplot2)
library(lme4)
library(glmmTMB)
library(Seurat)
library(data.table)




dir <- "/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/"
outdir <- paste0(dir,"output/Variance/RNAvelocity/post_review/data/")
dir.create(outdir, recursive = TRUE)



##### Read in Data #####
### seurat object ###
seurat <- readRDS(paste0(dir,"output/Distribution_tests/seurat_integrated_all_times_clustered_1pct_expressing.rds"))

### velocity metadata ###
velo_meta <- fread("/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/output/scVelo/velocyto/scvelo_combat_corrected_all2/metadata.csv", sep = ",")
rownames(velo_meta) <- velo_meta$V1
velo_meta$V1 <- NULL

velo_meta_sub <- velo_meta[,c("n_unspliced_counts", "latent_time")]
rownames(velo_meta_sub) <- rownames(velo_meta)

seurat <- AddMetaData(seurat, velo_meta_sub)


seurat@meta.data$Location <- gsub("_Baseline", "", seurat@meta.data$Location) %>% gsub("_Village.+", "", .) %>% gsub("Thawed", "Cryopreserved",.)

seurat_noNA <- subset(seurat, subset = latent_time >= 0)


seurat_noNA@meta.data$Location <- gsub("_Baseline", "", seurat_noNA@meta.data$Location) %>% gsub("_Village.+", "", .) %>% gsub("Thawed", "Cryopreserved",.)
seurat_noNA@meta.data$Time <- gsub("Thawed Village Day 0", "Baseline", seurat_noNA@meta.data$Time) %>% gsub("Thawed Village Day 7", "Village", .) %>% gsub(" Day 4", "", .)
seurat_noNA@meta.data$Cryopreserved <-ifelse(seurat_noNA@meta.data$Location == "Sydney_Cryopreserved", "Cryopreserved", "Fresh")
seurat_noNA@meta.data$Location <- gsub("_Cryopreserved", "", seurat_noNA@meta.data$Location)


saveRDS(seurat_noNA, paste0(outdir, "seurat_integrated_all_times_clustered_1pct_expressing_pseudotime.rds"))
103 changes: 103 additions & 0 deletions Variance/RNAvelocity/post_review/pseudotime_effect.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
library(haven)
library(ggplot2)
library(glmmTMB)
library(Seurat)
library(tidyverse)
library(specr)
library(data.table)
library(dsLib)
library(pkgcond)
library(texreg)


inicio("Starting Analysis")



##### Define functions #####
icc_glmmtmb <- function(model, percent = TRUE) {
tmp <- VarCorr(model)
var <- do.call(rbind, lapply(names(tmp$cond), function(x) data.table("grp" = x, "vcov" = attr(tmp$cond[[x]], "stddev")^2)))
var <- rbind(var, data.table("grp" = "Residual", "vcov" = sigma(model)^2))
sum_var <- sum(var$vcov)
var <- var %>% dplyr::mutate(icc = vcov/sum_var)
if (isTRUE(percent)) {
var <- var %>% dplyr::mutate(percent = .data$icc * 100)
}
return(var)
}



##### Bring in variables #####
### Bring in arguments
args <- commandArgs(trailingOnly = TRUE)
icc_interaction_outdir <- paste0(args[1])
icc_outdir <- paste0(args[2])
model_interaction_outdir <- paste0(args[3])
model_outdir <- paste0(args[4])
resid_outdir <- paste0(args[5])
gene <- as.character(args[6])

print(icc_outdir)
print(icc_outdir)
print(model_outdir)
print(resid_outdir)
print(gene)



##### Read in data #####
### Seurat object with normalized data and covariates needed ###
seurat <- readRDS("/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/output/Variance/RNAvelocity/post_review/data/seurat_integrated_all_times_clustered_1pct_expressing_pseudotime.rds")

### Dataframe of icc summaries so know what variables need to be fit for each gene ###
icc_summary <- fread("/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/output/variance_partitioning_all_cells/combined/sig_results.tsv.gz", sep = "\t")
colnames(icc_summary) <- gsub("gene", "ensg", colnames(icc_summary))


### Make DF for modeling ###
df_hier_unscale <- data.frame("Expression" = seurat[["SCT"]]@scale.data[gene,], "Village" = as.factor(ifelse(seurat@meta.data$Time == "Baseline", 0, 1)), "Line" = seurat@meta.data$Final_Assignment, "Replicate" = as.factor(gsub("[A-Z][a-z]+", "", seurat@meta.data$MULTI_ID)), "Cryopreserved" = seurat$Cryopreserved, "Site" = seurat$Location, "Pseudotime" = round(seurat$latent_time, 2))
colnames(df_hier_unscale)[1] <- "Expression"



##### Get list of variables to fit before testing pseudotime effect #####
variables <- icc_summary[ensg == gene & grp != "Residual"]$grp



##### Test pseudotime impact #####
### Fit the known variables to get residuals ###
model_all <- as.formula(paste0("Expression ~ (1|", paste0(variables, collapse = ") + (1|"), ")"))
model_glmmtmb <- suppress_warnings(glmmTMB(formula = noquote(model_all), data = df_hier_unscale, REML = TRUE), "giveCsparse")


### Test pseudotime on residuals ###
df_hier_unscale$Residuals <- resid(model_glmmtmb)

model_pseudotime <- suppress_warnings(glmmTMB(Residuals ~ Pseudotime, data = df_hier_unscale, REML = TRUE), "giveCsparse")
model_pseudotime2 <- suppress_warnings(glmmTMB(Residuals ~ 1, data = df_hier_unscale, REML = TRUE), "giveCsparse")

### Test with Anova ###
P_value <- anova(model_pseudotime2, model_pseudotime)$`Pr(>Chisq)`[2]
P_value <- anova(model_glmmtmb, model_pseudotime2)$`Pr(>Chisq)`[2]


test_plot <- ggplot(df_hier_unscale, aes(Pseudotime, Residuals, color = Site)) +
geom_point() +
facet_grid(~Line) +
geom_smooth(method = "lm", se = FALSE)

ggsave(test_plot, filename = "/directflow/SCCGGroupShare/projects/DrewNeavin/test.png")


if (P_value < 0.05/(length(variables) + 1)){
### Test for amount of variance explained ###
model_pseudotime2 <- suppress_warnings(glmmTMB(Residuals ~ 1 + Pseudotime, data = df_hier_unscale, REML = TRUE), "giveCsparse")



### Test for interactions

}
Loading

0 comments on commit 11c445f

Please sign in to comment.