-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
780 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
library(data.table) | ||
library(tidyverse) | ||
library(gtools) | ||
library(growthcurver) | ||
library("ggpubr") | ||
|
||
|
||
dir <- "/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/data/" | ||
data <- fread(paste0(dir, "Growth_rate/growth_rate_measurements.csv")) | ||
outdir <- "/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/output/growth_rate/" | ||
|
||
|
||
dir.create(outdir, recursive = TRUE) | ||
|
||
|
||
models <- lapply(unique(data$CellLine), function(line){ | ||
SummarizeGrowth(data[CellLine == line]$Time, data[CellLine == line]$Count) | ||
}) | ||
names(models) <- unique(data$CellLine) | ||
|
||
predict <- lapply(names(models), function(line){ | ||
tmp <- data.table(Time = data[CellLine == line]$Time, pred.wt = predict(models[[line]]$model)) | ||
tmp$CellLine <- line | ||
return(tmp) | ||
}) | ||
|
||
predict_df <- do.call(rbind, predict) | ||
|
||
|
||
|
||
|
||
### Make Plot ### | ||
pBasic <- ggplot(data, aes(Time, Count, color = CellLine)) + | ||
geom_point() + | ||
theme_classic() + | ||
geom_line(data=predict_df, aes(y=pred.wt)) | ||
|
||
ggsave(pBasic, filename = paste0(outdir, "/Basic_growth_rates_plot.png")) | ||
|
||
|
||
|
||
|
||
##### Correlate growth rate with confluency ##### | ||
pCorr <- ggplot(data, aes(Count,Confluency, color = CellLine)) + | ||
geom_point() + | ||
theme_classic() | ||
|
||
ggsave(pCorr, filename = paste0(outdir, "Count_confluency_corr.png")) | ||
|
||
|
||
pCorr <- ggscatter(data, x = "Count", y = "Confluency", color = "CellLine", | ||
add = "reg.line", conf.int = TRUE, | ||
cor.method = "pearson") + | ||
stat_cor(aes(color = CellLine), label.x = 90) | ||
ggsave(pCorr, filename = paste0(outdir, "Count_confluency_corr.png")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#!/bin/bash | ||
|
||
RATRACK=/directflow/SCCGGroupShare/projects/DrewNeavin/software/ratrack/ | ||
SNAKEFILE=/directflow/SCCGGroupShare/projects/DrewNeavin/software/ratrack/Snakefile | ||
|
||
|
||
cd $RATRACK | ||
mkdir logs | ||
|
||
conda activate ratrack | ||
|
||
# ### First test with demo ### | ||
# snakemake results/minimal.pdf results/minimal.fit.csv --dryrun | ||
# snakemake results/kcl22.pdf results/kcl22.fit.csv | ||
|
||
|
||
# snakemake results/TOB0421_ratrack.pdf results/TOB0421_ratrack.fit.csv --dryrun | ||
# nohup snakemake results/TOB0421_ratrack.pdf results/TOB0421_ratrack.fit.csv --cores 5 > logs/nohup_`date +%Y-%m-%d.%H:%M:%S`_TOB0421.log & | ||
|
||
# snakemake results/FSA0006_ratrack.pdf results/FSA0006_ratrack.fit.csv --dryrun | ||
# nohup snakemake results/FSA0006_ratrack.pdf results/FSA0006_ratrack.fit.csv --cores 5 > logs/nohup_`date +%Y-%m-%d.%H:%M:%S`_FSA0006.log & | ||
|
||
# snakemake results/MBE1006_ratrack.pdf results/MBE1006_ratrack.fit.csv --dryrun | ||
# nohup snakemake results/MBE1006_ratrack.pdf results/MBE1006_ratrack.fit.csv --cores 5 > logs/nohup_`date +%Y-%m-%d.%H:%M:%S`_MBE1006.log & | ||
|
||
|
||
### Try with original counts + dillution info ### | ||
snakemake results/TOB0421_original_counts.pdf results/TOB0421_original_counts.fit.csv --dryrun | ||
nohup snakemake results/TOB0421_original_counts.pdf results/TOB0421_original_counts.fit.csv --cores 5 > logs/nohup_`date +%Y-%m-%d.%H:%M:%S`_TOB0421.log & | ||
|
||
snakemake results/FSA0006_original_counts.pdf results/FSA0006_original_counts.fit.csv --dryrun | ||
nohup snakemake results/FSA0006_original_counts.pdf results/FSA0006_original_counts.fit.csv --cores 5 > logs/nohup_`date +%Y-%m-%d.%H:%M:%S`_FSA0006.log & | ||
|
||
snakemake results/MBE1006_original_counts.pdf results/MBE1006_original_counts.fit.csv --dryrun | ||
nohup snakemake results/MBE1006_original_counts.pdf results/MBE1006_original_counts.fit.csv --cores 5 > logs/nohup_`date +%Y-%m-%d.%H:%M:%S`_MBE1006.log & | ||
|
||
|
||
|
||
|
||
|
||
snakemake results/TOB0421_ratrack.pdf results/TOB0421_ratrack.fit.csv --dryrun --unlock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
##### Author: Drew Neavin | ||
##### Date: 2 December, 2021 | ||
##### Reason: Look at the proportions of each line at each time of Nona's multi-ome experiment | ||
|
||
|
||
|
||
|
||
##### Load in libraries ##### | ||
library(data.table) | ||
library(Seurat) | ||
library(tidyverse) | ||
library("ggpomological") | ||
|
||
|
||
##### Set up directories ##### | ||
datadir <- "/directflow/SCCGGroupShare/projects/himaro/imputing_snp/demultiplexing/demultiplex_Nona/processed_data_demultiplex2/log_dir/" | ||
outdir <- "/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/output/Nona_multiome/" | ||
|
||
dir.create(outdir, recursive = TRUE) | ||
|
||
|
||
##### Get a list of the village pools ##### | ||
villages <- list.files(datadir, pattern = "Village") | ||
|
||
|
||
|
||
##### Get the singlets from the file ##### | ||
village_id_list <- lapply(villages, function(x){ | ||
print(x) | ||
tmp <- fread(paste0(datadir,x,"/CombinedResults/Final_Assignments_demultiplexing_doublets_new_edit.txt"), sep = "\t") | ||
tmp$Pool_ID <- x | ||
tmp$Day <- as.numeric(as.character(gsub("Village_Day", "", tmp$Pool_ID))) | ||
tmp$V1 <- NULL | ||
return(tmp) | ||
}) | ||
|
||
village_id <- do.call(rbind, village_id_list) | ||
|
||
### Update columnes ### | ||
village_id$Day <- factor(village_id$Day, levels = c(0,1,2,3,4,5,7,15)) | ||
village_id$Assignment <- gsub("^0_", "", village_id$Assignment) %>% | ||
gsub("D-", "", .) %>% | ||
gsub("\\.\\d\\.", "", .) %>% | ||
gsub("N-", "", .) %>% | ||
gsub("-P36", "", .) %>% | ||
gsub("-", "", .) | ||
|
||
village_id$DropletType <- ifelse(village_id$DropletType != "singlet", "doublet", village_id$DropletType) | ||
|
||
data.table(prop.table(table(village_id[,c("DropletType", "Day")]), margin = 2)) | ||
|
||
village_summary <- data.table(prop.table(table(village_id[,c("Assignment", "Day")]), margin = 2)) | ||
|
||
village_summary_singlets <- data.table(prop.table(table(village_id[Assignment != "unassigned" & Assignment != "doublet",c("Assignment", "Day")]), margin = 2)) | ||
|
||
village_summary_singlets$Assignment <- factor(village_summary_singlets$Assignment, levels = rev(village_summary_singlets[Day == 15]$Assignment[order(village_summary_singlets[Day == 15]$N)])) | ||
|
||
|
||
##### Make proportion plots (area plot) ##### | ||
p_stacked_area <- ggplot(village_summary_singlets, aes(x = as.numeric(as.character(Day)), y = N, fill = factor(Assignment), group = Assignment)) + | ||
geom_area(alpha=0.6 , size=0.5, colour="black") + | ||
theme_classic() + | ||
scale_fill_manual(values = c("#f44336", "#e81f63", "#9c27b0", "#673ab7", "#3f51b5", "#2096f3","#2096f3", "#009688", "#4caf50", "#8bc34a", "#cddc39", "#ffeb3b", "#ffc108", "#ff9801", "#ff5723" ,"#795548", "#9e9e9e", "#607d8b")) + | ||
xlab("Days") + | ||
ylab("Proportion of Cells") | ||
ggsave(p_stacked_area, filename = paste0(outdir,"stacked_area.png"), width = 7, height = 4) | ||
ggsave(p_stacked_area, filename = paste0(outdir,"stacked_area.pdf"), width = 7, height = 4) | ||
|
||
|
||
|
||
|
||
##### Make line plot of propotion over time ##### | ||
p_line <- ggplot(village_summary_singlets, aes(x = as.numeric(as.character(Day)), y = N, color = Assignment)) + | ||
geom_point() + | ||
theme_classic() + | ||
geom_line() + | ||
scale_color_manual(values = c("#f44336", "#e81f63", "#9c27b0", "#673ab7", "#3f51b5", "#2096f3","#2096f3", "#009688", "#4caf50", "#8bc34a", "#cddc39", "#ffeb3b", "#ffc108", "#ff9801", "#ff5723" ,"#795548", "#9e9e9e", "#607d8b")) + | ||
xlab("Days") + | ||
ylab("Proportion of Cells") | ||
|
||
ggsave(p_line, filename = paste0(outdir,"line_proportions.png"), width = 7, height = 4) | ||
ggsave(p_line, filename = paste0(outdir,"line_proportions.pdf"), width = 7, height = 4) | ||
|
||
|
||
|
||
##### Check QC metrics ##### | ||
### Load in Data ### | ||
tenxdir <- "/directflow/SCCGGroupShare/projects/annsen/ATACseq/" | ||
|
||
tenx_list <- lapply(villages, function(x){ | ||
Read10X(paste0(tenxdir,x, "/outs/filtered_feature_bc_matrix")) | ||
}) | ||
|
||
|
||
seurat_list <- lapply(tenx_list, function(x){ | ||
CreateSeuratObject(counts = x) | ||
}) | ||
|
||
seurat <- merge(seurat_list[[1]], y = seurat_list[2:length(seurat_list)], add.cell.ids = villages) | ||
|
||
seurat$Pool <- gsub("_[GTAC]+-1", "", rownames(seurat@meta.data)) | ||
seurat$Day <- as.numeric(as.character(gsub("Village_Day", "", seurat$Pool))) | ||
|
||
seurat$mt_percent <- PercentageFeatureSet(seurat, pattern = "^MT-") | ||
seurat$rb_percent <- PercentageFeatureSet(seurat, pattern = "^RP[SL]") | ||
|
||
### Generate some plots by pool to show the QC metrics ### | ||
### N UMI ### | ||
p_UMI_vnl <- VlnPlot(seurat, features = "nCount_RNA", group.by = "Day", split.by = "Pool", pt.size = 0) + | ||
scale_fill_manual(values = c("#c03728", "#919c4c", "#f18721", "#f5c049", "#e68c7c", "#828585", "#c3c377", "#4f5157")) + | ||
NoLegend() | ||
|
||
ggsave(p_UMI_vnl, filename = paste0(outdir,"umi_vln.png")) | ||
|
||
|
||
### N Genes ### | ||
p_gene_vnl <- VlnPlot(seurat, features = "nFeature_RNA", group.by = "Day", split.by = "Day", pt.size = 0) + | ||
scale_fill_manual(values = c("#c03728", "#919c4c", "#f18721", "#f5c049", "#e68c7c", "#828585", "#c3c377", "#4f5157")) + | ||
NoLegend() | ||
|
||
ggsave(p_gene_vnl, filename = paste0(outdir,"gene_vln.png")) | ||
|
||
|
||
### Mt % ### | ||
p_mt_vnl <- VlnPlot(seurat, features = "mt_percent", group.by = "Day", split.by = "Day", pt.size = 0) + | ||
scale_fill_manual(values = c("#c03728", "#919c4c", "#f18721", "#f5c049", "#e68c7c", "#828585", "#c3c377", "#4f5157")) + | ||
NoLegend() | ||
|
||
ggsave(p_mt_vnl, filename = paste0(outdir,"mt_vln.png")) | ||
|
||
### Rb % ### | ||
p_rb_vnl <- VlnPlot(seurat, features = "rb_percent", group.by = "Day", split.by = "Day", pt.size = 0) + | ||
scale_fill_manual(values = c("#c03728", "#919c4c", "#f18721", "#f5c049", "#e68c7c", "#828585", "#c3c377", "#4f5157")) + | ||
NoLegend() | ||
|
||
ggsave(p_rb_vnl, filename = paste0(outdir,"rb_vln.png")) | ||
|
||
### Mt % vs N UMIs ### | ||
plot1 <- FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "mt_percent") + | ||
scale_color_manual() | ||
|
||
|
||
### N UMI vs N Genes ### | ||
plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") | ||
|
||
### Data was already pretty clean since just used intronic reads, probably don't need to filter further for the high quality cells |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
##### Reason: want to test nb interaction model before running on large number of genes | ||
|
||
library(haven) | ||
library(ggplot2) | ||
library(lme4) | ||
library(glmmTMB) | ||
library(Seurat) | ||
library(tidyverse) | ||
library(lmtest) | ||
|
||
|
||
|
||
dir <- "/directflow/SCCGGroupShare/projects/DrewNeavin/iPSC_Village/" | ||
outdir <- paste0(dir,"output/test_interaction_nb_model") | ||
dir.create(outdir, recursive = TRUE) | ||
|
||
|
||
##### Read in seurat with genes ##### | ||
seurat <- readRDS(paste0(dir,"output/Distribution_tests/seurat_integrated_all_times_clustered_1pct_expressing.rds")) | ||
|
||
seurat@meta.data$Location <- gsub("_Baseline", "", seurat@meta.data$Location) %>% gsub("_Village.+", "", .) %>% gsub("Thawed", "Cryopreserved",.) | ||
seurat@meta.data$Time <- gsub("Thawed Village Day 0", "Baseline", seurat@meta.data$Time) %>% gsub("Thawed Village Day 7", "Village", .) | ||
|
||
|
||
|
||
|
||
### Make dataframes for modeling ### | ||
gene <- "ENSG00000106153" | ||
|
||
df <- data.frame("Expression" = data.frame(seurat[["SCT"]]@counts[gene,]), "Line" = seurat@meta.data$Final_Assignment, "Village" = ifelse(seurat@meta.data$Time == "Baseline", 0, 1), "Replicate" = gsub("[A-Z][a-z]+", "", seurat@meta.data$MULTI_ID), "Location" = seurat@meta.data$Location) ### The results are not dependent on the order or the covariates included so don't need to include extra covariates | ||
colnames(df)[1] <- "Expression" | ||
|
||
|
||
|
||
|
||
|
||
### Calculate R2 for each separately + remaining (1 - model with all in ) & use proporiton of these in figure | ||
mode <- list() | ||
model[["Base"]] <- glmmTMB(Expression ~ 1, data = df, family = nbinom2) | ||
model[["Line"]] <- glmmTMB(Expression ~ 1 + Line , data = df, family = nbinom2) | ||
model[["Replicate"]] <- glmmTMB(Expression ~ 1 + Replicate , data = df, family = nbinom2) | ||
model[["Location"]] <- glmmTMB(Expression ~ 1 + Location , data = df, family = nbinom2) | ||
model[["Village"]] <- glmmTMB(Expression ~ 1 + Village, data = df, family = nbinom2) | ||
model[["Line_Replicate_Location_Village"]] <- glmmTMB(Expression ~ 1 + Village, data = df, family = nbinom2) | ||
model[["Line_Replicate_Location_Village"]] | ||
|
||
|
||
|
||
### Calculate model together ### | ||
pseudoR2[[1-logLik(model_line)/logLik(base_model) | ||
|
||
|
||
1-logLik(model_rep)/logLik(model) | ||
|
||
|
||
1-logLik(model_site)/logLik(model) | ||
|
||
|
||
|
||
1-logLik(model_line_village)/logLik(model) | ||
1-logLik(model_village_line)/logLik(model) | ||
|
||
|
||
### fit model with interaction - lrt | ||
anova(model_village_line_rep_site2, model_village_line_rep_site_interaction2) | ||
pchisq(137.96, df=1, lower.tail=FALSE)/2 |
Oops, something went wrong.