-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
123 additions
and
118 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,128 +1,135 @@ | ||
|
||
#' Quality Control for Spatial Data | ||
#' | ||
#' This function identify spots in a | ||
#' [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] (SPE) | ||
#' with outlier quality control values: low `sum_umi` or `sum_gene`, or high | ||
#' `expr_chrM_ratio`, utilizing `scran::isOutlier()`. Also identifies in-tissue | ||
#' edge spots and distance to the edge for each spot. | ||
#' edge spots and distance to the edge for each spot. | ||
#' | ||
#' @param spe a [SpatialExperiment][SpatialExperiment::SpatialExperiment-class] | ||
#' | ||
#' @return A [SpatialExperiment][SpatialExperiment::SpatialExperiment-class] | ||
#' with added quiality control information added to the colData. | ||
#' | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' if (enough_ram()) { | ||
#' if (enough_ram()) { | ||
#' ## Obtain the necessary data | ||
#' if (!exists("spe")) spe <- fetch_data("spe") | ||
#' | ||
#' | ||
#' ## fake out tissue spots in example data (TODO add pre-qc data) | ||
#' spe_qc <- spe | ||
#' spe_qc$in_tissue[spe_qc$array_col < 10] <- FALSE | ||
#' | ||
#' | ||
#' ## adds QC metrics to colData of the spe | ||
#' spe_qc <- metrics_qc(spe_qc) | ||
#' colData(spe_qc) | ||
#' | ||
#' | ||
#' ## visualize edge spots | ||
#' vis_clus(spe_qc, sampleid = "151507", clustervar = "edge_spot") | ||
#' vis_gene(spe_qc, sampleid = "151507", geneid = "edge_distance", minCount = -1) | ||
#' | ||
#' | ||
#' ## visualize scran QC flags | ||
#' | ||
#' | ||
#' vis_clus(spe_qc, sample_id = "151507", clustervar = "scran_low_lib_size") | ||
#' | ||
#' scater::plotColData(spe_qc[,spe_qc$in_tissue], x = "sample_id", y = "sum_umi", colour_by = "scran_low_lib_size") | ||
#' | ||
#' | ||
#' scater::plotColData(spe_qc[, spe_qc$in_tissue], x = "sample_id", y = "sum_umi", colour_by = "scran_low_lib_size") | ||
#' | ||
#' vis_clus(spe_qc, sampleid = "151507", clustervar = "scran_discard") | ||
#' vis_clus(spe_qc, sampleid = "151507", clustervar = "scran_low_lib_size_edge") | ||
#' | ||
#' } | ||
#' #' | ||
#' } | ||
#' #' | ||
#' @importFrom dplyr group_by summarize left_join select mutate | ||
#' @importFrom SummarizedExperiment colData | ||
#' @importFrom scater isOutlier | ||
#' @importFrom scater isOutlier | ||
metrics_qc <- function(spe) { | ||
|
||
stopifnot("in_tissue" %in% colnames(colData(spe))) | ||
stopifnot("sum_umi" %in% colnames(colData(spe))) | ||
stopifnot("sum_gene" %in% colnames(colData(spe))) | ||
stopifnot("expr_chrM_ratio" %in% colnames(colData(spe))) | ||
stopifnot("in_tissue" %in% colnames(colData(spe))) | ||
stopifnot("sum_umi" %in% colnames(colData(spe))) | ||
stopifnot("sum_gene" %in% colnames(colData(spe))) | ||
stopifnot("expr_chrM_ratio" %in% colnames(colData(spe))) | ||
|
||
spe_in <- spe[, spe$in_tissue] | ||
|
||
## QC in-tissue spots | ||
qc_df <- data.frame( | ||
log2sum = log2(spe_in$sum_umi), | ||
log2detected = log2(spe_in$sum_gene), | ||
subsets_Mito_percent = spe_in$expr_chrM_ratio * 100, | ||
sample_id = spe_in$sample_id | ||
) | ||
|
||
qcfilter <- data.frame( | ||
low_lib_size = scater::isOutlier(qc_df$log2sum, type = "lower", log = TRUE, batch = qc_df$sample_id), | ||
low_n_features = scater::isOutlier(qc_df$log2detected, type = "lower", log = TRUE, batch = qc_df$sample_id), | ||
high_subsets_Mito_percent = scater::isOutlier(qc_df$subsets_Mito_percent, type = "higher", batch = qc_df$sample_id) | ||
) |> | ||
dplyr::mutate(discard = (low_lib_size | low_n_features) | high_subsets_Mito_percent) | ||
|
||
## Add qcfilter cols to colData(spe) after factoring | ||
## discard | ||
spe$scran_discard <- NA | ||
spe$scran_discard[which(spe$in_tissue)] <- qcfilter$discard | ||
spe$scran_discard <- factor(spe$scran_discard, levels = c("TRUE", "FALSE")) | ||
|
||
## low_lib_size | ||
spe$scran_low_lib_size <- NA | ||
spe$scran_low_lib_size[which(spe$in_tissue)] <- qcfilter$low_lib_size | ||
spe$scran_low_lib_size <- factor(spe$scran_low_lib_size, | ||
levels = c("TRUE", "FALSE") | ||
) | ||
## low_n_features | ||
spe$scran_low_n_features <- NA | ||
spe$scran_low_n_features[which(spe$in_tissue)] <- qcfilter$low_n_features | ||
spe$sscran_low_n_features <- factor(spe$scran_low_n_features, | ||
levels = c("TRUE", "FALSE") | ||
) | ||
|
||
## high mito percent | ||
spe$scran_high_subsets_Mito_percent <- NA | ||
spe$scran_high_subsets_Mito_percent[which(spe$in_tissue)] <- | ||
qcfilter$high_subsets_Mito_percent | ||
spe$scran_high_subsets_Mito_percent <- | ||
factor(spe$scran_high_subsets_Mito_percent, levels = c("TRUE", "FALSE")) | ||
|
||
## Find edge spots | ||
spot_coords <- colData(spe_in) |> | ||
as.data.frame() |> | ||
select(in_tissue, sample_id, array_row, array_col) |> | ||
group_by(sample_id, array_row) |> | ||
mutate( | ||
edge_col = array_col == min(array_col) | array_col == max(array_col), | ||
col_distance = pmin( | ||
abs(array_col - min(array_col)), | ||
abs(array_col - max(array_col)) | ||
) | ||
) |> | ||
group_by(sample_id, array_col) |> | ||
mutate( | ||
edge_row = array_row == min(array_row) | array_row == max(array_row), | ||
row_distance = pmin( | ||
abs(array_row - min(array_row)), | ||
abs(array_row - max(array_row)) | ||
) | ||
) |> | ||
group_by(sample_id) |> | ||
mutate( | ||
edge_spot = edge_row | edge_col, | ||
edge_distance = pmin(row_distance, col_distance) | ||
) | ||
|
||
|
||
## Add Edge info to spe | ||
spe$edge_spot <- NA | ||
spe$edge_spot[which(spe$in_tissue)] <- spot_coords$edge_spot | ||
spe$edge_spot <- factor(spe$edge_spot, levels = c("TRUE", "FALSE")) | ||
|
||
spe$edge_distance <- NA | ||
spe$edge_distance[which(spe$in_tissue)] <- spot_coords$edge_distance | ||
|
||
spe$scran_low_lib_size_edge <- NA | ||
spe$scran_low_lib_size_edge[which(spe$in_tissue)] <- qcfilter$low_lib_size & spot_coords$edge_spot | ||
spe$scran_low_lib_size_edge <- factor(spe$scran_low_lib_size_edge, levels = c("TRUE", "FALSE")) | ||
|
||
spe_in <- spe[, spe$in_tissue] | ||
|
||
## QC in-tissue spots | ||
qc_df <- data.frame( | ||
log2sum = log2(spe_in$sum_umi), | ||
log2detected = log2(spe_in$sum_gene), | ||
subsets_Mito_percent = spe_in$expr_chrM_ratio*100, | ||
sample_id = spe_in$sample_id | ||
) | ||
|
||
qcfilter <- data.frame( | ||
low_lib_size = scater::isOutlier(qc_df$log2sum, type = "lower", log = TRUE, batch = qc_df$sample_id), | ||
low_n_features = scater::isOutlier(qc_df$log2detected, type = "lower", log = TRUE, batch = qc_df$sample_id), | ||
high_subsets_Mito_percent = scater::isOutlier(qc_df$subsets_Mito_percent, type = "higher", batch = qc_df$sample_id) | ||
) |> | ||
dplyr::mutate(discard = (low_lib_size | low_n_features) | high_subsets_Mito_percent) | ||
|
||
## Add qcfilter cols to colData(spe) after factoring | ||
## discard | ||
spe$scran_discard <- NA | ||
spe$scran_discard[which(spe$in_tissue)] <- qcfilter$discard | ||
spe$scran_discard <- factor(spe$scran_discard, levels = c("TRUE", "FALSE")) | ||
|
||
## low_lib_size | ||
spe$scran_low_lib_size <- NA | ||
spe$scran_low_lib_size[which(spe$in_tissue)] <- qcfilter$low_lib_size | ||
spe$scran_low_lib_size <- factor(spe$scran_low_lib_size, | ||
levels = c("TRUE", "FALSE")) | ||
## low_n_features | ||
spe$scran_low_n_features <- NA | ||
spe$scran_low_n_features[which(spe$in_tissue)] <- qcfilter$low_n_features | ||
spe$sscran_low_n_features<- factor(spe$scran_low_n_features, | ||
levels = c("TRUE", "FALSE")) | ||
|
||
## high mito percent | ||
spe$scran_high_subsets_Mito_percent <- NA | ||
spe$scran_high_subsets_Mito_percent[which(spe$in_tissue)] <- | ||
qcfilter$high_subsets_Mito_percent | ||
spe$scran_high_subsets_Mito_percent <- | ||
factor(spe$scran_high_subsets_Mito_percent, levels = c("TRUE", "FALSE")) | ||
|
||
## Find edge spots | ||
spot_coords <- colData(spe_in) |> | ||
as.data.frame() |> | ||
select(in_tissue, sample_id, array_row, array_col) |> | ||
group_by(sample_id, array_row) |> | ||
mutate(edge_col = array_col == min(array_col) | array_col == max(array_col), | ||
col_distance = pmin(abs(array_col - min(array_col)), | ||
abs(array_col - max(array_col))) | ||
) |> | ||
group_by(sample_id, array_col) |> | ||
mutate(edge_row = array_row == min(array_row) | array_row == max(array_row), | ||
row_distance = pmin(abs(array_row - min(array_row)), | ||
abs(array_row - max(array_row))) | ||
) |> | ||
group_by(sample_id) |> | ||
mutate(edge_spot = edge_row | edge_col, | ||
edge_distance = pmin(row_distance, col_distance)) | ||
|
||
|
||
## Add Edge info to spe | ||
spe$edge_spot <- NA | ||
spe$edge_spot[which(spe$in_tissue)] <- spot_coords$edge_spot | ||
spe$edge_spot <- factor(spe$edge_spot, levels = c("TRUE", "FALSE")) | ||
|
||
spe$edge_distance <- NA | ||
spe$edge_distance[which(spe$in_tissue)] <- spot_coords$edge_distance | ||
|
||
spe$scran_low_lib_size_edge <- NA | ||
spe$scran_low_lib_size_edge[which(spe$in_tissue)] <- qcfilter$low_lib_size & spot_coords$edge_spot | ||
spe$scran_low_lib_size_edge <- factor(spe$scran_low_lib_size_edge, levels = c("TRUE", "FALSE")) | ||
|
||
return(spe) | ||
} | ||
return(spe) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,11 @@ | ||
test_that( | ||
"metrics_qc returns modified spe", | ||
{ | ||
if (!exists("spe")) spe <- fetch_data("spe") | ||
|
||
#run metrics spe | ||
spe_qc <- metrics_qc(spe) | ||
expect_equal(ncol(spe), ncol(spe_qc)) ## same number of spots | ||
expect_equal(ncol(colData(spe)) + 8, ncol(colData(spe_qc))) ## add 8 QC cols to colData | ||
|
||
} | ||
"metrics_qc returns modified spe", | ||
{ | ||
if (!exists("spe")) spe <- fetch_data("spe") | ||
|
||
# run metrics spe | ||
spe_qc <- metrics_qc(spe) | ||
expect_equal(ncol(spe), ncol(spe_qc)) ## same number of spots | ||
expect_equal(ncol(colData(spe)) + 8, ncol(colData(spe_qc))) ## add 8 QC cols to colData | ||
} | ||
) |