From 72f119854f6ee8f007176436618c0976db2ed061 Mon Sep 17 00:00:00 2001 From: browaeysrobin Date: Tue, 14 Nov 2023 14:55:07 +0100 Subject: [PATCH] Fix issue: *condition-specific cell types in abundance_expression_info *remove senderLigand-receiverReceptor interactions if sender and receiver are not present in the same samples --- R/expression_processing.R | 2 +- R/pipeline.R | 21 +++++++++++++++---- R/pipeline_wrappers.R | 3 +++ .../condition_specific_celltype_MISC.Rmd | 4 ++-- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/R/expression_processing.R b/R/expression_processing.R index a9a77ba..efc82fe 100644 --- a/R/expression_processing.R +++ b/R/expression_processing.R @@ -613,7 +613,7 @@ get_frac_exprs = function(sce, sample_id, celltype_id, group_id, batches = NA, m for(i in seq(length(unique(expressed_df$celltype)))){ celltype_oi = unique(expressed_df$celltype)[i] n_genes = expressed_df %>% filter(celltype == celltype_oi) %>% filter(expressed == TRUE) %>% pull(gene) %>% unique() %>% length() - print(paste0(n_genes, " genes areconsidered as expressed in the cell type: ",celltype_oi)) + print(paste0(n_genes, " genes are considered as expressed in the cell type: ",celltype_oi)) } return(list(frq_df = frq_df, frq_df_group = frq_df_group, expressed_df = expressed_df)) diff --git a/R/pipeline.R b/R/pipeline.R index 33edf1f..fe2ead1 100644 --- a/R/pipeline.R +++ b/R/pipeline.R @@ -368,13 +368,26 @@ multi_nichenet_analysis = function(sce, abundance_df$n[is.na(abundance_df$n)] = 0 abundance_df$keep[is.na(abundance_df$keep)] = FALSE abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep))) - celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() - celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present > 0) %>% pull(celltype_id) %>% unique() + celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() # find truly condition-specific cell types by searching for cell types truely absent in at least one condition + celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 2) %>% pull(celltype_id) %>% unique() # require presence in at least 2 samples of one group so it is really present in at least one condition condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition) + total_nr_conditions = SummarizedExperiment::colData(sce)[,group_id] %>% unique() %>% length() + absent_celltypes = abundance_df_summarized %>% dplyr::filter(samples_present < 2) %>% dplyr::group_by(celltype_id) %>% dplyr::count() %>% dplyr::filter(n == total_nr_conditions) %>% dplyr::pull(celltype_id) + print("condition-specific celltypes:") print(condition_specific_celltypes) + print("absent celltypes:") + print(absent_celltypes) + + senders_oi = senders_oi %>% setdiff(absent_celltypes) + receivers_oi = receivers_oi %>% setdiff(absent_celltypes) + + retained_celltypes = union(senders_oi, receivers_oi) + + sce = sce[, SummarizedExperiment::colData(sce)[,celltype_id] %in% retained_celltypes] + ## define expressed genes frq_list = get_frac_exprs(sce = sce, sample_id = sample_id, celltype_id = celltype_id, group_id = group_id, batches = batches, min_cells = min_cells, fraction_cutoff = fraction_cutoff, min_sample_prop = min_sample_prop) @@ -419,7 +432,7 @@ multi_nichenet_analysis = function(sce, celltype_de = DE_info_emp$de_output_tidy_emp %>% dplyr::select(-p_val, -p_adj) %>% dplyr::rename(p_val = p_emp, p_adj = p_adj_emp) } - print(celltype_de %>% dplyr::group_by(cluster_id, contrast) %>% dplyr::filter(p_adj <= p_val_threshold & abs(logFC) >= logFC_threshold) %>% dplyr::count() %>% dplyr::arrange(-n)) + # print(celltype_de %>% dplyr::group_by(cluster_id, contrast) %>% dplyr::filter(p_adj <= p_val_threshold & abs(logFC) >= logFC_threshold) %>% dplyr::count() %>% dplyr::arrange(-n)) senders_oi = celltype_de$cluster_id %>% unique() receivers_oi = celltype_de$cluster_id %>% unique() @@ -446,7 +459,7 @@ multi_nichenet_analysis = function(sce, if(verbose == TRUE){ print("Calculate normalized average and pseudobulk expression") } - abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info) + abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = union(senders_oi, condition_specific_celltypes), receivers_oi = union(receivers_oi, condition_specific_celltypes), lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info) metadata_combined = SummarizedExperiment::colData(sce) %>% tibble::as_tibble() diff --git a/R/pipeline_wrappers.R b/R/pipeline_wrappers.R index d68945a..4583695 100644 --- a/R/pipeline_wrappers.R +++ b/R/pipeline_wrappers.R @@ -317,6 +317,9 @@ process_abundance_expression_info = function(sce, sample_id, group_id, celltype_ receivers_oi = receivers_oi, lr_network = lr_network)) + sender_receiver_info$pb_df = sender_receiver_info$pb_df %>% dplyr::ungroup() %>% dplyr::inner_join(sender_receiver_info$pb_df_group %>% dplyr::ungroup() %>% dplyr::distinct(ligand, receptor, sender, receiver)) + sender_receiver_info$avg_df = sender_receiver_info$avg_df %>% dplyr::ungroup() %>% dplyr::inner_join(sender_receiver_info$avg_df_group %>% dplyr::ungroup() %>% dplyr::distinct(ligand, receptor, sender, receiver)) + sender_receiver_info$frq_df = sender_receiver_info$frq_df %>% dplyr::ungroup() %>% dplyr::inner_join(sender_receiver_info$frq_df_group %>% dplyr::ungroup() %>% dplyr::distinct(ligand, receptor, sender, receiver)) return(list(abundance_data_receiver = abundance_data_receiver, abundance_data_sender = abundance_data_sender, celltype_info = celltype_info, receiver_info_ic = receiver_info_ic, sender_info_ic = sender_info_ic, sender_receiver_info = sender_receiver_info)) diff --git a/vignettes/condition_specific_celltype_MISC.Rmd b/vignettes/condition_specific_celltype_MISC.Rmd index 7b7b16e..b556a90 100644 --- a/vignettes/condition_specific_celltype_MISC.Rmd +++ b/vignettes/condition_specific_celltype_MISC.Rmd @@ -170,8 +170,8 @@ We can automatically check for condition_specific_celltypes with the following c abundance_df$n[is.na(abundance_df$n)] = 0 abundance_df$keep[is.na(abundance_df$keep)] = FALSE abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep))) - celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() - celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present > 0) %>% pull(celltype_id) %>% unique() + celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() # focus only on cell types that are totally absent in at least one condition + celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 2) %>% pull(celltype_id) %>% unique() # at least 2 samples in at least one condition should have sufficient cells of the cell type of interest condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition) print("condition-specific celltypes:")