Fix issue:

*condition-specific cell types in abundance_expression_info *remove senderLigand-receiverReceptor interactions if sender and receiver are not present in the same samples
saeyslab · Nov 14, 2023 · 72f1198 · 72f1198
1 parent 9180052
commit 72f1198
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 7 deletions.
diff --git a/R/expression_processing.R b/R/expression_processing.R
@@ -613,7 +613,7 @@ get_frac_exprs = function(sce, sample_id, celltype_id, group_id, batches = NA, m
  for(i in seq(length(unique(expressed_df$celltype)))){
  celltype_oi = unique(expressed_df$celltype)[i]
  n_genes = expressed_df %>% filter(celltype == celltype_oi) %>% filter(expressed == TRUE) %>% pull(gene) %>% unique() %>% length()
- print(paste0(n_genes, " genes areconsidered as expressed in the cell type: ",celltype_oi)) 
+ print(paste0(n_genes, " genes are considered as expressed in the cell type: ",celltype_oi)) 
  }
  return(list(frq_df = frq_df, frq_df_group = frq_df_group, expressed_df = expressed_df))
 

diff --git a/R/pipeline.R b/R/pipeline.R
@@ -368,13 +368,26 @@ multi_nichenet_analysis = function(sce,
  abundance_df$n[is.na(abundance_df$n)] = 0
  abundance_df$keep[is.na(abundance_df$keep)] = FALSE
  abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep)))
- celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique()
- celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present > 0) %>% pull(celltype_id) %>% unique()
+ celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() # find truly condition-specific cell types by searching for cell types truely absent in at least one condition
+ celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 2) %>% pull(celltype_id) %>% unique() # require presence in at least 2 samples of one group so it is really present in at least one condition
  condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition)
 
+ total_nr_conditions = SummarizedExperiment::colData(sce)[,group_id] %>% unique() %>% length() 
+ absent_celltypes = abundance_df_summarized %>% dplyr::filter(samples_present < 2) %>% dplyr::group_by(celltype_id) %>% dplyr::count() %>% dplyr::filter(n == total_nr_conditions) %>% dplyr::pull(celltype_id)
+
  print("condition-specific celltypes:")
  print(condition_specific_celltypes)
 
+ print("absent celltypes:")
+ print(absent_celltypes)
+
+ senders_oi = senders_oi %>% setdiff(absent_celltypes)
+ receivers_oi = receivers_oi %>% setdiff(absent_celltypes)
+
+ retained_celltypes = union(senders_oi, receivers_oi) 
+
+ sce = sce[, SummarizedExperiment::colData(sce)[,celltype_id] %in% retained_celltypes]
+
  ## define expressed genes
  frq_list = get_frac_exprs(sce = sce, sample_id = sample_id, celltype_id = celltype_id, group_id = group_id, batches = batches, min_cells = min_cells, fraction_cutoff = fraction_cutoff, min_sample_prop = min_sample_prop)
 
@@ -419,7 +432,7 @@ multi_nichenet_analysis = function(sce,
  celltype_de = DE_info_emp$de_output_tidy_emp %>% dplyr::select(-p_val, -p_adj) %>% dplyr::rename(p_val = p_emp, p_adj = p_adj_emp)
  }
 
- print(celltype_de %>% dplyr::group_by(cluster_id, contrast) %>% dplyr::filter(p_adj <= p_val_threshold & abs(logFC) >= logFC_threshold) %>% dplyr::count() %>% dplyr::arrange(-n))
+ # print(celltype_de %>% dplyr::group_by(cluster_id, contrast) %>% dplyr::filter(p_adj <= p_val_threshold & abs(logFC) >= logFC_threshold) %>% dplyr::count() %>% dplyr::arrange(-n))
 
  senders_oi = celltype_de$cluster_id %>% unique()
  receivers_oi = celltype_de$cluster_id %>% unique()
@@ -446,7 +459,7 @@ multi_nichenet_analysis = function(sce,
  if(verbose == TRUE){
  print("Calculate normalized average and pseudobulk expression")
  }
- abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info)
+ abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = union(senders_oi, condition_specific_celltypes), receivers_oi = union(receivers_oi, condition_specific_celltypes), lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info)
 
  metadata_combined = SummarizedExperiment::colData(sce) %>% tibble::as_tibble()
 

diff --git a/R/pipeline_wrappers.R b/R/pipeline_wrappers.R
@@ -317,6 +317,9 @@ process_abundance_expression_info = function(sce, sample_id, group_id, celltype_
  receivers_oi = receivers_oi,
  lr_network = lr_network))
 
+ sender_receiver_info$pb_df = sender_receiver_info$pb_df %>% dplyr::ungroup() %>% dplyr::inner_join(sender_receiver_info$pb_df_group %>% dplyr::ungroup() %>% dplyr::distinct(ligand, receptor, sender, receiver))
+ sender_receiver_info$avg_df = sender_receiver_info$avg_df %>% dplyr::ungroup() %>% dplyr::inner_join(sender_receiver_info$avg_df_group %>% dplyr::ungroup() %>% dplyr::distinct(ligand, receptor, sender, receiver))
+ sender_receiver_info$frq_df = sender_receiver_info$frq_df %>% dplyr::ungroup() %>% dplyr::inner_join(sender_receiver_info$frq_df_group %>% dplyr::ungroup() %>% dplyr::distinct(ligand, receptor, sender, receiver))
 
  return(list(abundance_data_receiver = abundance_data_receiver, abundance_data_sender = abundance_data_sender, celltype_info = celltype_info, receiver_info_ic = receiver_info_ic, sender_info_ic = sender_info_ic, sender_receiver_info = sender_receiver_info))
 

diff --git a/vignettes/condition_specific_celltype_MISC.Rmd b/vignettes/condition_specific_celltype_MISC.Rmd
@@ -170,8 +170,8 @@ We can automatically check for condition_specific_celltypes with the following c
  abundance_df$n[is.na(abundance_df$n)] = 0
  abundance_df$keep[is.na(abundance_df$keep)] = FALSE
  abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep)))
- celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique()
- celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present > 0) %>% pull(celltype_id) %>% unique()
+ celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() # focus only on cell types that are totally absent in at least one condition
+ celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 2) %>% pull(celltype_id) %>% unique() # at least 2 samples in at least one condition should have sufficient cells of the cell type of interest
  condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition)
 
  print("condition-specific celltypes:")