Skip to content

Commit

Permalink
Fix issue:
Browse files Browse the repository at this point in the history
*condition-specific cell types in abundance_expression_info
*remove senderLigand-receiverReceptor interactions if sender and receiver are not present in the same samples
  • Loading branch information
browaeysrobin committed Nov 14, 2023
1 parent 9180052 commit 72f1198
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 7 deletions.
2 changes: 1 addition & 1 deletion R/expression_processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ get_frac_exprs = function(sce, sample_id, celltype_id, group_id, batches = NA, m
for(i in seq(length(unique(expressed_df$celltype)))){
celltype_oi = unique(expressed_df$celltype)[i]
n_genes = expressed_df %>% filter(celltype == celltype_oi) %>% filter(expressed == TRUE) %>% pull(gene) %>% unique() %>% length()
print(paste0(n_genes, " genes areconsidered as expressed in the cell type: ",celltype_oi))
print(paste0(n_genes, " genes are considered as expressed in the cell type: ",celltype_oi))
}
return(list(frq_df = frq_df, frq_df_group = frq_df_group, expressed_df = expressed_df))

Expand Down
21 changes: 17 additions & 4 deletions R/pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -368,13 +368,26 @@ multi_nichenet_analysis = function(sce,
abundance_df$n[is.na(abundance_df$n)] = 0
abundance_df$keep[is.na(abundance_df$keep)] = FALSE
abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep)))
celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique()
celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present > 0) %>% pull(celltype_id) %>% unique()
celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() # find truly condition-specific cell types by searching for cell types truely absent in at least one condition
celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 2) %>% pull(celltype_id) %>% unique() # require presence in at least 2 samples of one group so it is really present in at least one condition
condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition)

total_nr_conditions = SummarizedExperiment::colData(sce)[,group_id] %>% unique() %>% length()
absent_celltypes = abundance_df_summarized %>% dplyr::filter(samples_present < 2) %>% dplyr::group_by(celltype_id) %>% dplyr::count() %>% dplyr::filter(n == total_nr_conditions) %>% dplyr::pull(celltype_id)

print("condition-specific celltypes:")
print(condition_specific_celltypes)

print("absent celltypes:")
print(absent_celltypes)

senders_oi = senders_oi %>% setdiff(absent_celltypes)
receivers_oi = receivers_oi %>% setdiff(absent_celltypes)

retained_celltypes = union(senders_oi, receivers_oi)

sce = sce[, SummarizedExperiment::colData(sce)[,celltype_id] %in% retained_celltypes]

## define expressed genes
frq_list = get_frac_exprs(sce = sce, sample_id = sample_id, celltype_id = celltype_id, group_id = group_id, batches = batches, min_cells = min_cells, fraction_cutoff = fraction_cutoff, min_sample_prop = min_sample_prop)

Expand Down Expand Up @@ -419,7 +432,7 @@ multi_nichenet_analysis = function(sce,
celltype_de = DE_info_emp$de_output_tidy_emp %>% dplyr::select(-p_val, -p_adj) %>% dplyr::rename(p_val = p_emp, p_adj = p_adj_emp)
}

print(celltype_de %>% dplyr::group_by(cluster_id, contrast) %>% dplyr::filter(p_adj <= p_val_threshold & abs(logFC) >= logFC_threshold) %>% dplyr::count() %>% dplyr::arrange(-n))
# print(celltype_de %>% dplyr::group_by(cluster_id, contrast) %>% dplyr::filter(p_adj <= p_val_threshold & abs(logFC) >= logFC_threshold) %>% dplyr::count() %>% dplyr::arrange(-n))

senders_oi = celltype_de$cluster_id %>% unique()
receivers_oi = celltype_de$cluster_id %>% unique()
Expand All @@ -446,7 +459,7 @@ multi_nichenet_analysis = function(sce,
if(verbose == TRUE){
print("Calculate normalized average and pseudobulk expression")
}
abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info)
abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = union(senders_oi, condition_specific_celltypes), receivers_oi = union(receivers_oi, condition_specific_celltypes), lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info)

metadata_combined = SummarizedExperiment::colData(sce) %>% tibble::as_tibble()

Expand Down
3 changes: 3 additions & 0 deletions R/pipeline_wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ process_abundance_expression_info = function(sce, sample_id, group_id, celltype_
receivers_oi = receivers_oi,
lr_network = lr_network))

sender_receiver_info$pb_df = sender_receiver_info$pb_df %>% dplyr::ungroup() %>% dplyr::inner_join(sender_receiver_info$pb_df_group %>% dplyr::ungroup() %>% dplyr::distinct(ligand, receptor, sender, receiver))
sender_receiver_info$avg_df = sender_receiver_info$avg_df %>% dplyr::ungroup() %>% dplyr::inner_join(sender_receiver_info$avg_df_group %>% dplyr::ungroup() %>% dplyr::distinct(ligand, receptor, sender, receiver))
sender_receiver_info$frq_df = sender_receiver_info$frq_df %>% dplyr::ungroup() %>% dplyr::inner_join(sender_receiver_info$frq_df_group %>% dplyr::ungroup() %>% dplyr::distinct(ligand, receptor, sender, receiver))

return(list(abundance_data_receiver = abundance_data_receiver, abundance_data_sender = abundance_data_sender, celltype_info = celltype_info, receiver_info_ic = receiver_info_ic, sender_info_ic = sender_info_ic, sender_receiver_info = sender_receiver_info))

Expand Down
4 changes: 2 additions & 2 deletions vignettes/condition_specific_celltype_MISC.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ We can automatically check for condition_specific_celltypes with the following c
abundance_df$n[is.na(abundance_df$n)] = 0
abundance_df$keep[is.na(abundance_df$keep)] = FALSE
abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep)))
celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique()
celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present > 0) %>% pull(celltype_id) %>% unique()
celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() # focus only on cell types that are totally absent in at least one condition
celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 2) %>% pull(celltype_id) %>% unique() # at least 2 samples in at least one condition should have sufficient cells of the cell type of interest
condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition)
print("condition-specific celltypes:")
Expand Down

0 comments on commit 72f1198

Please sign in to comment.