diff --git a/R/inferCNV_HMM.R b/R/inferCNV_HMM.R index 0e125ce..f0ad720 100644 --- a/R/inferCNV_HMM.R +++ b/R/inferCNV_HMM.R @@ -643,38 +643,53 @@ get_predicted_CNV_regions <- function(infercnv_obj, by=c("consensus", "subcluste else { stop("Error, shouldn't get here ... bug") } - - cnv_regions = list() - cnv_counter_start = 0 - for (cell_group_name in names(cell_groups)) { - + mc.cores = infercnv.env$GLOBAL_NUM_THREADS + futile.logger::flog.info(paste("Processing cnv regions in parallel with", mc.cores, "cores")) + + num_cell_groups = length(cell_groups) + cell_group_names = names(cell_groups) + + state_par_func <- function(i){ + cell_group_name = cell_group_names[[i]] cell_group = cell_groups[[cell_group_name]] - #flog.info(sprintf("cell group %s -> %s", cell_group_name, cell_group)) + flog.info(sprintf("-processing cell_group_name: %s, size: %d", + cell_group_name, length(cell_group))) - flog.info(sprintf("-processing cell_group_name: %s, size: %d", cell_group_name, length(cell_group))) - cell_group_mtx = infercnv_obj@expr.data[,cell_group,drop=FALSE] - cell_group_names = colnames(cell_group_mtx) state_consensus <- .get_state_consensus(cell_group_mtx) - + names(state_consensus) <- rownames(cell_group_mtx) - cnv_gene_regions <- .define_cnv_gene_regions(state_consensus, infercnv_obj@gene_order, cnv_counter_start) - cnv_ranges <- .get_cnv_gene_region_bounds(cnv_gene_regions) + cnv_gene_regions <- .define_cnv_gene_regions(state_consensus, + infercnv_obj@gene_order) - consensus_state_list = list(cell_group_name=cell_group_name, - cells=cell_group_names, - gene_regions=cnv_gene_regions, - cnv_ranges=cnv_ranges) - - cnv_regions[[length(cnv_regions)+1]] = consensus_state_list + cnv_region_list = list(cell_group_name=cell_group_name, + cells=colnames(cell_group_mtx), + gene_regions=cnv_gene_regions) + cnv_region_list + + } + unnamed_cnv_regions <- parallel::mclapply(seq_along(cell_groups), + FUN = state_par_func, + mc.cores = mc.cores) + + # updates regions with unique name + named_cnv_regions = list() + cnv_counter_start = 0 + for (i in 1:num_cell_groups) { + cnv_gene_regions <- .rename_cnv_gene_regions(unnamed_cnv_regions[[i]]$gene_regions, cnv_counter_start) cnv_counter_start = cnv_counter_start + length(cnv_gene_regions) - + cnv_ranges <- .get_cnv_gene_region_bounds(cnv_gene_regions) + + named_cnv_regions[[i]] = list(cell_group_name=unnamed_cnv_regions[[i]]$cell_group_name, + cells=unnamed_cnv_regions[[i]]$cells, + gene_regions=cnv_gene_regions, + cnv_ranges=cnv_ranges) } - return(cnv_regions) + return(named_cnv_regions) } @@ -713,6 +728,7 @@ generate_cnv_region_reports <- function(infercnv_obj, ## cell clusters defined. cell_clusters_outfile = paste(out_dir, paste0(output_filename_prefix, ".cell_groupings"), sep="/") + flog.info(sprintf("-before writing cell clusters file: %s", cell_clusters_outfile)) cell_clusters_df = lapply(cnv_regions, function(x) { cell_group_name = x$cell_group_name @@ -901,25 +917,89 @@ adjust_genes_regions_report <- function(hmm.infercnv_obj, return(consensus) } +#' @title .get_num_cnv_gene_regions +#' +#' @description gets the number of cnv regions for a given state consensus vector +#' +#' @param state_consensus state consensus vector +#' +#' @param gene_order the infercnv_obj@gene_order info +#'#' +#' @return cnv_region_counter used to provide unique region names. +#' +#' @noRd + +.get_num_cnv_gene_regions <- function(state_consensus, gene_order) { + cnv_region_counter = 0 + chrs = unique(gene_order$chr) + for (chr in chrs) { + gene_idx = which(gene_order$chr==chr) + if (length(gene_idx) < 2) { next } + + chr_states = state_consensus[gene_idx] + prev_state = chr_states[1] + cnv_region_counter = cnv_region_counter + 1 + + for (i in seq(2,length(gene_idx))) { + state = chr_states[i] + if (state != prev_state) { + ## state transition + ## start new cnv region + cnv_region_counter = cnv_region_counter + 1 + } + + prev_state = state + } + + } + + return(cnv_region_counter) +} + +#' @title .rename_cnv_gene_regions +#' +#' @description Rename cnv regions with global consistent names +#' +#' @param unnamed_regions list of regions returned by .define_cnv_gene_regions +#' +#' @param cnv_counter_start starting index of cnv counter +#' +#' @return regions list containing the cnv regions defined. +#' +#' @noRd + +.rename_cnv_gene_regions <- function(unnamed_regions, cnv_counter_start) { + + named_regions = list() + + for (ii in seq_along(unnamed_regions)) { + chr = as.character(unnamed_regions[[ii]]$chr)[1] + cnv_index = cnv_counter_start + ii + cnv_region_name = sprintf("%s-region_%d", chr, cnv_index) + named_regions[[cnv_region_name]] = unnamed_regions[[ii]] + } + + return(named_regions) +} #' @title .define_cnv_gene_regions #' #' @description Given the state consensus vector and gene order info, defines cnv regions -#' based on consistent ordering and cnv state +#' based on local ordering and cnv state #' #' @param state_consensus state consensus vector #' #' @param gene_order the infercnv_obj@gene_order info #' -#' @param cnv_region_counter number x where counting starts at x+1, used to provide unique region names. -#' #' @return regions list containing the cnv regions defined. #' #' @noRd -.define_cnv_gene_regions <- function(state_consensus, gene_order, cnv_region_counter) { +.define_cnv_gene_regions <- function(state_consensus, gene_order) { + + unnamed_regions = list() - regions = list() + local_cnv_region_counter = 0 gene_names = rownames(gene_order) @@ -933,15 +1013,14 @@ adjust_genes_regions_report <- function(hmm.infercnv_obj, ## pos_begin = paste(gene_order[gene_idx[1],,drop=TRUE], collapse=",") pos_begin = gene_order[gene_idx[1],,drop=TRUE] - cnv_region_counter = cnv_region_counter + 1 + local_cnv_region_counter = local_cnv_region_counter + 1 - cnv_region_name = sprintf("%s-region_%d", chr, cnv_region_counter) current_cnv_region = data.frame(state=prev_state, gene=gene_names[gene_idx[1]], chr=pos_begin$chr, start=pos_begin$start, end=pos_begin$stop) - regions[[cnv_region_name]] = current_cnv_region + unnamed_regions[[local_cnv_region_counter]] = current_cnv_region for (i in seq(2,length(gene_idx))) { state = chr_states[i] @@ -955,12 +1034,11 @@ adjust_genes_regions_report <- function(hmm.infercnv_obj, if (state != prev_state) { ## state transition ## start new cnv region - cnv_region_counter = cnv_region_counter + 1 - cnv_region_name = sprintf("%s-region_%d", chr, cnv_region_counter) - regions[[cnv_region_name]] = next_gene_entry + cnv_regionlocal_cnv_region_counter_counter = local_cnv_region_counter + 1 + unnamed_regions[[local_cnv_region_counter]] = next_gene_entry } else { ## append gene to current cnv region - regions[[cnv_region_name]] = rbind(regions[[cnv_region_name]], next_gene_entry) + unnamed_regions[[local_cnv_region_counter]] = rbind(unnamed_regions[[local_cnv_region_counter]], next_gene_entry) } prev_state = state @@ -968,7 +1046,7 @@ adjust_genes_regions_report <- function(hmm.infercnv_obj, } - return(regions) + return(unnamed_regions) }