diff --git a/DESCRIPTION b/DESCRIPTION
index e61e7ee..af636e5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,14 +1,19 @@
Package: KGExplorer
Type: Package
Title: Biomedical Knowledge Network Construction and Analysis
-Version: 0.99.03
+Version: 0.99.05
Authors@R:
c(
person(given = "Brian",
family = "Schilder",
role = c("aut","cre"),
email = "brian_schilder@alumni.brown.edu",
- comment = c(ORCID = "0000-0001-5949-2191"))
+ comment = c(ORCID = "0000-0001-5949-2191")),
+ person(given = "Hiranyamaya",
+ family = "Dash",
+ role = c("ctb"),
+ email = "hdash.work@gmail.com",
+ comment = c(ORCID = "0009-0005-5514-505X"))
)
Description: Query, construct, and analyse large-scale biomedical knowledge graphs and ontologies.
URL: https://github.com/neurogenomics/KGExplorer
@@ -75,15 +80,16 @@ Suggests:
tidyr,
DiagrammeR,
forcats,
- arrow
+ arrow,
+ curl
Remotes:
- github::charlieccarey/monarchr,
+ github::monarch-initiative/monarchr,
github::phenoscape/rphenoscape,
github::vjcitn/biocBiocypher,
github::RajLabMSSM/echogithub,
github::RajLabMSSM/downloadR,
github::kwstat/pals
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
VignetteBuilder: knitr
License: GPL-3
Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 8111e39..20f5822 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -22,6 +22,7 @@ export(get_gencc)
export(get_gene_lengths)
export(get_genes_disease)
export(get_graph_colnames)
+export(get_hpo)
export(get_monarch)
export(get_monarch_files)
export(get_monarch_kg)
diff --git a/NEWS.md b/NEWS.md
index c114c0b..569e822 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,35 @@
+# KGExplorer 0.99.05
+
+## New features
+* `get_hpo`
+ - Port function from `HPOExplorer` package to prevent circular dependency.
+
+## Bug fixes
+* `DESCRIPTION`
+ - Update remote for `monarchr`.
+* Tests
+ - Add `skip_if_offline` to tests that (may) require internet access.
+* `ontology_to`
+ - `igraph::as_adj` (deprecated) -> `igraph::as_adjacency_matrix`.
+
+# KGExplorer 0.99.04
+
+## Bug fixes
+* `test-get_ontology_levels`
+ - Check for range rather than fixed values.
+* `filter_ontology`
+ - Move `terms` processing block to after check for character, as appropriate.
+* `get_ontology_dict`
+ - Add error handling for missing `alternative_terms` when
+ `include_alternative_terms=TRUE`.
+* `plot_ontology_heatmap`
+ - Fix default value for argument `annot`-- cast one@elementMetadata to
+ data.frame first.
+* `prune_ancestors`
+ - Add value for argument `id_col` in example.
+* `set_cores`
+ - Reduce workers during `R CMD CHECK` if required.
+
# KGExplorer 0.99.03
## New features
diff --git a/R/filter_ontology.R b/R/filter_ontology.R
index 7bad367..03d3432 100644
--- a/R/filter_ontology.R
+++ b/R/filter_ontology.R
@@ -16,7 +16,7 @@ filter_ontology <- function(ont,
include_self = TRUE,
use_simona=FALSE,
...){
- #### Check remove_terms ####
+ #### Check remove_terms ####
terms <- terms[!terms %in% remove_terms]
#### Use simona ####
if(isTRUE(use_simona)){
@@ -30,14 +30,14 @@ filter_ontology <- function(ont,
to = "id") |> stats::na.omit()
if(length(keep_descendants)>0){
messager("Keeping descendants of",length(keep_descendants),"term(s).")
- ont <- simona::dag_filter(ont,
+ ont <- simona::dag_filter(ont,
root=as.character(keep_descendants),
...)
messager(formatC(ont@n_terms,big.mark = ","),
"terms remain after filtering.")
} else {
messager("keep_descendants: No descendants found.")
- }
+ }
}
#### remove_descendants ####
if(!is.null(remove_descendants)){
@@ -50,8 +50,8 @@ filter_ontology <- function(ont,
include_self = include_self,
term = remove_descendants)
keep_terms <- ont@terms[!ont@terms %in% remove_descendants]
- ont <- simona::dag_filter(ont,
- terms=keep_terms,
+ ont <- simona::dag_filter(ont,
+ terms=keep_terms,
...)
messager(formatC(ont@n_terms,big.mark = ","),
"terms remain after filtering.")
@@ -61,29 +61,30 @@ filter_ontology <- function(ont,
}
#### Use custom filtering methods ####
if(!is.null(terms)){
- terms <- map_ontology_terms(ont = ont,
- terms = terms,
- to = "id") |> stats::na.omit()
- ## Characters
+
+ ## Characters
if(is.character(terms)){
+ terms <- map_ontology_terms(ont = ont,
+ terms = terms,
+ to = "id") |> stats::na.omit()
terms <- terms[simona::dag_has_terms(dag=ont, terms = unique(terms))]
if(length(terms)==0) {
stopper("None of the supplied terms found in the ontology.")
- }
+ }
ont <- ont[,terms]
-
+
} else if (is.numeric(terms)){
messager("Randomly sampling",terms,"term(s).")
if(terms>length(ont@terms)){
messager(
"Number of terms requested exceeds number of terms in the ontology.",
"Returning original ontology object without filtering.")
- return(ont)
- }
+ return(ont)
+ }
if(terms==0) stopper("Terms must be >0 if numeric.")
term_ids <- sample(ont@terms,terms, replace = FALSE)
ont <- ont[,term_ids]
- }
+ }
}
return(ont)
-}
\ No newline at end of file
+}
diff --git a/R/get_hpo.R b/R/get_hpo.R
new file mode 100644
index 0000000..00f526c
--- /dev/null
+++ b/R/get_hpo.R
@@ -0,0 +1,43 @@
+#' @describeIn get_ get_
+#' Get Human Phenotype Ontology (HPO)
+#'
+#' Updated version of Human Phenotype Ontology (HPO).
+#' Created from the OBO files distributed by the HPO project's
+#' \href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}.
+#' Adapted from \link[HPOExplorer]{get_hpo}.
+#'
+#' By comparison, the \code{hpo} data from \pkg{ontologyIndex} is from 2016.
+#' Note that the maximum ontology level depth in the 2016 version was 14,
+#' whereas in the 2023 version the maximum ontology level depth is 16
+#' (due to an expansion of the HPO).
+#' @inheritParams get_ontology
+#' @inheritDotParams get_ontology
+#' @returns \link[simona]{ontology_DAG} object.
+#'
+#' @export
+#' @examples
+#' hpo <- get_hpo()
+get_hpo <- function(lvl = 2,
+ force_new = FALSE,
+ terms=NULL,
+ ## rols imports the international version for some reason
+ method="github",
+ save_dir=cache_dir(package = "KGExplorer"),
+ ...){
+
+ file <- file.path(save_dir,"hp.rds")
+ if(!file.exists(file) || isTRUE(force_new)){
+ ont <- get_ontology(name = "hp",
+ lvl = lvl,
+ force_new = force_new,
+ terms = terms,
+ method = method,
+ save_dir = save_dir,
+ ...)
+ saveRDS(ont,file)
+ } else {
+ ont <- readRDS(file)
+ }
+ ont <- filter_ontology(ont = ont, terms = terms)
+ return(ont)
+}
diff --git a/R/get_ontology_dict.R b/R/get_ontology_dict.R
index c441bec..522cee9 100644
--- a/R/get_ontology_dict.R
+++ b/R/get_ontology_dict.R
@@ -1,22 +1,22 @@
#' @describeIn get_ get_
-#'
+#'
#' @param as_datatable Return as a data.table instead of a named vector.
#' @param include_alternative_terms Include alternative terms in the dictionary.
#' @export
#' @examples
#' ont <- get_ontology("hp", terms=10)
#' dict <- get_ontology_dict(ont)
-get_ontology_dict <- function(ont,
+get_ontology_dict <- function(ont,
from="short_id",
to=c("name","label","term"),
include_self=FALSE,
- include_alternative_terms=TRUE,
+ include_alternative_terms=FALSE,
as_datatable=FALSE){
to <- intersect(to,colnames(ont@elementMetadata))[1]
-
+
if(from=="id") from <- "short_id"
if(to=="id") to <- "short_id"
-
+
## Check from col exists
if(!from %in% colnames(ont@elementMetadata)){
stopper("Column",from,"not found in ontology metadata.")
@@ -25,7 +25,7 @@ get_ontology_dict <- function(ont,
if(!to %in% colnames(ont@elementMetadata)){
stopper("Column",to,"not found in ontology metadata.")
}
-
+
if(isTRUE(as_datatable)){
#### As data.table ####
dict <- data.table::as.data.table(
@@ -33,7 +33,10 @@ get_ontology_dict <- function(ont,
)[,from:=get(from)][,to:=get(to)][,c("from","to")]
if(isTRUE(include_alternative_terms) &&
"alternative_terms" %in% methods::slotNames(ont)){
- data.table::setkeyv(dict, c("from"))
+ data.table::setkeyv(dict, c("from"))
+ if(length(ontnn@alternative_terms)==0){
+ stopper("No alternative terms found in ontology.")
+ }
tmp <- data.table::data.table(
from=gsub("_",":",basename(names(ont@alternative_terms))),
to=dict[unname(ont@alternative_terms)]$to)
@@ -46,7 +49,7 @@ get_ontology_dict <- function(ont,
)[,from:=get(to)][,to:=get(to)][,c("from","to")])
}
dict <- unique(dict)
- data.table::setkeyv(dict, c("from"))
+ data.table::setkeyv(dict, c("from"))
} else {
#### As named vector ####
dict <- stats::setNames(ont@elementMetadata[[to]],
@@ -57,6 +60,6 @@ get_ontology_dict <- function(ont,
ont@elementMetadata[[to]])
)
}
- }
+ }
return(dict)
}
diff --git a/R/map_upheno_data_i.R b/R/map_upheno_data_i.R
index f104977..da5104a 100644
--- a/R/map_upheno_data_i.R
+++ b/R/map_upheno_data_i.R
@@ -12,7 +12,7 @@ map_upheno_data_i <- function(pheno_map_method,
n_genes_db1 <- object <- gene_label <- db <- . <-
n_genes_db2 <- subject_taxon_label1 <- subject_taxon_label2 <-
phenotype_genotype_score <- equivalence_score <- NULL;
-
+
pheno_map_method <- pheno_map_method[1]
gene_map_method <- gene_map_method[1]
messager(paste0("map_upheno_data: pheno_map_method=",
@@ -26,10 +26,10 @@ map_upheno_data_i <- function(pheno_map_method,
names(pheno_map) <-gsub("^object","id2",names(pheno_map))
pheno_map[,db1:=gsub("*:.*","",basename(id1))]
} else if(pheno_map_method=="monarch"){
-
- hpo <- HPOExplorer::get_hpo()
+
+ hpo <- get_hpo()
out <- monarchr::monarch_search(query = NULL,
- category = "biolink:PhenotypicFeature",
+ category = "biolink:PhenotypicFeature",
limit = 500)
pheno_map <- get_monarch(queries = "phenotype_to_phenotype") |>
data.table::setnames(c("label_x","label_y"),c("label1","label2"))
@@ -47,7 +47,7 @@ map_upheno_data_i <- function(pheno_map_method,
}
}
}
-
+
## Gene-phenotype associations across 8 species
{
genes <- get_monarch(maps = list(c("phenotype","gene")),
@@ -65,7 +65,7 @@ map_upheno_data_i <- function(pheno_map_method,
## Create an db-species map for each Ontology
species_map <- genes_map[,.SD[1], keyby="db"][,.(db,subject_taxon_label)]
}
-
+
#### Map non-human genes onto human orthologs ####
{
genes_homol <- map_genes_monarch(dat=genes,
@@ -75,7 +75,7 @@ map_upheno_data_i <- function(pheno_map_method,
data.table::uniqueN(genes$subject_taxon_label),
"species remain after cross-species gene mapping.")
}
-
+
#### Map non-human phenotypes onto human phenotypes ####
#### Merge nonhuman ontology genes with human HPO genes ####
{
@@ -94,7 +94,7 @@ map_upheno_data_i <- function(pheno_map_method,
all.y = keep_nogenes,
suffixes = c(1,2),
allow.cartesian = TRUE
- )
+ )
pheno_map_genes[,db2:=id2_db]
## Fill in missing species for those without gene data
pheno_map_genes[
@@ -113,7 +113,7 @@ map_upheno_data_i <- function(pheno_map_method,
## Remove
# remove(genes_human,genes_nonhuman,pheno_map)
}
-
+
#### Count the number of overlapping genes
{
if(isFALSE(keep_nogenes)){
@@ -154,4 +154,4 @@ map_upheno_data_i <- function(pheno_map_method,
## less than or equal to the number of total HPO genes.
# pheno_map_genes_match[n_genes>n_genes_hpo,]
return(pheno_map_genes_match)
-}
\ No newline at end of file
+}
diff --git a/R/ontology_to.R b/R/ontology_to.R
index 7da1724..5ed82b6 100644
--- a/R/ontology_to.R
+++ b/R/ontology_to.R
@@ -2,13 +2,13 @@
#' Convert ontology
#'
#' Convert an \link[simona]{ontology_DAG} to
-#' a number of other useful formats.
-#' @export
+#' a number of other useful formats.
+#' @export
#' @importFrom stats as.dist hclust cutree
#' @examples
#' ont <- get_ontology()
#' obj <- ontology_to(ont=ont, to="dendrogram")
-ontology_to <- function(ont,
+ontology_to <- function(ont,
to=c("adjacency",
"adjacency_dist",
"adjacency_dist_hclust",
@@ -28,7 +28,7 @@ ontology_to <- function(ont,
"list"),
terms=ont@terms,
remove_terms=grep(":",terms,
- invert = TRUE,
+ invert = TRUE,
value = TRUE),
as_sparse=FALSE,
...){
@@ -38,7 +38,7 @@ ontology_to <- function(ont,
remove_terms = remove_terms)
if(to=="adjacency"){
g <- ontology_to_graph(ont)
- obj <- igraph::as_adj(g)
+ obj <- igraph::as_adjacency_matrix(g)
} else if(to=="adjacency_dist"){
adj <- ontology_to(ont, to="adjacency")
# obj <- stats::dist(adj) ### seems to take forever
@@ -56,7 +56,7 @@ ontology_to <- function(ont,
obj <- simona::dag_as_DOT(ont, ...)
} else if(to=="similarity"){
obj <- simona::term_sim(ont, terms=ont@terms, ...)
- } else if(to=="adjacency_dist_hclust_clusters"){
+ } else if(to=="adjacency_dist_hclust_clusters"){
hc <- ontology_to(ont, to="adjacency_dist_hclust")
obj <- stats::cutree(hc, ...)
} else if(to=="igraph"){
@@ -74,7 +74,7 @@ ontology_to <- function(ont,
} else if(to=="igraph_dist_hclust_dendrogram"){
gdh <- ontology_to(ont, to="igraph_dist_hclust")
obj <- stats::as.dendrogram(gdh)
- } else if(to=="tbl_graph"){
+ } else if(to=="tbl_graph"){
obj <- ontology_to_graph(ont, ...)
} else if(to=="data.frame"){
g <- ontology_to_graph(ont)
@@ -84,7 +84,7 @@ ontology_to <- function(ont,
obj <- data.table::as.data.table(df)
} else if(to=="list") {
obj <- list(
- similarity=ontology_to(ont, to = "similarity"),
+ similarity=ontology_to(ont, to = "similarity"),
adjacency=ontology_to(ont, to = "adjacency"),
elementMetadata=data.table::data.table(ont@elementMetadata),
annotation=ont@annotation,
diff --git a/R/plot_ontology_heatmap.R b/R/plot_ontology_heatmap.R
index 864af52..f53296e 100644
--- a/R/plot_ontology_heatmap.R
+++ b/R/plot_ontology_heatmap.R
@@ -8,7 +8,7 @@
#' metadata annotations.
#' @param col_side_vars Variables to include in column-side
#' metadata annotations.
-#' @param fontsize Axis labels font size.
+#' @param fontsize Axis labels font size.
#' @param seed Set the seed for reproducible clustering.
#' @inheritParams map_
#' @inheritParams ComplexHeatmap::Heatmap
@@ -21,9 +21,9 @@
#' hm <- plot_ontology_heatmap(ont)
plot_ontology_heatmap <- function(ont,
annot = data.table::data.table(
- ont@elementMetadata
+ as.data.frame(ont@elementMetadata)
),
- X = ontology_to(ont, to = "similarity"),
+ X = ontology_to(ont, to = "similarity"),
fontsize = ont@n_terms*4e-4,
row_labels = ont@terms,
column_labels = row_labels,
@@ -39,7 +39,7 @@ plot_ontology_heatmap <- function(ont,
save_path = tempfile(
fileext = "plot_ontology_heatmap.pdf"),
height = 12,
- width = height*1.1,
+ width = height*1.1,
# row_km = 3,
# column_km = row_km,
# row_km_repeats = 1000,
@@ -48,13 +48,13 @@ plot_ontology_heatmap <- function(ont,
types = c("heatmaply",
"ComplexHeatmap")[2],
...
- ){
- if(!is.null(seed)) set.seed(seed)
+ ){
+ if(!is.null(seed)) set.seed(seed)
## Check if we need to add ancestors
if(any(c("ancestor","ancestor_name") %in% c(row_side_vars,col_side_vars))
&& !is.null(ont)){
ont <- add_ancestors(ont)
- }
+ }
#### Heatmaply version ####
if("heatmaply" %in% types){
requireNamespace("heatmaply")
@@ -112,9 +112,9 @@ plot_ontology_heatmap <- function(ont,
# ComplexHeatmap::row_order(hm)
#### Save plot ####
if(!is.null(save_path)){
- plot_save(plt = hm,
- save_path = save_path,
- height = height,
+ plot_save(plt = hm,
+ save_path = save_path,
+ height = height,
width = width)
}
}
diff --git a/R/prune_ancestors.R b/R/prune_ancestors.R
index 6ff999f..b36686a 100644
--- a/R/prune_ancestors.R
+++ b/R/prune_ancestors.R
@@ -1,5 +1,5 @@
#' Prune ancestor
-#'
+#'
#' Prune redundant ancestral terms from a \link{data.table}.
#' @export
#' @param dat A \link{data.table} with a column of ontology terms.
@@ -9,7 +9,7 @@
#' dat <- data.table::data.table(hpo_id=c("HP:0000001","HP:0000002","HP:0000003"),
#' name=c("term1","term2","term3"))
#' ont <- get_ontology("hp")
-#' dat2 <- prune_ancestors(dat,ont=ont)
+#' dat2 <- prune_ancestors(dat,id_col="hpo_id",ont=ont)
prune_ancestors <- function(dat,
id_col,
ont){
diff --git a/R/set_cores.R b/R/set_cores.R
index a5810a3..22b32a6 100644
--- a/R/set_cores.R
+++ b/R/set_cores.R
@@ -1,24 +1,24 @@
#' Set cores
#'
#' Assign cores automatically for parallel processing, while reserving some.
-#'
+#'
#' @param workers Number (>1) or proportion (<1) of worker cores to use.
#' @param verbose Print messages.
-#' @param progressbar logical(1) Enable progress bar
+#' @param progressbar logical(1) Enable progress bar
#' (based on \code{plyr:::progress_text}).
-#' Enabling the progress bar changes the default value of tasks to
-#' \code{.Machine$integer.max}, so that progress is reported for
+#' Enabling the progress bar changes the default value of tasks to
+#' \code{.Machine$integer.max}, so that progress is reported for
#' each element of X.
#' @returns List of core allocations.
-#'
+#'
#' @export
#' @import data.table
#' @import BiocParallel
#' @importFrom parallel detectCores
set_cores <- function(workers = .90,
progressbar = TRUE,
- verbose = TRUE) {
-
+ verbose = TRUE) {
+
# Enable parallelization of HDF5 functions
## Allocate ~10% of your available cores to non-parallelized processes
workers <- if (is.null(workers)) .90 else workers
@@ -36,12 +36,20 @@ set_cores <- function(workers = .90,
)
### Ensure data.table doesn't interfere with parallelization ####
if(workers>1) data.table::setDTthreads(threads = 1)
+ ### Handle _R_CHECK_LIMIT_CORES_ ###
+ if (nzchar(chk <- Sys.getenv("_R_CHECK_LIMIT_CORES_", ""))) {
+ if (workers > 2) {
+ workers <- 2
+ messager(paste("R_CHECK_LIMIT_CORES_' environment variable detected",
+ "BiocParallel workers reduced to 2."))
+ }
+ }
#### Handle Windows ####
if (.Platform$OS.type == "windows") {
params <- BiocParallel::SnowParam(workers = workers,
progressbar = progressbar)
} else {
- params <- BiocParallel::MulticoreParam(workers = workers,
+ params <- BiocParallel::MulticoreParam(workers = workers,
progressbar = progressbar)
}
# DelayedArray::setAutoBPPARAM(params)
diff --git a/README.md b/README.md
index 4d33fbd..a70d374 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@ KGExplorer
[![License:
GPL-3](https://img.shields.io/badge/license-GPL--3-blue.svg)](https://cran.r-project.org/web/licenses/GPL-3)
-[![](https://img.shields.io/badge/devel%20version-0.99.0-black.svg)](https://github.com/neurogenomics/KGExplorer)
+[![](https://img.shields.io/badge/devel%20version-0.99.05-black.svg)](https://github.com/neurogenomics/KGExplorer)
[![](https://img.shields.io/github/languages/code-size/neurogenomics/KGExplorer.svg)](https://github.com/neurogenomics/KGExplorer)
[![](https://img.shields.io/github/last-commit/neurogenomics/KGExplorer.svg)](https://github.com/neurogenomics/KGExplorer/commits/master)
[![R build
@@ -12,10 +12,10 @@ status](https://github.com/neurogenomics/KGExplorer/workflows/rworkflows/badge.s