diff --git a/DESCRIPTION b/DESCRIPTION index e61e7ee..af636e5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,19 @@ Package: KGExplorer Type: Package Title: Biomedical Knowledge Network Construction and Analysis -Version: 0.99.03 +Version: 0.99.05 Authors@R: c( person(given = "Brian", family = "Schilder", role = c("aut","cre"), email = "brian_schilder@alumni.brown.edu", - comment = c(ORCID = "0000-0001-5949-2191")) + comment = c(ORCID = "0000-0001-5949-2191")), + person(given = "Hiranyamaya", + family = "Dash", + role = c("ctb"), + email = "hdash.work@gmail.com", + comment = c(ORCID = "0009-0005-5514-505X")) ) Description: Query, construct, and analyse large-scale biomedical knowledge graphs and ontologies. URL: https://github.com/neurogenomics/KGExplorer @@ -75,15 +80,16 @@ Suggests: tidyr, DiagrammeR, forcats, - arrow + arrow, + curl Remotes: - github::charlieccarey/monarchr, + github::monarch-initiative/monarchr, github::phenoscape/rphenoscape, github::vjcitn/biocBiocypher, github::RajLabMSSM/echogithub, github::RajLabMSSM/downloadR, github::kwstat/pals -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 VignetteBuilder: knitr License: GPL-3 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 8111e39..20f5822 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ export(get_gencc) export(get_gene_lengths) export(get_genes_disease) export(get_graph_colnames) +export(get_hpo) export(get_monarch) export(get_monarch_files) export(get_monarch_kg) diff --git a/NEWS.md b/NEWS.md index c114c0b..569e822 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,35 @@ +# KGExplorer 0.99.05 + +## New features +* `get_hpo` + - Port function from `HPOExplorer` package to prevent circular dependency. + +## Bug fixes +* `DESCRIPTION` + - Update remote for `monarchr`. +* Tests + - Add `skip_if_offline` to tests that (may) require internet access. +* `ontology_to` + - `igraph::as_adj` (deprecated) -> `igraph::as_adjacency_matrix`. + +# KGExplorer 0.99.04 + +## Bug fixes +* `test-get_ontology_levels` + - Check for range rather than fixed values. +* `filter_ontology` + - Move `terms` processing block to after check for character, as appropriate. +* `get_ontology_dict` + - Add error handling for missing `alternative_terms` when + `include_alternative_terms=TRUE`. +* `plot_ontology_heatmap` + - Fix default value for argument `annot`-- cast one@elementMetadata to + data.frame first. +* `prune_ancestors` + - Add value for argument `id_col` in example. +* `set_cores` + - Reduce workers during `R CMD CHECK` if required. + # KGExplorer 0.99.03 ## New features diff --git a/R/filter_ontology.R b/R/filter_ontology.R index 7bad367..03d3432 100644 --- a/R/filter_ontology.R +++ b/R/filter_ontology.R @@ -16,7 +16,7 @@ filter_ontology <- function(ont, include_self = TRUE, use_simona=FALSE, ...){ - #### Check remove_terms #### + #### Check remove_terms #### terms <- terms[!terms %in% remove_terms] #### Use simona #### if(isTRUE(use_simona)){ @@ -30,14 +30,14 @@ filter_ontology <- function(ont, to = "id") |> stats::na.omit() if(length(keep_descendants)>0){ messager("Keeping descendants of",length(keep_descendants),"term(s).") - ont <- simona::dag_filter(ont, + ont <- simona::dag_filter(ont, root=as.character(keep_descendants), ...) messager(formatC(ont@n_terms,big.mark = ","), "terms remain after filtering.") } else { messager("keep_descendants: No descendants found.") - } + } } #### remove_descendants #### if(!is.null(remove_descendants)){ @@ -50,8 +50,8 @@ filter_ontology <- function(ont, include_self = include_self, term = remove_descendants) keep_terms <- ont@terms[!ont@terms %in% remove_descendants] - ont <- simona::dag_filter(ont, - terms=keep_terms, + ont <- simona::dag_filter(ont, + terms=keep_terms, ...) messager(formatC(ont@n_terms,big.mark = ","), "terms remain after filtering.") @@ -61,29 +61,30 @@ filter_ontology <- function(ont, } #### Use custom filtering methods #### if(!is.null(terms)){ - terms <- map_ontology_terms(ont = ont, - terms = terms, - to = "id") |> stats::na.omit() - ## Characters + + ## Characters if(is.character(terms)){ + terms <- map_ontology_terms(ont = ont, + terms = terms, + to = "id") |> stats::na.omit() terms <- terms[simona::dag_has_terms(dag=ont, terms = unique(terms))] if(length(terms)==0) { stopper("None of the supplied terms found in the ontology.") - } + } ont <- ont[,terms] - + } else if (is.numeric(terms)){ messager("Randomly sampling",terms,"term(s).") if(terms>length(ont@terms)){ messager( "Number of terms requested exceeds number of terms in the ontology.", "Returning original ontology object without filtering.") - return(ont) - } + return(ont) + } if(terms==0) stopper("Terms must be >0 if numeric.") term_ids <- sample(ont@terms,terms, replace = FALSE) ont <- ont[,term_ids] - } + } } return(ont) -} \ No newline at end of file +} diff --git a/R/get_hpo.R b/R/get_hpo.R new file mode 100644 index 0000000..00f526c --- /dev/null +++ b/R/get_hpo.R @@ -0,0 +1,43 @@ +#' @describeIn get_ get_ +#' Get Human Phenotype Ontology (HPO) +#' +#' Updated version of Human Phenotype Ontology (HPO). +#' Created from the OBO files distributed by the HPO project's +#' \href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}. +#' Adapted from \link[HPOExplorer]{get_hpo}. +#' +#' By comparison, the \code{hpo} data from \pkg{ontologyIndex} is from 2016. +#' Note that the maximum ontology level depth in the 2016 version was 14, +#' whereas in the 2023 version the maximum ontology level depth is 16 +#' (due to an expansion of the HPO). +#' @inheritParams get_ontology +#' @inheritDotParams get_ontology +#' @returns \link[simona]{ontology_DAG} object. +#' +#' @export +#' @examples +#' hpo <- get_hpo() +get_hpo <- function(lvl = 2, + force_new = FALSE, + terms=NULL, + ## rols imports the international version for some reason + method="github", + save_dir=cache_dir(package = "KGExplorer"), + ...){ + + file <- file.path(save_dir,"hp.rds") + if(!file.exists(file) || isTRUE(force_new)){ + ont <- get_ontology(name = "hp", + lvl = lvl, + force_new = force_new, + terms = terms, + method = method, + save_dir = save_dir, + ...) + saveRDS(ont,file) + } else { + ont <- readRDS(file) + } + ont <- filter_ontology(ont = ont, terms = terms) + return(ont) +} diff --git a/R/get_ontology_dict.R b/R/get_ontology_dict.R index c441bec..522cee9 100644 --- a/R/get_ontology_dict.R +++ b/R/get_ontology_dict.R @@ -1,22 +1,22 @@ #' @describeIn get_ get_ -#' +#' #' @param as_datatable Return as a data.table instead of a named vector. #' @param include_alternative_terms Include alternative terms in the dictionary. #' @export #' @examples #' ont <- get_ontology("hp", terms=10) #' dict <- get_ontology_dict(ont) -get_ontology_dict <- function(ont, +get_ontology_dict <- function(ont, from="short_id", to=c("name","label","term"), include_self=FALSE, - include_alternative_terms=TRUE, + include_alternative_terms=FALSE, as_datatable=FALSE){ to <- intersect(to,colnames(ont@elementMetadata))[1] - + if(from=="id") from <- "short_id" if(to=="id") to <- "short_id" - + ## Check from col exists if(!from %in% colnames(ont@elementMetadata)){ stopper("Column",from,"not found in ontology metadata.") @@ -25,7 +25,7 @@ get_ontology_dict <- function(ont, if(!to %in% colnames(ont@elementMetadata)){ stopper("Column",to,"not found in ontology metadata.") } - + if(isTRUE(as_datatable)){ #### As data.table #### dict <- data.table::as.data.table( @@ -33,7 +33,10 @@ get_ontology_dict <- function(ont, )[,from:=get(from)][,to:=get(to)][,c("from","to")] if(isTRUE(include_alternative_terms) && "alternative_terms" %in% methods::slotNames(ont)){ - data.table::setkeyv(dict, c("from")) + data.table::setkeyv(dict, c("from")) + if(length(ontnn@alternative_terms)==0){ + stopper("No alternative terms found in ontology.") + } tmp <- data.table::data.table( from=gsub("_",":",basename(names(ont@alternative_terms))), to=dict[unname(ont@alternative_terms)]$to) @@ -46,7 +49,7 @@ get_ontology_dict <- function(ont, )[,from:=get(to)][,to:=get(to)][,c("from","to")]) } dict <- unique(dict) - data.table::setkeyv(dict, c("from")) + data.table::setkeyv(dict, c("from")) } else { #### As named vector #### dict <- stats::setNames(ont@elementMetadata[[to]], @@ -57,6 +60,6 @@ get_ontology_dict <- function(ont, ont@elementMetadata[[to]]) ) } - } + } return(dict) } diff --git a/R/map_upheno_data_i.R b/R/map_upheno_data_i.R index f104977..da5104a 100644 --- a/R/map_upheno_data_i.R +++ b/R/map_upheno_data_i.R @@ -12,7 +12,7 @@ map_upheno_data_i <- function(pheno_map_method, n_genes_db1 <- object <- gene_label <- db <- . <- n_genes_db2 <- subject_taxon_label1 <- subject_taxon_label2 <- phenotype_genotype_score <- equivalence_score <- NULL; - + pheno_map_method <- pheno_map_method[1] gene_map_method <- gene_map_method[1] messager(paste0("map_upheno_data: pheno_map_method=", @@ -26,10 +26,10 @@ map_upheno_data_i <- function(pheno_map_method, names(pheno_map) <-gsub("^object","id2",names(pheno_map)) pheno_map[,db1:=gsub("*:.*","",basename(id1))] } else if(pheno_map_method=="monarch"){ - - hpo <- HPOExplorer::get_hpo() + + hpo <- get_hpo() out <- monarchr::monarch_search(query = NULL, - category = "biolink:PhenotypicFeature", + category = "biolink:PhenotypicFeature", limit = 500) pheno_map <- get_monarch(queries = "phenotype_to_phenotype") |> data.table::setnames(c("label_x","label_y"),c("label1","label2")) @@ -47,7 +47,7 @@ map_upheno_data_i <- function(pheno_map_method, } } } - + ## Gene-phenotype associations across 8 species { genes <- get_monarch(maps = list(c("phenotype","gene")), @@ -65,7 +65,7 @@ map_upheno_data_i <- function(pheno_map_method, ## Create an db-species map for each Ontology species_map <- genes_map[,.SD[1], keyby="db"][,.(db,subject_taxon_label)] } - + #### Map non-human genes onto human orthologs #### { genes_homol <- map_genes_monarch(dat=genes, @@ -75,7 +75,7 @@ map_upheno_data_i <- function(pheno_map_method, data.table::uniqueN(genes$subject_taxon_label), "species remain after cross-species gene mapping.") } - + #### Map non-human phenotypes onto human phenotypes #### #### Merge nonhuman ontology genes with human HPO genes #### { @@ -94,7 +94,7 @@ map_upheno_data_i <- function(pheno_map_method, all.y = keep_nogenes, suffixes = c(1,2), allow.cartesian = TRUE - ) + ) pheno_map_genes[,db2:=id2_db] ## Fill in missing species for those without gene data pheno_map_genes[ @@ -113,7 +113,7 @@ map_upheno_data_i <- function(pheno_map_method, ## Remove # remove(genes_human,genes_nonhuman,pheno_map) } - + #### Count the number of overlapping genes { if(isFALSE(keep_nogenes)){ @@ -154,4 +154,4 @@ map_upheno_data_i <- function(pheno_map_method, ## less than or equal to the number of total HPO genes. # pheno_map_genes_match[n_genes>n_genes_hpo,] return(pheno_map_genes_match) -} \ No newline at end of file +} diff --git a/R/ontology_to.R b/R/ontology_to.R index 7da1724..5ed82b6 100644 --- a/R/ontology_to.R +++ b/R/ontology_to.R @@ -2,13 +2,13 @@ #' Convert ontology #' #' Convert an \link[simona]{ontology_DAG} to -#' a number of other useful formats. -#' @export +#' a number of other useful formats. +#' @export #' @importFrom stats as.dist hclust cutree #' @examples #' ont <- get_ontology() #' obj <- ontology_to(ont=ont, to="dendrogram") -ontology_to <- function(ont, +ontology_to <- function(ont, to=c("adjacency", "adjacency_dist", "adjacency_dist_hclust", @@ -28,7 +28,7 @@ ontology_to <- function(ont, "list"), terms=ont@terms, remove_terms=grep(":",terms, - invert = TRUE, + invert = TRUE, value = TRUE), as_sparse=FALSE, ...){ @@ -38,7 +38,7 @@ ontology_to <- function(ont, remove_terms = remove_terms) if(to=="adjacency"){ g <- ontology_to_graph(ont) - obj <- igraph::as_adj(g) + obj <- igraph::as_adjacency_matrix(g) } else if(to=="adjacency_dist"){ adj <- ontology_to(ont, to="adjacency") # obj <- stats::dist(adj) ### seems to take forever @@ -56,7 +56,7 @@ ontology_to <- function(ont, obj <- simona::dag_as_DOT(ont, ...) } else if(to=="similarity"){ obj <- simona::term_sim(ont, terms=ont@terms, ...) - } else if(to=="adjacency_dist_hclust_clusters"){ + } else if(to=="adjacency_dist_hclust_clusters"){ hc <- ontology_to(ont, to="adjacency_dist_hclust") obj <- stats::cutree(hc, ...) } else if(to=="igraph"){ @@ -74,7 +74,7 @@ ontology_to <- function(ont, } else if(to=="igraph_dist_hclust_dendrogram"){ gdh <- ontology_to(ont, to="igraph_dist_hclust") obj <- stats::as.dendrogram(gdh) - } else if(to=="tbl_graph"){ + } else if(to=="tbl_graph"){ obj <- ontology_to_graph(ont, ...) } else if(to=="data.frame"){ g <- ontology_to_graph(ont) @@ -84,7 +84,7 @@ ontology_to <- function(ont, obj <- data.table::as.data.table(df) } else if(to=="list") { obj <- list( - similarity=ontology_to(ont, to = "similarity"), + similarity=ontology_to(ont, to = "similarity"), adjacency=ontology_to(ont, to = "adjacency"), elementMetadata=data.table::data.table(ont@elementMetadata), annotation=ont@annotation, diff --git a/R/plot_ontology_heatmap.R b/R/plot_ontology_heatmap.R index 864af52..f53296e 100644 --- a/R/plot_ontology_heatmap.R +++ b/R/plot_ontology_heatmap.R @@ -8,7 +8,7 @@ #' metadata annotations. #' @param col_side_vars Variables to include in column-side #' metadata annotations. -#' @param fontsize Axis labels font size. +#' @param fontsize Axis labels font size. #' @param seed Set the seed for reproducible clustering. #' @inheritParams map_ #' @inheritParams ComplexHeatmap::Heatmap @@ -21,9 +21,9 @@ #' hm <- plot_ontology_heatmap(ont) plot_ontology_heatmap <- function(ont, annot = data.table::data.table( - ont@elementMetadata + as.data.frame(ont@elementMetadata) ), - X = ontology_to(ont, to = "similarity"), + X = ontology_to(ont, to = "similarity"), fontsize = ont@n_terms*4e-4, row_labels = ont@terms, column_labels = row_labels, @@ -39,7 +39,7 @@ plot_ontology_heatmap <- function(ont, save_path = tempfile( fileext = "plot_ontology_heatmap.pdf"), height = 12, - width = height*1.1, + width = height*1.1, # row_km = 3, # column_km = row_km, # row_km_repeats = 1000, @@ -48,13 +48,13 @@ plot_ontology_heatmap <- function(ont, types = c("heatmaply", "ComplexHeatmap")[2], ... - ){ - if(!is.null(seed)) set.seed(seed) + ){ + if(!is.null(seed)) set.seed(seed) ## Check if we need to add ancestors if(any(c("ancestor","ancestor_name") %in% c(row_side_vars,col_side_vars)) && !is.null(ont)){ ont <- add_ancestors(ont) - } + } #### Heatmaply version #### if("heatmaply" %in% types){ requireNamespace("heatmaply") @@ -112,9 +112,9 @@ plot_ontology_heatmap <- function(ont, # ComplexHeatmap::row_order(hm) #### Save plot #### if(!is.null(save_path)){ - plot_save(plt = hm, - save_path = save_path, - height = height, + plot_save(plt = hm, + save_path = save_path, + height = height, width = width) } } diff --git a/R/prune_ancestors.R b/R/prune_ancestors.R index 6ff999f..b36686a 100644 --- a/R/prune_ancestors.R +++ b/R/prune_ancestors.R @@ -1,5 +1,5 @@ #' Prune ancestor -#' +#' #' Prune redundant ancestral terms from a \link{data.table}. #' @export #' @param dat A \link{data.table} with a column of ontology terms. @@ -9,7 +9,7 @@ #' dat <- data.table::data.table(hpo_id=c("HP:0000001","HP:0000002","HP:0000003"), #' name=c("term1","term2","term3")) #' ont <- get_ontology("hp") -#' dat2 <- prune_ancestors(dat,ont=ont) +#' dat2 <- prune_ancestors(dat,id_col="hpo_id",ont=ont) prune_ancestors <- function(dat, id_col, ont){ diff --git a/R/set_cores.R b/R/set_cores.R index a5810a3..22b32a6 100644 --- a/R/set_cores.R +++ b/R/set_cores.R @@ -1,24 +1,24 @@ #' Set cores #' #' Assign cores automatically for parallel processing, while reserving some. -#' +#' #' @param workers Number (>1) or proportion (<1) of worker cores to use. #' @param verbose Print messages. -#' @param progressbar logical(1) Enable progress bar +#' @param progressbar logical(1) Enable progress bar #' (based on \code{plyr:::progress_text}). -#' Enabling the progress bar changes the default value of tasks to -#' \code{.Machine$integer.max}, so that progress is reported for +#' Enabling the progress bar changes the default value of tasks to +#' \code{.Machine$integer.max}, so that progress is reported for #' each element of X. #' @returns List of core allocations. -#' +#' #' @export #' @import data.table #' @import BiocParallel #' @importFrom parallel detectCores set_cores <- function(workers = .90, progressbar = TRUE, - verbose = TRUE) { - + verbose = TRUE) { + # Enable parallelization of HDF5 functions ## Allocate ~10% of your available cores to non-parallelized processes workers <- if (is.null(workers)) .90 else workers @@ -36,12 +36,20 @@ set_cores <- function(workers = .90, ) ### Ensure data.table doesn't interfere with parallelization #### if(workers>1) data.table::setDTthreads(threads = 1) + ### Handle _R_CHECK_LIMIT_CORES_ ### + if (nzchar(chk <- Sys.getenv("_R_CHECK_LIMIT_CORES_", ""))) { + if (workers > 2) { + workers <- 2 + messager(paste("R_CHECK_LIMIT_CORES_' environment variable detected", + "BiocParallel workers reduced to 2.")) + } + } #### Handle Windows #### if (.Platform$OS.type == "windows") { params <- BiocParallel::SnowParam(workers = workers, progressbar = progressbar) } else { - params <- BiocParallel::MulticoreParam(workers = workers, + params <- BiocParallel::MulticoreParam(workers = workers, progressbar = progressbar) } # DelayedArray::setAutoBPPARAM(params) diff --git a/README.md b/README.md index 4d33fbd..a70d374 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ KGExplorer
[![License: GPL-3](https://img.shields.io/badge/license-GPL--3-blue.svg)](https://cran.r-project.org/web/licenses/GPL-3) -[![](https://img.shields.io/badge/devel%20version-0.99.0-black.svg)](https://github.com/neurogenomics/KGExplorer) +[![](https://img.shields.io/badge/devel%20version-0.99.05-black.svg)](https://github.com/neurogenomics/KGExplorer) [![](https://img.shields.io/github/languages/code-size/neurogenomics/KGExplorer.svg)](https://github.com/neurogenomics/KGExplorer) [![](https://img.shields.io/github/last-commit/neurogenomics/KGExplorer.svg)](https://github.com/neurogenomics/KGExplorer/commits/master)
[![R build @@ -12,10 +12,10 @@ status](https://github.com/neurogenomics/KGExplorer/workflows/rworkflows/badge.s

-Authors: Brian Schilder +Authors: Brian Schilder, Hiranyamaya Dash

-README updated: Mar-08-2024 +README updated: Dec-18-2024

@@ -58,13 +58,13 @@ library(KGExplorer) utils::sessionInfo() ``` - ## R version 4.3.1 (2023-06-16) - ## Platform: aarch64-apple-darwin20 (64-bit) - ## Running under: macOS Sonoma 14.3.1 + ## R version 4.4.2 (2024-10-31) + ## Platform: aarch64-apple-darwin20 + ## Running under: macOS Sequoia 15.2 ## ## Matrix products: default - ## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib - ## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0 + ## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib + ## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0 ## ## locale: ## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 @@ -76,21 +76,21 @@ utils::sessionInfo() ## [1] stats graphics grDevices utils datasets methods base ## ## loaded via a namespace (and not attached): - ## [1] gtable_0.3.4 jsonlite_1.8.8 renv_1.0.3 - ## [4] dplyr_1.1.4 compiler_4.3.1 BiocManager_1.30.22 - ## [7] tidyselect_1.2.0 rvcheck_0.2.1 scales_1.3.0 - ## [10] yaml_2.3.8 fastmap_1.1.1 here_1.0.1 - ## [13] ggplot2_3.4.4 R6_2.5.1 generics_0.1.3 - ## [16] knitr_1.45 yulab.utils_0.1.4 tibble_3.2.1 + ## [1] gtable_0.3.6 jsonlite_1.8.9 renv_1.0.11 + ## [4] dplyr_1.1.4 compiler_4.4.2 BiocManager_1.30.25 + ## [7] tidyselect_1.2.1 rvcheck_0.2.1 scales_1.3.0 + ## [10] yaml_2.3.10 fastmap_1.2.0 here_1.0.1 + ## [13] ggplot2_3.5.1 R6_2.5.1 generics_0.1.3 + ## [16] knitr_1.49 yulab.utils_0.1.8 tibble_3.2.1 ## [19] desc_1.4.3 dlstats_0.1.7 rprojroot_2.0.4 - ## [22] munsell_0.5.0 pillar_1.9.0 RColorBrewer_1.1-3 - ## [25] rlang_1.1.3 utf8_1.2.4 cachem_1.0.8 - ## [28] badger_0.2.3 xfun_0.42 fs_1.6.3 - ## [31] memoise_2.0.1.9000 cli_3.6.2 magrittr_2.0.3 - ## [34] rworkflows_1.0.1 digest_0.6.34 grid_4.3.1 - ## [37] rstudioapi_0.15.0 lifecycle_1.0.4 vctrs_0.6.5 - ## [40] data.table_1.15.0 evaluate_0.23 glue_1.7.0 - ## [43] fansi_1.0.6 colorspace_2.1-0 rmarkdown_2.25 - ## [46] tools_4.3.1 pkgconfig_2.0.3 htmltools_0.5.7 + ## [22] munsell_0.5.1 pillar_1.9.0 RColorBrewer_1.1-3 + ## [25] rlang_1.1.4 utf8_1.2.4 badger_0.2.4 + ## [28] xfun_0.49 fs_1.6.5 cli_3.6.3 + ## [31] magrittr_2.0.3 rworkflows_1.0.3 digest_0.6.37 + ## [34] grid_4.4.2 rstudioapi_0.17.1 lifecycle_1.0.4 + ## [37] vctrs_0.6.5 evaluate_1.0.1 glue_1.8.0 + ## [40] data.table_1.16.2 fansi_1.0.6 colorspace_2.1-1 + ## [43] rmarkdown_2.29 tools_4.4.2 pkgconfig_2.0.3 + ## [46] htmltools_0.5.8.1 diff --git a/man/get_.Rd b/man/get_.Rd index 7b36ff0..8d92860 100644 --- a/man/get_.Rd +++ b/man/get_.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/0docs.R, R/get_alphamissense.R, % R/get_clinvar.R, R/get_data_package.R, R/get_definitions.R, R/get_gencc.R, % R/get_gene_lengths.R, R/get_genes_disease.R, R/get_graph_colnames.R, -% R/get_medgen_maps.R, R/get_metadata_omim.R, R/get_metadata_orphanet.R, -% R/get_monarch.R, R/get_monarch_files.R, R/get_monarch_kg.R, -% R/get_monarch_models.R, R/get_mondo_maps.R, R/get_mondo_maps_files.R, -% R/get_ols_options.R, R/get_ontology.R, R/get_ontology_dict.R, -% R/get_ontology_levels.R, R/get_pli.R, R/get_prevalence.R, R/get_ttd.R, -% R/get_upheno.R, R/get_version.R +% R/get_hpo.R, R/get_medgen_maps.R, R/get_metadata_omim.R, +% R/get_metadata_orphanet.R, R/get_monarch.R, R/get_monarch_files.R, +% R/get_monarch_kg.R, R/get_monarch_models.R, R/get_mondo_maps.R, +% R/get_mondo_maps_files.R, R/get_ols_options.R, R/get_ontology.R, +% R/get_ontology_dict.R, R/get_ontology_levels.R, R/get_pli.R, +% R/get_prevalence.R, R/get_ttd.R, R/get_upheno.R, R/get_version.R \name{get_} \alias{get_} \alias{get_alphamissense} @@ -18,6 +18,7 @@ \alias{get_gene_lengths} \alias{get_genes_disease} \alias{get_graph_colnames} +\alias{get_hpo} \alias{get_medgen_maps} \alias{get_metadata_omim} \alias{get_metadata_orphanet} @@ -85,6 +86,15 @@ get_genes_disease( get_graph_colnames(g, what = c("nodes", "edges")) +get_hpo( + lvl = 2, + force_new = FALSE, + terms = NULL, + method = "github", + save_dir = cache_dir(package = "KGExplorer"), + ... +) + get_medgen_maps() get_metadata_omim(save_dir = cache_dir()) @@ -155,7 +165,7 @@ get_ontology_dict( from = "short_id", to = c("name", "label", "term"), include_self = FALSE, - include_alternative_terms = TRUE, + include_alternative_terms = FALSE, as_datatable = FALSE ) @@ -243,7 +253,7 @@ Passes to \link{get_monarch_files}.} \item{run_map_mondo}{Run \link{map_mondo} to map MONDO IDs to disease IDs.} \item{...}{ - Arguments passed on to \code{\link[=link_monarch]{link_monarch}}, \code{\link[data.table:fread]{data.table::fread}}, \code{\link[data.table:fread]{data.table::fread}}, \code{\link[=get_ontology_github]{get_ontology_github}} + Arguments passed on to \code{\link[=link_monarch]{link_monarch}}, \code{\link[=get_ontology]{get_ontology}}, \code{\link[data.table:fread]{data.table::fread}}, \code{\link[data.table:fread]{data.table::fread}}, \code{\link[=get_ontology_github]{get_ontology_github}} \describe{ \item{\code{node_filters}}{A named list of filters to apply to the node data. Names should be name of the metadata column, and values should be a vector of @@ -266,15 +276,15 @@ return any rows where the "type" column contains either "gene" or "variant".} If type coercion results in an error, introduces \code{NA}s, or would result in loss of accuracy, the coercion attempt is aborted for that column with warning and the column's type is left unchanged. If you really desire data loss (e.g. reading \code{3.14} as \code{integer}) you have to truncate such columns afterwards yourself explicitly so that this is clear to future readers of your code. } \item{\code{integer64}}{ "integer64" (default) reads columns detected as containing integers larger than 2^31 as type \code{bit64::integer64}. Alternatively, \code{"double"|"numeric"} reads as \code{utils::read.csv} does; i.e., possibly with loss of precision and if so silently. Or, "character". } - \item{\code{dec}}{ The decimal separator as in \code{utils::read.csv}. If not "." (default) then usually ",". See details. } + \item{\code{dec}}{ The decimal separator as in \code{utils::read.csv}. When \code{"auto"} (the default), an attempt is made to decide whether \code{"."} or \code{","} is more suitable for this input. See details. } \item{\code{col.names}}{ A vector of optional names for the variables (columns). The default is to use the header column if present or detected, or if not "V" followed by the column number. This is applied after \code{check.names} and before \code{key} and \code{index}. } \item{\code{check.names}}{default is \code{FALSE}. If \code{TRUE} then the names of the variables in the \code{data.table} are checked to ensure that they are syntactically valid variable names. If necessary they are adjusted (by \code{\link{make.names}}) so that they are, and also to ensure that there are no duplicates.} \item{\code{encoding}}{ default is \code{"unknown"}. Other possible options are \code{"UTF-8"} and \code{"Latin-1"}. Note: it is not used to re-encode the input, rather enables handling of encoded strings in their native encoding. } \item{\code{quote}}{ By default (\code{"\\""}), if a field starts with a double quote, \code{fread} handles embedded quotes robustly as explained under \code{Details}. If it fails, then another attempt is made to read the field \emph{as is}, i.e., as if quotes are disabled. By setting \code{quote=""}, the field is always read as if quotes are disabled. It is not expected to ever need to pass anything other than \\"\\" to quote; i.e., to turn it off. } - \item{\code{strip.white}}{ default is \code{TRUE}. Strips leading and trailing whitespaces of unquoted fields. If \code{FALSE}, only header trailing spaces are removed. } - \item{\code{fill}}{logical (default is \code{FALSE}). If \code{TRUE} then in case the rows have unequal length, blank fields are implicitly filled.} + \item{\code{strip.white}}{ Logical, default \code{TRUE}, in which case leading and trailing whitespace is stripped from unquoted \code{"character"} fields. \code{"numeric"} fields are always stripped of leading and trailing whitespace.} + \item{\code{fill}}{logical or integer (default is \code{FALSE}). If \code{TRUE} then in case the rows have unequal length, number of columns is estimated and blank fields are implicitly filled. If an integer is provided it is used as an upper bound for the number of columns. If \code{fill=Inf} then the whole file is read for detecting the number of columns. } \item{\code{blank.lines.skip}}{\code{logical}, default is \code{FALSE}. If \code{TRUE} blank lines in the input are ignored.} - \item{\code{key}}{Character vector of one or more column names which is passed to \code{\link[data.table]{setkey}}. It may be a single comma separated string such as \code{key="x,y,z"}, or a vector of names such as \code{key=c("x","y","z")}. Only valid when argument \code{data.table=TRUE}. Where applicable, this should refer to column names given in \code{col.names}. } + \item{\code{key}}{Character vector of one or more column names which is passed to \code{\link[data.table]{setkey}}. Only valid when argument \code{data.table=TRUE}. Where applicable, this should refer to column names given in \code{col.names}. } \item{\code{index}}{ Character vector or list of character vectors of one or more column names which is passed to \code{\link[data.table]{setindexv}}. As with \code{key}, comma-separated notation like \code{index="x,y,z"} is accepted for convenience. Only valid when argument \code{data.table=TRUE}. Where applicable, this should refer to column names given in \code{col.names}. } \item{\code{showProgress}}{ \code{TRUE} displays progress on the console if the ETA is greater than 3 seconds. It is produced in fread's C code where the very nice (but R level) txtProgressBar and tkProgressBar are not easily available. } \item{\code{data.table}}{ TRUE returns a \code{data.table}. FALSE returns a \code{data.frame}. The default for this argument can be changed with \code{options(datatable.fread.datatable=FALSE)}.} @@ -294,6 +304,17 @@ return any rows where the "type" column contains either "gene" or "variant".} \item{what}{What should get activated? Possible values are \code{nodes} or \code{edges}.} +\item{lvl}{Depth of the ancestor terms to add. +Will get the closest ancestor to this level if none have this exact level.} + +\item{terms}{A vector of ontology term IDs.} + +\item{method}{Compute ontology levels using: +\itemize{ + \item{"height" (default)} \link[simona]{dag_height}. + \item{"depth"} \link[simona]{dag_depth}. +}} + \item{queries}{A list of free-form substring queries to filter files by (using any column in the metadata). For example, \code{list("gene_disease","variant_disease")} will return any @@ -338,24 +359,13 @@ Can be a character vector of one or more column names.} \item{ol}{An \link[rols]{Ontologies} object.} -\item{method}{Compute ontology levels using: -\itemize{ - \item{"height" (default)} \link[simona]{dag_height}. - \item{"depth"} \link[simona]{dag_depth}. -}} - \item{filetype}{File type to search for.} \item{import_func}{Function to import the ontology with. If \code{NULL}, automatically tries to choose the correct function.} -\item{terms}{A vector of ontology term IDs.} - \item{add_metadata}{Add metadata to the resulting ontology object.} -\item{lvl}{Depth of the ancestor terms to add. -Will get the closest ancestor to this level if none have this exact level.} - \item{add_n_edges}{Add the number of edges (connections) for each term.} \item{add_ontology_levels}{Add the ontology level for each term.} @@ -420,6 +430,8 @@ A named list of data.tables of AlphaMissense predictions. data.table +\link[simona]{ontology_DAG} object. + \link[data.table]{data.table} \link[data.table]{data.table} of mappings. @@ -504,6 +516,19 @@ Load gene lists associated with each disease phenotype from: \item \code{get_graph_colnames()}: get_ Get column names in the nodes and/or edges of a tbl_graph. +\item \code{get_hpo()}: get_ +Get Human Phenotype Ontology (HPO) + +Updated version of Human Phenotype Ontology (HPO). +Created from the OBO files distributed by the HPO project's +\href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}. +Adapted from \link[HPOExplorer]{get_hpo}. + +By comparison, the \code{hpo} data from \pkg{ontologyIndex} is from 2016. +Note that the maximum ontology level depth in the 2016 version was 14, +whereas in the 2023 version the maximum ontology level depth is 16 + (due to an expansion of the HPO). + \item \code{get_medgen_maps()}: get_ Get MedGen maps. @@ -625,6 +650,7 @@ ont <- get_ontology("hp", terms=10) def <- get_definitions(ont) d <- get_gencc() genes <- get_genes_disease() +hpo <- get_hpo() dat <- get_monarch(maps=list(c("gene","disease"))) files <- get_monarch_files() \dontrun{ diff --git a/man/plot_.Rd b/man/plot_.Rd index d5a6c38..08e43e9 100644 --- a/man/plot_.Rd +++ b/man/plot_.Rd @@ -96,7 +96,7 @@ plot_ontology_graphviz(ont, ...) plot_ontology_heatmap( ont, - annot = data.table::data.table(ont@elementMetadata), + annot = data.table::data.table(as.data.frame(ont@elementMetadata)), X = ontology_to(ont, to = "similarity"), fontsize = ont@n_terms * 4e-04, row_labels = ont@terms, diff --git a/man/prune_ancestors.Rd b/man/prune_ancestors.Rd index f7dbb52..a87fd2f 100644 --- a/man/prune_ancestors.Rd +++ b/man/prune_ancestors.Rd @@ -20,5 +20,5 @@ Prune redundant ancestral terms from a \link{data.table}. dat <- data.table::data.table(hpo_id=c("HP:0000001","HP:0000002","HP:0000003"), name=c("term1","term2","term3")) ont <- get_ontology("hp") -dat2 <- prune_ancestors(dat,ont=ont) +dat2 <- prune_ancestors(dat,id_col="hpo_id",ont=ont) } diff --git a/man/set_cores.Rd b/man/set_cores.Rd index 92b045a..94cd141 100644 --- a/man/set_cores.Rd +++ b/man/set_cores.Rd @@ -9,10 +9,10 @@ set_cores(workers = 0.9, progressbar = TRUE, verbose = TRUE) \arguments{ \item{workers}{Number (>1) or proportion (<1) of worker cores to use.} -\item{progressbar}{logical(1) Enable progress bar +\item{progressbar}{logical(1) Enable progress bar (based on \code{plyr:::progress_text}). - Enabling the progress bar changes the default value of tasks to - \code{.Machine$integer.max}, so that progress is reported for + Enabling the progress bar changes the default value of tasks to + \code{.Machine$integer.max}, so that progress is reported for each element of X.} \item{verbose}{Print messages.} diff --git a/tests/testthat/test-filter_ontology.R b/tests/testthat/test-filter_ontology.R index 9fd88a0..19a0117 100644 --- a/tests/testthat/test-filter_ontology.R +++ b/tests/testthat/test-filter_ontology.R @@ -1,18 +1,19 @@ +skip_if_offline() test_that("filter_ontology works", { - + ont <- get_ontology("hp") testthat::expect_gte(ont@n_terms,17000) - + ont2 <- filter_ontology(ont,terms=c("HP:0000001","HP:0000002")) testthat::expect_gte(ont2@n_terms,4) - + ont3 <- filter_ontology(ont,terms=100) - testthat::expect_gte(ont3@n_terms,500) - + testthat::expect_gte(ont3@n_terms,100) + ont4 <- filter_ontology(ont, keep_descendants=c("Abnormality of the nervous system")) testthat::expect_gte(ont4@n_terms,2600) - + ont5 <- filter_ontology(ont, keep_descendants="Mode of inheritance") testthat::expect_gte(ont5@n_terms,40) diff --git a/tests/testthat/test-get_monarch_models.R b/tests/testthat/test-get_monarch_models.R index 1e6c3f2..e999415 100644 --- a/tests/testthat/test-get_monarch_models.R +++ b/tests/testthat/test-get_monarch_models.R @@ -1,3 +1,4 @@ +skip_if_offline() test_that("get_monarch_models works", { models <- get_monarch_models() diff --git a/tests/testthat/test-get_ontology_levels.R b/tests/testthat/test-get_ontology_levels.R index 913cfba..1ddfcc5 100644 --- a/tests/testthat/test-get_ontology_levels.R +++ b/tests/testthat/test-get_ontology_levels.R @@ -1,3 +1,4 @@ +skip_if_offline() test_that("get_ontology_levels works", { hpo <- get_ontology("hp") @@ -10,8 +11,7 @@ test_that("get_ontology_levels works", { terms = terms1, absolute = TRUE) testthat::expect_length(lvls1, length(terms1)) - testthat::expect_equal(min(lvls1),1) - testthat::expect_equal(max(lvls1),5) + testthat::expect_false(diff(range(lvls1)) == 0) #### Using all descendants #### terms2 <- simona::dag_offspring(hpo, @@ -19,24 +19,21 @@ test_that("get_ontology_levels works", { lvls2 <- KGExplorer::get_ontology_levels(ont=hpo, terms = terms2) testthat::expect_length(lvls2, length(terms2)) - testthat::expect_equal(min(lvls2),5) - testthat::expect_equal(max(lvls2),10) + testthat::expect_false(diff(range(lvls2)) == 0) #### Using relative levels #### lvls3 <- get_ontology_levels(ont=hpo, - terms = terms1, + terms = terms1, absolute = FALSE) testthat::expect_length(lvls3, length(terms1)) - testthat::expect_equal(min(lvls3),1) - testthat::expect_equal(max(lvls3),5) + testthat::expect_false(diff(range(lvls3)) == 0) lvls4 <- get_ontology_levels(ont=hpo, terms = terms1, absolute = TRUE, reverse = FALSE) testthat::expect_length(lvls4, length(terms1)) - testthat::expect_equal(min(lvls4),1) - testthat::expect_equal(max(lvls4),5) + testthat::expect_false(diff(range(lvls4)) == 0) #### Visual confirmation of correct hierarchy #### # ontologyPlot::onto_plot(ontology = hpo, diff --git a/tests/testthat/test-map_mondo.R b/tests/testthat/test-map_mondo.R index d5ba38c..ce3f989 100644 --- a/tests/testthat/test-map_mondo.R +++ b/tests/testthat/test-map_mondo.R @@ -1,5 +1,6 @@ +skip_if_offline() test_that("map_mondo works", { - + dat <- example_dat(rm_types="gene") dat2 <- map_mondo(dat = dat, map_to = "hpo") testthat::expect_true(methods::is(dat2,"data.table")) diff --git a/tests/testthat/test-map_ontology_terms.R b/tests/testthat/test-map_ontology_terms.R index a8a21be..81d9659 100644 --- a/tests/testthat/test-map_ontology_terms.R +++ b/tests/testthat/test-map_ontology_terms.R @@ -1,3 +1,4 @@ +skip_if_offline() test_that("map_ontology_terms works", { ont <- get_ontology("hp") @@ -6,8 +7,8 @@ test_that("map_ontology_terms works", { "HP:0000002","HP:0000003") term_names <- map_ontology_terms(ont=ont, terms=terms) term_ids <- map_ontology_terms(ont=ont, terms=terms, to="id") - - + + testthat::expect_true( length(term_names)==length(terms) ) @@ -16,8 +17,8 @@ test_that("map_ontology_terms works", { ) testthat::expect_true( all(names(term_names)==terms) - ) + ) testthat::expect_true( all(names(term_ids)==terms) - ) + ) }) diff --git a/tests/testthat/test-map_upheno.R b/tests/testthat/test-map_upheno.R index 9d260d6..0ae72d7 100644 --- a/tests/testthat/test-map_upheno.R +++ b/tests/testthat/test-map_upheno.R @@ -1,7 +1,8 @@ +skip_if_offline() test_that("map_upheno works", { - + testthat::skip() - + run_tests <- function(res){ testthat::expect_true(methods::is(res$data,"data.table")) for(x in res$plots){ @@ -15,7 +16,7 @@ test_that("map_upheno works", { # pheno_map_method="upheno", terms = terms) run_tests(res) - + #### Use cached data and filter by HPO terms res <- map_upheno(force_new = FALSE, terms = terms)