Skip to content

Commit

Permalink
rewrite map_ontology_terms to use keyed datatables
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Mar 11, 2024
1 parent e0c2a3f commit 4dd28f7
Show file tree
Hide file tree
Showing 44 changed files with 561 additions and 286 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export(get_monarch_models)
export(get_mondo_maps)
export(get_ols_options)
export(get_ontology)
export(get_ontology_descendants)
export(get_ontology_dict)
export(get_ontology_levels)
export(get_pli)
Expand Down
49 changes: 36 additions & 13 deletions R/0docs.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
#### main_ ####
#' @title Main functions
#'
#' @description
#' Documentation for common arguments.
#' @param ont An ontology of class \link[simona]{ontology_DAG}.
#' @param terms A vector of ontology term IDs.
#' @param g \link[tidygraph]{tbl_graph} object.
#' @param force_new Create a new file instead of using any cached files.
#' @param save_dir Directory to save a file to.
#' @param save_path File name to save to.
#' @param obj R object.
#' @family main_
#' @returns R object.
#'
#' @name main_
NULL



#### utils_ ####
#' @title Utility functions
#'
Expand All @@ -16,6 +36,7 @@ NULL
#' Functions to query specific subset of data via dedicated APIs.
#' @param ids IDs to query.
#' @param batch_size Number of IDs to query at once.
#' @inheritParams main_
#'
#' @family query_
#' @returns Queried data.
Expand All @@ -28,10 +49,7 @@ NULL
#'
#' @description
#' Functions to plot data.
#' @param ont An ontology of class \link[simona]{ontology_DAG}.
#' @param types Types of graph to produce. Can be one or more.

#' @param g \link[tidygraph]{tbl_graph} object.
#' @param layout_func Layout function for the graph.
#' @param node_color_var Variable in the vertex metadata to color nodes by.
#' @param edge_color_var Variable in the edge metadata to color edges by.
Expand Down Expand Up @@ -67,6 +85,8 @@ NULL
#' @param size_var Column to scale node size by.
#' @param colour_var Column to color nodes by.
#' @param ... Additional arguments passed to plot-specific functions.
#' @inheritParams main_
#' @inheritParams filter_
#' @import simona
#' @family plot_
#' @returns A named list containing the plot and the data.
Expand All @@ -77,11 +97,6 @@ NULL
#### get_ ####
#' @title Get functions
#'
#' @param force_new Create a new file instead of using any cached files.
#' @param save_dir Directory to save a file to.
#' @param filters A named list, where each element in the list is the name of
#' a column in the data, and the vector within each element represents the
#' values to include in the final data.
#' @param maps A list of paired to/from types to filter Monarch association
#' files by. For example, \code{list(c("gene","disease"))} will return any
#' files that contains gene-disease associations.
Expand All @@ -102,6 +117,7 @@ NULL
#' @param run_map_genes Map genes to standardised HGNC symbols using
#' \link[orthogene]{map_genes}.
#' @param from The designated from column in from-to mapping or relations.
#' @inheritParams main_
#' @inheritParams add_
#' @inheritParams to_
#' @inheritParams map_
Expand Down Expand Up @@ -133,6 +149,7 @@ NULL
#' @param add_definitions logical, if TRUE, add mondo definition column.
#' @param gr A \link[GenomicRanges]{GRanges} object.
#' @param build Genome build to use when mapping genomic coordinates.
#' @inheritParams main_
#' @inheritParams to_
#' @inheritParams filter_
#' @inheritParams data.table::merge.data.table
Expand All @@ -150,12 +167,13 @@ NULL
#'
#' @description
#' Functions to filter objects
#' @param terms Term IDs to include. Can alternatively be an integer,
#' which will be used to randomly sample N terms from the data.
#' @param remove_terms Character vector of term IDs to exclude.
#' @param use_simona Use \link[simona]{dag_filter} to filter terms.
#' @param keep_chr Which chromosomes to keep.
#' @param grlist Named list of \link[GenomicRanges]{GRanges} objects.
#' @param filters A named list, where each element in the list is the name of
#' a column in the data, and the vector within each element represents the
#' values to include in the final data.
#' @param node_filters A named list of filters to apply to the node data.
#' Names should be name of the metadata column, and values should be a vector of
#' valid options. For example, \code{list("type" = c("gene","variant"))} will
Expand All @@ -167,6 +185,7 @@ NULL
#' @param remove_descendants Terms whose descendants should be removed
#' (including themselves).
#' Set to \code{NULL} (default) to skip this filtering step.
#' @inheritParams main_
#' @inheritParams plot_
#' @inheritParams get_
#' @import simona
Expand All @@ -186,6 +205,7 @@ NULL
#' @param as_graph Return the object as a \link[tidygraph]{tbl_graph}.
#' @param as_sparse Return the object as a \link[Matrix]{sparseMatrix}.
#' @param as_granges Return the object as a \link[GenomicRanges]{GRanges}.
#' @inheritParams main_
#' @inheritParams plot_
#' @inheritParams filter_
#' @import tidygraph
Expand All @@ -200,13 +220,17 @@ NULL
#'
#' @description
#' Functions to add extra metadata to an ontology or data.table object.
#' @param ont An ontology of class \link[simona]{ontology_DAG}.
#' @param terms A vector of ontology term IDs.
#' @param g \link[tidygraph]{tbl_graph} object.
#' @param force_new Add the data again even if the associated column already
#' exists.
#' @param lvl Depth of the ancestor terms to add.
#' Will get the closest ancestor to this level if none have this exact level.
#' @param add_ancestors Add ancestors for each term.
#' @param add_n_edges Add the number of edges (connections) for each term.
#' @param add_ontology_levels Add the ontology level for each term.
#' @inheritParams main_
#' @inheritParams plot_
#' @inheritParams simona::dag_ancestors
#' @import simona
Expand All @@ -220,9 +244,7 @@ NULL
#'
#' @description
#' Functions to cache objects in order to speed up processes the second time.
#' @param obj R object.
#' @param path File name to save to.
#' @param save_dir Path to cache directory.
#' @inheritParams main_
#' @inheritParams base::unlink
#' @family cache_
#' @returns Null.
Expand All @@ -234,6 +256,7 @@ NULL
#'
#' @description
#' Functions to merge data resources.
#' @inheritParams main_
#' @inheritParams get_
#' @inheritParams map_
#' @inheritParams to_
Expand Down
7 changes: 5 additions & 2 deletions R/add_ancestors.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#' @describeIn add_ add_
#' Add ancestor
#' Add ancestors
#'
#' For each term, get its ancestor at a given level
#' and add the ID and name of the ancestor to the ontology metadata.
Expand All @@ -10,6 +9,10 @@
#' to itself as its own ancestor.
#' When a term has multiple ancestors at a given level,
#' the first ancestor is arbitrarily chosen.
#' @param prefix A prefix for the ancestor column names.
#' @param fill_na If \code{TRUE}, fill ancestor columns with
#' NA values using the term ID and name.
#' @inheritParams add_
#' @export
#' @inheritParams simona::dag_offspring
#' @examples
Expand Down
6 changes: 4 additions & 2 deletions R/add_db.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#' @describeIn add_ add_
#' Add database
#'
#' Extract a database identifier from an ID column and add it
#' as a separate column.
#' @inheritParams add_
#' @param dat data.table or tbl_graph.
#' @param input_col Column containing IDs.
#' @param output_col Column to add.
Expand All @@ -27,4 +29,4 @@ add_db <- function(dat,
} else {
stop("dat must be a tbl_graph or data.table")
}
}
}
2 changes: 1 addition & 1 deletion R/add_hoverboxes.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#' @describeIn add_ add_
#' Add hoverboxes
#'
#' A hoverbox is a box of text that shows up when the cursor
#' hovers over something.
#' These can be useful when making interactive network plots
#' of the HPO phenotypes because we can include a hoverbox that gives
#' information and data associated with each phenotype.
#' @inheritParams add_
#' @param columns Character vector of column names to include in the hoverbox.
#' @param hoverbox_column Name of the new hoverbox column to add.
#' @param decorators Left and right decorators to add to the hoverbox titles.
Expand Down
3 changes: 2 additions & 1 deletion R/add_ontology_metadata.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' @describeIn add_ add_
#' Add ontology metadata
#'
#' Add per-term metadata to ontology.
#' @inheritParams add_
#' @export
#' @import simona
#' @importFrom Matrix colSums
Expand Down
2 changes: 1 addition & 1 deletion R/cache_dir.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' @describeIn utils_ utils_
#' @describeIn cache_ cache_
#' Cache directory
#'
#' Provides the path to the package-wide cache directory.
Expand Down
10 changes: 5 additions & 5 deletions R/cache_save.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
#' @examples
#' cache_save(mtcars, tempfile())
cache_save <- function(obj,
path){
if(!is.null(path)){
dir.create(dirname(path), showWarnings = FALSE, recursive = TRUE)
messager("Caching file -->",path)
saveRDS(obj, path)
save_path){
if(!is.null(save_path)){
dir.create(dirname(save_path), showWarnings = FALSE, recursive = TRUE)
messager("Caching file -->",save_path)
saveRDS(obj, save_path)
}
}
4 changes: 2 additions & 2 deletions R/get_data.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' @describeIn utils_ utils_
#' Get remote data
#'
#' Download remotely stored data via \link[piggyback]{pb_download}.
#' @param save_dir Directory to save data to.
#' Download remotely stored data via \link[piggyback]{pb_download}.
#' @inheritParams get_
#' @param add_version Add the release version
#' to the returned object's \link[base]{attributes}
#' @returns Path to downloaded file or the object itself (when ".rds" format).
Expand Down
10 changes: 5 additions & 5 deletions R/get_monarch_kg.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,21 @@ get_monarch_kg <- function(as_graph=TRUE,
...){
files <- get_monarch_files(subdir = "monarch-kg/latest/",
queries = "\\.tsv\\.gz")
path <- file.path(
save_path <- file.save_path(
save_dir,
paste0(gsub("\\.tsv\\.gz","",basename(files$url[1])),".rds"))
if(file.exists(path) &
if(file.exists(save_path) &
isFALSE(force_new)){
messager("Importing",path)
g <- readRDS(path)
messager("Importing",save_path)
g <- readRDS(save_path)
return(g)
} else {
d <- data.table::fread(files$url[1],
tmpdir = save_dir,
...)
if(isFALSE(as_graph)) return(d)
g <- dt_to_kg(d)
cache_save(g,path)
cache_save(g,save_path)
}
return(g)
}
Expand Down
14 changes: 7 additions & 7 deletions R/get_mondo_maps.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,24 +31,24 @@ get_mondo_maps <- function(map_types=c("default",
top_by=c("subject","object"),
save_dir=cache_dir()
){
path <- subject_label <- object_label <- disease_label <- map_type <- to <-
db <- NULL;
requireNamespace("downloadR")
save_path <- subject_label <- object_label <- map_type <- label <- file <-
NULL;

if(length(map_types)==1 &&
map_types=="default"){
path <- downloadR::downloader(
save_path <- downloadR::downloader(
input_url = paste0(
"https://github.com/monarch-initiative/mondo/raw/master/",
"src/ontology/mappings/mondo.sssom.tsv"),
output_dir = save_dir,
download_method = "download.file")
map <- data.table::fread(path,
map <- data.table::fread(save_path,
skip = "subject",
tmpdir = save_dir)
data.table::setnames(map,
gsub("_id$","",names(map)))
map[,file:=basename(path)]
map[,file:=basename(save_path)]
add_db(dat=map,
input_col="subject",
output_col="subject_db")
Expand All @@ -63,10 +63,10 @@ get_mondo_maps <- function(map_types=c("default",
map <- lapply(stats::setNames(files$link_raw,
basename(files$link_raw)),
function(x){
path <- downloadR::downloader(input_url = x,
save_path <- downloadR::downloader(input_url = x,
output_dir = save_dir,
download_method = "download.file")
data.table::fread(path,
data.table::fread(save_path,
skip="subject_id",
tmpdir = save_dir)
}) |> data.table::rbindlist(fill = TRUE, idcol = "file")
Expand Down
6 changes: 3 additions & 3 deletions R/get_mondo_maps_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ get_mondo_maps_files <- function(map_types,
map_to,
save_dir){
requireNamespace("echogithub")
path <- map_type <- to <- NULL;
save_path <- map_type <- to <- NULL;

files_path <- file.path(save_dir,"mondo_maps.csv.gz")
if(!file.exists(files_path)){
Expand All @@ -15,13 +15,13 @@ get_mondo_maps_files <- function(map_types,
files <- data.table::fread(files_path)
}
#### Filter map types ####
files[,map_type:=data.table::tstrsplit(basename(path),"_",keep=2)]
files[,map_type:=data.table::tstrsplit(basename(save_path),"_",keep=2)]
files[is.na(map_type),map_type:="default"]
if(!is.null(map_types)){
files <- files[map_type %in% map_types]
}
#### Filter mapping to ####
files[,to:=data.table::tstrsplit(basename(path),"_|[.]",keep=3)]
files[,to:=data.table::tstrsplit(basename(save_path),"_|[.]",keep=3)]
if(!is.null(map_to)) {
if("hpo" %in% map_to) {
map_to <- c(map_to[map_to!="hpo"],
Expand Down
30 changes: 30 additions & 0 deletions R/get_ontology_descendants.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#' Get ontology descendants
#'
#' Get the descendants of a set of ontology terms.
#' @inheritParams get_
#' @inheritParams simona::dag_offspring
#' @returns A named list of descendants, where the names are the input terms.
#' @inheritDotParams simona::dag_offspring
#' @export
#' @examples
#' ont <- get_ontology("hp", terms=10)
#' d <- get_ontology_descendants(ont, c("HP:0000001","HP:0000002"))
get_ontology_descendants <- function(ont,
terms,
include_self = TRUE,
...){
lapply(terms, function(x){
message(x)
xt <- map_ontology_terms(ont = ont,
terms = x,
to = 'id')
if(all(is.na(xt))) {
messager("WARNING: The term",x,"was not found in the ontology.")
return(NULL)
}
simona::dag_offspring(ont,
include_self = include_self,
term=xt,
...)
})
}
Loading

0 comments on commit 4dd28f7

Please sign in to comment.