Skip to content

Commit

Permalink
Set class instead of omim_type on read_omim() output
Browse files Browse the repository at this point in the history
To allow for generic functions downstream.

Tests: PASS
  • Loading branch information
allenbaron committed Nov 27, 2023
1 parent 53e1b8b commit 95af35a
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 26 deletions.
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# DO.utils (development version)

## DO Management & Analysis
* `read_omim()` now additionally parses official OMIM downloads of search
results and [phenotypic series titles](https://www.omim.org/phenotypicSeriesTitles/all).
- Includes `omim_official` attribute to indicate if the source was an
official download.
- If input is an official source, the output class will indicate the type.


# DO.utils 0.3.0

## General
Expand Down
24 changes: 13 additions & 11 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,31 +147,32 @@ read_ga <- function(ga_file, read_all = FALSE, tidy = TRUE, keep_total = FALSE,
#' @export
read_omim <- function(file, ...) {
df <- preprocess_omim_dl(file, ...)
omim_type <- class(df)[1]

if (attr(df, "omim_type") == "search") {
if (omim_type == "omim_search") {
df <- df %>%
dplyr::mutate(
mim_symbol = stringr::str_extract(mim_number, "^ *[*+#%^]"),
mim_symbol = stringr::str_extract(.data$mim_number, "^ *[*+#%^]"),
mim_type = dplyr::case_match(
mim_symbol,
.data$mim_symbol,
"*" ~ "gene",
"+" ~ "gene, includes phenotype",
"#" ~ "phenotype",
"%" ~ "phenotype, unknown molecular basis",
"^" ~ "deprecated",
.default = "phenotype, suspected/overlap"
),
mim_number = stringr::str_remove(mim_number, "^ *[*+#%^]"),
omim = paste0("OMIM:", mim_number)
mim_number = stringr::str_remove(.data$mim_number, "^ *[*+#%^]"),
omim = paste0("OMIM:", .data$mim_number)
) %>%
dplyr::relocate(omim, mim_symbol, mim_type, .before = 1)
dplyr::relocate("omim", "mim_symbol", "mim_type", .before = 1)
}

entry_col_ordered <- c(
"location", "phenotype", "phenotype_mim_number", "inheritance",
"phenotype_mapping_key", "gene_locus", "gene_locus_mim_number"
)
if (attr(df, "omim_type") == "PS" || all(entry_col_ordered %in% names(df))) {
if (omim_type == "omim_PS" || all(entry_col_ordered %in% names(df))) {
df <- df %>%
dplyr::mutate(
omim = paste0("OMIM:", .data$phenotype_mim_number),
Expand Down Expand Up @@ -201,13 +202,14 @@ read_omim <- function(file, ...) {
)
}

if (attr(df, "omim_type") == "PS_titles") {
if (omim_type == "omim_PS_titles") {
df <- df %>%
dplyr::mutate(omim = paste0("OMIM:", phenotypic_series_number)) %>%
dplyr::relocate(omim, .before = 1)
dplyr::mutate(
omim = paste0("OMIM:", .data$phenotypic_series_number)
) %>%
dplyr::relocate("omim", .before = 1)
}

class(df) <- c("omim_tbl", class(df))
df
}

Expand Down
4 changes: 2 additions & 2 deletions R/read_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ preprocess_omim_dl <- function(file, ...) {
stringr::str_to_lower()

if (!is.na(dl_type)) {
attr(df, "omim_type") <- dl_type
class(df) <- c(paste0("omim_", dl_type), "omim_tbl", class(df))
} else {
attr(df, "omim_type") <- "generic"
class(df) <- c("omim_tbl", class(df))
}

df
Expand Down
20 changes: 7 additions & 13 deletions tests/testthat/test-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,8 @@ test_that("read_omim() works for OFFICIAL download of SEARCH results", {
)
),
row.names = c(NA, -14L),
class = c("omim_tbl", "tbl_df", "tbl", "data.frame"),
omim_official = TRUE,
omim_type = "search"
class = c("omim_search", "omim_tbl", "tbl_df", "tbl", "data.frame"),
omim_official = TRUE
)

expect_equal(read_omim("data/omim/omim-search_dl.tsv"), expected)
Expand All @@ -82,9 +81,8 @@ test_that("read_omim() works for OFFICIAL download of PHENOTYPIC SERIES TITLES",
)
),
row.names = c(NA, -4L),
class = c("omim_tbl", "tbl_df", "tbl", "data.frame"),
omim_official = TRUE,
omim_type = "PS_titles"
class = c("omim_PS_titles", "omim_tbl", "tbl_df", "tbl", "data.frame"),
omim_official = TRUE
)

expect_equal(read_omim("data/omim/omim-ps_titles_dl.tsv"), expected)
Expand All @@ -111,12 +109,8 @@ ps_df <- structure(
geno_inheritance = c(NA, rep("autosomal recessive inheritance", 3))
),
row.names = c(NA, -4L),
class = c(
"omim_tbl", "tbl_df", "tbl",
"data.frame"
),
omim_official = TRUE,
omim_type = "PS"
class = c("omim_PS", "omim_tbl", "tbl_df", "tbl", "data.frame"),
omim_official = TRUE
)

test_that("read_omim() works for OFFICIAL download of PHENOTYPIC SERIES", {
Expand All @@ -128,7 +122,7 @@ test_that("read_omim() works for COPIED data (PS or with entry info)", {
dplyr::filter(!stringr::str_detect(.data$omim, "PS")) %>%
dplyr::mutate(gene_locus = stringr::str_remove(.data$gene_locus, ",.*"))
attr(ps_df_cp, "omim_official") <- FALSE
attr(ps_df_cp, "omim_type") <- "generic"
class(ps_df_cp) <- class(ps_df)[-1]

expect_equal(read_omim("data/omim/omim-ps_cp-ps_page.csv"), ps_df_cp)
# expect_snapshot(read_omim("data/omim/omim-ps_cp-ps_page_w_ps.csv")) # not supported
Expand Down

0 comments on commit 95af35a

Please sign in to comment.