Skip to content

Commit

Permalink
Merge pull request #278 from OHDSI/issue_266
Browse files Browse the repository at this point in the history
modified description, readme, and mockOmopSketch
  • Loading branch information
catalamarti authored Dec 20, 2024
2 parents 9c644e7 + 22cd693 commit db3a67e
Show file tree
Hide file tree
Showing 14 changed files with 154 additions and 635 deletions.
7 changes: 3 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Suggests:
omock (>= 0.3.0),
covr,
ggplot2,
visOmopResults (>= 0.4.0)
visOmopResults (>= 0.5.0)
Config/testthat/edition: 3
Config/testthat/parallel: true
Imports:
Expand All @@ -65,7 +65,7 @@ Imports:
dplyr,
glue,
lifecycle,
omopgenerics (>= 0.3.1),
omopgenerics (>= 0.4.1),
PatientProfiles (>= 1.2.1),
purrr,
rlang,
Expand All @@ -77,5 +77,4 @@ Depends:
URL: https://OHDSI.github.io/OmopSketch/
BugReports: https://github.com/OHDSI/OmopSketch/issues
VignetteBuilder: knitr
Remotes:
darwin-eu/omopgenerics

40 changes: 39 additions & 1 deletion R/mockOmopSketch.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,48 @@ mockOmopSketch <- function(con = NULL,
omock::mockProcedureOccurrence(seed = seed) |>
omock::mockVisitOccurrence(seed = seed) |>
# Create device exposure table - empty (Eunomia also has it empty)
omopgenerics::emptyOmopTable("device_exposure")
omopgenerics::emptyOmopTable("device_exposure")|>
checkColumns()


# WHEN WE SUPORT LOCAL CDMs WE WILL HAVE TO ACCOUNT FOR THAT HERE
cdm <- CDMConnector::copy_cdm_to(con = con, cdm = cdm, schema = writeSchema)

return(cdm)
}

checkColumns <- function(cdm_local){
info <- omopgenerics::omopTableFields() |>
dplyr::filter(.data$type == "cdm_table") |>
dplyr::mutate(cdm_datatype = dplyr::case_when(
.data$cdm_datatype == "integer" ~ "NA_integer_",
grepl("varchar", .data$cdm_datatype) ~ "NA_character_",
.default = "NA"
))
for (table in names(cdm_local)){
cols <- info |>
dplyr::filter(.data$cdm_table_name == table)|>
dplyr::select(cdm_field_name,cdm_datatype)

missing_cols <- cols|>
dplyr::filter(!(cdm_field_name %in% colnames(cdm_local[[table]])))

if (nrow(missing_cols) > 0) {

missing_tbl <- tibble::tibble(
!!!rlang::set_names(
lapply(missing_cols$cdm_datatype, function(datatype) {
eval(parse(text = datatype))
}),
missing_cols$cdm_field_name
)
)

cdm_local[[table]] <- dplyr::bind_cols(cdm_local[[table]], missing_tbl)

}
}
return(cdm_local)
}


19 changes: 15 additions & 4 deletions R/plotConceptSetCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ plotConceptSetCounts <- function(result,
))
}

result1 <- result |> omopgenerics::splitAdditional()
result1 <- result |> omopgenerics::splitAll()
# Detect if there are several time intervals
if("time_interval" %in% colnames(result1)){
# Line plot where each concept is a different line
p <- result1 |>
dplyr::filter(.data$time_interval != "overall") |>
omopgenerics::uniteAdditional(cols = c("time_interval", "standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id")) |>
omopgenerics::pivotEstimates() |>
visOmopResults::scatterPlot(x = "time_interval",
y = "count",
line = TRUE,
Expand All @@ -71,17 +71,28 @@ plotConceptSetCounts <- function(result,
colour = colour)
}else{
if("standard_concept_name" %in% colnames(result1)){
p <- result |>
p <- result1 |>
omopgenerics::pivotEstimates() |>
visOmopResults::barPlot(x = c("standard_concept_name", "standard_concept_id"),
y = "count",
facet = facet,
colour = colour)
p$data <- p$data |>
dplyr::mutate(
standard_concept_name_standard_concept_id = factor(
.data$standard_concept_name_standard_concept_id,
levels = c("overall - overall", sort(setdiff(.data$standard_concept_name_standard_concept_id, "overall - overall")))
)
)

}else{
p <- result |>
p <- result1 |>
visOmopResults::barPlot(x = "codelist_name",
y = "count",
facet = facet,
colour = colour)
p$data <- p$data |>
dplyr::arrange(.data$codelist_name)
}
p <- p +
ggplot2::labs(
Expand Down
18 changes: 16 additions & 2 deletions R/plotInObservation.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ plotInObservation <- function(result,

# plot
if(length(unique(result$additional_level)) > 1 ){
result |>
dplyr::mutate(additional_level = as.character(gsub("-01$","",as.Date(gsub(" to.*","",.data$additional_level))))) |>
p <- result |>
dplyr::filter(.data$estimate_name == "count") |>
visOmopResults::scatterPlot(
x = "time_interval",
Expand All @@ -78,6 +77,21 @@ plotInObservation <- function(result,
y = variable,
x = "Date"
)
p$data <- p$data |>
dplyr::arrange(.data$time_interval) |>
dplyr::mutate(
show_label = seq_along(.data$time_interval) %% ceiling(nrow(p$data) / 20) == 0
)

p <- p +
ggplot2::scale_x_discrete(
breaks = p$data$time_interval[p$data$show_label]
) +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90, hjust = 1, size = 8),
plot.margin = ggplot2::margin(t = 5, r = 5, b = 30, l = 5)
)
p
}else{
result |>
dplyr::filter(.data$estimate_name == "count") |>
Expand Down
26 changes: 26 additions & 0 deletions R/plotRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,30 @@ plotRecordCount <- function(result,
y = "Number records",
x = "Date"
)
p$data <- p$data |>
dplyr::arrange(.data$time_interval) |>
dplyr::group_by(.data$omop_table) |>
dplyr::mutate(
show_label = if (dplyr::cur_group_id() == 1) {
seq_along(.data$time_interval) %% ceiling(dplyr::n() / 20) == 0
} else {
FALSE
}
) |>
dplyr::ungroup()

# Modify the plot
p <- p +
ggplot2::scale_x_discrete(
breaks = p$data$time_interval[p$data$show_label],
labels = p$data$time_interval[p$data$show_label]
) +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90, hjust = 1, size = 8),
plot.margin = ggplot2::margin(t = 5, r = 5, b = 30, l = 5)
)


}else{
p <- result |>
visOmopResults::barPlot(x = "variable_name",
Expand All @@ -70,5 +94,7 @@ plotRecordCount <- function(result,
x = ""
)
}

p
}

44 changes: 22 additions & 22 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,10 @@ knitr::opts_chunk$set(
# OmopSketch <a href="https://OHDSI.github.io/OmopSketch/"><img src="man/figures/logo.png" align="right" height="138" alt="OmopSketch website" /></a>

<!-- badges: start -->
[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
[![R-CMD-check](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml)
[![CRAN status](https://www.r-pkg.org/badges/version/OmopSketch)](https://CRAN.R-project.org/package=OmopSketch)
[![Codecov test coverage](https://codecov.io/gh/OHDSI/OmopSketch/branch/main/graph/badge.svg)](https://app.codecov.io/gh/OHDSI/OmopSketch?branch=main)
<!-- badges: end -->

### WARNING: this package is under-development and has only been tested using mock data
[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![R-CMD-check](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml) [![CRAN status](https://www.r-pkg.org/badges/version/OmopSketch)](https://CRAN.R-project.org/package=OmopSketch) [![Codecov test coverage](https://codecov.io/gh/OHDSI/OmopSketch/branch/main/graph/badge.svg)](https://app.codecov.io/gh/OHDSI/OmopSketch?branch=main)

<!-- badges: end -->

The goal of OmopSketch is to characterise and visualise an OMOP CDM instance to asses if it meets the necessary criteria to answer a specific clinical question and conduct a certain study.

Expand All @@ -48,16 +45,19 @@ con <- dbConnect(duckdb(), eunomia_dir())
cdm <- cdmFromCon(con = con, cdmSchema = "main", writeSchema = "main")
cdm
```

### Snapshot

We first create a snapshot of our database. This will allow us to track when the analysis has been conducted and capture details about the CDM version or the data release.

```{r}
summariseOmopSnapshot(cdm) |>
tableOmopSnapshot(type = "flextable")
```


### Characterise the clinical tables
Once we have collected the snapshot information, we can start characterising the clinical tables of the CDM. By using `summariseClinicalRecords()` and `tableClinicalRecords()`, we can easily visualise the main characteristics of specific clinical tables.

Once we have collected the snapshot information, we can start characterising the clinical tables of the CDM. By using `summariseClinicalRecords()` and `tableClinicalRecords()`, we can easily visualise the main characteristics of specific clinical tables.

```{r}
summariseClinicalRecords(cdm, c("condition_occurrence", "drug_exposure")) |>
Expand All @@ -67,45 +67,45 @@ summariseClinicalRecords(cdm, c("condition_occurrence", "drug_exposure")) |>
We can also explore trends in the clinical table records over time.

```{r}
summariseRecordCount(cdm, c("condition_occurrence", "drug_exposure")) |>
plotRecordCount(facet = "omop_table")
summariseRecordCount(cdm, c("condition_occurrence", "drug_exposure"), interval = "years") |>
plotRecordCount(facet = "omop_table", colour = "cdm_name")
```

### Characterise the observation period

After visualising the main characteristics of our clinical tables, we can explore the observation period details. OmopSketch provides several functions to have an overview the dataset study period.

Using `summariseInObservation()` and `plotInObservation()`, we can gather information on the number of records per year.

```{r}
summariseInObservation(cdm$observation_period, output = "records") |>
plotInObservation()
summariseInObservation(cdm$observation_period, output = "records", interval = "years") |>
plotInObservation(colour = "cdm_name")
```
You can also visualise and explore the characteristics of the observation period per each individual in the database using `summariseObservationPeriod()`.

You can also visualise and explore the characteristics of the observation period per each individual in the database using `summariseObservationPeriod()`.

```{r}
summariseObservationPeriod(cdm$observation_period) |>
tableObservationPeriod(type = "flextable")
```

Or if visualisation is preferred, you can easily build a histogram to explore how many participants have more than one observation period.

```{r}
summariseObservationPeriod(cdm$observation_period) |>
plotObservationPeriod()
plotObservationPeriod(colour = "observation_period_ordinal")
```

### Characterise the concepts

OmopSketch also provides functions to explore some of (or all) the concepts in the dataset.

```{r}
acetaminophen <- c(1125315, 1127433, 1127078)
summariseConceptSetCounts(cdm, conceptSet = list("acetaminophen" = acetaminophen)) |>
filter(variable_name == "Number records") |>
plotConceptSetCounts()
plotConceptSetCounts(colour = "codelist_name")
```

### Characterise the population
Finally, OmopSketch can also help us to characterise the population at the start and end of the observation period.
```{r}
summarisePopulationCharacteristics(cdm) |>
tablePopulationCharacteristics(type = "flextable")
```
As seen, OmopSketch offers multiple functionalities to provide a general overview of a database. Additionally, it includes more tools and arguments that allow for deeper exploration, helping to assess the database's suitability for specific research studies. For further information, please refer to the vignettes.

Loading

0 comments on commit db3a67e

Please sign in to comment.