Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

modified description, readme, and mockOmopSketch #278

Merged
merged 7 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Suggests:
omock (>= 0.3.0),
covr,
ggplot2,
visOmopResults (>= 0.4.0)
visOmopResults (>= 0.5.0)
Config/testthat/edition: 3
Config/testthat/parallel: true
Imports:
Expand All @@ -65,7 +65,7 @@ Imports:
dplyr,
glue,
lifecycle,
omopgenerics (>= 0.3.1),
omopgenerics (>= 0.4.1),
PatientProfiles (>= 1.2.1),
purrr,
rlang,
Expand All @@ -77,5 +77,4 @@ Depends:
URL: https://OHDSI.github.io/OmopSketch/
BugReports: https://github.com/OHDSI/OmopSketch/issues
VignetteBuilder: knitr
Remotes:
darwin-eu/omopgenerics

40 changes: 39 additions & 1 deletion R/mockOmopSketch.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,48 @@ mockOmopSketch <- function(con = NULL,
omock::mockProcedureOccurrence(seed = seed) |>
omock::mockVisitOccurrence(seed = seed) |>
# Create device exposure table - empty (Eunomia also has it empty)
omopgenerics::emptyOmopTable("device_exposure")
omopgenerics::emptyOmopTable("device_exposure")|>
checkColumns()


# WHEN WE SUPORT LOCAL CDMs WE WILL HAVE TO ACCOUNT FOR THAT HERE
cdm <- CDMConnector::copy_cdm_to(con = con, cdm = cdm, schema = writeSchema)

return(cdm)
}

checkColumns <- function(cdm_local){
info <- omopgenerics::omopTableFields() |>
dplyr::filter(.data$type == "cdm_table") |>
dplyr::mutate(cdm_datatype = dplyr::case_when(
.data$cdm_datatype == "integer" ~ "NA_integer_",
grepl("varchar", .data$cdm_datatype) ~ "NA_character_",
.default = "NA"
))
for (table in names(cdm_local)){
cols <- info |>
dplyr::filter(.data$cdm_table_name == table)|>
dplyr::select(cdm_field_name,cdm_datatype)

missing_cols <- cols|>
dplyr::filter(!(cdm_field_name %in% colnames(cdm_local[[table]])))

if (nrow(missing_cols) > 0) {

missing_tbl <- tibble::tibble(
!!!rlang::set_names(
lapply(missing_cols$cdm_datatype, function(datatype) {
eval(parse(text = datatype))
}),
missing_cols$cdm_field_name
)
)

cdm_local[[table]] <- dplyr::bind_cols(cdm_local[[table]], missing_tbl)

}
}
return(cdm_local)
}


19 changes: 15 additions & 4 deletions R/plotConceptSetCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ plotConceptSetCounts <- function(result,
))
}

result1 <- result |> omopgenerics::splitAdditional()
result1 <- result |> omopgenerics::splitAll()
# Detect if there are several time intervals
if("time_interval" %in% colnames(result1)){
# Line plot where each concept is a different line
p <- result1 |>
dplyr::filter(.data$time_interval != "overall") |>
omopgenerics::uniteAdditional(cols = c("time_interval", "standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id")) |>
omopgenerics::pivotEstimates() |>
visOmopResults::scatterPlot(x = "time_interval",
y = "count",
line = TRUE,
Expand All @@ -71,17 +71,28 @@ plotConceptSetCounts <- function(result,
colour = colour)
}else{
if("standard_concept_name" %in% colnames(result1)){
p <- result |>
p <- result1 |>
omopgenerics::pivotEstimates() |>
visOmopResults::barPlot(x = c("standard_concept_name", "standard_concept_id"),
y = "count",
facet = facet,
colour = colour)
p$data <- p$data |>
dplyr::mutate(
standard_concept_name_standard_concept_id = factor(
.data$standard_concept_name_standard_concept_id,
levels = c("overall - overall", sort(setdiff(.data$standard_concept_name_standard_concept_id, "overall - overall")))
)
)

}else{
p <- result |>
p <- result1 |>
visOmopResults::barPlot(x = "codelist_name",
y = "count",
facet = facet,
colour = colour)
p$data <- p$data |>
dplyr::arrange(.data$codelist_name)
}
p <- p +
ggplot2::labs(
Expand Down
18 changes: 16 additions & 2 deletions R/plotInObservation.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ plotInObservation <- function(result,

# plot
if(length(unique(result$additional_level)) > 1 ){
result |>
dplyr::mutate(additional_level = as.character(gsub("-01$","",as.Date(gsub(" to.*","",.data$additional_level))))) |>
p <- result |>
dplyr::filter(.data$estimate_name == "count") |>
visOmopResults::scatterPlot(
x = "time_interval",
Expand All @@ -78,6 +77,21 @@ plotInObservation <- function(result,
y = variable,
x = "Date"
)
p$data <- p$data |>
dplyr::arrange(.data$time_interval) |>
dplyr::mutate(
show_label = seq_along(.data$time_interval) %% ceiling(nrow(p$data) / 20) == 0
)

p <- p +
ggplot2::scale_x_discrete(
breaks = p$data$time_interval[p$data$show_label]
) +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90, hjust = 1, size = 8),
plot.margin = ggplot2::margin(t = 5, r = 5, b = 30, l = 5)
)
p
}else{
result |>
dplyr::filter(.data$estimate_name == "count") |>
Expand Down
26 changes: 26 additions & 0 deletions R/plotRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,30 @@ plotRecordCount <- function(result,
y = "Number records",
x = "Date"
)
p$data <- p$data |>
dplyr::arrange(.data$time_interval) |>
dplyr::group_by(.data$omop_table) |>
dplyr::mutate(
show_label = if (dplyr::cur_group_id() == 1) {
seq_along(.data$time_interval) %% ceiling(dplyr::n() / 20) == 0
} else {
FALSE
}
) |>
dplyr::ungroup()

# Modify the plot
p <- p +
ggplot2::scale_x_discrete(
breaks = p$data$time_interval[p$data$show_label],
labels = p$data$time_interval[p$data$show_label]
) +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90, hjust = 1, size = 8),
plot.margin = ggplot2::margin(t = 5, r = 5, b = 30, l = 5)
)


}else{
p <- result |>
visOmopResults::barPlot(x = "variable_name",
Expand All @@ -70,5 +94,7 @@ plotRecordCount <- function(result,
x = ""
)
}

p
}

44 changes: 22 additions & 22 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,10 @@ knitr::opts_chunk$set(
# OmopSketch <a href="https://OHDSI.github.io/OmopSketch/"><img src="man/figures/logo.png" align="right" height="138" alt="OmopSketch website" /></a>

<!-- badges: start -->
[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
[![R-CMD-check](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml)
[![CRAN status](https://www.r-pkg.org/badges/version/OmopSketch)](https://CRAN.R-project.org/package=OmopSketch)
[![Codecov test coverage](https://codecov.io/gh/OHDSI/OmopSketch/branch/main/graph/badge.svg)](https://app.codecov.io/gh/OHDSI/OmopSketch?branch=main)
<!-- badges: end -->

### WARNING: this package is under-development and has only been tested using mock data
[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![R-CMD-check](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml) [![CRAN status](https://www.r-pkg.org/badges/version/OmopSketch)](https://CRAN.R-project.org/package=OmopSketch) [![Codecov test coverage](https://codecov.io/gh/OHDSI/OmopSketch/branch/main/graph/badge.svg)](https://app.codecov.io/gh/OHDSI/OmopSketch?branch=main)

<!-- badges: end -->

The goal of OmopSketch is to characterise and visualise an OMOP CDM instance to asses if it meets the necessary criteria to answer a specific clinical question and conduct a certain study.

Expand All @@ -48,16 +45,19 @@ con <- dbConnect(duckdb(), eunomia_dir())
cdm <- cdmFromCon(con = con, cdmSchema = "main", writeSchema = "main")
cdm
```

### Snapshot

We first create a snapshot of our database. This will allow us to track when the analysis has been conducted and capture details about the CDM version or the data release.

```{r}
summariseOmopSnapshot(cdm) |>
tableOmopSnapshot(type = "flextable")
```


### Characterise the clinical tables
Once we have collected the snapshot information, we can start characterising the clinical tables of the CDM. By using `summariseClinicalRecords()` and `tableClinicalRecords()`, we can easily visualise the main characteristics of specific clinical tables.

Once we have collected the snapshot information, we can start characterising the clinical tables of the CDM. By using `summariseClinicalRecords()` and `tableClinicalRecords()`, we can easily visualise the main characteristics of specific clinical tables.

```{r}
summariseClinicalRecords(cdm, c("condition_occurrence", "drug_exposure")) |>
Expand All @@ -67,45 +67,45 @@ summariseClinicalRecords(cdm, c("condition_occurrence", "drug_exposure")) |>
We can also explore trends in the clinical table records over time.

```{r}
summariseRecordCount(cdm, c("condition_occurrence", "drug_exposure")) |>
plotRecordCount(facet = "omop_table")
summariseRecordCount(cdm, c("condition_occurrence", "drug_exposure"), interval = "years") |>
plotRecordCount(facet = "omop_table", colour = "cdm_name")
```

### Characterise the observation period

After visualising the main characteristics of our clinical tables, we can explore the observation period details. OmopSketch provides several functions to have an overview the dataset study period.

Using `summariseInObservation()` and `plotInObservation()`, we can gather information on the number of records per year.

```{r}
summariseInObservation(cdm$observation_period, output = "records") |>
plotInObservation()
summariseInObservation(cdm$observation_period, output = "records", interval = "years") |>
plotInObservation(colour = "cdm_name")
```
You can also visualise and explore the characteristics of the observation period per each individual in the database using `summariseObservationPeriod()`.

You can also visualise and explore the characteristics of the observation period per each individual in the database using `summariseObservationPeriod()`.

```{r}
summariseObservationPeriod(cdm$observation_period) |>
tableObservationPeriod(type = "flextable")
```

Or if visualisation is preferred, you can easily build a histogram to explore how many participants have more than one observation period.

```{r}
summariseObservationPeriod(cdm$observation_period) |>
plotObservationPeriod()
plotObservationPeriod(colour = "observation_period_ordinal")
```

### Characterise the concepts

OmopSketch also provides functions to explore some of (or all) the concepts in the dataset.

```{r}
acetaminophen <- c(1125315, 1127433, 1127078)
summariseConceptSetCounts(cdm, conceptSet = list("acetaminophen" = acetaminophen)) |>
filter(variable_name == "Number records") |>
plotConceptSetCounts()
plotConceptSetCounts(colour = "codelist_name")
```

### Characterise the population
Finally, OmopSketch can also help us to characterise the population at the start and end of the observation period.
```{r}
summarisePopulationCharacteristics(cdm) |>
tablePopulationCharacteristics(type = "flextable")
```
As seen, OmopSketch offers multiple functionalities to provide a general overview of a database. Additionally, it includes more tools and arguments that allow for deeper exploration, helping to assess the database's suitability for specific research studies. For further information, please refer to the vignettes.

Loading
Loading