-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from Boehringer-Ingelheim/rc
dv.loader release v2.0.0
- Loading branch information
Showing
18 changed files
with
723 additions
and
170 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,3 +7,4 @@ | |
^\.github | ||
^\.lintr$ | ||
^NEWS\.md$ | ||
^inst/validation/results/\.gitempty$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
linters: linters_with_defaults( | ||
line_length_linter(120), | ||
object_usage_linter = NULL, | ||
indentation_linter = NULL, | ||
trailing_whitespace_linter = NULL | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
Package: dv.loader | ||
Type: Package | ||
Title: Data loading module | ||
Version: 1.1.1 | ||
Version: 2.0.0 | ||
Authors@R: c( | ||
person( "Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")), | ||
person( given = "Ming", family = "Yang", role = c("aut", "cre"), email = "[email protected]"), | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,30 @@ | ||
# Data Loading Module | ||
|
||
[![Checks 🧩](https://github.com/Boehringer-Ingelheim/dv.loader/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/Boehringer-Ingelheim/dv.loader/actions/workflows/ci.yml) | ||
# Data Loading | ||
|
||
The {dv.loader} package provides a simple interface for loading data from a network file storage folder or | ||
locally. It is designed to be used with `.RDS` and `.sas7bdat` file formats. | ||
The package provides a simple function, `load_data()`, which loads R and SAS data files into memory. | ||
Loading data from SQL databases is not yet supported. The function returns a list named by the file names passed, | ||
and containing data frames, along with metadata for that table. By default, the function will look for files in a | ||
sub-directory `sub_dir` of the base path defined by a environment variable "RXD_DATA". You can check if the base path | ||
is set by running `Sys.getenv("RXD_DATA")`. A single file or multiple files can be loaded at once. | ||
To make the loading process faster for large datasets, it is suggested that '.sas7bdat' files are converted to | ||
'.RDS' files. The function will prefer '.RDS' files over '.sas7bdat' files by default. | ||
|
||
## Installation | ||
|
||
```r | ||
install.packages("devtools") # if you have not installed "devtools" package | ||
devtools::install_github("Boehringer-Ingelheim/dv.loader") | ||
if (!require("remotes")) install.packages("remotes") | ||
remotes::install_github("Boehringer-Ingelheim/dv.loader") | ||
``` | ||
|
||
## Basic usage | ||
|
||
**Note**: `dv.loader` is only designed to be used with `.RDS` and `.sas7bdat` file formats. | ||
|
||
```r | ||
# getting data from a network file storage folder | ||
dv.loader::load_data(sub_dir = "subdir1/subdir2", file_names = c("adsl", "adae")) | ||
``` | ||
|
||
```r | ||
# getting data locally (e.g., if you have file `./data/adsl.RDS`) | ||
dv.loader::load_data(sub_dir = "data", file_names = c("adsl"), use_wd = T) | ||
dv.loader::load_data(sub_dir = "data", file_names = c("adsl"), use_wd = TRUE) | ||
``` | ||
|
||
## Contact | ||
|
||
If you have edits or suggestions, please open a PR. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,16 @@ | ||
--- | ||
url: https://boehringer-ingelheim.github.io/dv.loader | ||
|
||
template: | ||
bootstrap: 5 | ||
|
||
navbar: | ||
right: | ||
- icon: fa-github | ||
href: https://github.com/boehringer-ingelheim/dv.loader | ||
type: inverse | ||
structure: | ||
left: [intro, reference, articles, tutorials, news, qc] | ||
components: | ||
qc: | ||
text: Quality Control | ||
href: articles/qc.html | ||
home: | ||
title: dv.loader | ||
links: | ||
- text: Browse source code | ||
href: https://github.com/Boehringer-Ingelheim/dv.loader |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
pkg_name <- read.dcf("DESCRIPTION")[, "Package"] | ||
pkg_version <- read.dcf("DESCRIPTION")[, "Version"] | ||
test_results <- tibble::as_tibble(devtools::test()) | ||
|
||
local({ | ||
# This is evaluated inside a local because, otherwise, all the variables created in the chunks of the rendered | ||
# document leak into the environment | ||
|
||
validation_root <- "./inst/validation" | ||
validation_report_rmd <- file.path(validation_root, "val_report.Rmd") | ||
validation_report_html <- "val_report.html" | ||
validation_results <- file.path(validation_root, "results") | ||
val_param_rds <- file.path(validation_results, "val_param.rds") | ||
|
||
stopifnot(dir.exists(validation_root)) | ||
stopifnot(file.exists(validation_report_rmd)) | ||
|
||
stopifnot(dir.exists(validation_results)) | ||
unlink(list.files(validation_results)) | ||
|
||
saveRDS( | ||
list( | ||
package = pkg_name, | ||
tests = test_results, | ||
version = pkg_version | ||
), | ||
val_param_rds | ||
) | ||
|
||
rmarkdown::render( | ||
input = validation_report_rmd, | ||
params = list( | ||
package = pkg_name, | ||
tests = test_results, | ||
version = pkg_version | ||
), | ||
output_dir = validation_results, | ||
output_file = validation_report_html | ||
) | ||
|
||
# We use one of the leaked variables, created inside the validation report to asses if the validation is | ||
# succesful or not | ||
VALIDATION_PASSED | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Use a list to declare the specs | ||
|
||
specs_list <- list | ||
|
||
specs <- specs_list( | ||
"default_dir" = "loads data from the working directory by default", | ||
"file_names" = "returns an error if file_names is missing", | ||
"file_type" = "returns an error if the file type is not supported", | ||
"file_extensions" = "checks for valid file extensions", | ||
"data_integrity" = "ensures data is loaded correctly", | ||
"meta_data" = "reads metadata from the items of file.info()", | ||
"prefer_sas" = "loads a SAS or RDS file based on the prefer_sas flag" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
#' Setting up the validation | ||
|
||
if (!exists("package_name")) stop("package name must be in the environment when this script is sourced") | ||
|
||
#' How to link tests and specs | ||
|
||
if (FALSE) { | ||
test_that( | ||
vdoc[["add_spec"]]("my test description", specs$a_spec), | ||
{ | ||
expect_true(TRUE) | ||
} | ||
) | ||
} | ||
#' The specs variable on the call references the one declared in specs.R | ||
|
||
#' 3. For those tests covering more than one spec. | ||
#' NOTE: It must be c() and not list() | ||
#' | ||
|
||
if (FALSE) { | ||
test_that( | ||
vdoc[["add_spec"]]("my test_description", c(specs$my$hier$spec, vdoc_specs$my$hier$other_spec)), | ||
{ | ||
expect_true(TRUE) | ||
} | ||
) | ||
} | ||
|
||
#' Considerations: | ||
#' - parse_spec uses deparse(substitute()). These spec_ids are later used to check if all requirements | ||
#' are covered or not, therefore those calls cannot by substituted for: | ||
|
||
if (FALSE) { | ||
my_spec <- specs$my$hier$spec | ||
test_that(vdoc[["add_spec"]]("my test_description", my_spec), { | ||
... | ||
}) | ||
|
||
test_that(vdoc[["add_spec"]]("my test_description", specs[["my"]][["hier"]][["spec"]]), { | ||
... | ||
}) | ||
} | ||
|
||
# In this case the substitute captures my_spec and cannot be used later. | ||
# If you want to do this you must use the spec_id parameter where you pass a | ||
# character vector with the ids. | ||
# Notice that the ids in character form do no longer have the specs particle | ||
# at the beginning, only the pathing of the spec is needed. | ||
|
||
if (FALSE) { | ||
my_spec <- specs$my$hier$spec | ||
test_that(vdoc$parse_spec(my_spec, "my test_description", spec_id = c("my$hier$spec")), { | ||
... | ||
}) | ||
} | ||
|
||
# Validation code | ||
# nolint start cyclocomp_linter | ||
local({ | ||
specs <- source( | ||
system.file("validation", "specs.R", package = package_name, mustWork = TRUE), | ||
local = TRUE | ||
)[["value"]] | ||
recursive_ids <- function(x, parent = character(0)) { | ||
if (!is.list(x)) { | ||
return(parent) | ||
} | ||
unlist(mapply(recursive_ids, | ||
x, | ||
paste(parent, names(x), | ||
sep = if (identical(parent, character(0))) "" else "$" | ||
), | ||
SIMPLIFY = FALSE, USE.NAMES = FALSE | ||
)) | ||
} | ||
|
||
recursive_ids <- function(x, parent = character(0)) { | ||
if (!is.list(x)) { | ||
return(parent) | ||
} | ||
unlist(mapply(recursive_ids, x, | ||
paste(parent, names(x), | ||
sep = if (identical(parent, character(0))) "" else "$" | ||
), | ||
SIMPLIFY = FALSE, USE.NAMES = FALSE | ||
)) | ||
} | ||
|
||
|
||
spec_id_list <- recursive_ids(specs) | ||
|
||
list( | ||
specs = specs, | ||
spec_id_list = spec_id_list, | ||
add_spec = function(desc, spec, spec_id) { | ||
if (missing(spec_id)) { | ||
if (!is.character(spec) || length(spec) == 0) stop("spec must be a non-empty character vector") | ||
s_spec <- substitute(spec) | ||
if (s_spec[[1]] == "c") { | ||
spec_id <- sapply(s_spec[2:length(s_spec)], identity) | ||
} else { | ||
spec_id <- list(s_spec) # Otherwise the posterior vapply iterates over the expression | ||
} | ||
|
||
spec_id_chr <- vapply(spec_id, function(x) { | ||
sub("^[^$]*\\$", "", deparse(x)) | ||
}, FUN.VALUE = character(1)) | ||
|
||
if (!all(spec_id_chr %in% spec_id_list)) { | ||
stop("At least one spec is not declared in the spec list") | ||
} # This should be covered by pack of constants but just in case | ||
} else { | ||
spec_id_chr <- spec_id | ||
} | ||
paste0(desc, "__spec_ids{", paste0(spec_id_chr, collapse = ";"), "}") | ||
}, | ||
get_spec = function(test, specs) { | ||
spec_ids <- utils::strcapture( | ||
pattern = "__spec_ids\\{(.*)\\}", | ||
x = test, | ||
proto = list(spec = character()) | ||
)[["spec"]] | ||
|
||
spec_ids <- strsplit(spec_ids, split = ";") | ||
|
||
specs_and_id <- list() | ||
|
||
for (idx in seq_along(spec_ids)){ | ||
ids <- spec_ids[[idx]] | ||
if (all(!is.na(ids))) { | ||
this_specs <- list() | ||
for (sub_idx in seq_along(ids)) { | ||
id <- ids[[sub_idx]] | ||
this_specs[[sub_idx]] <- eval(str2expression(paste0("specs$", id))) | ||
} | ||
specs_and_id[[idx]] <- list( | ||
spec_id = ids, | ||
spec = this_specs | ||
) | ||
} else { | ||
specs_and_id[[idx]] <- list( | ||
spec_id = NULL, | ||
spec = NULL | ||
) | ||
} | ||
} | ||
specs_and_id | ||
} | ||
|
||
|
||
) | ||
}) | ||
|
||
# nolint end cyclocomp_linter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
--- | ||
title: "Quality Control" | ||
output: | ||
html_document: | ||
toc: true | ||
toc_depth: 2 | ||
code_folding: hide | ||
toc-title: "----\nIndex" | ||
|
||
params: | ||
package: NULL | ||
tests: NULL | ||
version: NULL | ||
--- | ||
|
||
```{r, child = "val_report_child.Rmd"} | ||
``` |
Oops, something went wrong.