From 71b7a7e65d6067c83d5659eb653e3f3fa709c0b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADcio=20Kury?= Date: Fri, 13 Jan 2023 22:29:01 -0500 Subject: [PATCH] 0.7.0 --- DESCRIPTION | 10 +- R/calculate_formula.R | 193 +++++++------------ R/component.R | 92 +++++---- R/plot.R | 2 +- R/record_source.R | 48 +++-- R/setup.R | 157 ++++++++++++---- docs/aki.Rmd | 28 +-- docs/aki.html | 382 +++++++++++++++++-------------------- docs/obese_ami.html | 385 ++++++++++++++++++++++++++++---------- docs/weight-increase.Rmd | 25 ++- docs/weight-increase.html | 235 +++++++++-------------- man/sqla.Rd | 2 +- 12 files changed, 861 insertions(+), 698 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 68fb781..5e36c67 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: phea Title: Phenotyping Algebra -Version: 0.6.4.0000 +Version: 0.7.0.0000 Authors@R: person("Fabrício", "Kury", , "github@kury.dev", role = c("aut", "cre"), comment = c(ORCID = "YOUR-ORCID-ID")) -Description: Provides a formula-based framework for identifying patients in time-stamped data in SQL databases. +Description: SQL query builder (based on dbplyr) that creates queries that calculate formulas using patient data as variables. License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) @@ -16,9 +16,9 @@ Imports: stringr, purrr, rlang, - tidyr, - plotly + tidyr Suggests: knitr, - rmarkdown + rmarkdown, + plotly VignetteBuilder: knitr diff --git a/R/calculate_formula.R b/R/calculate_formula.R index 364697e..133ef02 100644 --- a/R/calculate_formula.R +++ b/R/calculate_formula.R @@ -123,9 +123,6 @@ calculate_formula <- function(components, fml = NULL, window = NULL, export = NU # , pick = !(is.null(component$pick_by) || is.na(component$pick_by) || component$pick_by == '') # , pick_by = component$pick_by) - # if(filtering_dates) - # res <- mutate(res, date_out = comp_name %in% dates_from) - return(res) }) |> dplyr::bind_rows() @@ -234,7 +231,7 @@ calculate_formula <- function(components, fml = NULL, window = NULL, export = NU # Add extra dates ------------------------------------------------------------------------------------------------- if(!is.null(dates)) { - message('Warning: dates is yet to be properly tested.') + message('Warning: `dates` is yet to be properly tested.') dates_table <- dbplyr::copy_inline(.pheaglobalenv$con, dates) board <- dplyr::union_all(board, dates_table) } @@ -245,115 +242,61 @@ calculate_formula <- function(components, fml = NULL, window = NULL, export = NU ~rlang::exprs(!!..1 := !!dplyr::sql(..2))) |> unique() |> unlist(recursive = FALSE) - # The unique() above is just in case, but is it needed? Seems like the only way there could be duplicates is if the - # same component gets added twice to the call to calculate_formula(). + # The unique() above is just in case, but is it needed? Seems like the only way there could be duplicates is if the + # same component gets added twice to the call to calculate_formula(). ## Second, apply commands. - phea_row_id_sql_txt <- paste0('row_number() over (order by ', dbQuoteId('pid'), ', ', dbQuoteId('ts'), ')') - - if(F) { - # if(any(var_map$pick)) { - # var_map_picks <- var_map[var_map$pick,] - # - # picks <- purrr::map(seq(nrow(var_map_picks)), \(i) { - # phea_col_name <- var_map_picks$composed_name[i] - # orig_col_name <- var_map_picks$column[i] - # component_name <- var_map_picks$component_name[i] - # rec_name <- var_map_picks$rec_name[i] - # pick_by <- var_map_picks$pick_by[i] - # sql_txt <- paste0('case when ', - # dbQuoteId('name'), ' = ', dbQuoteStr(rec_name), ' and ', - # dbQuoteId(pick_by), ' = ', dbQuoteId(paste0(component_name, '_', pick_by)), - # ' then ', dbQuoteId(orig_col_name), ' else NULL end') - # rlang::exprs(!!phea_col_name := !!dplyr::sql(sql_txt)) - # }) |> - # unique() |> - # unlist(recursive = FALSE) - # # } - # - # # pick_sql <- list( - # # x = components, - # # y = names(components)) |> - # # purrr::pmap(\(x, y) { - # # if(x$pick) { - # # browser() - # # variables_to_hide <- - # # return(paste0('case when name != \'', x$rec_source$rec_name, - # # '\' then true else ', y, '_', x$pick_by, ' = ', x$pick_by, ' end')) - # # } else - # # return(NULL) - # # }) |> - # # purrr::discard(is.null) |> - # # paste0(collapse = ' and ') - # - # # if(pick_sql != '') { - # # Apply commands, filter, then drop unneded columns. - # board <- dplyr::mutate(board, - # phea_row_id = dplyr::sql(phea_row_id_sql_txt), - # !!!commands) - # - # browser() - # board <- filter(board, sql(pick_sql)) - # - # commands_names <- names(commands) - # if(filtering_dates) { - # board <- dplyr::select(board, - # phea_row_id, pid, ts, phea_date_out, all_of(commands_names)) - # } else { - # board <- dplyr::transmute(board, - # phea_row_id, pid, ts, all_of(commands_names)) - # } - # } else { - } + # Make phea_row_id + prid <- dbplyr::win_over(con = .pheaglobalenv$con, + expr = dplyr::sql('row_number()'), order = c('pid', 'ts')) # Apply commands to the board all at once, so we only generate a single layer of "SELECT ... FROM (SELECT ...)". if(filtering_dates) { board <- dplyr::transmute(board, - phea_row_id = dplyr::sql(phea_row_id_sql_txt), + phea_row_id = prid, pid, ts, name, !!!commands) } else { board <- dplyr::transmute(board, - phea_row_id = dplyr::sql(phea_row_id_sql_txt), + phea_row_id = prid, pid, ts, !!!commands) } - # } - - # browser() - ## Third and final, fill the blanks downward with the last non-blank value, within the patient. - # board <- board |> - # dbplyr::window_order(pid, ts) |> - # dplyr::group_by(pid) |> - # tidyr::fill(!any_of(c('phea_row_id', 'pid', 'ts', 'name'))) |> - # ungroup() - - # For some reason, apparently a bug in dbplyr's SQL translation, we need to "erase" an ORDER BY "pid", "ts" that is - # left over in the translated query. That ORDER BY persists even if you posteriorly do a dplyr::group_by() on the - # result of the phenotype (i.e. the board at this point). This causes the SQL server's query engine to raise an error, - # saying that "ts" must also be part of the GROUP BY. This left over ORDER BY "pid", "ts" apparently comes from the - # dbplyr::window_order() call that was necessary to guarantee the intended behavior of the call to - # tidyr::fill.lazy_tbl() above. - # board <- board |> - # arrange() + + if(.pheaglobalenv$compatibility_mode) { + ## Fill the blanks downward with the last non-blank value, within the patient. + board <- board |> + dbplyr::window_order(pid, ts) |> + dplyr::group_by(pid) |> + tidyr::fill(!any_of(c('phea_row_id', 'pid', 'ts', 'name'))) |> + ungroup() + + # For some reason, apparently a bug in dbplyr's SQL translation, we need to "erase" an ORDER BY "pid", "ts" that is + # left over in the translated query. That ORDER BY persists even if you posteriorly do a dplyr::group_by() on the + # result of the phenotype (i.e. the board at this point). This causes the SQL server's query engine to raise an + # error, saying that "ts" must also be part of the GROUP BY. This left over ORDER BY "pid", "ts" apparently comes + # from the dbplyr::window_order() call that was necessary to guarantee the intended behavior of the call to + # tidyr::fill.lazy_tbl() above. + board <- board |> + arrange() + } # dates_from ------------------------------------------------------------------------------------------------------ if(filtering_dates) { # Obtain `rec_name`s from the record sources of the target components. rec_names <- unique(var_map[var_map$component_name %in% dates_from,]$rec_name) - sql_txt <- paste0(dbQuoteId('name'), ' in (', paste0(dbQuoteStr(rec_names), collapse = ', '), ')') - + sql_txt <- paste0(dbQuoteId('name'), ' in (', paste0(rec_names, collapse = ', '), ')') + + # Keep only the rows coming from those `rec_name`s. board <- board |> - filter(sql(sql_txt)) + dplyr::filter(dplyr::sql(sql_txt)) } # Compute window -------------------------------------------------------------------------------------------------- window_components <- setdiff(var_map$component_name, out_window) if(!input_is_phenotype && length(window_components) > 1) { # Window only makes sense if there is > 1 component. - window_components_sql <- window_components |> - unique() |> - paste0('_ts') |> + window_components_sql <- paste0(unique(window_components), '_ts') |> DBI::dbQuoteIdentifier(conn = .pheaglobalenv$con) |> paste0(collapse = ', ') @@ -367,18 +310,27 @@ calculate_formula <- function(components, fml = NULL, window = NULL, export = NU sql_ts_greatest <- sql_ts_least } - # phea_ts_row is used to pick the best computation within each date. This is for the case when multiple data points - # exist on the same date. The best computation for each date is the last row within that date. - # The most complete computation is the last one in each timestamp. 'max(phea_row_id) over (partition by "pid", "ts")' - # finds the row with the largest (most complete) phea_row_id in each timestamp. last_value() could give the same - # result, and could be potentially faster (wild assumption) due to optimizations, but that's just an idea. - sql_txt <- paste0('MAX(', dbQuoteId('phea_row_id'), ') OVER (PARTITION BY ', - dbQuoteId('pid'), ', ', dbQuoteId('ts'), ')') - - board <- board |> - dplyr::mutate( - window = dplyr::sql(sql_ts_greatest) - dplyr::sql(sql_ts_least), - phea_ts_row = dplyr::sql(sql_txt)) + if(.pheaglobalenv$compatibility_mode) { + # phea_ts_row is used to pick the best computation within each date. This is for the case when multiple data points + # exist on the same date. The best computation for each date is the last row within that date. + # The most complete computation is the last one in each timestamp. 'max(phea_row_id) over (partition by "pid", + # "ts")' finds the row with the largest (most complete) phea_row_id in each timestamp. last_value() could give the + # same result, and could be potentially faster (wild assumption) due to optimizations, but that's just an idea. + + # Make phea_ts_row + ptsr_txt <- paste0('max(', dbQuoteId('phea_row_id'), ')') + + board <- board |> + dplyr::mutate( + window = dplyr::sql(sql_ts_greatest) - dplyr::sql(sql_ts_least), + phea_ts_row = dbplyr::win_over(con = .pheaglobalenv$con, + expr = dplyr::sql(ptsr_txt), + partition = c('pid', 'ts'))) + } else { + board <- board |> + dplyr::mutate( + window = dplyr::sql(sql_ts_greatest) - dplyr::sql(sql_ts_least)) + } # Filter rows ----------------------------------------------------------------------------------------------------- # We also need to: @@ -392,51 +344,45 @@ calculate_formula <- function(components, fml = NULL, window = NULL, export = NU # is FALSE. required_components <- setdiff(names(components), dont_require) if(length(required_components) > 0) { - sql_txt <- required_components |> - paste0('_ts') |> + sql_txt <- paste0(required_components, '_ts') |> DBI::dbQuoteIdentifier(conn = .pheaglobalenv$con) |> - paste0(' is not null') |> - paste(collapse = ' and ') + paste0(' is not null', collapse = ' and ') if(has_content(window)) { board <- dplyr::filter(board, - phea_row_id == phea_ts_row && - dplyr::sql(sql_txt) && - window < local(window)) + window < local(window) && + dplyr::sql(sql_txt)) } else { board <- dplyr::filter(board, - phea_row_id == phea_ts_row && - dplyr::sql(sql_txt)) + dplyr::sql(sql_txt)) } } else { # No required components after all, because all were excluded by dont_require. Let's just filter by the most # complete computation. if(has_content(window)) { board <- dplyr::filter(board, - phea_row_id == phea_ts_row && - window < local(window)) - } else { - board <- dplyr::filter(board, - phea_row_id == phea_ts_row) + window < local(window)) } } } else { # No need to require all components. Let's just filter by the most complete computation. if(has_content(window)) { # This covers case if `window` is NULL board <- board |> - dplyr::filter(phea_row_id == phea_ts_row && - window < local(window)) - } else { - board <- board |> - dplyr::filter(phea_row_id == phea_ts_row) + dplyr::filter(window < local(window)) } } + if(.pheaglobalenv$compatibility_mode) { + # Keep only most complete computation + board <- dplyr::filter(board, + phea_row_id == phea_ts_row) + } + # Apply filters, if provided. if(!is.null(filters) && any(!is.na(filters))) { sql_txt <- paste0('(', paste0(filters[!is.na(filters)], collapse = ') AND ('), ')') board <- board |> - filter(sql(sql_txt)) + dplyr::filter(dplyr::sql(sql_txt)) } # Limit number of output rows, if requested. @@ -452,6 +398,7 @@ calculate_formula <- function(components, fml = NULL, window = NULL, export = NU # Calculate the formulas, if any. res_vars <- NULL if(!is.null(fml)) { + # is cascaded on? if(cascaded) { # Compute one at a time, so that the prior result can be used in the next formula. for(i in seq(fml)) { @@ -482,16 +429,14 @@ calculate_formula <- function(components, fml = NULL, window = NULL, export = NU # Get the name from the parent object, fml. res_vars <- c(res_vars, names(fml)[i]) - # The formula is the SQL. - # sql_txt <- cur_fml - - # Apply to the board, producing a layer of SELECT ... FROM (SELECT ...). + # Apply to the board, producing a layer of SELECT ... FROM (SELECT ...). The formula is the SQL. board <- dplyr::mutate(board, !!rlang::sym(names(fml)[i]) := dplyr::sql(cur_fml)) } } } else { # cascaded is turned off. + # Let's check if any of the formulas is itself a list, which means cascaded was supposed to be on. if(any(lapply(fml, class) == 'list')) stop('Nested formulas require cascaded = TRUE.') diff --git a/R/component.R b/R/component.R index 447c833..e933d9d 100644 --- a/R/component.R +++ b/R/component.R @@ -115,7 +115,7 @@ make_component <- function(input_source, if('tbl_lazy' %in% class(input_source)) { # Input is a lazy table. Make a record source from it. if(is.null(pid) || is.null(ts)) - stop('If providing a lazy table to make_component(), must also provide pid or .pid and ts or .ts.') + stop('If providing a lazy table to make_component(), must also provide `pid` or `.pid` and `ts` or `.ts`.') } else { stop('Unable to recognize input_source.') } @@ -204,7 +204,7 @@ make_component <- function(input_source, if(any(!names_mask)) # Is any name empty? default_arg <- args[min(which(!names_mask))] # Use first empty name. else - default_arg <- def_arg # 'last_value' + default_arg <- def_arg } if(!is.null(preprocess_fn)) @@ -231,11 +231,15 @@ make_component <- function(input_source, use_fn <- capture_named_args(component$fn, 'last_value', component$ts_fn, component$columns, component$passthrough) - # Replace with custom aggregate if needed + # Use custom aggregates if needed if(exists('custom_aggregate', envir = .pheaglobalenv)) { - mask <- grepl('last_value', use_fn, ignore.case = TRUE) - if(any(mask)) - use_fn[mask] <- .pheaglobalenv$custom_aggregate + for(i in 1:length(.pheaglobalenv$custom_aggregate)) { + fn_name_out <- names(.pheaglobalenv$custom_aggregate)[i] + fn_name_in <- .pheaglobalenv$custom_aggregate[[i]] + mask <- grepl(fn_name_out, use_fn, ignore.case = TRUE) + if(any(mask)) + use_fn[mask] <- fn_name_in + } } # use_arg is a vector of arguments to window functions, one for each column. @@ -267,6 +271,16 @@ make_component <- function(input_source, # use_omit_value is a vector of Boolean, one for each column. use_omit_value <- capture_named_args(component$omit_value, FALSE, component$ts_omit_value, component$columns, component$passthrough) + +# Default nulls treatment ----------------------------------------------------------------------------------------- + if(.pheaglobalenv$engine_code == 3) { + # TODO: Test this on DataBricks. + nulls_treatment_mask <- sapply(use_fn, grepl, pattern = 'last_value|first_value', ignore.case = TRUE) + if(any(nulls_treatment_mask)) { + use_arg[nulls_treatment_mask] <- ', TRUE' + use_omit_value[nulls_treatment_mask] <- FALSE + } + } # Default to line = 0 if needed ----------------------------------------------------------------------------------- # At this point, if any of the parameters line/bound/delay/window/ahead/up_to were NA, they are now NULL (or their @@ -282,8 +296,7 @@ make_component <- function(input_source, # Build window function SQL --------------------------------------------------------------------------------------- # columns_sql is vectorized by the presence of `component$columns` - columns_sql <- paste0('case when ', dbQuoteId('name'), ' = ', - DBI::dbQuoteString(.pheaglobalenv$con, component$rec_source$rec_name), + columns_sql <- paste0('case when ', dbQuoteId('name'), ' = ', component$rec_source$rec_name, ' then ', dbQuoteId(component$columns), ' else null end') component$placement_sql <- columns_sql @@ -308,39 +321,39 @@ make_component <- function(input_source, if(isTRUE(is.na(line))) line <- NULL + if(.pheaglobalenv$compatibility_mode) { + # This is the "most default" case: the user just wants the most recent record of the component, without line/ + # bound/delay/window/ahead/up_to. In this case, we don't need a window function. We can just copy the column + # whenever the line comes from the correct record source, then use tidyr::fill() to fill NULLs downward. + # In other words, the SQL to access the value is merely the CASE WHEN ... statement that otherwise goes inside the + # window function call. + component$access <- 'line' + + component$access_sql <- dplyr::sql(columns_sql) + component_has_been_built <- TRUE + } + if(!component_has_been_built && (!is.null(line) || !is.null(bound))) { # Produce access via *line*. # Line access is built differently, because we can use dbplyr::win_over(). dbplyr::win_over() does not support # window functions' RANGE mode, forcing us to not use it when mode is not ROWS. component$access <- 'line' - # TODO: Revise below. - if(F && - is.null(bound) && !is.null(line) && line == 0) { - # Here is one optimization. - # This is the "most default" case: the user just wants the most recent record of the component, without line/ - # bound/delay/window/ahead/up_to. In this case, we don't need a window function. We can just copy the column - # whenever the line comes from the correct record source. In other words, the SQL to access the value is merely - # the CASE WHEN ... statement that otherwise goes inside the window function call. - component$access_sql <- sql(columns_sql) - } else { - params_sql <- make_params_sql() - component$access_sql <- lapply(seq(component$columns), \(i) { - sql_txt <- paste0(use_fn[i], '(', params_sql[i], ')') - - dbplyr::win_over( - expr = sql(sql_txt), - partition = c('pid'), #, 'name'), - order = 'ts', - frame = c( - ifelse(is.null(bound), -Inf, -bound), - ifelse( is.null(line), 0, -line)), - con = .pheaglobalenv$con) - }) - # Unlist the SQL objects without accidentally converting to character, which happens when we use unlist(). - component$access_sql <- do.call(c, component$access_sql) - } + params_sql <- make_params_sql() + + component$access_sql <- lapply(seq(component$columns), \(i) { + sql_txt <- paste0(use_fn[i], '(', params_sql[i], ')') + dbplyr::win_over(con = .pheaglobalenv$con, + expr = dplyr::sql(sql_txt), + partition = 'pid', + order = 'ts', + frame = c( + ifelse(is.null(bound), -Inf, -bound), + ifelse( is.null(line), 0, -line))) + }) + # Unlist the SQL objects without accidentally converting to character, which happens when we use unlist(). + component$access_sql <- do.call(c, component$access_sql) component_has_been_built <- TRUE } @@ -348,7 +361,10 @@ make_component <- function(input_source, # As commented above, for access other than *line* we need to write out the window function call by ourselves, # because dbplyr::win_over() does not support RANGE mode. params_sql <- make_params_sql() - sql_start <- paste0(use_fn, '(', params_sql, ') over (', over_clause, ' range between ') + + sql_start <- paste0(use_fn, '(', params_sql, ') over (', over_clause, ' range between ') # 1 + # switch(.pheaglobalenv$engine_code, + # paste0(use_fn, '(', params_sql, ') over (', over_clause, ' range between ')) # 1 if(!is.null(delay) || !is.null(window)) { # Produce access via *delay/window*. @@ -375,14 +391,14 @@ make_component <- function(input_source, component$access <- 'ahead' sql_txts <- paste0(sql_start, - ifelse( is.null(ahead), "'0 days'::interval following", paste0(ahead, ' following')), + ifelse( is.null(ahead), 'current row', paste0(ahead, ' following')), ' and ', - ifelse(is.null(up_to) || up_to == Inf, 'unbounded', up_to), + ifelse(is.null(up_to) || up_to == Inf, 'unbounded', up_to), ' following)') } # Produce SQL objects from character - component$access_sql <- sql(sql_txts) + component$access_sql <- dplyr::sql(sql_txts) component_has_been_built <- TRUE } diff --git a/R/plot.R b/R/plot.R index 6460146..0b64102 100644 --- a/R/plot.R +++ b/R/plot.R @@ -46,7 +46,7 @@ phea_plot <- function(board, pid, plot_title = NULL, exclude = NULL, verbose = N # Plot all columns except some. chart_items <- colnames(board_data) - if(sum(c('phea_row_id', 'pid', 'ts', 'window') %in% colnames(board_data)) > 3) { + if(sum(c('phea_row_id', 'pid', 'ts', 'window') %in% colnames(board_data)) > 2) { # The board has the base columns of a phenotype result. Remove them. chart_items <- setdiff(chart_items, c('phea_row_id', 'pid', 'ts', 'window')) } diff --git a/R/record_source.R b/R/record_source.R index 4121390..c6023b9 100644 --- a/R/record_source.R +++ b/R/record_source.R @@ -6,12 +6,15 @@ # Random name ----------------------------------------------------------------------------------------------------- -random_name <- function(len) { - # Generate random rec_name, 6 characters long, case-insensitive, starting with a letter. - sample(letters, 1) |> - c(sample(c(letters, 0:9), len-1, replace = TRUE)) |> - as.list() |> - do.call(what = paste0) +# random_rec_name <- function(len) { +# } +random_rec_name <- function(len) { + # random rec_name, integer, 0 to `len` digits long + return(as.integer(runif(1) * 10^len)) + + # random rec_name, `len` characters long, case-insensitive, starting with a letter + # c(sample(letters, 1), sample(c(letters, 0:9), len-1, replace = TRUE)) |> + # as.list() |> do.call(what = paste0) } @@ -28,12 +31,11 @@ random_name <- function(len) { #' #' @param pid Character. Name of the colum in `records` that gives the person (patient) identifier. #' @param ts Character. Name of the colum in `records` that gives the timestamp. -#' @param .pid Unquoted characters. Optional. Use this argument to pass unquoted characters to the `pid` argument. If -#' `pid` is provided, `.pid` is ignored. See examples. -#' @param .ts Unquoted characters. Optional. Use this argument to pass unquoted characters to the `ts` argument. If `ts` -#' is provided, `.ts` is ignored. See examples. +#' @param .pid,.ts Unquoted characters. Use these argument to pass unquoted characters to the `pid` or `ts` arguments. +#' If `pid`/`ts` is provided, `.pid`/`.ts` is ignored. See examples. #' -#' @param rec_name Character. Optional. Record name. +#' @param rec_name Integer. Optional. Number to use as record name. If not provided, a random one will be generated. +#' #' @param vars Character vector. Optional. Name of the colums to make available from `records`. If not supplied, all #' columns are used. #' @@ -69,25 +71,39 @@ random_name <- function(len) { make_record_source <- function(records, pid = NULL, ts = NULL, vars = NULL, .pid = NULL, .ts = NULL, rec_name = NULL) { rec_source <- list() + # records rec_source$records <- records - if(is.null(rec_name)) - rec_name <- random_name(6) - - rec_source$rec_name <- rec_name - + # pid if(is.null(pid) || is.na(pid)) pid <- deparse(substitute(.pid)) rec_source$pid <- pid + # ts if(is.null(ts) || is.na(ts)) ts <- deparse(substitute(.ts)) rec_source$ts <- ts + # rec_name + if(is.null(rec_name)) { + rec_name <- random_rec_name(6) + } else { + if(is.numeric(rec_name)) + rec_name <- as.integer(rec_name) + + if(!is.integer(rec_name)) + stop('rec_name must be integer or numeric, or NULL.') + } + + rec_source$rec_name <- rec_name + + # vars if(is.null(vars)) vars <- setdiff(colnames(records), pid) # Keep all columns but `pid` + rec_source$vars <- vars + # finalize attr(rec_source, 'phea') <- 'record_source' rec_source diff --git a/R/setup.R b/R/setup.R index c4fbea1..e03d949 100644 --- a/R/setup.R +++ b/R/setup.R @@ -13,47 +13,103 @@ if(!exists('.pheaglobalenv')) # Setup Phea ------------------------------------------------------------------------------------------------------ #' Setup Phea #' -#' Configures Phea, in particular the SQL shorthands `sqlt()`, `sql0()` and `sqla()`. +#' Configures Phea, %in% particular the SQL shorthands `sqlt()`, `sql0()` and `sqla()`. #' #' @export #' @param connection DBI-compatible SQL connection (e.g. produced by DBI::dbConnect). -#' @param schema Schema to be used by default in `sqlt()`. If no schema, use `NA`. -#' @param verbose Logical. Optional. If TRUE (default), functions will print to console at times. -#' @param .fix_dbplyr_spark Logical. Optional. Very niche functionality. Set to `TRUE` to attempt to fix the use of -#' `IGNORE NULLS` by the OBDC driver connected to a Spark SQL server/cluster. This is the only situation where this -#' argument should be used. -setup_phea <- function(connection, schema, verbose = TRUE, .fix_dbplyr_spark = FALSE) { +#' @param schema Schema to be used by default %in% `sqlt()`. If no schema, use `NA`. +#' @param verbose Logical. If TRUE (default), functions will print to console at times. +#' @param engine Character. What is the flavor of your SQL server. If not provided, `setup_phea()` will try to detect it +#' from `dbplyr::db_connection_describe()`. Options are: `postgres`, `mysql`, `redshift`, `spark`, `oracle`, +#' `bigquery`, `sqlserver`. Names are case-insensitive but must otherwise match exactly. If `engine` is not provided and +#' it can't be detected, an error is raised. +#' @param compatibility_mode Logical. If `TRUE` (default is `FALSE`), all component features besides `window` are +#' deactivated, and all components become strictly _"most recently available record_" on all columns. Turning this +#' feature on may help make Phea work on SQL flavors it where it wasn't tested. +setup_phea <- function(connection, schema, verbose = TRUE, engine = NULL, compatibility_mode = FALSE, + custom_aggregate = NULL) { assign('con', connection, envir = .pheaglobalenv) assign('schema', schema, envir = .pheaglobalenv) assign('verbose', verbose, envir = .pheaglobalenv) - postgres_exists <- function() { - db_engine <- dbplyr::db_connection_describe(.pheaglobalenv$con) - return(grepl('postgres', db_engine, ignore.case = TRUE)) + if(is.null(engine)) { + db_desc <- dbplyr::db_connection_describe(.pheaglobalenv$con) + + if(is.null(engine) && grepl('postgres', db_desc, ignore.case = TRUE)) + engine <- 'postgres' + + if(is.null(engine) && grepl('mysql', db_desc, ignore.case = TRUE)) + engine <- 'mysql' + + if(is.null(engine) && grepl('redshift', db_desc, ignore.case = TRUE)) + engine <- 'redshift' + + if(is.null(engine) && (grepl('spark', db_desc, ignore.case = TRUE) + || isTRUE(try(connection@info$dbms.name == "Spark SQL")))) + engine <- 'spark' + + if(is.null(engine) && grepl('bigquery', db_desc, ignore.case = TRUE)) + engine <- 'bigquery' + + if(is.null(engine)) + stop(paste0("Unable to detect SQL engine. Please provide `engine` argument. Options are: postgres, mysql, ", + "redshift, spark, oracle, bigquery, sqlserver. If your engine is not on the list, you can try another one ", + "with similar SQL syntax.")) + } else { + # Normalize to lower case + engine <- tolower(engine) } - sql_function_exists <- function(name) { - function_check <- DBI::dbGetQuery(.pheaglobalenv$con, - paste0('select * from - pg_proc p - join pg_namespace n - on p.pronamespace = n.oid - where proname =\'', name, '\';')) |> - nrow() - return(function_check == 1) + assign('engine', engine, envir = .pheaglobalenv) + + engine_code <- NULL + + if(is.null(engine_code) && engine %in% c('mysql')) { + engine_code <- 0 + compatibility_mode <- TRUE } - if(postgres_exists()) { + if(is.null(engine_code) && engine %in% c('postgres')) + engine_code <- 1 + + if(is.null(engine_code) && engine %in% c('redshift', 'oracle', 'bigquery')) + engine_code <- 2 + + if(is.null(engine_code) && engine %in% c('spark')) + engine_code <- 3 + + if(is.null(engine_code) && engine %in% c('sqlserver')) + engine_code <- 4 + + assign('engine_code', engine_code, envir = .pheaglobalenv) + assign('compatibility_mode', compatibility_mode, envir = .pheaglobalenv) + + if(engine == 'postgres') { + sql_function_exists <- function(name) { + function_check <- DBI::dbGetQuery(.pheaglobalenv$con, + paste0('select * from + pg_proc p + join pg_namespace n + on p.pronamespace = n.oid + where proname =\'', name, '\';')) |> + nrow() + return(function_check == 1) + } - if(!sql_function_exists('phea_coalesce_r_sfunc')) { - if(verbose) - message('PostgreSQL detected in "', dbplyr::db_connection_describe(.pheaglobalenv$con), '".') - + need_to_install <- c('phea_coalesce_r_sfunc', 'phea_coalesce_nr_sfunc', + 'phea_last_value_ignore_nulls', 'phea_first_value_ignore_nulls') |> + sapply(sql_function_exists, USE.NAMES = TRUE) + need_to_install <- !need_to_install + + if(any(need_to_install) && verbose) + message('Engine configured to PostgreSQL.') + + if(need_to_install[['phea_coalesce_r_sfunc']]) { if(verbose) message('Installing phea_coalesce_r_sfunc.') DBI::dbExecute(.pheaglobalenv$con, - "create function phea_coalesce_r_sfunc(state anyelement, value anyelement) + "create or replace function phea_coalesce_r_sfunc(state anyelement, value anyelement) returns anyelement immutable parallel safe as @@ -62,27 +118,60 @@ setup_phea <- function(connection, schema, verbose = TRUE, .fix_dbplyr_spark = F $$ language sql;") } - if(!sql_function_exists('phea_last_value_ignore_nulls')) { + if(need_to_install[['phea_coalesce_nr_sfunc']]) { + if(verbose) { + message('Installing phea_coalesce_nr_sfunc.') + } + + DBI::dbExecute(.pheaglobalenv$con, + "create or replace function phea_coalesce_nr_sfunc(state anyelement, value anyelement) + returns anyelement + immutable parallel safe + as + $$ + select coalesce(state, value); + $$ language sql;") + } + + if(need_to_install[['phea_last_value_ignore_nulls']]) { if(verbose) message('Installing phea_last_value_ignore_nulls.') DBI::dbExecute(.pheaglobalenv$con, - "create aggregate phea_last_value_ignore_nulls(anyelement) ( + "create or replace aggregate phea_last_value_ignore_nulls(anyelement) ( sfunc = phea_coalesce_r_sfunc, stype = anyelement );") } - assign('custom_aggregate', 'phea_last_value_ignore_nulls', envir = .pheaglobalenv) + if(need_to_install[['phea_first_value_ignore_nulls']]) { + if(verbose) + message('Installing phea_first_value_ignore_nulls.') + + DBI::dbExecute(.pheaglobalenv$con, + "create or replace aggregate phea_first_value_ignore_nulls(anyelement) ( + sfunc = phea_coalesce_nr_sfunc, + stype = anyelement + );") + } + + custom_aggregate <- list( + last_value = 'phea_last_value_ignore_nulls', + first_value = 'phea_first_value_ignore_nulls') } - if(.fix_dbplyr_spark) { - if(connection@info$dbms.name == "Spark SQL") { - # Fix dbplyr's last_value() implementation. - `last_value_sql.Spark SQL` <<- function(con, x) { - dbplyr:::build_sql("LAST_VALUE(", ident(as.character(x)), ", true)", con = con) - } + if(engine == 'spark') { + # Insert _nulls treatment_ into dbplyr's last_value() and first_value() implementation. + `last_value_sql.Spark SQL` <<- function(con, x) { + dbplyr:::build_sql("LAST_VALUE(", ident(as.character(x)), ", true)", con = con) + } + + `first_value_sql.Spark SQL` <<- function(con, x) { + dbplyr:::build_sql("FIRST_VALUE(", ident(as.character(x)), ", true)", con = con) } } + + if(!is.null(custom_aggregate)) + assign('custom_aggregate', custom_aggregate, envir = .pheaglobalenv) } diff --git a/docs/aki.Rmd b/docs/aki.Rmd index 63df265..2331ed5 100644 --- a/docs/aki.Rmd +++ b/docs/aki.Rmd @@ -11,7 +11,7 @@ vignette: > library(knitr) options(scipen = 5e5) knitr::opts_chunk$set(collapse = TRUE, comment = "#>") -library(credx) # contains cred$pg +library(credx) # contains fabcred$pg library(wrap) ``` ```{r setup, message = FALSE} @@ -20,10 +20,10 @@ library(dplyr) # Connect to SQL server. dbcon <- DBI::dbConnect(RPostgres::Postgres(), - host = 'localhost', port = 8765, dbname = 'fort', - user = cred$pg$user, password = cred$pg$pass) + host = fabcred$pg$host, port = fabcred$pg$port, dbname = fabcred$pg$database, + user = fabcred$pg$user, password = fabcred$pg$pass) -# Call setup_phea so we can use sqlt() and sql0(). +# Setup Phea setup_phea(dbcon, 'cdm_new_york3') ``` @@ -75,8 +75,8 @@ scr_records <- union_all(scr_records_a, scr_records_b) # Make a record source. scr_record_source <- make_record_source(scr_records, - ts = measurement_datetime, - pid = person_id) + ts = 'measurement_datetime', + pid = 'person_id') ``` ### Glomerular filtration rate @@ -89,8 +89,8 @@ gfr_records <- sqlt(measurement) |> filter(measurement_concept_id == 46236952) gfr_record_source <- make_record_source(gfr_records, - ts = measurement_datetime, - pid = person_id) + ts = 'measurement_datetime', + pid = 'person_id') ``` ## Calculate the phenotype @@ -105,18 +105,22 @@ scr_change <- calculate_formula( # Minimum value within 48-hour window scr_48h_min = make_component(scr_record_source, - window = '48 days', .fn = 'min'), + window = "'48 days'::interval", + fn = list(value_as_number = 'min')), # Minimum value within 7-day window scr_7d_min = make_component(scr_record_source, - window = '7 months', .fn = 'min'), + window = "'7 months'::interval", + fn = list(value_as_number = 'min')), # Current glomerular filtration rate (GFR) gfr = make_component(gfr_record_source), # Glomerular filtration rate 3 to 5 months older than phenotype date gfr_prior = make_component(gfr_record_source, - delay = '3 months', window = '5 months', .fn = 'min')), + delay = "'3 months'::interval", + window = "'5 months'::interval", + fn = list(value_as_number = 'min'))), fml = list( scr_case_a = 'scr_value_as_number - scr_48h_min_value_as_number >= 0.3', @@ -132,7 +136,7 @@ scr_change <- calculate_formula( 'gfr_measurement_datetime', 'gfr_prior_measurement_datetime'), - .cascaded = FALSE, # Because we don't need to use results of prior formulas inside other formulas. + cascaded = FALSE, # Because we don't need to use results of prior formulas inside other formulas. ) ``` diff --git a/docs/aki.html b/docs/aki.html index 6842235..7779ed7 100644 --- a/docs/aki.html +++ b/docs/aki.html @@ -325,22 +325,18 @@ el.style.width = "100%"; el.style.height = "100%"; - var rect = cel.getBoundingClientRect(); - return { - getWidth: function() { return rect.width; }, - getHeight: function() { return rect.height; } + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } }; } else { el.style.width = px(sizing.width); el.style.height = px(sizing.height); - var rect = cel.getBoundingClientRect(); - return { - getWidth: function() { return rect.width; }, - getHeight: function() { return rect.height; } + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } }; } } @@ -2278,10 +2274,10 @@

Kidney injury lab presentation

# Connect to SQL server. dbcon <- DBI::dbConnect(RPostgres::Postgres(), - host = 'localhost', port = 8765, dbname = 'fort', - user = cred$pg$user, password = cred$pg$pass) + host = fabcred$pg$host, port = fabcred$pg$port, dbname = fabcred$pg$database, + user = fabcred$pg$user, password = fabcred$pg$pass) -# Call setup_phea so we can use sqlt() and sql0(). +# Setup Phea setup_phea(dbcon, 'cdm_new_york3')

In this vignette we identify:

Case A.

@@ -2342,8 +2338,8 @@

Serum creatinine

# Make a record source. scr_record_source <- make_record_source(scr_records, - ts = measurement_datetime, - pid = person_id) + ts = 'measurement_datetime', + pid = 'person_id')

Glomerular filtration rate

@@ -2355,8 +2351,8 @@

Glomerular filtration rate

filter(measurement_concept_id == 46236952) gfr_record_source <- make_record_source(gfr_records, - ts = measurement_datetime, - pid = person_id)
+ ts = 'measurement_datetime', + pid = 'person_id')
@@ -2377,48 +2373,52 @@

Calculate the phenotype

# Minimum value within 48-hour window scr_48h_min = make_component(scr_record_source, - window = '48 days', .fn = 'min'), - - # Minimum value within 7-day window - scr_7d_min = make_component(scr_record_source, - window = '7 months', .fn = 'min'), - - # Current glomerular filtration rate (GFR) - gfr = make_component(gfr_record_source), - - # Glomerular filtration rate 3 to 5 months older than phenotype date - gfr_prior = make_component(gfr_record_source, - delay = '3 months', window = '5 months', .fn = 'min')), - - fml = list( - scr_case_a = 'scr_value_as_number - scr_48h_min_value_as_number >= 0.3', - - scr_case_b = 'scr_value_as_number / scr_7d_min_value_as_number >= 1.5', - - gfr_case_c = 'gfr_value_as_number < 60 AND gfr_prior_value_as_number < 60'), - - export = c( - 'scr_measurement_datetime', - 'scr_48h_min_measurement_datetime', - 'scr_7d_min_measurement_datetime', - 'gfr_measurement_datetime', - 'gfr_prior_measurement_datetime'), - - .cascaded = FALSE, # Because we don't need to use results of prior formulas inside other formulas. -)
+ window = "'48 days'::interval", + fn = list(value_as_number = 'min')), + + # Minimum value within 7-day window + scr_7d_min = make_component(scr_record_source, + window = "'7 months'::interval", + fn = list(value_as_number = 'min')), + + # Current glomerular filtration rate (GFR) + gfr = make_component(gfr_record_source), + + # Glomerular filtration rate 3 to 5 months older than phenotype date + gfr_prior = make_component(gfr_record_source, + delay = "'3 months'::interval", + window = "'5 months'::interval", + fn = list(value_as_number = 'min'))), + + fml = list( + scr_case_a = 'scr_value_as_number - scr_48h_min_value_as_number >= 0.3', + + scr_case_b = 'scr_value_as_number / scr_7d_min_value_as_number >= 1.5', + + gfr_case_c = 'gfr_value_as_number < 60 AND gfr_prior_value_as_number < 60'), + + export = c( + 'scr_measurement_datetime', + 'scr_48h_min_measurement_datetime', + 'scr_7d_min_measurement_datetime', + 'gfr_measurement_datetime', + 'gfr_prior_measurement_datetime'), + + cascaded = FALSE, # Because we don't need to use results of prior formulas inside other formulas. +)

Let us take a small peek at 15 rows from the phenotype results.

head_shot(scr_change, 15) |>
   kable()
-+-+-+@@ -2431,7 +2431,7 @@

Calculate the phenotype

- + @@ -2452,7 +2452,7 @@

Calculate the phenotype

- + @@ -2471,7 +2471,7 @@

Calculate the phenotype

- + @@ -2490,7 +2490,7 @@

Calculate the phenotype

- + @@ -2509,7 +2509,7 @@

Calculate the phenotype

- + @@ -2528,14 +2528,14 @@

Calculate the phenotype

- + - + @@ -2557,8 +2557,8 @@

Calculate the phenotype

- - + + @@ -2566,7 +2566,7 @@

Calculate the phenotype

- + @@ -2585,7 +2585,7 @@

Calculate the phenotype

- + @@ -2604,7 +2604,7 @@

Calculate the phenotype

- + @@ -2623,7 +2623,7 @@

Calculate the phenotype

- + @@ -2642,7 +2642,7 @@

Calculate the phenotype

- + @@ -2661,7 +2661,7 @@

Calculate the phenotype

- + @@ -2680,7 +2680,7 @@

Calculate the phenotype

- + @@ -2699,7 +2699,7 @@

Calculate the phenotype

- + @@ -2710,7 +2710,7 @@

Calculate the phenotype

- + @@ -2718,20 +2718,20 @@

Calculate the phenotype

- + - + - + - + @@ -2769,9 +2769,9 @@

Plot the phenotype for a random patient

mutate( scr_case_a_or_b = scr_case_a | scr_case_b) |> phea_plot(random_patient) -#> Collecting lazy table, done. (turn this message off with `verbose = FALSE`) -
- +#> Collecting lazy table, done. (turn this message off with `verbose` or `.verbose` in setup_phea()) +
+

At the end of this report I include a large table with all the data for this patient, directly from the record sources, for maximum verification.

@@ -2783,7 +2783,7 @@

Obtain the SQL query that computes the phenotype

.clip_sql option in calculate_formula().

code_shot(scr_change)
SELECT
-  "row_id",
+  "phea_row_id",
   "pid",
   "ts",
   "window",
@@ -2803,128 +2803,88 @@ 

Obtain the SQL query that computes the phenotype

FROM ( SELECT *, - greatest(scr_ts, scr_48h_min_ts, scr_7d_min_ts, gfr_ts, gfr_prior_ts) - least(scr_ts, scr_48h_min_ts, scr_7d_min_ts, gfr_ts, gfr_prior_ts) AS "window", - last_value(row_id) over (partition by "pid", "ts") AS "phea_ts_row" + greatest("scr_ts", "scr_48h_min_ts", "scr_7d_min_ts", "gfr_ts", "gfr_prior_ts") - least("scr_ts", "scr_48h_min_ts", "scr_7d_min_ts", "gfr_ts", "gfr_prior_ts") AS "window", + MAX("phea_row_id") OVER (PARTITION BY "pid", "ts") AS "phea_ts_row" FROM ( SELECT - "row_id", + row_number() over (order by "pid", "ts") AS "phea_row_id", "pid", "ts", - MAX("scr_measurement_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_1") AS "scr_measurement_datetime", - MAX("scr_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_2") AS "scr_value_as_number", - MAX("scr_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_3") AS "scr_ts", - MAX("scr_48h_min_measurement_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_4") AS "scr_48h_min_measurement_datetime", - MAX("scr_48h_min_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_5") AS "scr_48h_min_value_as_number", - MAX("scr_48h_min_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_6") AS "scr_48h_min_ts", - MAX("scr_7d_min_measurement_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_7") AS "scr_7d_min_measurement_datetime", - MAX("scr_7d_min_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_8") AS "scr_7d_min_value_as_number", - MAX("scr_7d_min_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_9") AS "scr_7d_min_ts", - MAX("gfr_measurement_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_10") AS "gfr_measurement_datetime", - MAX("gfr_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_11") AS "gfr_value_as_number", - MAX("gfr_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_12") AS "gfr_ts", - MAX("gfr_prior_measurement_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_13") AS "gfr_prior_measurement_datetime", - MAX("gfr_prior_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_14") AS "gfr_prior_value_as_number", - MAX("gfr_prior_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_15") AS "gfr_prior_ts" + phea_last_value_ignore_nulls(case when "name" = 'em37qa' then "measurement_datetime" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "scr_measurement_datetime", + phea_last_value_ignore_nulls(case when "name" = 'em37qa' then "value_as_number" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "scr_value_as_number", + phea_last_value_ignore_nulls(case when "name" = 'em37qa' then "ts" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "scr_ts", + phea_last_value_ignore_nulls(case when "name" = 'em37qa' then "measurement_datetime" else null end) over (partition by "pid" order by "ts" range between '48 days'::interval preceding and current row) AS "scr_48h_min_measurement_datetime", + min(case when "name" = 'em37qa' then "value_as_number" else null end) over (partition by "pid" order by "ts" range between '48 days'::interval preceding and current row) AS "scr_48h_min_value_as_number", + phea_last_value_ignore_nulls(case when "name" = 'em37qa' then "ts" else null end) over (partition by "pid" order by "ts" range between '48 days'::interval preceding and current row) AS "scr_48h_min_ts", + phea_last_value_ignore_nulls(case when "name" = 'em37qa' then "measurement_datetime" else null end) over (partition by "pid" order by "ts" range between '7 months'::interval preceding and current row) AS "scr_7d_min_measurement_datetime", + min(case when "name" = 'em37qa' then "value_as_number" else null end) over (partition by "pid" order by "ts" range between '7 months'::interval preceding and current row) AS "scr_7d_min_value_as_number", + phea_last_value_ignore_nulls(case when "name" = 'em37qa' then "ts" else null end) over (partition by "pid" order by "ts" range between '7 months'::interval preceding and current row) AS "scr_7d_min_ts", + phea_last_value_ignore_nulls(case when "name" = 'a9xeco' then "measurement_datetime" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "gfr_measurement_datetime", + phea_last_value_ignore_nulls(case when "name" = 'a9xeco' then "value_as_number" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "gfr_value_as_number", + phea_last_value_ignore_nulls(case when "name" = 'a9xeco' then "ts" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "gfr_ts", + phea_last_value_ignore_nulls(case when "name" = 'a9xeco' then "measurement_datetime" else null end) over (partition by "pid" order by "ts" range between '5 months'::interval preceding and '3 months'::interval preceding) AS "gfr_prior_measurement_datetime", + min(case when "name" = 'a9xeco' then "value_as_number" else null end) over (partition by "pid" order by "ts" range between '5 months'::interval preceding and '3 months'::interval preceding) AS "gfr_prior_value_as_number", + phea_last_value_ignore_nulls(case when "name" = 'a9xeco' then "ts" else null end) over (partition by "pid" order by "ts" range between '5 months'::interval preceding and '3 months'::interval preceding) AS "gfr_prior_ts" FROM ( - SELECT - *, - SUM(CASE WHEN (("scr_measurement_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_1", - SUM(CASE WHEN (("scr_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_2", - SUM(CASE WHEN (("scr_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_3", - SUM(CASE WHEN (("scr_48h_min_measurement_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_4", - SUM(CASE WHEN (("scr_48h_min_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_5", - SUM(CASE WHEN (("scr_48h_min_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_6", - SUM(CASE WHEN (("scr_7d_min_measurement_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_7", - SUM(CASE WHEN (("scr_7d_min_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_8", - SUM(CASE WHEN (("scr_7d_min_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_9", - SUM(CASE WHEN (("gfr_measurement_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_10", - SUM(CASE WHEN (("gfr_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_11", - SUM(CASE WHEN (("gfr_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_12", - SUM(CASE WHEN (("gfr_prior_measurement_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_13", - SUM(CASE WHEN (("gfr_prior_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_14", - SUM(CASE WHEN (("gfr_prior_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_15" - FROM ( - SELECT - row_number() over () AS "row_id", - "pid", - "ts", - last_value(case when "name" = 'xl450dcw' then "measurement_datetime" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "scr_measurement_datetime", - last_value(case when "name" = 'xl450dcw' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "scr_value_as_number", - last_value(case when "name" = 'xl450dcw' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "scr_ts", - min(case when "name" = 'xl450dcw' then "measurement_datetime" else null end) over (partition by "pid", "name" order by "ts" range between '48 days'::interval preceding and current row) AS "scr_48h_min_measurement_datetime", - min(case when "name" = 'xl450dcw' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" range between '48 days'::interval preceding and current row) AS "scr_48h_min_value_as_number", - last_value(case when "name" = 'xl450dcw' then "ts" else null end) over (partition by "pid", "name" order by "ts" range between '48 days'::interval preceding and current row) AS "scr_48h_min_ts", - min(case when "name" = 'xl450dcw' then "measurement_datetime" else null end) over (partition by "pid", "name" order by "ts" range between '7 months'::interval preceding and current row) AS "scr_7d_min_measurement_datetime", - min(case when "name" = 'xl450dcw' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" range between '7 months'::interval preceding and current row) AS "scr_7d_min_value_as_number", - last_value(case when "name" = 'xl450dcw' then "ts" else null end) over (partition by "pid", "name" order by "ts" range between '7 months'::interval preceding and current row) AS "scr_7d_min_ts", - last_value(case when "name" = 'jfawg3sk' then "measurement_datetime" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "gfr_measurement_datetime", - last_value(case when "name" = 'jfawg3sk' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "gfr_value_as_number", - last_value(case when "name" = 'jfawg3sk' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "gfr_ts", - min(case when "name" = 'jfawg3sk' then "measurement_datetime" else null end) over (partition by "pid", "name" order by "ts" range between '5 months'::interval preceding and '3 months days'::interval preceding) AS "gfr_prior_measurement_datetime", - min(case when "name" = 'jfawg3sk' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" range between '5 months'::interval preceding and '3 months days'::interval preceding) AS "gfr_prior_value_as_number", - last_value(case when "name" = 'jfawg3sk' then "ts" else null end) over (partition by "pid", "name" order by "ts" range between '5 months'::interval preceding and '3 months days'::interval preceding) AS "gfr_prior_ts" - FROM ( - ( - SELECT - 'xl450dcw' AS "name", - "person_id" AS "pid", - "measurement_datetime" AS "ts", - "measurement_datetime", - "value_as_number" - FROM ( - ( - SELECT * - FROM "cdm_new_york3"."measurement" - WHERE ("measurement_concept_id" IN (3051825.0, 3016723.0) AND "unit_source_value" = 'mg/dL') - ) - UNION ALL - ( - SELECT - "measurement_id", - "person_id", - "measurement_concept_id", - "measurement_date", - "measurement_datetime", - "measurement_time", - "measurement_type_concept_id", - "operator_concept_id", - "value_as_number" / 88.42 AS "value_as_number", - "value_as_concept_id", - "unit_concept_id", - "range_low", - "range_high", - "provider_id", - "visit_occurrence_id", - "visit_detail_id", - "measurement_source_value", - "measurement_source_concept_id", - 'mg/dL' AS "unit_source_value", - "unit_source_concept_id", - "value_source_value", - "measurement_event_id", - "meas_event_field_concept_id" - FROM "cdm_new_york3"."measurement" - WHERE ("measurement_concept_id" IN (3051825.0, 3016723.0) AND "unit_source_value" = 'µmol/L') - ) - ) "q01" - ) - UNION ALL - ( - SELECT - 'jfawg3sk' AS "name", - "person_id" AS "pid", - "measurement_datetime" AS "ts", - "measurement_datetime", - "value_as_number" - FROM "cdm_new_york3"."measurement" - WHERE ("measurement_concept_id" = 46236952.0) - ) - ) "q02" - ) "q03" - ) "q04" - ) "q05" -) "q06" -WHERE ("row_id" = "phea_ts_row")
+ ( + SELECT + 'em37qa' AS "name", + "person_id" AS "pid", + "measurement_datetime" AS "ts", + "measurement_datetime", + "value_as_number" + FROM ( + ( + SELECT * + FROM "cdm_new_york3"."measurement" + WHERE ("measurement_concept_id" IN (3051825.0, 3016723.0) AND "unit_source_value" = 'mg/dL') + ) + UNION ALL + ( + SELECT + "measurement_id", + "person_id", + "measurement_concept_id", + "measurement_date", + "measurement_datetime", + "measurement_time", + "measurement_type_concept_id", + "operator_concept_id", + "value_as_number" / 88.42 AS "value_as_number", + "value_as_concept_id", + "unit_concept_id", + "range_low", + "range_high", + "provider_id", + "visit_occurrence_id", + "visit_detail_id", + "measurement_source_value", + "measurement_source_concept_id", + 'mg/dL' AS "unit_source_value", + "unit_source_concept_id", + "value_source_value", + "measurement_event_id", + "meas_event_field_concept_id" + FROM "cdm_new_york3"."measurement" + WHERE ("measurement_concept_id" IN (3051825.0, 3016723.0) AND "unit_source_value" = 'µmol/L') + ) + ) "q01" + ) + UNION ALL + ( + SELECT + 'a9xeco' AS "name", + "person_id" AS "pid", + "measurement_datetime" AS "ts", + "measurement_datetime", + "value_as_number" + FROM "cdm_new_york3"."measurement" + WHERE ("measurement_concept_id" = 46236952.0) + ) + ) "q02" + ) "q03" +) "q04" +WHERE ("phea_row_id" = "phea_ts_row")

See the raw data that went into @@ -3548,18 +3508,18 @@

Glomerular filtration rate records

- + - - + + - + - - + + @@ -3813,65 +3773,65 @@

Produce events for Atlas

- + - + - - - - - - - + - + - + - + - + - + - + + + + + + +
row_idphea_row_id pid ts window
12 1 2014-03-07 00:00:00 FALSE
24 1 2016-03-11 00:00:00 FALSE
36 1 2018-03-16 00:00:00 FALSE
128 1 2020-03-20 00:00:00 FALSE
512 1 2022-02-11 00:00:00 1.5 1.5 1.584.8132.3 NA 2022-02-11 2022-02-11 105.7 NA 2022-03-252022-02-112022-02-112022-03-252022-03-25 2022-03-25 NA FALSE FALSE
1718 3 2004-11-26 00:00:00 FALSE
4720 3 2005-12-02 00:00:00 FALSE
1922 3 2006-12-08 00:00:00 FALSE
4924 3 2007-12-14 00:00:00 FALSE
5026 3 2008-12-19 00:00:00 FALSE
2228 3 2009-12-25 00:00:00 FALSE
2330 3 2010-12-31 00:00:00 FALSE
2432 3 2011-04-15 105 days 132.3 2011-04-15 2011-04-152010-12-312011-04-15 2011-04-15 2010-12-31 FALSE FALSE
5434 3 2012-01-06371 days00:00:00 1.5 1.5 1.5 91.6132.3NA 2012-01-06 2012-01-06 2012-01-06 2012-01-062010-12-31NA FALSE FALSE FALSE mL/min/{1.73_m2}
63666402 19 2014-08-1056.6mL/min/{1.73_m2}89.2mL/min
64026366 19 2014-08-1089.2mL/min56.6mL/min/{1.73_m2}
5508
2016-01-292016-11-11 30.51.7 2000000001
2014-08-10190.72000000001
2016-11-27 19 1.6 2000000001
2018-12-16 19 0.8 2000000001
2019-11-03 19 1.0 2000000001
2020-11-22 19 0.6 2000000001
2021-11-14 19 0.8 2000000001
2021-02-21 30 0.4 2000000001
2021-02-23 30 0.3 2000000001
2021-02-24 30 0.3 2000000001
2021-02-25300.32000000001
diff --git a/docs/obese_ami.html b/docs/obese_ami.html index ed9a4e0..1b69d59 100644 --- a/docs/obese_ami.html +++ b/docs/obese_ami.html @@ -325,22 +325,18 @@ el.style.width = "100%"; el.style.height = "100%"; - var rect = cel.getBoundingClientRect(); - return { - getWidth: function() { return rect.width; }, - getHeight: function() { return rect.height; } + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } }; } else { el.style.width = px(sizing.width); el.style.height = px(sizing.height); - var rect = cel.getBoundingClientRect(); - return { - getWidth: function() { return rect.width; }, - getHeight: function() { return rect.height; } + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } }; } } @@ -2393,7 +2389,224 @@

Calculate the phenotype

ami_obese = 'has_ami and is_obese' ) -)
+) +#> Called from: calculate_formula(components = list(weight = weight_component, +#> height = height_component, ami = ami_component), fml = list(height_in_meters = "height_value_as_number / 100", +#> a = list(bmi = "weight_value_as_number / (height_in_meters * height_in_meters)", +#> has_ami = "ami_condition_start_datetime is not null"), +#> b = list(has_bmi = "bmi is not null", is_obese = "bmi > 30"), +#> ami_obese = "has_ami and is_obese")) +#> debug: board <- dplyr::mutate(board, window = dplyr::sql(sql_ts_greatest) - +#> dplyr::sql(sql_ts_least), phea_ts_row = dplyr::sql(sql_txt)) +#> debug: if (require_all || !is.null(dont_require)) { +#> required_components <- setdiff(names(components), dont_require) +#> if (length(required_components) > 0) { +#> sql_txt <- paste0(DBI::dbQuoteIdentifier(paste0(required_components, +#> "_ts"), conn = .pheaglobalenv$con), " is not null", +#> collapse = " and ") +#> if (has_content(window)) { +#> board <- dplyr::filter(board, phea_row_id == phea_ts_row && +#> dplyr::sql(sql_txt) && window < local(window)) +#> } +#> else { +#> board <- dplyr::filter(board, phea_row_id == phea_ts_row && +#> dplyr::sql(sql_txt)) +#> } +#> } +#> else { +#> if (has_content(window)) { +#> board <- dplyr::filter(board, phea_row_id == phea_ts_row && +#> window < local(window)) +#> } +#> else { +#> board <- dplyr::filter(board, phea_row_id == phea_ts_row) +#> } +#> } +#> } else { +#> if (has_content(window)) { +#> board <- dplyr::filter(board, phea_row_id == phea_ts_row && +#> window < local(window)) +#> } +#> else { +#> board <- dplyr::filter(board, phea_row_id == phea_ts_row) +#> } +#> } +#> debug: if (has_content(window)) { +#> board <- dplyr::filter(board, phea_row_id == phea_ts_row && +#> window < local(window)) +#> } else { +#> board <- dplyr::filter(board, phea_row_id == phea_ts_row) +#> } +#> debug: board <- dplyr::filter(board, phea_row_id == phea_ts_row) +#> debug: if (!is.null(filters) && any(!is.na(filters))) { +#> sql_txt <- paste0("(", paste0(filters[!is.na(filters)], collapse = ") AND ("), +#> ")") +#> board <- filter(board, sql(sql_txt)) +#> } +#> debug: if (isFALSE(is.na(limit))) board <- head(board, n = lim) +#> debug: board <- dplyr::select(board, phea_row_id, pid, ts, window, !!!g_vars) +#> debug: res_vars <- NULL +#> debug: if (!is.null(fml)) { +#> if (cascaded) { +#> for (i in seq(fml)) { +#> cur_fml <- fml[[i]] +#> if (class(cur_fml) == "list") { +#> if (any(lapply(cur_fml, class) == "list")) +#> stop("Formulas cannot be nested deeper than 1 level.") +#> res_vars <- c(res_vars, names(cur_fml)) +#> commands <- unlist(purrr::map2(names(cur_fml), +#> cur_fml, ~rlang::exprs(`:=`(!!..1, dplyr::sql(!!..2))))) +#> board <- dplyr::mutate(board, !!!commands) +#> } +#> else { +#> res_vars <- c(res_vars, names(fml)[i]) +#> board <- dplyr::mutate(board, `:=`(!!rlang::sym(names(fml)[i]), +#> dplyr::sql(cur_fml))) +#> } +#> } +#> } +#> else { +#> if (any(lapply(fml, class) == "list")) +#> stop("Nested formulas require cascaded = TRUE.") +#> res_vars <- c(res_vars, names(fml)) +#> commands <- unlist(purrr::map2(names(fml), fml, ~rlang::exprs(`:=`(!!..1, +#> dplyr::sql(!!..2))))) +#> board <- dplyr::mutate(board, !!!commands) +#> } +#> } +#> debug: if (cascaded) { +#> for (i in seq(fml)) { +#> cur_fml <- fml[[i]] +#> if (class(cur_fml) == "list") { +#> if (any(lapply(cur_fml, class) == "list")) +#> stop("Formulas cannot be nested deeper than 1 level.") +#> res_vars <- c(res_vars, names(cur_fml)) +#> commands <- unlist(purrr::map2(names(cur_fml), cur_fml, +#> ~rlang::exprs(`:=`(!!..1, dplyr::sql(!!..2))))) +#> board <- dplyr::mutate(board, !!!commands) +#> } +#> else { +#> res_vars <- c(res_vars, names(fml)[i]) +#> board <- dplyr::mutate(board, `:=`(!!rlang::sym(names(fml)[i]), +#> dplyr::sql(cur_fml))) +#> } +#> } +#> } else { +#> if (any(lapply(fml, class) == "list")) +#> stop("Nested formulas require cascaded = TRUE.") +#> res_vars <- c(res_vars, names(fml)) +#> commands <- unlist(purrr::map2(names(fml), fml, ~rlang::exprs(`:=`(!!..1, +#> dplyr::sql(!!..2))))) +#> board <- dplyr::mutate(board, !!!commands) +#> } +#> debug: for (i in seq(fml)) { +#> cur_fml <- fml[[i]] +#> if (class(cur_fml) == "list") { +#> if (any(lapply(cur_fml, class) == "list")) +#> stop("Formulas cannot be nested deeper than 1 level.") +#> res_vars <- c(res_vars, names(cur_fml)) +#> commands <- unlist(purrr::map2(names(cur_fml), cur_fml, +#> ~rlang::exprs(`:=`(!!..1, dplyr::sql(!!..2))))) +#> board <- dplyr::mutate(board, !!!commands) +#> } +#> else { +#> res_vars <- c(res_vars, names(fml)[i]) +#> board <- dplyr::mutate(board, `:=`(!!rlang::sym(names(fml)[i]), +#> dplyr::sql(cur_fml))) +#> } +#> } +#> debug: cur_fml <- fml[[i]] +#> debug: if (class(cur_fml) == "list") { +#> if (any(lapply(cur_fml, class) == "list")) +#> stop("Formulas cannot be nested deeper than 1 level.") +#> res_vars <- c(res_vars, names(cur_fml)) +#> commands <- unlist(purrr::map2(names(cur_fml), cur_fml, ~rlang::exprs(`:=`(!!..1, +#> dplyr::sql(!!..2))))) +#> board <- dplyr::mutate(board, !!!commands) +#> } else { +#> res_vars <- c(res_vars, names(fml)[i]) +#> board <- dplyr::mutate(board, `:=`(!!rlang::sym(names(fml)[i]), +#> dplyr::sql(cur_fml))) +#> } +#> debug: res_vars <- c(res_vars, names(fml)[i]) +#> debug: board <- dplyr::mutate(board, `:=`(!!rlang::sym(names(fml)[i]), +#> dplyr::sql(cur_fml))) +#> debug: cur_fml <- fml[[i]] +#> debug: if (class(cur_fml) == "list") { +#> if (any(lapply(cur_fml, class) == "list")) +#> stop("Formulas cannot be nested deeper than 1 level.") +#> res_vars <- c(res_vars, names(cur_fml)) +#> commands <- unlist(purrr::map2(names(cur_fml), cur_fml, ~rlang::exprs(`:=`(!!..1, +#> dplyr::sql(!!..2))))) +#> board <- dplyr::mutate(board, !!!commands) +#> } else { +#> res_vars <- c(res_vars, names(fml)[i]) +#> board <- dplyr::mutate(board, `:=`(!!rlang::sym(names(fml)[i]), +#> dplyr::sql(cur_fml))) +#> } +#> debug: if (any(lapply(cur_fml, class) == "list")) stop("Formulas cannot be nested deeper than 1 level.") +#> debug: res_vars <- c(res_vars, names(cur_fml)) +#> debug: commands <- unlist(purrr::map2(names(cur_fml), cur_fml, ~rlang::exprs(`:=`(!!..1, +#> dplyr::sql(!!..2))))) +#> debug: board <- dplyr::mutate(board, !!!commands) +#> debug: cur_fml <- fml[[i]] +#> debug: if (class(cur_fml) == "list") { +#> if (any(lapply(cur_fml, class) == "list")) +#> stop("Formulas cannot be nested deeper than 1 level.") +#> res_vars <- c(res_vars, names(cur_fml)) +#> commands <- unlist(purrr::map2(names(cur_fml), cur_fml, ~rlang::exprs(`:=`(!!..1, +#> dplyr::sql(!!..2))))) +#> board <- dplyr::mutate(board, !!!commands) +#> } else { +#> res_vars <- c(res_vars, names(fml)[i]) +#> board <- dplyr::mutate(board, `:=`(!!rlang::sym(names(fml)[i]), +#> dplyr::sql(cur_fml))) +#> } +#> debug: if (any(lapply(cur_fml, class) == "list")) stop("Formulas cannot be nested deeper than 1 level.") +#> debug: res_vars <- c(res_vars, names(cur_fml)) +#> debug: commands <- unlist(purrr::map2(names(cur_fml), cur_fml, ~rlang::exprs(`:=`(!!..1, +#> dplyr::sql(!!..2))))) +#> debug: board <- dplyr::mutate(board, !!!commands) +#> debug: cur_fml <- fml[[i]] +#> debug: if (class(cur_fml) == "list") { +#> if (any(lapply(cur_fml, class) == "list")) +#> stop("Formulas cannot be nested deeper than 1 level.") +#> res_vars <- c(res_vars, names(cur_fml)) +#> commands <- unlist(purrr::map2(names(cur_fml), cur_fml, ~rlang::exprs(`:=`(!!..1, +#> dplyr::sql(!!..2))))) +#> board <- dplyr::mutate(board, !!!commands) +#> } else { +#> res_vars <- c(res_vars, names(fml)[i]) +#> board <- dplyr::mutate(board, `:=`(!!rlang::sym(names(fml)[i]), +#> dplyr::sql(cur_fml))) +#> } +#> debug: res_vars <- c(res_vars, names(fml)[i]) +#> debug: board <- dplyr::mutate(board, `:=`(!!rlang::sym(names(fml)[i]), +#> dplyr::sql(cur_fml))) +#> debug: if (class(kco) == "logical") { +#> if (length(kco) > 1) +#> stop("If logical, kco must be of length 1.") +#> if (kco && !is.null(res_vars)) +#> board <- keep_change_of(board, res_vars, partition = "pid", +#> order = "ts") +#> } else { +#> if (class(kco) != "character") +#> stop("kco must be logical or character vector.") +#> board <- keep_change_of(board, kco, partition = "pid", order = "ts") +#> } +#> debug: if (length(kco) > 1) stop("If logical, kco must be of length 1.") +#> debug: if (kco && !is.null(res_vars)) board <- keep_change_of(board, +#> res_vars, partition = "pid", order = "ts") +#> debug: board <- dplyr::collapse(board, cte = TRUE) +#> debug: attr(board, "phea") <- "phenotype" +#> debug: attr(board, "phea_res_vars") <- res_vars +#> debug: attr(board, "phea_out_vars") <- g_vars +#> debug: if (get_sql) { +#> return(dbplyr::sql_render(board)) +#> } else { +#> return(board) +#> } +#> debug: return(board)

Keep only the records closest to each other

@@ -2426,16 +2639,16 @@

Keep only the records closest to each other

head_shot(ami_obese_f) |> kable()
- +
-+--++@@ -2444,7 +2657,7 @@

Keep only the records closest to each other

- + @@ -2623,7 +2836,7 @@

Plot the phenotype for a random patient

random_patient <- sample(ami_obese_patients, 1)message('Sampled patient: ', random_patient) -#> Sampled patient: 21270 +#> Sampled patient: 13001
ami_obese |>
   select(
     -has_bmi,
@@ -2631,8 +2844,8 @@ 

Plot the phenotype for a random patient

-ami_condition_start_datetime) |> phea_plot(pid = random_patient) #> Collecting lazy table, done. (turn this message off with `verbose` or `.verbose` in setup_phea())
-
- +
+

Obtain the SQL query that computes the phenotype

@@ -2650,7 +2863,7 @@

Obtain the SQL query that computes the phenotype

ami_condition_start_datetime is not null AS "has_ami" FROM ( SELECT - "row_id", + "phea_row_id", "pid", "ts", "window", @@ -2662,90 +2875,68 @@

Obtain the SQL query that computes the phenotype

SELECT *, greatest("weight_ts", "height_ts", "ami_ts") - least("weight_ts", "height_ts", "ami_ts") AS "window", - last_value("row_id") OVER (PARTITION BY "pid", "ts") AS "phea_ts_row" + MAX("phea_row_id") OVER (PARTITION BY "pid", "ts") AS "phea_ts_row" FROM ( SELECT - "row_id", + row_number() over (order by "pid", "ts") AS "phea_row_id", "pid", "ts", - MAX("weight_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_1") AS "weight_value_as_number", - MAX("weight_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_2") AS "weight_ts", - MAX("height_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_3") AS "height_value_as_number", - MAX("height_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_4") AS "height_ts", - MAX("ami_condition_start_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_5") AS "ami_condition_start_datetime", - MAX("ami_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_6") AS "ami_ts" + phea_last_value_ignore_nulls(case when "name" = 184746 then "value_as_number" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "weight_value_as_number", + phea_last_value_ignore_nulls(case when "name" = 184746 then "ts" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "weight_ts", + phea_last_value_ignore_nulls(case when "name" = 476286 then "value_as_number" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "height_value_as_number", + phea_last_value_ignore_nulls(case when "name" = 476286 then "ts" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "height_ts", + phea_last_value_ignore_nulls(case when "name" = 144573 then "condition_start_datetime" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "ami_condition_start_datetime", + phea_last_value_ignore_nulls(case when "name" = 144573 then "ts" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "ami_ts" FROM ( - SELECT - *, - SUM(CASE WHEN (("weight_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_1", - SUM(CASE WHEN (("weight_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_2", - SUM(CASE WHEN (("height_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_3", - SUM(CASE WHEN (("height_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_4", - SUM(CASE WHEN (("ami_condition_start_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_5", - SUM(CASE WHEN (("ami_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_6" - FROM ( - SELECT - row_number() over (order by "pid", "ts") AS "row_id", - "pid", - "ts", - case when "name" = 'xctik8nd' then "value_as_number" else null end AS "weight_value_as_number", - case when "name" = 'xctik8nd' then "ts" else null end AS "weight_ts", - case when "name" = 'lbi4f6j9' then "value_as_number" else null end AS "height_value_as_number", - case when "name" = 'lbi4f6j9' then "ts" else null end AS "height_ts", - case when "name" = 'rjkfahms' then "condition_start_datetime" else null end AS "ami_condition_start_datetime", - case when "name" = 'rjkfahms' then "ts" else null end AS "ami_ts" - FROM ( - ( - SELECT *, NULL AS "condition_start_datetime" - FROM ( - ( - SELECT - 'xctik8nd' AS "name", - "person_id" AS "pid", - "measurement_datetime" AS "ts", - "value_as_number" - FROM "cdm_new_york3"."measurement" - WHERE ("measurement_concept_id" = 3025315.0) - ) - UNION ALL - ( - SELECT - 'lbi4f6j9' AS "name", - "person_id" AS "pid", - "measurement_datetime" AS "ts", - "value_as_number" - FROM "cdm_new_york3"."measurement" - WHERE ("measurement_concept_id" = 3036277.0) - ) - ) "q01" - ) - UNION ALL - ( - SELECT - "name", - "pid", - "ts", - NULL AS "value_as_number", - "condition_start_datetime" - FROM ( - SELECT - 'rjkfahms' AS "name", - "person_id" AS "pid", - "condition_start_datetime" AS "ts", - "condition_start_datetime" - FROM "cdm_new_york3"."condition_occurrence" - WHERE ("condition_concept_id" = 4329847.0) - ) "q02" - ) - ) "q03" - ) "q04" - ) "q05" - ) "q06" - ) "q07" - WHERE ("row_id" = "phea_ts_row") - ) "q08" - ) "q09" -) "q10"
+ ( + SELECT *, NULL AS "condition_start_datetime" + FROM ( + ( + SELECT + 184746 AS "name", + "person_id" AS "pid", + "measurement_datetime" AS "ts", + "value_as_number" + FROM "cdm_new_york3"."measurement" + WHERE ("measurement_concept_id" = 3025315.0) + ) + UNION ALL + ( + SELECT + 476286 AS "name", + "person_id" AS "pid", + "measurement_datetime" AS "ts", + "value_as_number" + FROM "cdm_new_york3"."measurement" + WHERE ("measurement_concept_id" = 3036277.0) + ) + ) "q01" + ) + UNION ALL + ( + SELECT + "name", + "pid", + "ts", + NULL AS "value_as_number", + "condition_start_datetime" + FROM ( + SELECT + 144573 AS "name", + "person_id" AS "pid", + "condition_start_datetime" AS "ts", + "condition_start_datetime" + FROM "cdm_new_york3"."condition_occurrence" + WHERE ("condition_concept_id" = 4329847.0) + ) "q02" + ) + ) "q03" + ) "q04" + ) "q05" + WHERE ("phea_row_id" = "phea_ts_row") + ) "q06" + ) "q07" +) "q08" diff --git a/docs/weight-increase.Rmd b/docs/weight-increase.Rmd index 4cd6098..7ecb150 100644 --- a/docs/weight-increase.Rmd +++ b/docs/weight-increase.Rmd @@ -13,17 +13,15 @@ knitr::opts_chunk$set( comment = "#>" ) -library(credx) # contains cred$pg ``` ```{r setup} library(knitr) -library(phea) +library(credx) # local package, contains dbConnectFort +# library(phea) +devtools::load_all() suppressPackageStartupMessages(library(dplyr)) -# Connect to SQL server. -dbcon <- DBI::dbConnect(RPostgres::Postgres(), - host = 'localhost', port = 8765, - dbname = 'fort', user = cred$pg$user, password = cred$pg$pass) +dbcon <- dbConnectFort() # Provide the connection to Phea so we can use the sqlt() and sql0() shorthands. setup_phea(dbcon, 'cdm_new_york3') @@ -43,16 +41,16 @@ While the _record source_ is the same as `weight_current`, the _component_ is di weight_record_source = sqlt(measurement) |> filter(measurement_concept_id == 3025315) |> make_record_source( - ts = measurement_datetime, - pid = person_id) + ts = 'measurement_datetime', + pid = 'person_id') # Most recent weight count record weight_current = make_component(weight_record_source) # weight count record at least 24 hours older than phenotype date, up to limit of 48 hours weight_previous = make_component(weight_record_source, - delay = '2 year', - window = '3 years') + delay = "'2 years'::interval", + window = "'3 years'::interval") ``` _`delay` picks the most recent record that is **at least as old** as the specified amount of time._ @@ -68,8 +66,8 @@ To facilitate interpretation of our phenotype, let us add the age of the patient ```{r, age} age_component <- sqlt(person) |> make_component( - .ts = birth_datetime, - .pid = person_id) + ts = 'birth_datetime', + pid = 'person_id') ``` When you supply a lazy table to `make_component()`, it silently creates a record source from it, then uses that record source to create a component. You must provide varibales `.ts` e `.pid` for that. @@ -150,7 +148,8 @@ phen |> -weight_current_measurement_datetime, -weight_previous_measurement_datetime, -person_birth_datetime) |> - phea_plot(pid = sample_patient) + phea_plot(pid = sample_patient, + titles_font_size = 8) ``` Notice that the chart for `weight_previous_value_as_number` tracks `weight_current_measurement_datetime`, but 2-3 years behind. diff --git a/docs/weight-increase.html b/docs/weight-increase.html index fe37315..a4edd73 100644 --- a/docs/weight-increase.html +++ b/docs/weight-increase.html @@ -2274,13 +2274,13 @@

Calculate increase in body weight

records shown in this example are synthetic data from Synthea Patient Generator.

library(knitr)
-library(phea)
-suppressPackageStartupMessages(library(dplyr))
-
-# Connect to SQL server.
-dbcon <- DBI::dbConnect(RPostgres::Postgres(),
-    host = 'localhost', port = 8765,
-    dbname = 'fort', user = cred$pg$user, password = cred$pg$pass)
+library(credx) # local package, contains dbConnectFort
+# library(phea)
+devtools::load_all()
+#> ℹ Loading phea
+suppressPackageStartupMessages(library(dplyr))
+
+dbcon <- dbConnectFort()
 
 # Provide the connection to Phea so we can use the sqlt() and sql0() shorthands.
 setup_phea(dbcon, 'cdm_new_york3')
@@ -2302,16 +2302,16 @@

Calculate increase in body weight

weight_record_source = sqlt(measurement) |> filter(measurement_concept_id == 3025315) |> make_record_source( - ts = measurement_datetime, - pid = person_id) + ts = 'measurement_datetime', + pid = 'person_id')# Most recent weight count recordweight_current = make_component(weight_record_source)# weight count record at least 24 hours older than phenotype date, up to limit of 48 hoursweight_previous = make_component(weight_record_source, - delay = '2 year', - window = '3 years') + delay = "'2 years'::interval", + window = "'3 years'::interval")

delay picks the most recent record that is at least as old as the specified amount of time.

@@ -2330,8 +2330,8 @@

Adding patient age

timestamp.

age_component <- sqlt(person) |>
   make_component(
-    .ts = birth_datetime,
-    .pid = person_id)
+ ts = 'birth_datetime', + pid = 'person_id')

When you supply a lazy table to make_component(), it silently creates a record source from it, then uses that record source to create a component. You must provide varibales .ts e @@ -2418,7 +2418,7 @@

Filter patients whose increase is at least 50%

# Sample one patient at random, from among those with ≥50% weight increase at least once in their history.
 sample_patient <- sample(summary_per_patient$pid, 1)
 message('Sampled patient: ', sample_patient)
-#> Sampled patient: 14158
+#> Sampled patient: 21762
   
 # Plot the phenotype for the sampled patient.
 phen |>
@@ -2426,10 +2426,11 @@ 

Filter patients whose increase is at least 50%

-weight_current_measurement_datetime, -weight_previous_measurement_datetime, -person_birth_datetime) |> - phea_plot(pid = sample_patient) -#> Collecting lazy table, done. (turn this message off with `verbose = FALSE`)
-
- + phea_plot(pid = sample_patient, + titles_font_size = 8) +#> Collecting lazy table, done. (turn this message off with `verbose` or `.verbose` in setup_phea()) +
+

Notice that the chart for weight_previous_value_as_number tracks weight_current_measurement_datetime, but 2-3 years @@ -2442,134 +2443,76 @@

Obtain the SQL query that computes the phenotype

.clip_sql option in calculate_formula().

code_shot(phen)
SELECT
-  "row_id",
-  "pid",
-  "ts",
-  "window",
-  "person_birth_datetime",
-  "weight_current_value_as_number",
-  "weight_previous_value_as_number",
-  "weight_current_measurement_datetime",
-  "weight_previous_measurement_datetime",
-  "patient_age",
-  "weight_change"
-FROM (
-  SELECT
-    *,
-    lag(phea_kco_var1) OVER (PARTITION BY "pid" ORDER BY "ts") AS "phea_kco_lag1",
-    lag(phea_kco_var2) OVER (PARTITION BY "pid" ORDER BY "ts") AS "phea_kco_lag2",
-    lag(phea_kco_var3) OVER (PARTITION BY "pid" ORDER BY "ts") AS "phea_kco_lag3"
-  FROM (
-    SELECT
-      *,
-      pid AS "phea_kco_var1",
-      patient_age AS "phea_kco_var2",
-      weight_change AS "phea_kco_var3"
-    FROM (
-      SELECT
-        *,
-        (weight_current_value_as_number - weight_previous_value_as_number) /
-      weight_previous_value_as_number AS "weight_change"
-      FROM (
-        SELECT
-          "row_id",
-          "pid",
-          "ts",
-          "window",
-          "person_birth_datetime",
-          "weight_current_value_as_number",
-          "weight_previous_value_as_number",
-          "weight_current_measurement_datetime",
-          "weight_previous_measurement_datetime",
-          extract(year from age(ts, person_birth_datetime)) AS "patient_age"
-        FROM (
-          SELECT
-            *,
-            greatest(person_ts, weight_current_ts, weight_previous_ts) - least(person_ts, weight_current_ts, weight_previous_ts) AS "window",
-            last_value(row_id) over (partition by "pid", "ts") AS "phea_ts_row"
-          FROM (
-            SELECT
-              "row_id",
-              "pid",
-              "ts",
-              MAX("person_birth_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_1") AS "person_birth_datetime",
-              MAX("person_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_2") AS "person_ts",
-              MAX("weight_current_measurement_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_3") AS "weight_current_measurement_datetime",
-              MAX("weight_current_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_4") AS "weight_current_value_as_number",
-              MAX("weight_current_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_5") AS "weight_current_ts",
-              MAX("weight_previous_measurement_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_6") AS "weight_previous_measurement_datetime",
-              MAX("weight_previous_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_7") AS "weight_previous_value_as_number",
-              MAX("weight_previous_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_8") AS "weight_previous_ts"
-            FROM (
-              SELECT
-                *,
-                SUM(CASE WHEN (("person_birth_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_1",
-                SUM(CASE WHEN (("person_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_2",
-                SUM(CASE WHEN (("weight_current_measurement_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_3",
-                SUM(CASE WHEN (("weight_current_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_4",
-                SUM(CASE WHEN (("weight_current_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_5",
-                SUM(CASE WHEN (("weight_previous_measurement_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_6",
-                SUM(CASE WHEN (("weight_previous_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_7",
-                SUM(CASE WHEN (("weight_previous_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_8"
-              FROM (
-                SELECT
-                  row_number() over () AS "row_id",
-                  "pid",
-                  "ts",
-                  last_value(case when "name" = 'vztoubyfanpq' then "birth_datetime" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "person_birth_datetime",
-                  last_value(case when "name" = 'vztoubyfanpq' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "person_ts",
-                  last_value(case when "name" = 'o2typvxa9fki' then "measurement_datetime" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "weight_current_measurement_datetime",
-                  last_value(case when "name" = 'o2typvxa9fki' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "weight_current_value_as_number",
-                  last_value(case when "name" = 'o2typvxa9fki' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "weight_current_ts",
-                  last_value(case when "name" = 'o2typvxa9fki' then "measurement_datetime" else null end) over (partition by "pid", "name" order by "ts" range between '3 years'::interval preceding and '2 year days'::interval preceding) AS "weight_previous_measurement_datetime",
-                  last_value(case when "name" = 'o2typvxa9fki' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" range between '3 years'::interval preceding and '2 year days'::interval preceding) AS "weight_previous_value_as_number",
-                  last_value(case when "name" = 'o2typvxa9fki' then "ts" else null end) over (partition by "pid", "name" order by "ts" range between '3 years'::interval preceding and '2 year days'::interval preceding) AS "weight_previous_ts"
-                FROM (
-                  (
-                    SELECT
-                      *,
-                      NULL AS "measurement_datetime",
-                      NULL AS "value_as_number"
-                    FROM (
-                      SELECT
-                        'vztoubyfanpq' AS "name",
-                        "person_id" AS "pid",
-                        "birth_datetime" AS "ts",
-                        "birth_datetime"
-                      FROM "cdm_new_york3"."person"
-                    ) "q01"
-                  )
-                  UNION ALL
-                  (
-                    SELECT
-                      "name",
-                      "pid",
-                      "ts",
-                      NULL AS "birth_datetime",
-                      "measurement_datetime",
-                      "value_as_number"
-                    FROM (
-                      SELECT
-                        'o2typvxa9fki' AS "name",
-                        "person_id" AS "pid",
-                        "measurement_datetime" AS "ts",
-                        "measurement_datetime",
-                        "value_as_number"
-                      FROM "cdm_new_york3"."measurement"
-                      WHERE ("measurement_concept_id" = 3025315.0)
-                    ) "q02"
-                  )
-                ) "q03"
-              ) "q04"
-            ) "q05"
-          ) "q06"
-        ) "q07"
-        WHERE ("row_id" = "phea_ts_row")
-      ) "q08"
-    ) "q09"
-  ) "q10"
-) "q11"
-WHERE ((("phea_kco_lag1" IS NULL) AND NOT(("phea_kco_var1" IS NULL))) OR "phea_kco_lag1" != "phea_kco_var1" OR (("phea_kco_lag2" IS NULL) AND NOT(("phea_kco_var2" IS NULL))) OR "phea_kco_lag2" != "phea_kco_var2" OR (("phea_kco_lag3" IS NULL) AND NOT(("phea_kco_var3" IS NULL))) OR "phea_kco_lag3" != "phea_kco_var3")
+ *, + (weight_current_value_as_number - weight_previous_value_as_number) / + weight_previous_value_as_number AS "weight_change" +FROM ( + SELECT + "phea_row_id", + "pid", + "ts", + "window", + "person_birth_datetime", + "weight_current_value_as_number", + "weight_previous_value_as_number", + "weight_current_measurement_datetime", + "weight_previous_measurement_datetime", + extract(year from age(ts, person_birth_datetime)) AS "patient_age" + FROM ( + SELECT + *, + greatest("person_ts", "weight_current_ts", "weight_previous_ts") - least("person_ts", "weight_current_ts", "weight_previous_ts") AS "window", + MAX("phea_row_id") OVER (PARTITION BY "pid", "ts") AS "phea_ts_row" + FROM ( + SELECT + row_number() over (order by "pid", "ts") AS "phea_row_id", + "pid", + "ts", + phea_last_value_ignore_nulls(case when "name" = 'qamnxu' then "birth_datetime" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "person_birth_datetime", + phea_last_value_ignore_nulls(case when "name" = 'qamnxu' then "ts" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "person_ts", + phea_last_value_ignore_nulls(case when "name" = 'ywlxe9' then "measurement_datetime" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "weight_current_measurement_datetime", + phea_last_value_ignore_nulls(case when "name" = 'ywlxe9' then "value_as_number" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "weight_current_value_as_number", + phea_last_value_ignore_nulls(case when "name" = 'ywlxe9' then "ts" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "weight_current_ts", + phea_last_value_ignore_nulls(case when "name" = 'ywlxe9' then "measurement_datetime" else null end) over (partition by "pid" order by "ts" range between '3 years'::interval preceding and '2 years'::interval preceding) AS "weight_previous_measurement_datetime", + phea_last_value_ignore_nulls(case when "name" = 'ywlxe9' then "value_as_number" else null end) over (partition by "pid" order by "ts" range between '3 years'::interval preceding and '2 years'::interval preceding) AS "weight_previous_value_as_number", + phea_last_value_ignore_nulls(case when "name" = 'ywlxe9' then "ts" else null end) over (partition by "pid" order by "ts" range between '3 years'::interval preceding and '2 years'::interval preceding) AS "weight_previous_ts" + FROM ( + ( + SELECT *, NULL AS "measurement_datetime", NULL AS "value_as_number" + FROM ( + SELECT + 'qamnxu' AS "name", + "person_id" AS "pid", + "birth_datetime" AS "ts", + "birth_datetime" + FROM "cdm_new_york3"."person" + ) "q01" + ) + UNION ALL + ( + SELECT + "name", + "pid", + "ts", + NULL AS "birth_datetime", + "measurement_datetime", + "value_as_number" + FROM ( + SELECT + 'ywlxe9' AS "name", + "person_id" AS "pid", + "measurement_datetime" AS "ts", + "measurement_datetime", + "value_as_number" + FROM "cdm_new_york3"."measurement" + WHERE ("measurement_concept_id" = 3025315.0) + ) "q02" + ) + ) "q03" + ) "q04" + ) "q05" + WHERE ("phea_row_id" = "phea_ts_row") +) "q06" diff --git a/man/sqla.Rd b/man/sqla.Rd index 1610208..6f7339d 100644 --- a/man/sqla.Rd +++ b/man/sqla.Rd @@ -4,7 +4,7 @@ \alias{sqla} \title{SQL query with arguments} \usage{ -sqla(args, ...) +sqla(args, ..., no_check = FALSE) } \arguments{ \item{args}{}
row_idphea_row_id pid ts window