-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added population simulation, random age generator and Halley bands.
- Loading branch information
1 parent
9bc704d
commit f874f2b
Showing
9 changed files
with
463 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
#' Halley band of the mortality profile of a skeletal population | ||
#' | ||
#' In a series of papers, M. A. Luy and U. Wittwer-Backofen \emph{(2005; 2008)} | ||
#' proposed a method they called 'Halley band' as alternative for other | ||
#' methods of sampling from an skeletal population. It basically involves | ||
#' sampling n times from the age-estimation of each individual and then | ||
#' only taking the 2.5th and 97.5th percentile into account. The space | ||
#' between they dubbed 'Halley band' but pointed out that it | ||
#' is not to be confused with confidence intervals. | ||
#' | ||
#' @param x a data.frame with individuals and age estimations. | ||
#' | ||
#' @param n number of runs, default: 1000. | ||
#' | ||
#' @param uncert level of uncertainty, default: 0.95. | ||
#' | ||
#' @param agebeg numeric. Starting age of the respective individual. | ||
#' | ||
#' @param ageend numeric. Closing age of the respective individual. | ||
#' | ||
#' @param agerange character. Determination if the closing | ||
#' age leaves a gap to the following age category. If yes (= "excluded"), | ||
#' "1" is added to avoid gaps, default: "excluded". | ||
#' | ||
#' @return | ||
#' One data.frame with the following items: | ||
#' | ||
#' \itemize{ | ||
#' \item \bold{age}: age in years. | ||
#' \item \bold{lower_dx}: Lower boundary of uncertainty for dx. | ||
#' \item \bold{upper_dx}: Upper boundary of uncertainty for dx. | ||
#' \item \bold{lower_qx}: Lower boundary of uncertainty for qx. | ||
#' \item \bold{upper_qx}: Upper boundary of uncertainty for qx. | ||
#' \item \bold{lower_lx}: Lower boundary of uncertainty for lx. | ||
#' \item \bold{upper_lx}: Upper boundary of uncertainty for lx. | ||
#' } | ||
#' | ||
#' @references | ||
#' | ||
#' \insertRef{Luy_Wittwer-Backofen_2005}{mortAAR} | ||
#' | ||
#' \insertRef{Luy_Wittwer-Backofen_2008}{mortAAR} | ||
#' | ||
#' @examples | ||
#' | ||
#'# create simulated population with artifical coarsening first | ||
#' pop_sim <- pop.sim.gomp(n = 1000) | ||
#' sim_ranges <- random.cat() | ||
#' | ||
#' # apply random age categories to simulated ages | ||
#' sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to") | ||
#' | ||
#' # create halley bands | ||
#' demo <- halley.band(sim_appl, n = 1000, uncert = 0.95, agebeg = "from", ageend = "to", agerange = "excluded") | ||
#' | ||
#' # plot band with ggplot | ||
#' library(ggplot2) | ||
#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_dx, ymax = upper_dx), linetype = 0, fill = "grey") | ||
#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_lx, ymax = upper_lx), linetype = 0, fill = "grey") | ||
#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_qx, ymax = upper_qx), linetype = 0, fill = "grey") | ||
|
||
#' @rdname halley.band | ||
#' @export | ||
halley.band <- function(x, n = 1000, uncert = 0.95, agebeg, ageend, agerange = "excluded") { | ||
asd <- data.frame(x) | ||
|
||
# Change the names of agebeg and ageend for further processes to "beg" and "ende". | ||
names(asd)[which(names(asd)==agebeg)] <- "beg" | ||
if (!is.na(ageend)) { | ||
names(asd)[which(names(asd)==ageend)] <- "ende" | ||
} else { | ||
asd$ende <- asd$beg | ||
} | ||
|
||
# Defines if the max of the age ranges is inclusive or exclusive. | ||
if(agerange == "excluded"){ | ||
asd$ende = asd$ende + 1 | ||
} | ||
|
||
low_q <- ( 1 - uncert ) / 2 | ||
up_q <- 1 - ( 1 - uncert ) / 2 | ||
|
||
demo_sim_list <- list() | ||
for (i in 1:n) { | ||
demo_sim_sim <- data.frame(ind = 1:nrow(asd)) %>% | ||
dplyr::mutate(age = (round(runif(dplyr::n(), min = asd$beg, asd$ende) ) ) ) | ||
necdf <- data.frame(a = 1, demo_sim_sim %>% dplyr::group_by(age) %>% dplyr::summarize(Dx = dplyr::n())) | ||
|
||
necdf['dx'] <- necdf['Dx'] / sum(necdf['Dx']) * 100 | ||
necdf['lx'] <- c(100, 100 - cumsum(necdf[, 'dx']))[1:nrow(necdf)] | ||
necdf['qx'] <- necdf['dx'] / necdf['lx'] * 100 | ||
necdf['Ax'] <- necdf[, 'a'] / 2 | ||
necdf['Lx'] <- necdf['a']* necdf['lx'] - ((necdf['a'] - necdf['Ax']) * necdf['dx']) | ||
demo_sim_list[[i]] <- necdf | ||
} | ||
demo_sim_all <- dplyr::bind_rows(demo_sim_list) | ||
output <- demo_sim_all %>% dplyr::group_by(age) %>% dplyr::summarize( | ||
lower_dx = quantile(dx, probs = low_q), upper_dx = quantile(dx, probs = up_q) , | ||
lower_qx = quantile(qx, probs = low_q), upper_qx = quantile(qx, probs = up_q) , | ||
lower_lx = quantile(lx, probs = low_q), upper_lx = quantile(lx, probs = up_q)) | ||
return(output) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#' Generation of random age ranges | ||
#' | ||
#' Helper function that generates random age categories of absolute ages. | ||
#' It is mainly used together with the functions \code{pop.sim.gomp} | ||
#' and \code{random.cat.apply}. | ||
#' | ||
#' @param n_cat numeric. Number of categories, default: 20. | ||
#' | ||
#' @param min_age numeric. Minimum age, default: 15. | ||
#' | ||
#' @param max_age numeric. Maximum age, default: 75. | ||
#' | ||
#' @param max_cat_low numeric. Lower boundary of highest age categoriy, default: 60. | ||
#' | ||
#' @return | ||
#' One data.frame with the following items: | ||
#' | ||
#' \itemize{ | ||
#' \item \bold{from}: Lower boundary of age category. | ||
#' \item \bold{to}: Upper boundary of age category. | ||
#' } | ||
#' | ||
#' @examples | ||
#' sim_ranges <- random.cat() | ||
|
||
# generate random age ranges with 5 year ranges | ||
random.cat <- function(n_cat = 20, min_age = 15, max_cat_low = 60, max_age = 75) { | ||
n_sim_ranges <- 0 | ||
sim_ranges <- data.frame() | ||
while (n_sim_ranges < n_cat){ | ||
range_from <- round(runif(1, min = min_age, max = max_age)/5) * 5 | ||
if(range_from > max_cat_low) {range_from <- max_cat_low} | ||
|
||
#define probabilities | ||
beta_1 <- range_from/40 | ||
if(beta_1 == 0){beta_1 <- 0.01} | ||
beta_2 <- 3 - beta_1 | ||
range_probs <- dbeta(seq(1, 8, 1)/8.1, beta_1, beta_2) | ||
|
||
range_to <- range_from + sample(seq(1, 8, 1), 1 , prob = range_probs) *5 -1 | ||
if(range_to > max_cat_low) {range_to <- max_age} | ||
sim_ranges <- rbind(sim_ranges, data.frame(from = range_from, to = range_to)) | ||
sim_ranges <- sim_ranges %>% dplyr::distinct() | ||
n_sim_ranges <- nrow(sim_ranges) | ||
} | ||
return(sim_ranges) | ||
} | ||
|
||
|
||
|
||
#' Applying random age ranges to individuals with absolute ages | ||
#' | ||
#' Helper function that applies random age categories to "known" absolute ages. | ||
#' It is mainly used together with the functions \code{pop.sim.gomp} | ||
#' and \code{random.cat}. | ||
#' | ||
#' @param x a data.frame with individual absolute ages. | ||
#' | ||
#' @param age_ranges a data.frame with age ranges. | ||
#' | ||
#' @param from numeric. Column name for the begin of an age range. | ||
#' | ||
#' @param to numeric. Column name for the end of an age range. | ||
#' | ||
#' @return | ||
#' The original data.frame \code{x} with two additional columns: | ||
#' | ||
#' \itemize{ | ||
#' \item \bold{from}: Lower boundary of age category. | ||
#' \item \bold{to}: Upper boundary of age category. | ||
#' } | ||
#' | ||
#' @examples | ||
#' | ||
#' # Simulate population and age ranges first | ||
#' pop_sim <- pop.sim.gomp(n = 10000) | ||
#' sim_ranges <- random.cat() | ||
#' | ||
#' # apply random age categories to simulated ages | ||
#' sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to") | ||
|
||
random.cat.apply <- function(x, age, age_ranges, from, to) { | ||
asd <- data.frame(x) | ||
max_age <- max(age_ranges['to']) | ||
a_r <- data.frame(from = age_ranges['from'], to = age_ranges['to']) | ||
|
||
for (j in 1:nrow(asd)){ | ||
this_age <- asd[j,'age'] | ||
if(this_age > max_age) | ||
{ this_age <- max_age} | ||
possible_age_cat <- subset(a_r, from <= this_age & to >= this_age) | ||
selected_age_index <- sample.int(nrow(possible_age_cat), 1) | ||
selected_age_cat <- possible_age_cat[selected_age_index,] | ||
asd$from[j] <- selected_age_cat$from | ||
asd$to[j] <- selected_age_cat$to | ||
} | ||
return(asd) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.