Added population simulation, random age generator and Halley bands.

ISAAKiel · Mar 10, 2024 · f874f2b · f874f2b
1 parent 9bc704d
commit f874f2b
Show file tree

Hide file tree

Showing 9 changed files with 463 additions and 31 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -30,6 +30,7 @@ S3method(print,mortaar_life_table)
 S3method(print,mortaar_life_table_list)
 export(as.mortaar_life_table)
 export(as.mortaar_life_table_list)
+export(halley.band)
 export(is.mortaar_life_table)
 export(is.mortaar_life_table_list)
 export(life.table)
@@ -39,6 +40,7 @@ export(lt.population_size)
 export(lt.representativity)
 export(lt.reproduction)
 export(lt.sexrelation)
+export(pop.sim.gomp)
 export(prep.life.table)
 importFrom(Rdpack,reprompt)
 importFrom(graphics,axis)

diff --git a/R/halley_band.R b/R/halley_band.R
@@ -0,0 +1,102 @@
+#' Halley band of the mortality profile of a skeletal population
+#'
+#' In a series of papers, M. A. Luy and U. Wittwer-Backofen \emph{(2005; 2008)}
+#' proposed a method they called 'Halley band' as alternative for other
+#' methods of sampling from an skeletal population. It basically involves
+#' sampling n times from the age-estimation of each individual and then
+#' only taking the 2.5th and 97.5th percentile into account. The space
+#' between they dubbed 'Halley band' but pointed out that it
+#' is not to be confused with confidence intervals.
+#'
+#' @param x a data.frame with individuals and age estimations.
+#'
+#' @param n number of runs, default: 1000.
+#'
+#' @param uncert level of uncertainty, default: 0.95.
+#'
+#' @param agebeg numeric. Starting age of the respective individual.
+#'
+#' @param ageend numeric. Closing age of the respective individual.
+#'
+#' @param agerange character. Determination if the closing
+#' age leaves a gap to the following age category. If yes (= "excluded"),
+#' "1" is added to avoid gaps, default: "excluded".
+#'
+#' @return
+#' One data.frame with the following items:
+#'
+#' \itemize{
+#'   \item \bold{age}:  age in years.
+#'   \item \bold{lower_dx}: Lower boundary of uncertainty for dx.
+#'   \item \bold{upper_dx}: Upper boundary of uncertainty for dx.
+#'   \item \bold{lower_qx}: Lower boundary of uncertainty for qx.
+#'   \item \bold{upper_qx}: Upper boundary of uncertainty for qx.
+#'   \item \bold{lower_lx}: Lower boundary of uncertainty for lx.
+#'   \item \bold{upper_lx}: Upper boundary of uncertainty for lx.
+#'  }
+#'
+#' @references
+#'
+#' \insertRef{Luy_Wittwer-Backofen_2005}{mortAAR}
+#'
+#' \insertRef{Luy_Wittwer-Backofen_2008}{mortAAR}
+#'
+#' @examples
+#'
+#'# create simulated population with artifical coarsening first
+#' pop_sim <- pop.sim.gomp(n = 1000)
+#' sim_ranges <- random.cat()
+#'
+#' # apply random age categories to simulated ages
+#' sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to")
+#'
+#' # create halley bands
+#' demo <- halley.band(sim_appl, n = 1000, uncert = 0.95, agebeg = "from", ageend = "to", agerange = "excluded")
+#'
+#' # plot band with ggplot
+#' library(ggplot2)
+#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_dx, ymax = upper_dx), linetype = 0, fill = "grey")
+#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_lx, ymax = upper_lx), linetype = 0, fill = "grey")
+#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_qx, ymax = upper_qx), linetype = 0, fill = "grey")
+
+#' @rdname halley.band
+#' @export
+halley.band <- function(x, n = 1000, uncert = 0.95, agebeg, ageend, agerange = "excluded") {
+  asd <- data.frame(x)
+
+  # Change the names of agebeg and ageend for further processes to "beg" and "ende".
+  names(asd)[which(names(asd)==agebeg)] <- "beg"
+  if (!is.na(ageend)) {
+    names(asd)[which(names(asd)==ageend)] <- "ende"
+  } else {
+    asd$ende <- asd$beg
+  }
+
+  # Defines if the max of the age ranges is inclusive or exclusive.
+  if(agerange == "excluded"){
+    asd$ende = asd$ende + 1
+  }
+
+  low_q <- ( 1 - uncert ) / 2
+  up_q <- 1 - ( 1 - uncert ) / 2
+
+  demo_sim_list <- list()
+  for (i in 1:n) {
+    demo_sim_sim <- data.frame(ind = 1:nrow(asd)) %>%
+      dplyr::mutate(age = (round(runif(dplyr::n(), min = asd$beg, asd$ende) ) ) )
+    necdf <- data.frame(a = 1, demo_sim_sim %>% dplyr::group_by(age) %>% dplyr::summarize(Dx = dplyr::n()))
+
+    necdf['dx'] <- necdf['Dx'] / sum(necdf['Dx']) * 100
+    necdf['lx'] <- c(100, 100 - cumsum(necdf[, 'dx']))[1:nrow(necdf)]
+    necdf['qx'] <- necdf['dx'] / necdf['lx'] * 100
+    necdf['Ax'] <- necdf[, 'a'] / 2
+    necdf['Lx'] <- necdf['a']* necdf['lx'] - ((necdf['a'] - necdf['Ax']) * necdf['dx'])
+    demo_sim_list[[i]] <- necdf
+  }
+  demo_sim_all <- dplyr::bind_rows(demo_sim_list)
+  output <- demo_sim_all %>% dplyr::group_by(age) %>% dplyr::summarize(
+    lower_dx = quantile(dx, probs = low_q), upper_dx = quantile(dx, probs = up_q) ,
+    lower_qx = quantile(qx, probs = low_q), upper_qx = quantile(qx, probs = up_q) ,
+    lower_lx = quantile(lx, probs = low_q), upper_lx = quantile(lx, probs = up_q))
+  return(output)
+}
diff --git a/R/population_simulation.R b/R/population_simulation.R
@@ -1,10 +1,10 @@
-#' Simulates an adult population with Gompertz distribution
+#' Simulation of a population of adults with Gompertz distribution
 #'
 #' In many instances, it is useful to calculate with a population with
 #' known parameters. To generate a population with realistic
 #' characteristics is less obvious than it seems. We operate here
 #' with the Gompertz distribution which provides a reasonable
-#' approximation of human mortality for adult mortality, that is
+#' approximation of human mortality for \bold{adult} mortality, that is
 #' for the ages >= 15 years. The user has to specify
 #' either the parameter b or the modal age M. The modal age M is
 #' particular useful as it provides an intuitive understanding of
@@ -13,52 +13,46 @@
 #' \emph{Sasaki and Kondo 2016}. If neither is given, a population
 #' with random parameters realistic for pre-modern times is generated.
 #'
-#' @param x number of individuals to be simulated.
+#' @param n number of individuals to be simulated.
 #'
 #' @param b numeric, optional. Gompertz parameter controlling the
 #' level of mortality.
 #'
 #' @param M numeric, optional. Modal age M.
 #'
-#' @param start_age numeric, optional. Start age, default: 15 years.
+#' @param start_age numeric. Start age, default: 15 years.
 #'
 #' @return
 #' A list of two data.frames with the following items:
 #'
+#' First data.frame
 #' \itemize{
-#'  \item \bold{First data.frame}
 #'   \item \bold{N}:  Number of individuals.
-#'   \item \bold{b}: Gompertz parameter controlling mortality.
+#'   \item \bold{b}:  Gompertz parameter controlling mortality.
 #'   \item \bold{M}:  Modal age.
-#'   \item \bold{a}: Gompertz parameter controlling hazard of the
+#'   \item \bold{a}:  Gompertz parameter controlling hazard of the
 #'   youngest age group.
 #'  }
 #'
+#'Second data.frame
+#' \itemize{
+#'   \item \bold{ind}:  ID of individuals.
+#'   \item \bold{age}:  Simulated absolute age.
+#'  }
+#'
 #' @references
 #'
-#' \insertRef{sasaki and kondo 2016}{mortAAR}
+#' \insertRef{sasaki_kondo_2016}{mortAAR}
 #'
 #' @examples
 #'
-#' pop_lt <- pop.sim.gomp(10000, M = 35)
-
-#' @rdname pop.sim.gomp
-#' @export
-pop.sim.gomp <- function(x, b, M, start_age) {
-  UseMethod("pop.sim.gomp")
-}
-
-#' @rdname pop.sim.gomp
-#' @export
-#' @noRd
-pop.sim.gomp.default <- function(x, b, M, start_age) {
-  stop("x must be a numeric value.")
-}
+#' pop_sim <- pop.sim.gomp(n = 10000, M = 35)
+#' pop_sim <- pop.sim.gomp(n = 10000, b = 0.03)
+#' pop_sim <- pop.sim.gomp(n = 10000)
 
 #' @rdname pop.sim.gomp
 #' @export
-#' @noRd
-pop.sim.gomp.df <- function(x, b = NULL, M = NULL, start_age = 15) {
+pop.sim.gomp <- function(n, b = NULL, M = NULL, start_age = 15) {
   if ( length(M) > 0) {
     M_1 <- 0
     M_2 <- 0
@@ -71,12 +65,16 @@ pop.sim.gomp.df <- function(x, b = NULL, M = NULL, start_age = 15) {
     }
   } else if (length(b) > 0) {
     a <- exp(rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) )
+    M <- 1 / b * log (b/a) + start_age
   } else {
     b <- runif(n = 1, min = 0.02, max = 0.05)
     a <- exp(rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) )
+    M <- 1 / b * log (b/a) + start_age
   }
 
-  lt_result <- data.frame(ind = 1:x) %>%
-    mutate(age = round(flexsurv::rgompertz(n(), b, a) ) + start_age)
-  return(lt_result)
+  lt_result <- data.frame(ind = 1:n) %>%
+    dplyr::mutate(age = round(flexsurv::rgompertz(dplyr::n(), b, a) ) + start_age)
+  lt_values <- data.frame(n = n, b = b, a = a, M = M, start_age = start_age)
+  lt_list <- list(values = lt_values, result = lt_result)
+  return(lt_list)
 }
diff --git a/R/random_categories.R b/R/random_categories.R
@@ -0,0 +1,98 @@
+#' Generation of random age ranges
+#'
+#' Helper function that generates random age categories of absolute ages.
+#' It is mainly used together with the functions \code{pop.sim.gomp}
+#' and \code{random.cat.apply}.
+#'
+#' @param n_cat numeric. Number of categories, default: 20.
+#'
+#' @param min_age numeric. Minimum age, default: 15.
+#'
+#' @param max_age numeric. Maximum age, default: 75.
+#'
+#' @param max_cat_low numeric. Lower boundary of highest age categoriy, default: 60.
+#'
+#' @return
+#' One data.frame with the following items:
+#'
+#' \itemize{
+#'   \item \bold{from}:  Lower boundary of age category.
+#'   \item \bold{to}: Upper boundary of age category.
+#'  }
+#'
+#' @examples
+#' sim_ranges <- random.cat()
+
+# generate random age ranges with 5 year ranges
+random.cat <- function(n_cat = 20, min_age = 15, max_cat_low = 60, max_age = 75) {
+  n_sim_ranges <- 0
+  sim_ranges <- data.frame()
+  while (n_sim_ranges < n_cat){
+    range_from <- round(runif(1, min = min_age, max = max_age)/5) * 5
+    if(range_from > max_cat_low) {range_from <- max_cat_low}
+
+    #define probabilities
+    beta_1 <- range_from/40
+    if(beta_1 == 0){beta_1 <- 0.01}
+    beta_2 <- 3 - beta_1
+    range_probs <- dbeta(seq(1, 8, 1)/8.1, beta_1, beta_2)
+
+    range_to <- range_from + sample(seq(1, 8, 1), 1 , prob = range_probs) *5 -1
+    if(range_to > max_cat_low) {range_to <- max_age}
+    sim_ranges <- rbind(sim_ranges, data.frame(from = range_from, to = range_to))
+    sim_ranges <- sim_ranges %>% dplyr::distinct()
+    n_sim_ranges <- nrow(sim_ranges)
+  }
+  return(sim_ranges)
+}
+
+
+
+#' Applying random age ranges to individuals with absolute ages
+#'
+#' Helper function that applies random age categories to "known" absolute ages.
+#' It is mainly used together with the functions \code{pop.sim.gomp}
+#' and \code{random.cat}.
+#'
+#' @param x a data.frame with individual absolute ages.
+#'
+#' @param age_ranges a data.frame with age ranges.
+#'
+#' @param from numeric. Column name for the begin of an age range.
+#'
+#' @param to numeric. Column name for the end of an age range.
+#'
+#' @return
+#' The original data.frame \code{x} with two additional columns:
+#'
+#' \itemize{
+#'   \item \bold{from}:  Lower boundary of age category.
+#'   \item \bold{to}: Upper boundary of age category.
+#'  }
+#'
+#' @examples
+#'
+#' # Simulate population and age ranges first
+#' pop_sim <- pop.sim.gomp(n = 10000)
+#' sim_ranges <- random.cat()
+#'
+#' # apply random age categories to simulated ages
+#' sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to")
+
+random.cat.apply <- function(x, age, age_ranges, from, to) {
+  asd <- data.frame(x)
+  max_age <- max(age_ranges['to'])
+  a_r <- data.frame(from = age_ranges['from'], to = age_ranges['to'])
+
+  for (j in 1:nrow(asd)){
+    this_age <- asd[j,'age']
+    if(this_age > max_age)
+    { this_age <- max_age}
+    possible_age_cat <- subset(a_r, from <= this_age & to >= this_age)
+    selected_age_index <- sample.int(nrow(possible_age_cat), 1)
+    selected_age_cat <- possible_age_cat[selected_age_index,]
+    asd$from[j] <- selected_age_cat$from
+    asd$to[j] <- selected_age_cat$to
+  }
+  return(asd)
+}
diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib
@@ -282,6 +282,30 @@ @article{kokkotidis_graberfeld-_1991
 	year = {1991}
 }
 
+@Article{Luy_Wittwer-Backofen_2005,
+author = {Luy, Marc A. and Wittwer-Backofen, Ursula},
+title = {{Das Halley-Band für paläodemographische Mortalitätsanalysen}},
+journal = {{Zeitschrift für Bevölkerungswissenschaft}},
+volume = {30},
+number = {2–3},
+doi = {},
+pages = {219–244},
+year = {2005}
+}
+
+@Incollection{Luy_Wittwer-Backofen_2008,
+author = {Luy, Marc A. and Wittwer-Backofen, Ursula},
+title = {{The Halley band for paleodemographic mortality analysis}},
+booktitle = {{Recent advances in paleodemography. Data, techniques, patterns}},
+series = {Cambridge Stud. Biol. and Evolutionary Anthr.},
+volume = {31},
+editor = {Bocquet-Appel, J.-P.},
+publisher = {Springer},
+address = {Dordrecht},
+pages = {119-141},
+year = {2008}
+}
+
 @book{martin_lehrbuch_1928,
 	location = {Jena},
 	edition = {2.},
@@ -321,8 +345,6 @@ @article{mcfadden_oxenham_2018b
 year = {2018}
 }
 
-
-
 @Article{mcfadden_oxenham_2019,
 author = {McFadden, Clare and Oxenham, Marc F.},
 title = {{The Paleodemographic Measure of Maternal Mortality and a Multifaceted Approach to Maternal Health}},
@@ -345,8 +367,6 @@ @Article{mcfadden_et_al_2020
 abstract = {}
 }
 
-
-
 @article{moghaddam_et_al_muensingen_2016,
   title = {{Social stratigraphy in Late Iron Age Switzerland: stable carbon, nitrogen and sulphur isotope analysis
   of human remains from Münsingen}},
@@ -408,6 +428,19 @@ @article{rinne_odagsen_2002
 	url = {http://www.jna.uni-kiel.de/index.php/jna/article/view/52}
 }
 
+@Article{sasaki_kondo_2016,
+author = {Sasaki, Tomohiko and Kondo, Osamu},
+title = {{An informative prior probability distribution of the gompertz parameters for bayesian approaches in paleodemography}},
+journal = {{American Journal of Physical Anthropology}},
+volume = {159},
+number = {3},
+doi = {10.1002/ajpa.22891},
+pages = {523–533},
+year = {2016},
+}
+
+
+
 @Incollection{scheeres_et_al_investigations_in_press,
 author = {Scheeres, M. and Knipper, C. and Schoenfelder, M. and Hauschild, M. and Siebel, W. and Alt, K.W.},
 title = {{Bioarchaeometric investigations (87Sr/86Sr and d18O) of the La Tène burial community of Münsingen-Rain, Switzerland}},