Skip to content

Commit

Permalink
Added population simulation, random age generator and Halley bands.
Browse files Browse the repository at this point in the history
  • Loading branch information
nmueller18 committed Mar 10, 2024
1 parent 9bc704d commit f874f2b
Show file tree
Hide file tree
Showing 9 changed files with 463 additions and 31 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ S3method(print,mortaar_life_table)
S3method(print,mortaar_life_table_list)
export(as.mortaar_life_table)
export(as.mortaar_life_table_list)
export(halley.band)
export(is.mortaar_life_table)
export(is.mortaar_life_table_list)
export(life.table)
Expand All @@ -39,6 +40,7 @@ export(lt.population_size)
export(lt.representativity)
export(lt.reproduction)
export(lt.sexrelation)
export(pop.sim.gomp)
export(prep.life.table)
importFrom(Rdpack,reprompt)
importFrom(graphics,axis)
Expand Down
102 changes: 102 additions & 0 deletions R/halley_band.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#' Halley band of the mortality profile of a skeletal population
#'
#' In a series of papers, M. A. Luy and U. Wittwer-Backofen \emph{(2005; 2008)}
#' proposed a method they called 'Halley band' as alternative for other
#' methods of sampling from an skeletal population. It basically involves
#' sampling n times from the age-estimation of each individual and then
#' only taking the 2.5th and 97.5th percentile into account. The space
#' between they dubbed 'Halley band' but pointed out that it
#' is not to be confused with confidence intervals.
#'
#' @param x a data.frame with individuals and age estimations.
#'
#' @param n number of runs, default: 1000.
#'
#' @param uncert level of uncertainty, default: 0.95.
#'
#' @param agebeg numeric. Starting age of the respective individual.
#'
#' @param ageend numeric. Closing age of the respective individual.
#'
#' @param agerange character. Determination if the closing
#' age leaves a gap to the following age category. If yes (= "excluded"),
#' "1" is added to avoid gaps, default: "excluded".
#'
#' @return
#' One data.frame with the following items:
#'
#' \itemize{
#' \item \bold{age}: age in years.
#' \item \bold{lower_dx}: Lower boundary of uncertainty for dx.
#' \item \bold{upper_dx}: Upper boundary of uncertainty for dx.
#' \item \bold{lower_qx}: Lower boundary of uncertainty for qx.
#' \item \bold{upper_qx}: Upper boundary of uncertainty for qx.
#' \item \bold{lower_lx}: Lower boundary of uncertainty for lx.
#' \item \bold{upper_lx}: Upper boundary of uncertainty for lx.
#' }
#'
#' @references
#'
#' \insertRef{Luy_Wittwer-Backofen_2005}{mortAAR}
#'
#' \insertRef{Luy_Wittwer-Backofen_2008}{mortAAR}
#'
#' @examples
#'
#'# create simulated population with artifical coarsening first
#' pop_sim <- pop.sim.gomp(n = 1000)
#' sim_ranges <- random.cat()
#'
#' # apply random age categories to simulated ages
#' sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to")
#'
#' # create halley bands
#' demo <- halley.band(sim_appl, n = 1000, uncert = 0.95, agebeg = "from", ageend = "to", agerange = "excluded")
#'
#' # plot band with ggplot
#' library(ggplot2)
#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_dx, ymax = upper_dx), linetype = 0, fill = "grey")
#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_lx, ymax = upper_lx), linetype = 0, fill = "grey")
#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_qx, ymax = upper_qx), linetype = 0, fill = "grey")

#' @rdname halley.band
#' @export
halley.band <- function(x, n = 1000, uncert = 0.95, agebeg, ageend, agerange = "excluded") {
asd <- data.frame(x)

# Change the names of agebeg and ageend for further processes to "beg" and "ende".
names(asd)[which(names(asd)==agebeg)] <- "beg"
if (!is.na(ageend)) {
names(asd)[which(names(asd)==ageend)] <- "ende"
} else {
asd$ende <- asd$beg
}

# Defines if the max of the age ranges is inclusive or exclusive.
if(agerange == "excluded"){
asd$ende = asd$ende + 1
}

low_q <- ( 1 - uncert ) / 2
up_q <- 1 - ( 1 - uncert ) / 2

demo_sim_list <- list()
for (i in 1:n) {
demo_sim_sim <- data.frame(ind = 1:nrow(asd)) %>%
dplyr::mutate(age = (round(runif(dplyr::n(), min = asd$beg, asd$ende) ) ) )
necdf <- data.frame(a = 1, demo_sim_sim %>% dplyr::group_by(age) %>% dplyr::summarize(Dx = dplyr::n()))

necdf['dx'] <- necdf['Dx'] / sum(necdf['Dx']) * 100
necdf['lx'] <- c(100, 100 - cumsum(necdf[, 'dx']))[1:nrow(necdf)]
necdf['qx'] <- necdf['dx'] / necdf['lx'] * 100
necdf['Ax'] <- necdf[, 'a'] / 2
necdf['Lx'] <- necdf['a']* necdf['lx'] - ((necdf['a'] - necdf['Ax']) * necdf['dx'])
demo_sim_list[[i]] <- necdf
}
demo_sim_all <- dplyr::bind_rows(demo_sim_list)
output <- demo_sim_all %>% dplyr::group_by(age) %>% dplyr::summarize(
lower_dx = quantile(dx, probs = low_q), upper_dx = quantile(dx, probs = up_q) ,
lower_qx = quantile(qx, probs = low_q), upper_qx = quantile(qx, probs = up_q) ,
lower_lx = quantile(lx, probs = low_q), upper_lx = quantile(lx, probs = up_q))
return(output)
}
52 changes: 25 additions & 27 deletions R/population_simulation.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#' Simulates an adult population with Gompertz distribution
#' Simulation of a population of adults with Gompertz distribution
#'
#' In many instances, it is useful to calculate with a population with
#' known parameters. To generate a population with realistic
#' characteristics is less obvious than it seems. We operate here
#' with the Gompertz distribution which provides a reasonable
#' approximation of human mortality for adult mortality, that is
#' approximation of human mortality for \bold{adult} mortality, that is
#' for the ages >= 15 years. The user has to specify
#' either the parameter b or the modal age M. The modal age M is
#' particular useful as it provides an intuitive understanding of
Expand All @@ -13,52 +13,46 @@
#' \emph{Sasaki and Kondo 2016}. If neither is given, a population
#' with random parameters realistic for pre-modern times is generated.
#'
#' @param x number of individuals to be simulated.
#' @param n number of individuals to be simulated.
#'
#' @param b numeric, optional. Gompertz parameter controlling the
#' level of mortality.
#'
#' @param M numeric, optional. Modal age M.
#'
#' @param start_age numeric, optional. Start age, default: 15 years.
#' @param start_age numeric. Start age, default: 15 years.
#'
#' @return
#' A list of two data.frames with the following items:
#'
#' First data.frame
#' \itemize{
#' \item \bold{First data.frame}
#' \item \bold{N}: Number of individuals.
#' \item \bold{b}: Gompertz parameter controlling mortality.
#' \item \bold{b}: Gompertz parameter controlling mortality.
#' \item \bold{M}: Modal age.
#' \item \bold{a}: Gompertz parameter controlling hazard of the
#' \item \bold{a}: Gompertz parameter controlling hazard of the
#' youngest age group.
#' }
#'
#'Second data.frame
#' \itemize{
#' \item \bold{ind}: ID of individuals.
#' \item \bold{age}: Simulated absolute age.
#' }
#'
#' @references
#'
#' \insertRef{sasaki and kondo 2016}{mortAAR}
#' \insertRef{sasaki_kondo_2016}{mortAAR}
#'
#' @examples
#'
#' pop_lt <- pop.sim.gomp(10000, M = 35)

#' @rdname pop.sim.gomp
#' @export
pop.sim.gomp <- function(x, b, M, start_age) {
UseMethod("pop.sim.gomp")
}

#' @rdname pop.sim.gomp
#' @export
#' @noRd
pop.sim.gomp.default <- function(x, b, M, start_age) {
stop("x must be a numeric value.")
}
#' pop_sim <- pop.sim.gomp(n = 10000, M = 35)
#' pop_sim <- pop.sim.gomp(n = 10000, b = 0.03)
#' pop_sim <- pop.sim.gomp(n = 10000)

#' @rdname pop.sim.gomp
#' @export
#' @noRd
pop.sim.gomp.df <- function(x, b = NULL, M = NULL, start_age = 15) {
pop.sim.gomp <- function(n, b = NULL, M = NULL, start_age = 15) {
if ( length(M) > 0) {
M_1 <- 0
M_2 <- 0
Expand All @@ -71,12 +65,16 @@ pop.sim.gomp.df <- function(x, b = NULL, M = NULL, start_age = 15) {
}
} else if (length(b) > 0) {
a <- exp(rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) )
M <- 1 / b * log (b/a) + start_age
} else {
b <- runif(n = 1, min = 0.02, max = 0.05)
a <- exp(rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) )
M <- 1 / b * log (b/a) + start_age
}

lt_result <- data.frame(ind = 1:x) %>%
mutate(age = round(flexsurv::rgompertz(n(), b, a) ) + start_age)
return(lt_result)
lt_result <- data.frame(ind = 1:n) %>%
dplyr::mutate(age = round(flexsurv::rgompertz(dplyr::n(), b, a) ) + start_age)
lt_values <- data.frame(n = n, b = b, a = a, M = M, start_age = start_age)
lt_list <- list(values = lt_values, result = lt_result)
return(lt_list)
}
98 changes: 98 additions & 0 deletions R/random_categories.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#' Generation of random age ranges
#'
#' Helper function that generates random age categories of absolute ages.
#' It is mainly used together with the functions \code{pop.sim.gomp}
#' and \code{random.cat.apply}.
#'
#' @param n_cat numeric. Number of categories, default: 20.
#'
#' @param min_age numeric. Minimum age, default: 15.
#'
#' @param max_age numeric. Maximum age, default: 75.
#'
#' @param max_cat_low numeric. Lower boundary of highest age categoriy, default: 60.
#'
#' @return
#' One data.frame with the following items:
#'
#' \itemize{
#' \item \bold{from}: Lower boundary of age category.
#' \item \bold{to}: Upper boundary of age category.
#' }
#'
#' @examples
#' sim_ranges <- random.cat()

# generate random age ranges with 5 year ranges
random.cat <- function(n_cat = 20, min_age = 15, max_cat_low = 60, max_age = 75) {
n_sim_ranges <- 0
sim_ranges <- data.frame()
while (n_sim_ranges < n_cat){
range_from <- round(runif(1, min = min_age, max = max_age)/5) * 5
if(range_from > max_cat_low) {range_from <- max_cat_low}

#define probabilities
beta_1 <- range_from/40
if(beta_1 == 0){beta_1 <- 0.01}
beta_2 <- 3 - beta_1
range_probs <- dbeta(seq(1, 8, 1)/8.1, beta_1, beta_2)

range_to <- range_from + sample(seq(1, 8, 1), 1 , prob = range_probs) *5 -1
if(range_to > max_cat_low) {range_to <- max_age}
sim_ranges <- rbind(sim_ranges, data.frame(from = range_from, to = range_to))
sim_ranges <- sim_ranges %>% dplyr::distinct()
n_sim_ranges <- nrow(sim_ranges)
}
return(sim_ranges)
}



#' Applying random age ranges to individuals with absolute ages
#'
#' Helper function that applies random age categories to "known" absolute ages.
#' It is mainly used together with the functions \code{pop.sim.gomp}
#' and \code{random.cat}.
#'
#' @param x a data.frame with individual absolute ages.
#'
#' @param age_ranges a data.frame with age ranges.
#'
#' @param from numeric. Column name for the begin of an age range.
#'
#' @param to numeric. Column name for the end of an age range.
#'
#' @return
#' The original data.frame \code{x} with two additional columns:
#'
#' \itemize{
#' \item \bold{from}: Lower boundary of age category.
#' \item \bold{to}: Upper boundary of age category.
#' }
#'
#' @examples
#'
#' # Simulate population and age ranges first
#' pop_sim <- pop.sim.gomp(n = 10000)
#' sim_ranges <- random.cat()
#'
#' # apply random age categories to simulated ages
#' sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to")

random.cat.apply <- function(x, age, age_ranges, from, to) {
asd <- data.frame(x)
max_age <- max(age_ranges['to'])
a_r <- data.frame(from = age_ranges['from'], to = age_ranges['to'])

for (j in 1:nrow(asd)){
this_age <- asd[j,'age']
if(this_age > max_age)
{ this_age <- max_age}
possible_age_cat <- subset(a_r, from <= this_age & to >= this_age)
selected_age_index <- sample.int(nrow(possible_age_cat), 1)
selected_age_cat <- possible_age_cat[selected_age_index,]
asd$from[j] <- selected_age_cat$from
asd$to[j] <- selected_age_cat$to
}
return(asd)
}
41 changes: 37 additions & 4 deletions inst/REFERENCES.bib
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,30 @@ @article{kokkotidis_graberfeld-_1991
year = {1991}
}

@Article{Luy_Wittwer-Backofen_2005,
author = {Luy, Marc A. and Wittwer-Backofen, Ursula},
title = {{Das Halley-Band für paläodemographische Mortalitätsanalysen}},
journal = {{Zeitschrift für Bevölkerungswissenschaft}},
volume = {30},
number = {2–3},
doi = {},
pages = {219–244},
year = {2005}
}

@Incollection{Luy_Wittwer-Backofen_2008,
author = {Luy, Marc A. and Wittwer-Backofen, Ursula},
title = {{The Halley band for paleodemographic mortality analysis}},
booktitle = {{Recent advances in paleodemography. Data, techniques, patterns}},
series = {Cambridge Stud. Biol. and Evolutionary Anthr.},
volume = {31},
editor = {Bocquet-Appel, J.-P.},
publisher = {Springer},
address = {Dordrecht},
pages = {119-141},
year = {2008}
}

@book{martin_lehrbuch_1928,
location = {Jena},
edition = {2.},
Expand Down Expand Up @@ -321,8 +345,6 @@ @article{mcfadden_oxenham_2018b
year = {2018}
}



@Article{mcfadden_oxenham_2019,
author = {McFadden, Clare and Oxenham, Marc F.},
title = {{The Paleodemographic Measure of Maternal Mortality and a Multifaceted Approach to Maternal Health}},
Expand All @@ -345,8 +367,6 @@ @Article{mcfadden_et_al_2020
abstract = {}
}



@article{moghaddam_et_al_muensingen_2016,
title = {{Social stratigraphy in Late Iron Age Switzerland: stable carbon, nitrogen and sulphur isotope analysis
of human remains from Münsingen}},
Expand Down Expand Up @@ -408,6 +428,19 @@ @article{rinne_odagsen_2002
url = {http://www.jna.uni-kiel.de/index.php/jna/article/view/52}
}

@Article{sasaki_kondo_2016,
author = {Sasaki, Tomohiko and Kondo, Osamu},
title = {{An informative prior probability distribution of the gompertz parameters for bayesian approaches in paleodemography}},
journal = {{American Journal of Physical Anthropology}},
volume = {159},
number = {3},
doi = {10.1002/ajpa.22891},
pages = {523–533},
year = {2016},
}



@Incollection{scheeres_et_al_investigations_in_press,
author = {Scheeres, M. and Knipper, C. and Schoenfelder, M. and Hauschild, M. and Siebel, W. and Alt, K.W.},
title = {{Bioarchaeometric investigations (87Sr/86Sr and d18O) of the La Tène burial community of Münsingen-Rain, Switzerland}},
Expand Down
Loading

0 comments on commit f874f2b

Please sign in to comment.