-
Notifications
You must be signed in to change notification settings - Fork 6
/
Model_Population.R
503 lines (368 loc) · 19.9 KB
/
Model_Population.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
# Population Simulation by 3D array for actuarial valuation
# Yimeng Yin
# 2/3/2015
get_Population <- function(.init_pop = init_pop,
.entrants_dist= entrants_dist,
.paramlist = paramlist,
.Global_paramlist = Global_paramlist){
## Issues
# growth of workforce(controlled by growth rate or pre-defined path of growth.)
## Inputs
# - range_ea: all possible entry ages
# - range_age: range of age
# - nyear: number of years in simulation
# - wf_growth: growth rate of the size of workforce
# - no_entrance: no new entrants into the workforce if set "TRUE". Overrides "wf_growth"
# - Decrement table: from Model_Decrements.R
# - Initial workforce for each type:
# - init_pop$actives: matrix, max ea by max age
# - init_pop$retirees: matrix, max ea by max age
## An array is created for each of the 6 status:
# (1)Active (dim = 3)
# (2)Terminated (dim = 4)
# (3)Retired (dim = 4)
# (4)Disabled (dim = 3) later will be expanded to dim = 4, with additional dim being year.disb
# (5)Dead (dim = 3) We do not really need an array for dead, what's needed is only the total number of dead.
# Run the section below when developing new features.
# .init_pop = init_pop
# .entrants_dist = entrants_dist
# .paramlist = paramlist
# .Global_paramlist = Global_paramlist
# #
#
assign_parmsList(.Global_paramlist, envir = environment())
assign_parmsList(.paramlist, envir = environment())
#*************************************************************************************************************
# Creating arrays for each status ####
#*************************************************************************************************************
## In each 3D array, dimension 1(row) represents entry age, dimension 2(column) represents attained age,
# dimension 3(depth) represents number of year, dimension 4(terms only) represents the termination year.
wf_dim <- c(length(range_ea), length(range_age), nyear)
wf_dimnames <- list(range_ea, range_age, 1:nyear)
# The array of terminated has 4 dimensions: ea x age x year x year of termination
wf_dim.term <- c(length(range_ea), length(range_age), nyear, nyear)
wf_dimnames.term <- list(range_ea, range_age, 1:nyear, 1:nyear)
# # The array of retirees has 4 dimensions: ea x age x year x year of retirement
wf_dim.retiree <- c(length(range_ea), length(range_age), nyear, nyear)
wf_dimnames.retiree <- list(range_ea, range_age, 1:nyear, 1:nyear)
wf_active <- array(0, wf_dim, dimnames = wf_dimnames)
wf_disb <- array(0, wf_dim, dimnames = wf_dimnames)
wf_dead <- array(0, wf_dim, dimnames = wf_dimnames)
wf_term <- array(0, wf_dim.term, dimnames = wf_dimnames.term)
wf_retired <- array(0, wf_dim.retiree, dimnames = wf_dimnames.retiree)
newDeath.act <- numeric(nyear)
newDeath.ret <- numeric(nyear)
newDeath.term <- numeric(nyear)
newDisb.act <- numeric(nyear)
#*************************************************************************************************************
# Setting initial population ####
#*************************************************************************************************************
# Setting inital distribution of workforce and retirees.
# Note on initial retirees: It is assumed that all initial retirees entered the workforce at age 54 and retireed in year 1.
# Altough this may produce yos greater than r.max - ea.min, it is irrelevant to the calculation since we do not care about initial retirees' yos.
wf_active[, , 1] <- .init_pop$actives
wf_retired[, , 1, 1] <- .init_pop$retirees
#*************************************************************************************************************
# Defining population dynamics ####
#*************************************************************************************************************
## Transition matrices ####
# Assume the actual decrement rates are the same as the rates in decrement tables.
# Later we may allow the actual decrement rates to differ from the assumed rates.
decrement_wf <- sapply(decrement, function(x){x[is.na(x)] <- 0; return(x)}) %>% data.frame
# Adjustment to retirement rates.
# The time line used in this model assume retirement occurs at the beginning of every period. In other words the
# switch of retirement status happens at the beginning of each period. However, the transition process here requires that
# any switch of status between t - 1(beginning) and t(beginning) must be defined in period t - 1. Hence we need to make following
# adjustment to the decrement table:
# Move qxr.a backward by 1 period.(qxr at t is now assigned to t - 1), the probability of retirement at t - 1 is lead(qxr.a(t))*(1 - qxt.a(t-1) - qxm.a(t-1) - qxd.a(t-1))
# For the age right before the max retirement age (r.max - 1), probability of retirement is 1 - qxm.a - qxd.a - qxt.a,
# which means all active members who survive all other risks at (r.max - 1) will enter the status "retired" for sure at age r.max (and collect the benefit regardless
# whether they will die at r.max)
# decrement_wf %<>% group_by(ea) %>%
# mutate(qxr.a = ifelse(age == r.max - 1,
# 1 - qxt.a - qxm.a - qxd.a,
# lead(qxr.a)*(1 - qxt.a - qxm.a - qxd.a)))
# decrement_wf %>% filter(age == 64)
# Define a function that produce transition matrices from decrement table.
make_dmat <- function(qx, df = decrement_wf) {
# inputs:
# qx: character, name of the transition probability to be created.
# df: data frame, decrement table.
# returns:
# a transtion matrix
df %<>% select_("age", "ea", qx) %>% ungroup %>% spread_("ea", qx, fill = 0) %>% select(-age) %>% t # need to keep "age" when use spread
dimnames(df) <- wf_dimnames[c(1,2)]
return(df)
}
#(p_active2term_nv <- make_dmat("qxtnv.a")) # test the function
# The transition matrices are defined below. The probabilities (eg. qxr for retirement) of flowing
# from the current status to the target status for a cell(age and ea combo) are given in the corresponding
# cell in the transtition matrices.
# Where do the active go
p_active2term <- make_dmat("qxt.a")
p_active2disb <- make_dmat("qxd.a")
p_active2dead <- make_dmat("qxm.a")
p_active2retired <- make_dmat("qxr.a")
# Where do the terminated go
p_term2dead <- make_dmat("qxm.t")
# p_term2retired <- make_dmat("qxr.v")
# Where do the disabled go
#p_disb2retired <- make_dmat("qxr.d")
p_disb2dead <- make_dmat("qxm.d")
# Where do the retired go
p_retired2dead <- make_dmat("qxm.r")
# In each iteration, a flow matrix for each possible transition(eg. active to retired) is created
# (if we wanted to track the flow in each period, we create flow arrays instead of flow matrices)
# Define the shifting matrix. When left mutiplied by a workforce matrix, it shifts each element one cell rightward(i.e. age + 1)
# A square matrix with the dimension length(range_age)
# created by a diagal matrix without 1st row and last coloumn
A <- diag(length(range_age) + 1)[-1, -(length(range_age) + 1)]
#*************************************************************************************************************
# Creating a function to calculate new entrants ####
#*************************************************************************************************************
# define function for determining the number of new entrants
calc_entrants <- function(wf0, wf1, delta, dist, no.entrants = FALSE){
# This function deterimine the number of new entrants based on workforce before and after decrement and workforce
# growth rate.
# inputs:
# wf0: a matrix of workforce before decrement. Typically a slice from wf_active
# wf1: a matrix of workforce after decrement.
# delta: growth rate of workforce
# returns:
# a matrix with the same dimension of wf0 and wf1, with the number of new entrants in the corresponding cells,
# and 0 in all other cells.
# working age
working_age <- min(range_age):(r.max - 1)
# age distribution of new entrants
# dist <- rep(1/nrow(wf0), nrow(wf0)) # equally distributed for now.
# compute the size of workforce before and after decrement
size0 <- sum(wf0[,as.character(working_age)], na.rm = TRUE)
size1 <- sum(wf1[,as.character(working_age)], na.rm = TRUE)
# computing new entrants
size_target <- size0*(1 + delta) # size of the workforce next year
size_hire <- size_target - size1 # number of workers need to hire
ne <- size_hire*dist # vector, number of new entrants by age
# Create the new entrant matrix
NE <- wf0; NE[ ,] <- 0
if (no.entrants){
return(NE)
} else {
NE[, rownames(NE)] <- diag(ne) # place ne on the matrix of new entrants
return(NE)
}
}
# test the function
# wf0 <- wf_active[, , 1]
# wf1 <- wf_active[, , 1]*(1 - p_active2term)
# sum(wf0, na.rm = T) - sum(wf1, na.rm = T)
# sum(calc_entrants(wf0, wf1, 0), na.rm = T)
#*************************************************************************************************************
# Simulating the evolution of population ####
#*************************************************************************************************************
# Now the next slice of the array (array[, , i + 1]) is defined
# wf_active[, , i + 1] <- (wf_active[, , i] + inflow_active[, , i] - outflow_active[, , i]) %*% A + wf_new[, , i + 1]
# i runs from 2 to nyear.
for (j in 1:(nyear - 1)){
#i <- 1
# compute the inflow to and outflow
active2term <- wf_active[, , j] * p_active2term # This will join wf_term[, , j + 1, j], note that workers who terminate in year j won't join the terminated group until j+1.
active2disb <- wf_active[, , j] * p_active2disb
active2dead <- wf_active[, , j] * p_active2dead
active2retired <- wf_active[, , j] * p_active2retired # This will join wf_retired[, , j + 1, j + 1].
# Where do the terminated_vested go
term2dead <- wf_term[, , j, ] * as.vector(p_term2dead) # a 3D array, each slice(3rd dim) contains the # of death in a termination age group
# Where do the retired go
retired2dead <- wf_retired[, , j, ] * as.vector(p_retired2dead) # a 3D array, each slice(3rd dim) contains the # of death in a retirement age group
# Where do the disabled go
disb2dead <- wf_disb[, , j] * p_disb2dead
# Total inflow and outflow for each status
out_active <- active2term + active2disb + active2retired + active2dead
new_entrants <- calc_entrants(wf_active[, , j], wf_active[, , j] - out_active, wf_growth, dist = .entrants_dist, no.entrants = no_entrance) # new entrants
out_term <- term2dead # This is a 3D array
in_term <- active2term # This is a matrix
out_disb <- disb2dead
in_disb <- active2disb
out_retired <- retired2dead # This is a 3D array (ea x age x year.retire)
in_retired <- active2retired # This is a matrix
in_dead <- active2dead +
apply(term2dead, c(1,2), sum) + apply(retired2dead, c(1,2), sum) + # get a matirix of ea x age by summing over year.term/year.retiree
disb2dead
# Calculate workforce for next year.
wf_active[, , j + 1] <- (wf_active[, , j] - out_active) %*% A + new_entrants
wf_term[, , j + 1, ] <- apply((wf_term[, , j, ] - out_term), 3, function(x) x %*% A) %>% array(wf_dim.term[-3])
wf_term[, , j + 1, j] <- in_term %*% A
wf_retired[, ,j + 1, ] <- apply((wf_retired[, , j, ] - out_retired), 3, function(x) x %*% A) %>% array(wf_dim.retiree[-3])
wf_retired[, , j + 1, j + 1] <- in_retired %*% A
wf_disb[, , j + 1] <- (wf_disb[, , j] + in_disb - out_disb) %*% A
wf_dead[, , j + 1] <- (wf_dead[, , j] + in_dead) %*% A
newDeath.act[j] <- sum(active2dead)
newDeath.ret[j] <- sum(retired2dead)
# newDeath.term[j] <- sum()
newDisb.act[j] <- sum(active2disb)
}
#*************************************************************************************************************
# Transform Demographic Data to Data Frames ####
#*************************************************************************************************************
## Convert 3D arrays of actives, retired and terms to data frame, to be joined by liability data frames
wf_active <- adply(wf_active, 3, function(x) {df = as.data.frame(x); df$ea = as.numeric(rownames(x));df}) %>%
rename(year = X1) %>%
gather(age, number.a, -ea, -year) %>%
mutate(year = f2n(year), age = as.numeric(age)) %>%
filter(age >= ea)
wf_retired <- data.frame(expand.grid(ea = range_ea, age = range_age, year = 1:nyear, year.retire = 1:nyear),
number.r = as.vector(wf_retired)) %>%
filter(age >= ea)
wf_term <- data.frame(expand.grid(ea = range_ea, age = range_age, year = 1:nyear, year.term = 1:nyear),
number.v = as.vector(wf_term)) %>%
filter(age >= ea)
# summarize term across termination year. Resulting data frame will join .Liab$active as part of the output.
term_reduced <- wf_term %>% group_by(year, age) %>% summarise(number.v = sum(number.v, na.rm = TRUE))
#*************************************************************************************************************
# Calculate Demographic Summary Statistics ####
#*************************************************************************************************************
# This is a digress from the main purpose of this function. Demographic statistics are useful in analyzing
# the plan characteristics, and the most convenient way to construct these statistics is by using data frames.
demo_summary <-
Reduce(merge, list(
# Average age of workforce
# Average year of service of workforce
# Average entry age of workforce
wf_active %>% group_by(year) %>% summarise(actives_age.avg = weighted.mean(age, number.a)),
wf_active %>% group_by(year) %>% summarise(actives_ea.avg = weighted.mean(ea, number.a)),
wf_active %>% group_by(year) %>% mutate(yos = age - ea) %>% summarise(actives_yos.avg = weighted.mean(yos, number.a)),
# Average age of retirees
wf_retired %>% group_by(year) %>% summarise(retirees_age.avg = weighted.mean(age, number.r)),
# Total actives, retirees and terminated members. (terms in all status, including not vested.)
wf_active %>% group_by(year) %>% summarise(tot_actives = sum(number.a)),
wf_retired %>% group_by(year) %>% summarise(tot_retirees = sum(number.r)),
wf_term %>% group_by(year) %>% summarise(tot_terms = sum(number.v)),
# Total vested terms in benefit status and total vested terms not in benefit status. Note: year - (age - ea) gives the year of entrance.
wf_term %>% filter(age >= r.full & year.term - (year - (age - ea)) >= v.yos) %>% group_by(year) %>% summarise(tot_termsBen = sum(number.v)),
wf_term %>% filter(age < r.full & year.term - (year - (age - ea)) >= v.yos) %>% group_by(year) %>% summarise(tot_termsInact = sum(number.v)),
# New entrants
wf_active %>% filter(age == ea) %>% group_by(year) %>% summarise(new_entrants = sum(number.a)),
# New retirees
wf_retired %>% filter(year == year.retire) %>% group_by(year) %>% summarise(new_retirees = sum(number.r)),
# New terms (in all status, including not vested.)
wf_term %>% filter(year == 1 | year == year.term + 1) %>% group_by(year) %>% summarise(new_terms = sum(number.v)),
# New terms in benefit status.
wf_term %>% filter(age == r.full & year.term - (year - (age - ea)) >= v.yos) %>% group_by(year) %>% summarise(new_termsBen = sum(number.v)),
# New terms in not in benefit status Note: year - (age - ea) gives the year of entrance.
wf_term %>% filter(year == 1 | (year == year.term + 1 & age < r.full & year.term - (year - (age - ea)) >= v.yos)) %>% group_by(year) %>% summarise(new_termsInact = sum(number.v)),
# Number of new death in actives and retirees
data.frame(year = 1:nyear, newDeath.act = newDeath.act),
data.frame(year = 1:nyear, newDeath.ret = newDeath.ret),
# Number of new disabled in actives
data.frame(year = 1:nyear, newDisb.act = newDisb.act)
)) %>%
mutate(# Ratios
newEnt_actives = 100 * new_entrants / tot_actives,
newRet_actives = 100 * new_retirees / tot_actives,
newTerm_actives = 100 * new_terms / tot_actives,
newTermsInact_actives = 100 * new_termsInact / tot_actives,
newTermsBen_termsInact = 100 * new_termsBen / tot_termsInact,
newDeath.act_actives = 100 * newDeath.act / tot_actives,
newDeath.ret_retirees = 100 * newDeath.ret / tot_retirees,
newDisb.act_actives = 100 * newDisb.act / tot_actives,
ar.ratio = tot_actives / tot_retirees, # Active-to-service retiree ratio
ab.ratio = tot_actives / (tot_retirees + tot_termsBen), # Active-to-beneficiary ratio
runname = runname) %>%
select(runname, everything())
return(list(active = wf_active, term = wf_term, disb = wf_disb, retired = wf_retired, dead = wf_dead,
newDeath.act = newDeath.act,
newDeath.ret = newDeath.ret,
newDisb.act = newDisb.act,
term_reduced = term_reduced,
demo_summary = demo_summary
))
}
start_time_wf <- proc.time()
pop <- get_Population()
end_time_wf <- proc.time()
Time_wf <- end_time_wf - start_time_wf
pop$active
#*************************************************************************************************************
# Checking the results ####
#*************************************************************************************************************
#
# # The workforce arrays
# wf_active
# wf_retired
# wf_term_nv
# wf_term_v
# wf_disb
# wf_dead
#
# # Test the loop with no new entrants, set no.entrants = TRUE in cal_entrants()
#
# apply(pop$active, 3, sum) +
# apply(pop$retired, 3, sum)+
# apply(pop$term, 3, sum) +
# apply(pop$disb, 3, sum) +
# apply(pop$dead, 3, sum)
#
#
# # summarizing the results
#
# # popluation by status
# apply(wf_active, 3, sum) %>% plot(type = "b")
# apply(wf_retired, 3, sum) %>% plot(type = "b")
# apply(wf_term_v, 3, sum) %>% plot(type = "b")
# apply(wf_term_nv, 3, sum) %>% plot(type = "b")
# apply(wf_disb, 3, sum) %>% plot(type = "b")
# apply(wf_dead, 3, sum) %>% plot(type = "b")
#
#
# # Total population
# (apply(wf_active, 3, sum) +
# apply(wf_retired, 3, sum) +
# apply(wf_term_v, 3, sum) +
# apply(wf_term_nv, 3, sum) +
# apply(wf_disb, 3, sum)) %>% plot(type = "b")
#
# # Look at workforce by entry age
# apply(wf_active, c(1,3), sum)
#
# # Look at workforce by age
# # options(digits = 4, scipen = 99)
# apply(wf_active, c(2,3), sum)
# # Potential problem, values for age over 65 are not exact 0s, although may be computationally equivalent to 0s.
#
#
# x <- array(0, c(3,3,3,3))
# for(i in 1:3) x[,,,i] <- i
# x[,,1,]
# x[,,1,] * as.vector(matrix(1:9, 3, 3))
#
# matrix(1, 3,3) * as.vector(matrix(1:9, 3, 3))
#
# x <- array(0, c(2,2,2,2))
# for(i in 1:2) x[,,,i] <- i
# microbenchmark(
# alply(x[,,1,], 3, function(x) (x %*% matrix(1:4, 2))) %>% unlist %>% array(c(2,2,2)),
# apply(x[,,1,], 3, function(x) (x %*% matrix(1:4, 2))) %>% array(c(2,2,2))
# )
#
#
# x[,,1,]
#
#
# x[,,1,] %>% apply(c(1,2), sum)
# wf_term[,,2,1]
# x <- data.frame(age = 40:120)
# x %>% mutate(ea = 39,
# year.retire = 1,
# year = 1,
# start.year = 1 - (age - 39),
# age.retire = age)
# # Explore shrinking workforce in AZ-PERS
#
# pop$active %>% apply(3, sum)
#
# pop$active[,"120",41] %>% sum
#
# pop$active[,,1]
#
#
# actives %>% filter(planname == "LA-CERA-43.fillin.yos", age >=75)
pop$active[,,]