From dcf42c3b3a737ea4401a25bd2bdfcaffd4e84ad5 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 27 May 2020 17:23:13 +1000 Subject: [PATCH 01/29] add mlogit stuffs --- DESCRIPTION | 1 + R/ModelMLogit.R | 32 +++++++++++++++++++ R/Transition.R | 2 +- R/TransitionClassification.R | 18 +++++++++++ tests/testthat/test-ModelMlogit.R | 31 ++++++++++++++++++ .../testthat/test-TransitionClassification.R | 15 +++++++++ 6 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 R/ModelMLogit.R create mode 100644 tests/testthat/test-ModelMlogit.R diff --git a/DESCRIPTION b/DESCRIPTION index 277f6dfd..02e286bc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,7 @@ Suggests: testthat (>= 2.1.0), fastmatch (>= 1.1.0), caret (>= 6.0), + mlogit (>= 1.0), pryr (>= 0.1.4), sf (>= 0.8), dodgr (>= 0.2.5), diff --git a/R/ModelMLogit.R b/R/ModelMLogit.R new file mode 100644 index 00000000..27bb180f --- /dev/null +++ b/R/ModelMLogit.R @@ -0,0 +1,32 @@ +ModelMlogit <- R6::R6Class( + classname = "ModelMlogit", + inherit = Model, + public = list( + simulate = function(world = NULL, chooser_data, alternative_data, chooser_id_col, alternative_id_col, chosen_col, n_choices = 30) { + checkmate::assert_r6(world, classes = "World", null.ok = TRUE) + checkmate::assert_r6(self, classes = "Model") + checkmate::assert_r6(private, classes = "Model") + checkmate::assert_data_table(chooser_data) + checkmate::assert_data_table(alternative_data) + checkmate::assert_names(names(chooser_data), must.include = c(chooser_id_col, chosen_col)) + checkmate::assert_names(names(alternative_data), must.include = alternative_id_col) + checkmate::assert_count(n_choices, positive = T, null.ok = FALSE, na.ok = FALSE) + } + ) +) + +# To construct choiceset for mlogit see https://stackoverflow.com/questions/51458223/multinomial-logit-estimation-on-a-subset-of-alternatives-in-r + +# To manually calculate the probs see https://monashdatafluency.github.io/r-linear/topics/linear_models.html +# This maybe requires as mlogit's predict only returns a data.frame with all columns +# that correspond to the universal choiceset. + + +# } + + +simulate( + preprocess = list(do1, do2, do3), + subset = +) + diff --git a/R/Transition.R b/R/Transition.R index ccb780e8..99213837 100644 --- a/R/Transition.R +++ b/R/Transition.R @@ -373,7 +373,7 @@ SupportedTransitionModels <- function() { #' @rdname SupportedTransitionModels #' @export get_supported_models <- function() { - return(c("train", "list", "data.table", "numeric", "glm", "lm")) + return(c("train", "list", "data.table", "numeric", "glm", "lm", "mlogit")) } monte_carlo_sim <- function(prediction, target) { diff --git a/R/TransitionClassification.R b/R/TransitionClassification.R index 4f3eff51..4786780e 100644 --- a/R/TransitionClassification.R +++ b/R/TransitionClassification.R @@ -201,6 +201,24 @@ TransitionClassification <- R6Class( ) ) +simulate_classification_mlogit <- function(self, private) { + # +.simulate.mlogit <- function(self, private, chooser_data, alternative_data, chooser_id_col, alternative_id_col, chosen_col) { + # checkmate::assert_r6(self, classes = "Model") + # checkmate::assert_r6(private, classes = "Model") + # checkmate::assert_data_table(chooser_data) + # checkmate::assert_data_table(alternative_data) + # checkmate::assert_names(names(chooser_data), must.include = c(chooser_id_col, chosen_col)) + # checkmate::assert_names(names(alternative_data), must.include = alternative_id_col) + # + # To construct choiceset for mlogit see https://stackoverflow.com/questions/51458223/multinomial-logit-estimation-on-a-subset-of-alternatives-in-r + + # To manually calculate the probs see https://monashdatafluency.github.io/r-linear/topics/linear_models.html + # This maybe requires as mlogit's predict only returns a data.frame with all columns + # that correspond to the universal choiceset. + + + # } +} # Simulate classification functions ---------------------------------------------------------- simulate_classification_train <- function(self, private) { diff --git a/tests/testthat/test-ModelMlogit.R b/tests/testthat/test-ModelMlogit.R new file mode 100644 index 00000000..dfbf718f --- /dev/null +++ b/tests/testthat/test-ModelMlogit.R @@ -0,0 +1,31 @@ +test_that("multiplication works", { + if (require("mlogit")) { + + toy_individual_mlogit_data <- + data.table::CJ(pid = toy_individuals[["pid"]], + choice_var = toy_individuals[, unique(marital_status)]) %>% + merge(., toy_individuals, by = "pid") %>% + .[, chosen := ifelse(choice_var == marital_status, TRUE, FALSE)] %>% + mlogit::mlogit.data( + data = ., + choice = "chosen", + shape = "long", + id.var = "pid", + alt.var = "choice_var" + ) + + m <- mlogit::mlogit(chosen ~ 0 | sex + age, data = toy_individual_mlogit_data) + checkmate::expect_matrix(predict(m, toy_individual_mlogit_data)) + mnl <- ModelMlogit$new(m) + mnl$simulate() + + + #' an equipvalent model in nnet + # library("nnet") + # mnn <- multinom(marital_status ~ age + sex, data = toy_individuals) + # predict(mnn, toy_individuals, type = "prob") %>% colMeans() + + } + + +}) diff --git a/tests/testthat/test-TransitionClassification.R b/tests/testthat/test-TransitionClassification.R index 2f9a6ab5..5d1ff847 100644 --- a/tests/testthat/test-TransitionClassification.R +++ b/tests/testthat/test-TransitionClassification.R @@ -304,3 +304,18 @@ test_that("dynamic target", { regexp = "Must be a subset of set \\{yes,no\\}.") }) + +# mlogit ---------------- +test_that("mlogit", { + if (require(mlogit)) { + create_toy_world() + + dwl_data <- world$entities$BuildingResidential$get_data() + dwl_data + + + + residential_location <- mlogit() + + } +}) From a256e0145aa07f563ed9692b3d47743dc505296a Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Thu, 28 May 2020 02:18:42 +1000 Subject: [PATCH 02/29] create ModelCustom --- NAMESPACE | 2 + NEWS.md | 1 + R/ModelMLogit.R | 9 --- R/utils.R | 33 +++++++++++ _pkgdown.yml | 2 +- man/ModelCustom.Rd | 11 ++++ man/unnest_dt.Rd | 22 ++++++++ tests/testthat/test-ModelCustom.R | 55 +++++++++++++++++++ tests/testthat/test-ModelMlogit.R | 31 ----------- .../testthat/test-TransitionClassification.R | 15 ----- tests/testthat/test-utils.R | 15 +++++ 11 files changed, 140 insertions(+), 56 deletions(-) create mode 100644 man/ModelCustom.Rd create mode 100644 man/unnest_dt.Rd create mode 100644 tests/testthat/test-ModelCustom.R delete mode 100644 tests/testthat/test-ModelMlogit.R diff --git a/NAMESPACE b/NAMESPACE index fb44c16c..d6087006 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,7 @@ export(MatchingMarket) export(MatchingMarketOptimal) export(MatchingMarketStochastic) export(Model) +export(ModelCustom) export(Network) export(Pipeline) export(Population) @@ -119,6 +120,7 @@ export(test_target) export(test_transition_supported_model) export(transition) export(unnest_datatable) +export(unnest_dt) export(use_event) export(use_module) export(use_module_readme) diff --git a/NEWS.md b/NEWS.md index b8966bc4..3c1d5d07 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # dymiumCore (development version) +- introduced `ModelCustom` a model class that let users specify its parameters and `predict` function. # dymiumCore 0.1.8 - Added `sim()` for compiling and executing a microsimulation pipeline. diff --git a/R/ModelMLogit.R b/R/ModelMLogit.R index 27bb180f..a6d4bf67 100644 --- a/R/ModelMLogit.R +++ b/R/ModelMLogit.R @@ -15,18 +15,9 @@ ModelMlogit <- R6::R6Class( ) ) -# To construct choiceset for mlogit see https://stackoverflow.com/questions/51458223/multinomial-logit-estimation-on-a-subset-of-alternatives-in-r -# To manually calculate the probs see https://monashdatafluency.github.io/r-linear/topics/linear_models.html -# This maybe requires as mlogit's predict only returns a data.frame with all columns -# that correspond to the universal choiceset. # } -simulate( - preprocess = list(do1, do2, do3), - subset = -) - diff --git a/R/utils.R b/R/utils.R index e503ccbc..058185a8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -439,3 +439,36 @@ get_current_git_branch <- function() { gsub("\\*|\\ ", "", .) return(current_branch) } + +#' Unnest data.table +#' +#' !! Note that this is only working when all list columns in the given +#' data.frame are to be unnested. See https://github.com/dymium-org/dymiumCore/issues/79. +#' +#' @param dt :: (`data.frame()`)\cr +#' a data.frame object. +#' @param cols :: (`character()`)\cr +#' a character vector denoting columns to be unnested. +#' +#' @return a data.table +#' @export +unnest_dt <- function(dt, cols) { + + if (!is.data.frame(dt)) { + stop("`dt` must be a data.frame or a data.table.") + } + + if (!is.data.table(dt)) { + dt <- as.data.table(dt) + } + + clnms <- setdiff(colnames(dt), cols) + + dt <- eval( + rlang::expr(dt[, lapply(.SD, unlist), by = c(clnms), .SDcols = cols]) + ) + + colnames(dt) <- c(as.character(clnms), as.character(cols)) + + dt +} diff --git a/_pkgdown.yml b/_pkgdown.yml index dab1e068..872d6a73 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -68,7 +68,7 @@ reference: - Container - World - Population - - Model + - matches("^Model.") - pop_register - household_formation - title: Simulation tools & Alignment diff --git a/man/ModelCustom.Rd b/man/ModelCustom.Rd new file mode 100644 index 00000000..67bc5b3f --- /dev/null +++ b/man/ModelCustom.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Model.R +\name{ModelCustom} +\alias{ModelCustom} +\title{ModelCustom} +\format{ +\link[R6:R6Class]{R6::R6Class} object inheriting \link{Model}. +} +\description{ +ModelCustom +} diff --git a/man/unnest_dt.Rd b/man/unnest_dt.Rd new file mode 100644 index 00000000..2b7df343 --- /dev/null +++ b/man/unnest_dt.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{unnest_dt} +\alias{unnest_dt} +\title{Unnest data.table} +\usage{ +unnest_dt(dt, cols) +} +\arguments{ +\item{dt}{:: (\code{data.frame()})\cr +a data.frame object.} + +\item{cols}{:: (\code{character()})\cr +a character vector denoting columns to be unnested.} +} +\value{ +a data.table +} +\description{ +!! Note that this is only working when all list columns in the given +data.frame are to be unnested. See https://github.com/dymium-org/dymiumCore/issues/79. +} diff --git a/tests/testthat/test-ModelCustom.R b/tests/testthat/test-ModelCustom.R new file mode 100644 index 00000000..c24c5ab6 --- /dev/null +++ b/tests/testthat/test-ModelCustom.R @@ -0,0 +1,55 @@ +test_that("ModelCustom - linear regression and binary logit", { + + num_rows <- 100 + + test_data <- data.frame( + id = 1:num_rows, + sex = sample(c("male", "female"), size = num_rows, replace = T), + age = sample(1:100, num_rows, replace = T), + x1 = runif(num_rows), + x2 = runif(num_rows) + ) + + my_formula <- ~ x1 + x2 + I(x1^2) + x1:x2 + params = c(`(Intercept)` = 1, x1 = 2.5, x2 = -3, `I(x1^2)` = 0.5 , `x1:x2` = 1) + + m <- ModelCustom$new(params = params, formula = my_formula, type = "regression") + checkmate::expect_numeric(m$predict(test_data), finite = T, any.missing = FALSE, len = num_rows) + + m <- ModelCustom$new(params = params, formula = my_formula, type = "binary") + checkmate::expect_numeric(m$predict(test_data),lower = 0, upper = 1, finite = T, any.missing = FALSE, len = num_rows) + +}) + +test_that("ModelCustom - multinomial logit", { + + num_rows <- 100 + num_choices = 30 + + my_formula <- ~ x1 + x2 + I(x1^2) + x1:x2 + params = c(`(Intercept)` = 1, x1 = 2.5, x2 = 3, `I(x1^2)` = 0.5 , `x1:x2` = 1) + + test_chooser_data <- data.table( + id = 1:num_rows, + sex = sample(c("male", "female"), size = num_rows, replace = T), + age = sample(1:100, num_rows, replace = T) + ) %>% + .[, choiceset := list(list(sample(1:num_choices, size = 5, replace = FALSE))), by = id] + + test_alternative_data <- data.table( + choice_id = 1:num_choices, + x1 = runif(num_choices), + x2 = runif(num_choices) + ) + + multinomial_test_data <- + test_chooser_data %>% + unnest_dt(., cols = "choiceset") %>% + .[, `:=`(choice_id = choiceset, choiceset = NULL)] %>% + merge(., test_alternative_data, by = "choice_id") + + m <- ModelCustom$new(params = params, formula = my_formula, type = "multinomial") + res <- m$predict(multinomial_test_data, chooser_id_col = "id", choice_id_col = "choice_id") + checkmate::expect_data_table(res, any.missing = FALSE, col.names = "strict", ncols = 4) + +}) diff --git a/tests/testthat/test-ModelMlogit.R b/tests/testthat/test-ModelMlogit.R deleted file mode 100644 index dfbf718f..00000000 --- a/tests/testthat/test-ModelMlogit.R +++ /dev/null @@ -1,31 +0,0 @@ -test_that("multiplication works", { - if (require("mlogit")) { - - toy_individual_mlogit_data <- - data.table::CJ(pid = toy_individuals[["pid"]], - choice_var = toy_individuals[, unique(marital_status)]) %>% - merge(., toy_individuals, by = "pid") %>% - .[, chosen := ifelse(choice_var == marital_status, TRUE, FALSE)] %>% - mlogit::mlogit.data( - data = ., - choice = "chosen", - shape = "long", - id.var = "pid", - alt.var = "choice_var" - ) - - m <- mlogit::mlogit(chosen ~ 0 | sex + age, data = toy_individual_mlogit_data) - checkmate::expect_matrix(predict(m, toy_individual_mlogit_data)) - mnl <- ModelMlogit$new(m) - mnl$simulate() - - - #' an equipvalent model in nnet - # library("nnet") - # mnn <- multinom(marital_status ~ age + sex, data = toy_individuals) - # predict(mnn, toy_individuals, type = "prob") %>% colMeans() - - } - - -}) diff --git a/tests/testthat/test-TransitionClassification.R b/tests/testthat/test-TransitionClassification.R index fe15851d..00082821 100644 --- a/tests/testthat/test-TransitionClassification.R +++ b/tests/testthat/test-TransitionClassification.R @@ -348,18 +348,3 @@ test_that("The scale of World only affect Target object", { # it is a global variable of the package env world$set_scale(1) }) - -# mlogit ---------------- -test_that("mlogit", { - if (require(mlogit)) { - create_toy_world() - - dwl_data <- world$entities$BuildingResidential$get_data() - dwl_data - - - - residential_location <- mlogit() - - } -}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 78515ccd..9b5bb2b6 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -110,3 +110,18 @@ test_that("dsample", { expect_equal(dsample(10, size = 10, replace = T, prob = 1), rep(10, 10)) }) + +test_that("unnest_dt and unnestv_dt works", { + dt <- data.table::data.table( + id = 1:3, + list_col_a = list(c("a","b","c"), c("a","b","c"), c("a","b","c"))) + res <- unnest_dt(dt, "list_col_a") + checkmate::expect_data_table(res, any.missing = FALSE) + expect_equal(res[["id"]],c(rep(1,3), rep(2,3), rep(3,3))) + expect_equal(res[["list_col_a"]], rep(c("a", "b", "c"), 3)) + dt <- data.table::data.table( + id = 1:3, + list_col_a = list(c("a","b","c"), c("a","b","c"), c("a","b","c")), + list_col_b = list(c("a","b","c"), c("a","b","c"), c("a","b","c"))) + expect_error(unnest_dt(dt, "list_col_a"), "column or expression 2 of 'by' or 'keyby' is type list") +}) From bf920a31a558beb455bd295f4e2b569245f902c4 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Thu, 28 May 2020 11:30:50 +1000 Subject: [PATCH 03/29] remove old Rd files --- man/Trans.Rd | 77 ---------------- man/transition.Rd | 177 ------------------------------------ tests/testthat/test-utils.R | 15 +++ 3 files changed, 15 insertions(+), 254 deletions(-) delete mode 100644 man/Trans.Rd delete mode 100644 man/transition.Rd diff --git a/man/Trans.Rd b/man/Trans.Rd deleted file mode 100644 index 169a2b3e..00000000 --- a/man/Trans.Rd +++ /dev/null @@ -1,77 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Transition.R -\name{Trans} -\alias{Trans} -\title{Transition class} -\description{ -A class that perform Monte Carlo simulation on agents using a probabilistic model. -Work flow: \code{initialise()} -> \code{filter()} -> \code{mutate()} -> \code{simulate()} -> \code{postprocess()}. -Note that, to swap the run order of \code{filter()} and \code{mutate()} you need to change the -\code{mutate_first} public field to \code{TRUE}. -} -\note{ -\code{target} can be static or dynamic depending on the data structure of it. A static -target can be a named list or an integer value depending its usage in each -event function. -} -\section{Construction}{ -\preformatted{Trans$new(x, model, target = NULL, targeted_agents = NULL) -} -\itemize{ -\item \code{x} :: \code{\link{R6}}\cr -A Agent class inheritance object. -\item \code{model} :: \code{object}\cr -A model -\item \code{target} :: [\code{integer()}|\link{Target}]\cr -(Default as NULL). A number that forces the number of micro events to occur. For example, if -`10`` is speficied, there will be 10 agents that under go the event. However, -if a integer vector is given it must be the same length as the classes in the model. -This only works for classification models. If a \link{Target} object is given, the -target values will be scaled down by the scaling factor of that was set by -the \link{World} object in your currently active environment. -\item \code{targeted_agent} :: \code{\link[=integer]{integer()}}\cr -(Default as NULL) A integer vectors that contains ids of agents in \code{x} to undergo the event. -} -} - -\section{Fields}{ - -\itemize{ -\item \code{mutate_first}:: \code{logical(1)}\cr -Default as FALSE, this flag is used to indicate whether the attribute data from -the Agent in \code{x} should be mutated (\verb{$mutate(.data)}) before filtered (\verb{$filter(.data)}). -See the description section for more details about the processing steps of \link{Trans}. -} -} - -\section{Methods}{ - -\itemize{ -\item \code{filter(.data)}\cr -(\code{\link[data.table:data.table]{data.table::data.table()}}) -> \verb{[data.table::data.table()]}\cr -\strong{(By default, first of the preprocessing steps)}\cr -By default this method returns the input \code{.data}. This method can be overwrite -to give the user the flexibility to 'filter' the data prior to making prediction -by the given model. Filtering for eligible agents for this transition can be done in this step. -\item \code{mutate(.data)}\cr -(\code{\link[data.table:data.table]{data.table::data.table()}}) -> \verb{[data.table::data.table()]}\cr -\strong{(By default, second of the preprocessing steps)}\cr -By default this method returns the input \code{.data}. This method can be overwrite -to give the user the flexibility to 'mutate' the data prior to making prediction -by the given model. Adding derived variables and historical life course of the agents -can be done in this step. -\item \code{update_agents(attr)}\cr -(\code{character(1)})\cr -Update the attribute data of the agents that undergo the transition event. -\item \code{get_result(ids)}\cr -(\code{integer()}) -> \link[data.table:data.table]{data.table::data.table}\cr -Returns the simulation result in a \link[data.table:data.table]{data.table::data.table} format with two -columns \code{id} and \code{response}. -\item \code{get_nrow_result()}\cr -Returns the number of rows in the simulation result. -\item \code{get_decision_maker_ids(response_filter = NULL)}\cr -(\code{character()}) -> (\code{integer()})\cr -Returns ids of the agents that have their response equal to \code{response_filter}. -} -} - diff --git a/man/transition.Rd b/man/transition.Rd deleted file mode 100644 index 51425b64..00000000 --- a/man/transition.Rd +++ /dev/null @@ -1,177 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/transition-fnc.R -\name{transition} -\alias{transition} -\alias{get_transition} -\title{Simulate state transition (Markov chain)} -\usage{ -transition( - world, - entity, - model, - target = NULL, - targeted_ids = NULL, - preprocessing_fn = NULL, - attr = NULL, - values = NULL, - verbose = FALSE -) - -get_transition( - world, - entity, - model, - target = NULL, - targeted_ids = NULL, - preprocessing_fn = NULL -) -} -\arguments{ -\item{world}{a \link{World} object} - -\item{entity}{a character indicating the entity class to apply the transition to.} - -\item{model}{a \link{Model} object or an object in \code{\link[=SupportedTransitionModels]{SupportedTransitionModels()}}.} - -\item{target}{a \link{Target} object or a named list or \code{NULL}.} - -\item{targeted_ids}{a integer vector containing ids of entities in \code{entity} -to undergo the transition or \code{NULL}.} - -\item{preprocessing_fn}{a function that accepts one argument or \code{NULL}. This allows preprocessing of -\code{entity}'s attribute data, for example, if only records with certain conditions -should undergo the transition or a new variable should be added to the data. See -the Note section below for how to create a preprocessing function.} - -\item{attr}{a character denoting which of the attribute if \code{entity} should be -updated as the result of the transition or \code{NULL}.} - -\item{values}{named \code{vector()}\cr -A named vector that is used to replace the outcomes of the model in the field -specified in \code{attr}. See the example section.} - -\item{verbose}{\code{logical()}\cr -Default as \code{FALSE}.} -} -\value{ -\link{transition} returns the first argument which is the \link{World} object, while -\link{get_transition} a data.table objec that contains the transition outcomes with -two columns: id and response. -} -\description{ -The \code{transition} function can be used to evaluate a model againts the attribute -data of an \link{Entity} object stored inside the input \link{World} object and update -an attribute of that \link{Entity} using random draws from the prediction result. -It allows simple state transitions to be simulated directly inside a -microsimulation pipeline, as it always returns the input \code{World} object. -} -\note{ -In general, dymiumCore detects variables in entity data of an \link{Entity} object that -has a dot prefix as derived variables. Meaning, those derived variables are not -check against new entity data that are getting added to the \link{Entity} object. But -when entity data are used in \code{get_transition}, the dot prefix of the derived variables -will be removed. This is to make it convenient when naming variables during -the model estimation step. - -To create a pre-processing function you can use \code{dplyr} or \code{data.table}. You can -even combine multiple functions with \link[magrittr:\%>\%]{magrittr::\%>\%}. As an example, if you only -want to filter just the male population then you can choose one of the -following options to create your preprocessing function.\preformatted{# as a function using base R -filter_male <- function(.data) \{ - .data[.data$sex == "male"] -\} - -# dplyr's way -filter_male <- function(.data) \{ - dplyr::filter(.data, sex == "male") -\} - -# data.table's way -filter_male <- function(.data) \{ - .data[sex == "male", ] -\} - -# magrittr's way + dplyr's way -filter_male <- - . \%>\% - dplyr::filter(., sex == "male") - -# magrittr's way + data.table's way + additional conditions -filter_male <- - . \%>\% - .[sex == "male", ] \%>\% - .[age >= 50, ] - -} - -New variables can also be added to the entity data to be used in predicting -their transition probability. Again we can use \code{dplyr}'s or \code{data.table}'s way. -The examples below show how you can add a 5-year age group variable, called \code{age5}, -to the entity data.\preformatted{# magrittr's way + data.table's way -filter_male <- - . \%>\% - .[sex == "male" & age >= 50, ] \%>\% - .[, age5 := cut(age, breaks = c(seq(0,80,5), Inf), include.lowest = TRUE, right = FALSE)] - -# magrittr's way + dplyr's way -filter_male <- - . \%>\% - dplyr::filter(., sex == "male" & age >= 50) \%>\% - dplyr::mutate(., age5 = cut(age, breaks = c(seq(0,80,5), Inf), include.lowest = TRUE, right = FALSE)) -} - -Note that, new variables added inside preprocessing_fn won't -change the attribute data of the \code{entity} object that is undergoing a transition. -These variables only appear temporary within the context of the transition. -} -\examples{ - -# create a filter function -library(caret) - -filter_male <- - . \%>\% - .[sex == "male", ] - -filter_not_dead <- - . \%>\% - .[age != -1] - -# create a multinomial logit model using `caret` -mnl <- caret::train(marital_status ~ age + sex, - data = toy_individuals, - method = "multinom", - trace = FALSE) -# this model denotes that there is a 10\% chance that an individual will decease -death_model <- list(yes = 0.1, no = 0.9) - -# create a toy world -create_toy_world() - -# simulate marital status transition and update the attribute. -transition(world, - entity = "Individual", - model = mnl, - preprocessing_fn = filter_male, - attr = "marital_status") - -# get a transition result -get_transition(world, - entity = "Individual", - model = mnl, - preprocessing_fn = filter_male) - -# lets make a pipeline of transitions -world \%>\% - transition(entity = "Individual", - model = mnl, - preprocessing_fn = . \%>\% filter_male \%>\% filter_not_dead, - attr = "marital_status") \%>\% - transition(entity = "Individual", - model = death_model, - preprocessing_fn = filter_not_dead, - attr = "age", - values = c(yes = -1L)) -# print the attributes of the individual agents -world$entities$Individual$get_data() -} diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 9b5bb2b6..27b3a391 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -125,3 +125,18 @@ test_that("unnest_dt and unnestv_dt works", { list_col_b = list(c("a","b","c"), c("a","b","c"), c("a","b","c"))) expect_error(unnest_dt(dt, "list_col_a"), "column or expression 2 of 'by' or 'keyby' is type list") }) + + +test_that("which_max_n and which_min_x work", { + + x = 1:4 + n = 2 + + expect_equal(which_max_n(x, n), 3:4) + expect_equal(which_min_n(x, n), 1:2) + + expect_error(which_max_n(x, length(x) + 1), "outside bounds") + expect_error(which_min_n(x, length(x) + 1), "outside bounds") + +}) + From a1ea005a7ca46f2733f0a233f0c190dca153efd7 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Fri, 29 May 2020 12:48:31 +1000 Subject: [PATCH 04/29] add the first prototype of ModelCustom --- R/Model.R | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/R/Model.R b/R/Model.R index 0bd4488a..e1502067 100644 --- a/R/Model.R +++ b/R/Model.R @@ -137,3 +137,74 @@ summary.Model <- function(object, ...) { } summary(object$model) } + + +predict.ModelCustom <- function(object, newdata, ...) { + object$predict(newdata) +} + +#' @title ModelCustom +#' +#' @usage NULL +#' @format [R6::R6Class] object inheriting [Model]. +#' +#' @export +ModelCustom <- R6::R6Class( + classname = "ModelCustom", + inherit = Model, + public = list( + + params = NULL, + type = NULL, + formula = NULL, + terms = NULL, + + initialize = function(params, formula, type = c("regression", "binary", "multinomial", "custom")) { + + self$params = checkmate::assert_numeric(params, finite = T, any.missing = FALSE, names = "unique") + self$type = match.arg(type) + self$formula = checkmate::assert_formula(formula, null.ok = FALSE) + self$terms = terms(formula) + + self$predict <- switch(type, + "regression" = private$.default_predict, + "binary" = private$.binary_predict, + "multinomial" = private$.multinomial_predict, + "custom" = NULL) + + invisible(NULL) + }, + + print = function() { + cat('Model type: ', self$type, "\n") + print(self$params) + }, + + predict = NULL + ), + + private = list( + + .default_predict = function(newdata) { + compute_linear_combination(self$params, self$formula, newdata) + }, + + .binary_predict = function(newdata) { + linear_comb <- private$.default_predict(newdata) + return(1 / (1 + exp(-linear_comb))) + }, + + .multinomial_predict = function(newdata, chooser_id_col, choice_id_col) { + data.table(chooser_id = newdata[[chooser_id_col]], + choice_id = newdata[[choice_id_col]], + linear_comb = private$.default_predict(newdata) + ) %>% + data.table::setorder(chooser_id) %>% + .[, prob := exp(linear_comb)/sum(exp(linear_comb)), by = chooser_id] + } + ) +) + +compute_linear_combination <- function(params, formula, newdata) { + mm <- model.matrix(formula, newdata) + as.numeric(params %*% t(mm)) From c930c3ab3da396673c6f88f3ba2d4a3846ccdad1 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Fri, 29 May 2020 13:50:26 +1000 Subject: [PATCH 05/29] add: Generic$.abstract --- R/Generic.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/Generic.R b/R/Generic.R index 37a11b85..b0aff8f2 100644 --- a/R/Generic.R +++ b/R/Generic.R @@ -127,6 +127,14 @@ Generic <- R6Class( ), private = list( + .abstract = function(msg) { + # this is a method for abstract methods + if (!missing(msg)) { + lg$fatal(msg) + } + stop("This is an abstract method which is to be implemented.") + }, + abstract = function(msg) { # this is a method for abstract methods if (!missing(msg)) { From 0fbf4bd419dcbe19b80483af9a73fb935cbd7c79 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Fri, 29 May 2020 13:52:55 +1000 Subject: [PATCH 06/29] add ModelCustom and its implementations --- NEWS.md | 3 +- R/Model.R | 71 ------------------------ R/ModelBinaryChoice.R | 44 +++++++++++++++ R/ModelCustom.R | 91 +++++++++++++++++++++++++++++++ R/ModelLinear.R | 26 +++++++++ R/ModelMultinomialLogit.R | 53 ++++++++++++++++++ tests/testthat/test-ModelCustom.R | 43 +++++++++++---- 7 files changed, 247 insertions(+), 84 deletions(-) create mode 100644 R/ModelBinaryChoice.R create mode 100644 R/ModelCustom.R create mode 100644 R/ModelLinear.R create mode 100644 R/ModelMultinomialLogit.R diff --git a/NEWS.md b/NEWS.md index 3c1d5d07..fe76e5b7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # dymiumCore (development version) -- introduced `ModelCustom` a model class that let users specify its parameters and `predict` function. +- introduced `ModelCustom` a model class that let users specify its parameters and `predict` function. We also add `ModelMultinomialLogit`, `ModelBinaryChoice` and `ModelLinear` which are implementations of `ModelCustom`. + # dymiumCore 0.1.8 - Added `sim()` for compiling and executing a microsimulation pipeline. diff --git a/R/Model.R b/R/Model.R index e1502067..0bd4488a 100644 --- a/R/Model.R +++ b/R/Model.R @@ -137,74 +137,3 @@ summary.Model <- function(object, ...) { } summary(object$model) } - - -predict.ModelCustom <- function(object, newdata, ...) { - object$predict(newdata) -} - -#' @title ModelCustom -#' -#' @usage NULL -#' @format [R6::R6Class] object inheriting [Model]. -#' -#' @export -ModelCustom <- R6::R6Class( - classname = "ModelCustom", - inherit = Model, - public = list( - - params = NULL, - type = NULL, - formula = NULL, - terms = NULL, - - initialize = function(params, formula, type = c("regression", "binary", "multinomial", "custom")) { - - self$params = checkmate::assert_numeric(params, finite = T, any.missing = FALSE, names = "unique") - self$type = match.arg(type) - self$formula = checkmate::assert_formula(formula, null.ok = FALSE) - self$terms = terms(formula) - - self$predict <- switch(type, - "regression" = private$.default_predict, - "binary" = private$.binary_predict, - "multinomial" = private$.multinomial_predict, - "custom" = NULL) - - invisible(NULL) - }, - - print = function() { - cat('Model type: ', self$type, "\n") - print(self$params) - }, - - predict = NULL - ), - - private = list( - - .default_predict = function(newdata) { - compute_linear_combination(self$params, self$formula, newdata) - }, - - .binary_predict = function(newdata) { - linear_comb <- private$.default_predict(newdata) - return(1 / (1 + exp(-linear_comb))) - }, - - .multinomial_predict = function(newdata, chooser_id_col, choice_id_col) { - data.table(chooser_id = newdata[[chooser_id_col]], - choice_id = newdata[[choice_id_col]], - linear_comb = private$.default_predict(newdata) - ) %>% - data.table::setorder(chooser_id) %>% - .[, prob := exp(linear_comb)/sum(exp(linear_comb)), by = chooser_id] - } - ) -) - -compute_linear_combination <- function(params, formula, newdata) { - mm <- model.matrix(formula, newdata) - as.numeric(params %*% t(mm)) diff --git a/R/ModelBinaryChoice.R b/R/ModelBinaryChoice.R new file mode 100644 index 00000000..2bceffd0 --- /dev/null +++ b/R/ModelBinaryChoice.R @@ -0,0 +1,44 @@ +#' @title ModelBinaryChoice +#' +#' @usage NULL +#' @format [R6::R6Class] object inheriting [ModelCustom]. +#' +#' @export +ModelBinaryChoice <- R6::R6Class( + classname = "ModelBinaryChoice", + inherit = ModelCustom, + public = list( + + + #' @description + #' + #' Initialisation function + #' + #' @param params a `data.frame` object. + #' @param formula a `formula` object. + #' + #' @return NULL + initialize = function(params, formula) { + super$initialize(params, formula, type = "binary_choice") + invisible(NULL) + }, + + #' @description + #' + #' This predict method returns probabilities generated from the parameters + #' of this [Model] object. + #' + #' @param newdata a `data.frame` object. + #' @param link_function :: `character(1)`\cr + #' default as 'logit' using [stats::binomial(link = "logit")]. Choice of + #' 'logit' and 'probit'. TODO: implement 'probit' option. + #' + #' @return + #' @export + predict = function(newdata, link_function = c("logit")) { + link_function <- match.arg(link_function) + linear_comb <- private$.compute_linear_combination(newdata) + 1 / (1 + exp(-linear_comb)) + } + ) +) diff --git a/R/ModelCustom.R b/R/ModelCustom.R new file mode 100644 index 00000000..4ceed900 --- /dev/null +++ b/R/ModelCustom.R @@ -0,0 +1,91 @@ +#' ModelCustom +#' +#' @description +#' +#' A ModelCustom class. +#' +#' @export +ModelCustom <- R6::R6Class( + classname = "ModelCustom", + inherit = Model, + public = list( + + #' @field params named `numeric()`\cr + #' a named numerical vector containing parameter values of the model object. + params = NULL, + #' @field type `character(1)`\cr + #' type of the model. + type = NULL, + #' @field type `formula()`\cr + #' model formula. + formula = NULL, + #' @field type `character(1)`\cr + #' terms of the model. This gets generated using `stats::terms` on `formula` + #' during initialisation. + terms = NULL, + + #' @description + #' + #' Constructor function. + #' + #' @param params + #' @param formula + #' @param type + #' + #' @return `NULL` + initialize = function(params, formula, type = "custom") { + + self$params = checkmate::assert_numeric(params, finite = T, any.missing = FALSE, names = "unique") + self$formula = checkmate::assert_formula(formula, null.ok = FALSE) + self$type = checkmate::assert_string(type, na.ok = FALSE) + self$terms = terms(formula) + private$.model = self + + invisible(NULL) + }, + + #' @description + #' + #' print method. + print = function() { + cat('Model type: ', self$type, "\n") + print(self$params) + }, + + #' @description + #' + #' an abstract method. Once implemented it should accept `newdata` as the first + #' argument and returns a `numeric()` vector or a `data.frame()` that contains + #' the predicted probabilities calculated using `self$params` and `newdata`. + predict = function() { + private$.abstract() + }, + + #' @description + #' + #' summary method. + summary = function() { + self$print() + } + ), + + private = list( + .compute_linear_combination = function(newdata) { + mm <- model.matrix(self$formula, newdata) + as.numeric(self$params %*% t(mm)) + } + ) +) + +compute_linear_combination <- function(params, formula, newdata) { + mm <- model.matrix(formula, newdata) + as.numeric(params %*% t(mm)) +} + +predict.ModelCustom <- function(object, newdata) { + object$predict(newdata) +} + +summary.ModelCustom <- function(x) { + x$summary() +} diff --git a/R/ModelLinear.R b/R/ModelLinear.R new file mode 100644 index 00000000..50ee6817 --- /dev/null +++ b/R/ModelLinear.R @@ -0,0 +1,26 @@ +#' @title ModelLinear +#' +#' @export +ModelLinear <- R6::R6Class( + classname = "ModelLinear", + inherit = ModelCustom, + public = list( + + #' @description + #' + #' Initialisation function + #' + #' @param params a `data.frame` object. + #' @param formula a `formula` object. + #' + #' @return NULL + initialize = function(params, formula) { + super$initialize(params, formula, type = "linear") + invisible(NULL) + }, + + predict = function(newdata) { + private$.compute_linear_combination(newdata) + } + ) +) diff --git a/R/ModelMultinomialLogit.R b/R/ModelMultinomialLogit.R new file mode 100644 index 00000000..89868e31 --- /dev/null +++ b/R/ModelMultinomialLogit.R @@ -0,0 +1,53 @@ +#' ModelMultinomialLogit +#' +#' @description +#' A multinomial logit model +#' +#' @details +#' This model object is use to create a multinomial model using already estimated +#' parameters. +#' +#' @export +ModelMultinomialLogit <- R6::R6Class( + classname = "ModelMultinomialLogit", + inherit = ModelCustom, + public = list( + + #' @description + #' + #' Initialisation function + #' + #' @param params + #' @param formula a `formula` or [mlogit::mFormula()] object. + #' + #' @return NULL + initialize = function(params, formula) { + super$initialize(params, formula, type = "multinomial") + invisible(NULL) + }, + + #' @description + #' + #' This predict method returns probabilities generated from the parameters + #' of this [Model] object. + #' + #' @param newdata (`data.frame()`) \cr + #' new data to generate probabilities conditioned on its explanatory variables. + #' @param chooser_id_col (`character(1)`)\cr + #' column name of the chooser id + #' @param choice_id_col (`character(1)`)\cr + #' column name of the choice id + #' + #' @return a `data.frame` object with three columns: chooser_id (`integer()`), + #' choice_id (`integer()`), linear_comb (`numeric()`), prob (`numeric()`). Note + #' that, 'linear_comb' stands for linear combination (i.e. $$B1 * x1 + B2 * x2$$). + predict = function(newdata, chooser_id_col, choice_id_col) { + data.table(chooser_id = newdata[[chooser_id_col]], + choice_id = newdata[[choice_id_col]], + linear_comb = private$.compute_linear_combination(newdata) + ) %>% + data.table::setorder(chooser_id) %>% + .[, prob := exp(linear_comb)/sum(exp(linear_comb)), by = chooser_id] + } + ) +) diff --git a/tests/testthat/test-ModelCustom.R b/tests/testthat/test-ModelCustom.R index c24c5ab6..e44712c9 100644 --- a/tests/testthat/test-ModelCustom.R +++ b/tests/testthat/test-ModelCustom.R @@ -1,4 +1,4 @@ -test_that("ModelCustom - linear regression and binary logit", { +test_that("ModelLinear and ModelBinaryChoice", { num_rows <- 100 @@ -13,28 +13,28 @@ test_that("ModelCustom - linear regression and binary logit", { my_formula <- ~ x1 + x2 + I(x1^2) + x1:x2 params = c(`(Intercept)` = 1, x1 = 2.5, x2 = -3, `I(x1^2)` = 0.5 , `x1:x2` = 1) - m <- ModelCustom$new(params = params, formula = my_formula, type = "regression") - checkmate::expect_numeric(m$predict(test_data), finite = T, any.missing = FALSE, len = num_rows) + mLinear <- ModelLinear$new(params = params, formula = my_formula) + checkmate::expect_numeric(mLinear$predict(test_data), finite = T, any.missing = FALSE, len = num_rows) - m <- ModelCustom$new(params = params, formula = my_formula, type = "binary") - checkmate::expect_numeric(m$predict(test_data),lower = 0, upper = 1, finite = T, any.missing = FALSE, len = num_rows) + mBinaryChoice <- ModelBinaryChoice$new(params = params, formula = my_formula) + checkmate::expect_numeric(mBinaryChoice$predict(test_data),lower = 0, upper = 1, finite = T, any.missing = FALSE, len = num_rows) }) -test_that("ModelCustom - multinomial logit", { +test_that("ModelMultinomialLogit", { num_rows <- 100 num_choices = 30 - my_formula <- ~ x1 + x2 + I(x1^2) + x1:x2 - params = c(`(Intercept)` = 1, x1 = 2.5, x2 = 3, `I(x1^2)` = 0.5 , `x1:x2` = 1) + my_formula <- chosen ~ x1 + x2 + I(x1^2) + x1:x2 + 0 + params = c(x1 = 2.5, x2 = 3, `I(x1^2)` = 0.5 , `x1:x2` = 1) test_chooser_data <- data.table( id = 1:num_rows, sex = sample(c("male", "female"), size = num_rows, replace = T), age = sample(1:100, num_rows, replace = T) ) %>% - .[, choiceset := list(list(sample(1:num_choices, size = 5, replace = FALSE))), by = id] + .[, choiceset := list(list(sample(1:num_choices, size = sample(2:10, 1), replace = FALSE))), by = id] test_alternative_data <- data.table( choice_id = 1:num_choices, @@ -46,10 +46,29 @@ test_that("ModelCustom - multinomial logit", { test_chooser_data %>% unnest_dt(., cols = "choiceset") %>% .[, `:=`(choice_id = choiceset, choiceset = NULL)] %>% - merge(., test_alternative_data, by = "choice_id") + merge(., test_alternative_data, by = "choice_id") %>% + .[, chosen := sample(c(T, rep(F, .N - 1))), by = "id"] %>% + data.table::setcolorder(c("id", "choice_id")) %>% + data.table::setorder("id") - m <- ModelCustom$new(params = params, formula = my_formula, type = "multinomial") + m <- ModelMultinomialLogit$new(params = params, formula = my_formula) res <- m$predict(multinomial_test_data, chooser_id_col = "id", choice_id_col = "choice_id") - checkmate::expect_data_table(res, any.missing = FALSE, col.names = "strict", ncols = 4) + + if (requireNamespace('mlogit')) { + require("mlogit") + # no intercept! + my_formula <- mlogit::mFormula(update(my_formula, ~ . + 0)) + mlogit_model <- mlogit::mlogit(my_formula, data = multinomial_test_data, + choice="chosen", + chid.var="id", + alt.var = "choice_id", + shape = "long") + m <- ModelMultinomialLogit$new(params = params, formula = my_formula) + res_mlogit <- m$predict(multinomial_test_data, chooser_id_col = "id", choice_id_col = "choice_id") + checkmate::expect_data_table(res_mlogit, any.missing = FALSE, col.names = "strict", ncols = 4) + } + + expect_equal(res_mlogit, res) + }) From df16cc9fec549f17f03c0a79ccc9b3359ee828bf Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Fri, 29 May 2020 13:58:02 +1000 Subject: [PATCH 07/29] Delete ModelMLogit.R --- R/ModelMLogit.R | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 R/ModelMLogit.R diff --git a/R/ModelMLogit.R b/R/ModelMLogit.R deleted file mode 100644 index a6d4bf67..00000000 --- a/R/ModelMLogit.R +++ /dev/null @@ -1,23 +0,0 @@ -ModelMlogit <- R6::R6Class( - classname = "ModelMlogit", - inherit = Model, - public = list( - simulate = function(world = NULL, chooser_data, alternative_data, chooser_id_col, alternative_id_col, chosen_col, n_choices = 30) { - checkmate::assert_r6(world, classes = "World", null.ok = TRUE) - checkmate::assert_r6(self, classes = "Model") - checkmate::assert_r6(private, classes = "Model") - checkmate::assert_data_table(chooser_data) - checkmate::assert_data_table(alternative_data) - checkmate::assert_names(names(chooser_data), must.include = c(chooser_id_col, chosen_col)) - checkmate::assert_names(names(alternative_data), must.include = alternative_id_col) - checkmate::assert_count(n_choices, positive = T, null.ok = FALSE, na.ok = FALSE) - } - ) -) - - - - -# } - - From d0e4eaac5812e77e8b0c82099d19b09f64d7d321 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Fri, 29 May 2020 14:02:14 +1000 Subject: [PATCH 08/29] patch(ModelMultinomialLogit): remove the setorder step in the predict method easy for the eyes but take time to compute which can add up significantly when running this repeatedly. --- R/ModelMultinomialLogit.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/ModelMultinomialLogit.R b/R/ModelMultinomialLogit.R index 89868e31..7ce4d77d 100644 --- a/R/ModelMultinomialLogit.R +++ b/R/ModelMultinomialLogit.R @@ -44,9 +44,7 @@ ModelMultinomialLogit <- R6::R6Class( predict = function(newdata, chooser_id_col, choice_id_col) { data.table(chooser_id = newdata[[chooser_id_col]], choice_id = newdata[[choice_id_col]], - linear_comb = private$.compute_linear_combination(newdata) - ) %>% - data.table::setorder(chooser_id) %>% + linear_comb = private$.compute_linear_combination(newdata)) %>% .[, prob := exp(linear_comb)/sum(exp(linear_comb)), by = chooser_id] } ) From d367547646ec6949c2e714f61e7744679f8b14ff Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Tue, 2 Jun 2020 08:35:50 +1000 Subject: [PATCH 09/29] update function docs --- man/ModelBinaryChoice.Rd | 28 ++++++ man/ModelCustom.Rd | 38 +++++++- man/ModelLinear.Rd | 13 +++ man/ModelMultinomialLogit.Rd | 34 +++++++ man/Trans.Rd | 77 +++++++++++++++ man/transition.Rd | 177 +++++++++++++++++++++++++++++++++++ man/which_max_n.Rd | 30 ++++++ 7 files changed, 393 insertions(+), 4 deletions(-) create mode 100644 man/ModelBinaryChoice.Rd create mode 100644 man/ModelLinear.Rd create mode 100644 man/ModelMultinomialLogit.Rd create mode 100644 man/Trans.Rd create mode 100644 man/transition.Rd create mode 100644 man/which_max_n.Rd diff --git a/man/ModelBinaryChoice.Rd b/man/ModelBinaryChoice.Rd new file mode 100644 index 00000000..238ee32f --- /dev/null +++ b/man/ModelBinaryChoice.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ModelBinaryChoice.R +\name{ModelBinaryChoice} +\alias{ModelBinaryChoice} +\title{ModelBinaryChoice} +\format{ +\link[R6:R6Class]{R6::R6Class} object inheriting \link{ModelCustom}. +} +\arguments{ +\item{params}{a \code{data.frame} object.} + +\item{formula}{a \code{formula} object.} + +\item{newdata}{a \code{data.frame} object.} + +\item{link_function}{:: \code{character(1)}\cr +default as 'logit' using \link[stats:binomial(link = "logit")]{stats::binomial(link = "logit")}. Choice of +'logit' and 'probit'. TODO: implement 'probit' option.} +} +\value{ + +} +\description{ +Initialisation function + +This predict method returns probabilities generated from the parameters +of this \link{Model} object. +} diff --git a/man/ModelCustom.Rd b/man/ModelCustom.Rd index 67bc5b3f..848359f5 100644 --- a/man/ModelCustom.Rd +++ b/man/ModelCustom.Rd @@ -1,11 +1,41 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Model.R +% Please edit documentation in R/ModelCustom.R \name{ModelCustom} \alias{ModelCustom} \title{ModelCustom} -\format{ -\link[R6:R6Class]{R6::R6Class} object inheriting \link{Model}. +\arguments{ +\item{type}{} +} +\value{ +\code{NULL} } \description{ -ModelCustom +A ModelCustom class. + +Constructor function. + +print method. + +an abstract method. Once implemented it should accept \code{newdata} as the first +argument and returns a \code{numeric()} vector or a \code{data.frame()} that contains +the predicted probabilities calculated using \code{self$params} and \code{newdata}. + +summary method. } +\section{Fields}{ + +\describe{ +\item{\code{params}}{named \code{numeric()}\cr +a named numerical vector containing parameter values of the model object.} + +\item{\code{type}}{\code{character(1)}\cr +type of the model.} + +\item{\code{type}}{\code{formula()}\cr +model formula.} + +\item{\code{type}}{\code{character(1)}\cr +terms of the model. This gets generated using \code{stats::terms} on \code{formula} +during initialisation.} +}} + diff --git a/man/ModelLinear.Rd b/man/ModelLinear.Rd new file mode 100644 index 00000000..b055074e --- /dev/null +++ b/man/ModelLinear.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ModelLinear.R +\name{ModelLinear} +\alias{ModelLinear} +\title{ModelLinear} +\arguments{ +\item{params}{a \code{data.frame} object.} + +\item{formula}{a \code{formula} object.} +} +\description{ +Initialisation function +} diff --git a/man/ModelMultinomialLogit.Rd b/man/ModelMultinomialLogit.Rd new file mode 100644 index 00000000..ab849202 --- /dev/null +++ b/man/ModelMultinomialLogit.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ModelMultinomialLogit.R +\name{ModelMultinomialLogit} +\alias{ModelMultinomialLogit} +\title{ModelMultinomialLogit} +\arguments{ +\item{formula}{a \code{formula} or \code{\link[mlogit:mFormula]{mlogit::mFormula()}} object.} + +\item{newdata}{(\code{data.frame()}) \cr +new data to generate probabilities conditioned on its explanatory variables.} + +\item{chooser_id_col}{(\code{character(1)})\cr +column name of the chooser id} + +\item{choice_id_col}{(\code{character(1)})\cr +column name of the choice id} +} +\value{ +a \code{data.frame} object with three columns: chooser_id (\code{integer()}), +choice_id (\code{integer()}), linear_comb (\code{numeric()}), prob (\code{numeric()}). Note +that, 'linear_comb' stands for linear combination (i.e. $$B1 * x1 + B2 * x2$$). +} +\description{ +A multinomial logit model + +Initialisation function + +This predict method returns probabilities generated from the parameters +of this \link{Model} object. +} +\details{ +This model object is use to create a multinomial model using already estimated +parameters. +} diff --git a/man/Trans.Rd b/man/Trans.Rd new file mode 100644 index 00000000..169a2b3e --- /dev/null +++ b/man/Trans.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Transition.R +\name{Trans} +\alias{Trans} +\title{Transition class} +\description{ +A class that perform Monte Carlo simulation on agents using a probabilistic model. +Work flow: \code{initialise()} -> \code{filter()} -> \code{mutate()} -> \code{simulate()} -> \code{postprocess()}. +Note that, to swap the run order of \code{filter()} and \code{mutate()} you need to change the +\code{mutate_first} public field to \code{TRUE}. +} +\note{ +\code{target} can be static or dynamic depending on the data structure of it. A static +target can be a named list or an integer value depending its usage in each +event function. +} +\section{Construction}{ +\preformatted{Trans$new(x, model, target = NULL, targeted_agents = NULL) +} +\itemize{ +\item \code{x} :: \code{\link{R6}}\cr +A Agent class inheritance object. +\item \code{model} :: \code{object}\cr +A model +\item \code{target} :: [\code{integer()}|\link{Target}]\cr +(Default as NULL). A number that forces the number of micro events to occur. For example, if +`10`` is speficied, there will be 10 agents that under go the event. However, +if a integer vector is given it must be the same length as the classes in the model. +This only works for classification models. If a \link{Target} object is given, the +target values will be scaled down by the scaling factor of that was set by +the \link{World} object in your currently active environment. +\item \code{targeted_agent} :: \code{\link[=integer]{integer()}}\cr +(Default as NULL) A integer vectors that contains ids of agents in \code{x} to undergo the event. +} +} + +\section{Fields}{ + +\itemize{ +\item \code{mutate_first}:: \code{logical(1)}\cr +Default as FALSE, this flag is used to indicate whether the attribute data from +the Agent in \code{x} should be mutated (\verb{$mutate(.data)}) before filtered (\verb{$filter(.data)}). +See the description section for more details about the processing steps of \link{Trans}. +} +} + +\section{Methods}{ + +\itemize{ +\item \code{filter(.data)}\cr +(\code{\link[data.table:data.table]{data.table::data.table()}}) -> \verb{[data.table::data.table()]}\cr +\strong{(By default, first of the preprocessing steps)}\cr +By default this method returns the input \code{.data}. This method can be overwrite +to give the user the flexibility to 'filter' the data prior to making prediction +by the given model. Filtering for eligible agents for this transition can be done in this step. +\item \code{mutate(.data)}\cr +(\code{\link[data.table:data.table]{data.table::data.table()}}) -> \verb{[data.table::data.table()]}\cr +\strong{(By default, second of the preprocessing steps)}\cr +By default this method returns the input \code{.data}. This method can be overwrite +to give the user the flexibility to 'mutate' the data prior to making prediction +by the given model. Adding derived variables and historical life course of the agents +can be done in this step. +\item \code{update_agents(attr)}\cr +(\code{character(1)})\cr +Update the attribute data of the agents that undergo the transition event. +\item \code{get_result(ids)}\cr +(\code{integer()}) -> \link[data.table:data.table]{data.table::data.table}\cr +Returns the simulation result in a \link[data.table:data.table]{data.table::data.table} format with two +columns \code{id} and \code{response}. +\item \code{get_nrow_result()}\cr +Returns the number of rows in the simulation result. +\item \code{get_decision_maker_ids(response_filter = NULL)}\cr +(\code{character()}) -> (\code{integer()})\cr +Returns ids of the agents that have their response equal to \code{response_filter}. +} +} + diff --git a/man/transition.Rd b/man/transition.Rd new file mode 100644 index 00000000..51425b64 --- /dev/null +++ b/man/transition.Rd @@ -0,0 +1,177 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transition-fnc.R +\name{transition} +\alias{transition} +\alias{get_transition} +\title{Simulate state transition (Markov chain)} +\usage{ +transition( + world, + entity, + model, + target = NULL, + targeted_ids = NULL, + preprocessing_fn = NULL, + attr = NULL, + values = NULL, + verbose = FALSE +) + +get_transition( + world, + entity, + model, + target = NULL, + targeted_ids = NULL, + preprocessing_fn = NULL +) +} +\arguments{ +\item{world}{a \link{World} object} + +\item{entity}{a character indicating the entity class to apply the transition to.} + +\item{model}{a \link{Model} object or an object in \code{\link[=SupportedTransitionModels]{SupportedTransitionModels()}}.} + +\item{target}{a \link{Target} object or a named list or \code{NULL}.} + +\item{targeted_ids}{a integer vector containing ids of entities in \code{entity} +to undergo the transition or \code{NULL}.} + +\item{preprocessing_fn}{a function that accepts one argument or \code{NULL}. This allows preprocessing of +\code{entity}'s attribute data, for example, if only records with certain conditions +should undergo the transition or a new variable should be added to the data. See +the Note section below for how to create a preprocessing function.} + +\item{attr}{a character denoting which of the attribute if \code{entity} should be +updated as the result of the transition or \code{NULL}.} + +\item{values}{named \code{vector()}\cr +A named vector that is used to replace the outcomes of the model in the field +specified in \code{attr}. See the example section.} + +\item{verbose}{\code{logical()}\cr +Default as \code{FALSE}.} +} +\value{ +\link{transition} returns the first argument which is the \link{World} object, while +\link{get_transition} a data.table objec that contains the transition outcomes with +two columns: id and response. +} +\description{ +The \code{transition} function can be used to evaluate a model againts the attribute +data of an \link{Entity} object stored inside the input \link{World} object and update +an attribute of that \link{Entity} using random draws from the prediction result. +It allows simple state transitions to be simulated directly inside a +microsimulation pipeline, as it always returns the input \code{World} object. +} +\note{ +In general, dymiumCore detects variables in entity data of an \link{Entity} object that +has a dot prefix as derived variables. Meaning, those derived variables are not +check against new entity data that are getting added to the \link{Entity} object. But +when entity data are used in \code{get_transition}, the dot prefix of the derived variables +will be removed. This is to make it convenient when naming variables during +the model estimation step. + +To create a pre-processing function you can use \code{dplyr} or \code{data.table}. You can +even combine multiple functions with \link[magrittr:\%>\%]{magrittr::\%>\%}. As an example, if you only +want to filter just the male population then you can choose one of the +following options to create your preprocessing function.\preformatted{# as a function using base R +filter_male <- function(.data) \{ + .data[.data$sex == "male"] +\} + +# dplyr's way +filter_male <- function(.data) \{ + dplyr::filter(.data, sex == "male") +\} + +# data.table's way +filter_male <- function(.data) \{ + .data[sex == "male", ] +\} + +# magrittr's way + dplyr's way +filter_male <- + . \%>\% + dplyr::filter(., sex == "male") + +# magrittr's way + data.table's way + additional conditions +filter_male <- + . \%>\% + .[sex == "male", ] \%>\% + .[age >= 50, ] + +} + +New variables can also be added to the entity data to be used in predicting +their transition probability. Again we can use \code{dplyr}'s or \code{data.table}'s way. +The examples below show how you can add a 5-year age group variable, called \code{age5}, +to the entity data.\preformatted{# magrittr's way + data.table's way +filter_male <- + . \%>\% + .[sex == "male" & age >= 50, ] \%>\% + .[, age5 := cut(age, breaks = c(seq(0,80,5), Inf), include.lowest = TRUE, right = FALSE)] + +# magrittr's way + dplyr's way +filter_male <- + . \%>\% + dplyr::filter(., sex == "male" & age >= 50) \%>\% + dplyr::mutate(., age5 = cut(age, breaks = c(seq(0,80,5), Inf), include.lowest = TRUE, right = FALSE)) +} + +Note that, new variables added inside preprocessing_fn won't +change the attribute data of the \code{entity} object that is undergoing a transition. +These variables only appear temporary within the context of the transition. +} +\examples{ + +# create a filter function +library(caret) + +filter_male <- + . \%>\% + .[sex == "male", ] + +filter_not_dead <- + . \%>\% + .[age != -1] + +# create a multinomial logit model using `caret` +mnl <- caret::train(marital_status ~ age + sex, + data = toy_individuals, + method = "multinom", + trace = FALSE) +# this model denotes that there is a 10\% chance that an individual will decease +death_model <- list(yes = 0.1, no = 0.9) + +# create a toy world +create_toy_world() + +# simulate marital status transition and update the attribute. +transition(world, + entity = "Individual", + model = mnl, + preprocessing_fn = filter_male, + attr = "marital_status") + +# get a transition result +get_transition(world, + entity = "Individual", + model = mnl, + preprocessing_fn = filter_male) + +# lets make a pipeline of transitions +world \%>\% + transition(entity = "Individual", + model = mnl, + preprocessing_fn = . \%>\% filter_male \%>\% filter_not_dead, + attr = "marital_status") \%>\% + transition(entity = "Individual", + model = death_model, + preprocessing_fn = filter_not_dead, + attr = "age", + values = c(yes = -1L)) +# print the attributes of the individual agents +world$entities$Individual$get_data() +} diff --git a/man/which_max_n.Rd b/man/which_max_n.Rd new file mode 100644 index 00000000..5a00783c --- /dev/null +++ b/man/which_max_n.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{which_max_n} +\alias{which_max_n} +\alias{which_min_n} +\title{Return the indices of n maximum or minimum values} +\usage{ +which_max_n(x, n = 1) + +which_min_n(x, n = 1) +} +\arguments{ +\item{x}{:: (\code{numeric()})\cr +an numeric vector} + +\item{n}{:: (\code{integer(1)})\cr +number of values to return.} +} +\value{ +an \code{integer()} vector. +} +\description{ +Return the indices of n maximum or minimum values +} +\examples{ + +which_max_n(1:4, 2) +which_min_n(1:4, 2) + +} From 81f692305c3ed609f786cd2a7c0f4943a943347b Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Tue, 2 Jun 2020 08:38:11 +1000 Subject: [PATCH 10/29] feat: add which_max_n and which_min_n these return n indices of maximum and minimum values in a numeric vector --- NEWS.md | 2 ++ R/utils.R | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/NEWS.md b/NEWS.md index fe76e5b7..a56965aa 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # dymiumCore (development version) +- renamed `unnest_datatable` to `unnest_dt` and add `unnestv_dt` which can take names of the list columns as a character vector. - introduced `ModelCustom` a model class that let users specify its parameters and `predict` function. We also add `ModelMultinomialLogit`, `ModelBinaryChoice` and `ModelLinear` which are implementations of `ModelCustom`. +- add helper functions `which_min_n` and `which_max_n`. # dymiumCore 0.1.8 diff --git a/R/utils.R b/R/utils.R index 058185a8..221eb134 100644 --- a/R/utils.R +++ b/R/utils.R @@ -472,3 +472,28 @@ unnest_dt <- function(dt, cols) { dt } +#' Return the indices of n maximum or minimum values +#' +#' @param x :: (`numeric()`)\cr +#' an numeric vector +#' @param n :: (`integer(1)`)\cr +#' number of values to return. +#' +#' @return an `integer()` vector. +#' @export +#' +#' @examples +#' +#' which_max_n(1:4, 2) +#' which_min_n(1:4, 2) +#' +which_max_n <- function(x, n = 1) { + which(x >= -sort(-x, partial = n)[n]) +} + + +#' @rdname which_max_n +#' @export +which_min_n <- function(x, n = 1) { + which(x < -sort(-x, partial = n)[n]) +} From aaf8523499ec2f93c89ab27faed43e036cd1ecd0 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Tue, 2 Jun 2020 09:29:30 +1000 Subject: [PATCH 11/29] unexport unnest_datatable --- NAMESPACE | 1 - R/utils.R | 55 +++++++++++++++++++++++------------------ _pkgdown.yml | 2 +- man/unnest_datatable.Rd | 12 ++++++--- man/unnest_dt.Rd | 2 +- 5 files changed, 41 insertions(+), 31 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d6087006..f38579a7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,7 +119,6 @@ export(test_subset2) export(test_target) export(test_transition_supported_model) export(transition) -export(unnest_datatable) export(unnest_dt) export(use_event) export(use_module) diff --git a/R/utils.R b/R/utils.R index 221eb134..717c7fa9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -279,6 +279,10 @@ lookup_and_replace2 <- function(x, cols, mapping) { #' .lookup_and_replace_list_cols #' +#' @description +#' +#' This unnest function keeps +#' #' @param data data.table #' @param lookup_table lookup_table #' @param id_col NA @@ -305,9 +309,8 @@ lookup_and_replace2 <- function(x, cols, mapping) { if (!is.data.table(lookup_table)) lookup_table <- as.data.table(lookup_table) - .data <- - unnest_datatable(.data, by_col = id_col) %>% + unnest_datatable(.data, id_col) %>% .[lookup_table, on = paste0(list_cols, "==.key"), eval((list_cols)) := value] %>% @@ -336,27 +339,6 @@ lookup_and_replace2 <- function(x, cols, mapping) { return(data) } -#' unnest_datatable -#' -#' unnest data.table object -#' -#' @param dt data.table object -#' @param by_col reference column -#' -#' @export -unnest_datatable = function(dt, by_col) { - stopifnot(is.data.table(dt)) - stopifnot(all(by_col %in% names(dt))) - - # unnest the list column - # https://stackoverflow.com/questions/34692260/how-to-ungroup-list-columns-in-data-table - dt <- dt[, - lapply(.SD, unlist), - by = eval((by_col))] - - return(dt) -} - #' Group a column into a list column and sort by the group-by column #' #' @param x :: [data.table::data.table()] @@ -440,7 +422,7 @@ get_current_git_branch <- function() { return(current_branch) } -#' Unnest data.table +#' Unnest data.table by list columns #' #' !! Note that this is only working when all list columns in the given #' data.frame are to be unnested. See https://github.com/dymium-org/dymiumCore/issues/79. @@ -458,6 +440,10 @@ unnest_dt <- function(dt, cols) { stop("`dt` must be a data.frame or a data.table.") } + if (sum(sapply(dt, is.list)) != length(cols)) { + stop("This unnest function only works if all list columns are to be unnested.") + } + if (!is.data.table(dt)) { dt <- as.data.table(dt) } @@ -472,6 +458,27 @@ unnest_dt <- function(dt, cols) { dt } + +#' unnest data.table by reference column(s). +#' +#' for internal use only. This will be replaced with a more native approach once +#' https://github.com/Rdatatable/data.table/pull/4156 is officially included in +#' data.table. +#' +#' @param dt [data.table::data.table()]\cr +#' a data.table object. +#' @param by_col (`character(1)`)\cr +#' A reference column. +unnest_datatable = function(dt, by_col) { + stopifnot(is.data.table(dt)) + stopifnot(all(by_col %in% names(dt))) + + # unnest the list column + # https://stackoverflow.com/questions/34692260/how-to-ungroup-list-columns-in-data-table + dt[, lapply(.SD, unlist), by = c(by_col)] +} + + #' Return the indices of n maximum or minimum values #' #' @param x :: (`numeric()`)\cr diff --git a/_pkgdown.yml b/_pkgdown.yml index 872d6a73..312c3bbf 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -146,7 +146,7 @@ reference: - lookup_and_replace2 - condense_rows - element_wise_expand_lists - - unnest_datatable + - unnest_dt - dt_group_and_sort - validate_linkages - is_dymium_class diff --git a/man/unnest_datatable.Rd b/man/unnest_datatable.Rd index fcc23dac..79d0968d 100644 --- a/man/unnest_datatable.Rd +++ b/man/unnest_datatable.Rd @@ -2,15 +2,19 @@ % Please edit documentation in R/utils.R \name{unnest_datatable} \alias{unnest_datatable} -\title{unnest_datatable} +\title{unnest data.table by reference column(s).} \usage{ unnest_datatable(dt, by_col) } \arguments{ -\item{dt}{data.table object} +\item{dt}{\code{\link[data.table:data.table]{data.table::data.table()}}\cr +a data.table object.} -\item{by_col}{reference column} +\item{by_col}{(\code{character(1)})\cr +A reference column.} } \description{ -unnest data.table object +for internal use only. This will be replaced with a more native approach once +https://github.com/Rdatatable/data.table/pull/4156 is officially included in +data.table. } diff --git a/man/unnest_dt.Rd b/man/unnest_dt.Rd index 2b7df343..3fec44ed 100644 --- a/man/unnest_dt.Rd +++ b/man/unnest_dt.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils.R \name{unnest_dt} \alias{unnest_dt} -\title{Unnest data.table} +\title{Unnest data.table by list columns} \usage{ unnest_dt(dt, cols) } From 9feb485e93b31e24679ad97056685e652c76c2c8 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Tue, 2 Jun 2020 09:29:45 +1000 Subject: [PATCH 12/29] update package files --- DESCRIPTION | 5 +++++ NAMESPACE | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 9af0dc29..73841ab5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -83,6 +83,10 @@ Collate: 'MatchingMarketOptimal.R' 'MatchingMarketStochastic.R' 'Model.R' + 'ModelBinaryChoice.R' + 'ModelCustom.R' + 'ModelLinear.R' + 'ModelMultinomialLogit.R' 'Network.R' 'Population.R' 'Pipeline.R' @@ -96,6 +100,7 @@ Collate: 'alignment.R' 'checkmate.R' 'checks.R' + 'choice-simulation.R' 'constants.R' 'create-world.R' 'data.R' diff --git a/NAMESPACE b/NAMESPACE index f38579a7..363d19b7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,7 +38,10 @@ export(MatchingMarket) export(MatchingMarketOptimal) export(MatchingMarketStochastic) export(Model) +export(ModelBinaryChoice) export(ModelCustom) +export(ModelLinear) +export(ModelMultinomialLogit) export(Network) export(Pipeline) export(Population) @@ -124,6 +127,8 @@ export(use_event) export(use_module) export(use_module_readme) export(validate_linkages) +export(which_max_n) +export(which_min_n) import(R6) import(data.table) importFrom(checkmate,makeExpectation) From f386dcb16928d9c3ae9490b88ec1e9d9c0f093c9 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Thu, 4 Jun 2020 02:13:27 +1000 Subject: [PATCH 13/29] test(unnest_dt): update error msg in a unit test --- tests/testthat/test-utils.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 27b3a391..37feb683 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -123,7 +123,8 @@ test_that("unnest_dt and unnestv_dt works", { id = 1:3, list_col_a = list(c("a","b","c"), c("a","b","c"), c("a","b","c")), list_col_b = list(c("a","b","c"), c("a","b","c"), c("a","b","c"))) - expect_error(unnest_dt(dt, "list_col_a"), "column or expression 2 of 'by' or 'keyby' is type list") + expect_error(unnest_dt(dt, "list_col_a"), + "This unnest function only works if all list columns are to be unnested") }) From fc33958ddd48a666a2e43a50b44618993cb247fe Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Tue, 16 Jun 2020 11:04:52 +1000 Subject: [PATCH 14/29] add preprocessing_fn as a constructor arg for new Model classes --- R/ModelBinaryChoice.R | 9 +++++++-- R/ModelCustom.R | 18 +++++++++++++----- R/ModelLinear.R | 9 +++++++-- R/ModelMultinomialLogit.R | 9 +++++++-- man/ModelBinaryChoice.Rd | 3 +++ man/ModelCustom.Rd | 9 ++++++++- man/ModelLinear.Rd | 3 +++ man/ModelMultinomialLogit.Rd | 3 +++ 8 files changed, 51 insertions(+), 12 deletions(-) diff --git a/R/ModelBinaryChoice.R b/R/ModelBinaryChoice.R index 2bceffd0..f8459744 100644 --- a/R/ModelBinaryChoice.R +++ b/R/ModelBinaryChoice.R @@ -16,10 +16,15 @@ ModelBinaryChoice <- R6::R6Class( #' #' @param params a `data.frame` object. #' @param formula a `formula` object. + #' @param preprocessing_fn a pre-processing function that gets applied to the + #' data given to the `predict` method before making the prediction. #' #' @return NULL - initialize = function(params, formula) { - super$initialize(params, formula, type = "binary_choice") + initialize = function(params, formula, preprocessing_fn) { + super$initialize(params = params, + formula = formula, + type = "binary_choice", + preprocessing_fn = preprocessing_fn) invisible(NULL) }, diff --git a/R/ModelCustom.R b/R/ModelCustom.R index 4ceed900..c93dbb4c 100644 --- a/R/ModelCustom.R +++ b/R/ModelCustom.R @@ -28,16 +28,24 @@ ModelCustom <- R6::R6Class( #' #' Constructor function. #' - #' @param params - #' @param formula - #' @param type + #' @param params a named `numeric()`. + #' @param formula a model `formula()`. + #' @param type type of the model. + #' @param preprocessing_fn a pre-processing function that gets applied to the + #' data given to the `predict` method before making the prediction. #' #' @return `NULL` - initialize = function(params, formula, type = "custom") { + initialize = function(params, formula, type = "custom", preprocessing_fn) { - self$params = checkmate::assert_numeric(params, finite = T, any.missing = FALSE, names = "unique") + self$params = checkmate::assert_numeric(params, + finite = T, + any.missing = FALSE, + names = "unique") self$formula = checkmate::assert_formula(formula, null.ok = FALSE) self$type = checkmate::assert_string(type, na.ok = FALSE) + self$preprocessing_fn = checkmate::assert_function(preprocessing_fn, + nargs = 1, + null.ok = TRUE) self$terms = terms(formula) private$.model = self diff --git a/R/ModelLinear.R b/R/ModelLinear.R index 50ee6817..ede7c396 100644 --- a/R/ModelLinear.R +++ b/R/ModelLinear.R @@ -12,10 +12,15 @@ ModelLinear <- R6::R6Class( #' #' @param params a `data.frame` object. #' @param formula a `formula` object. + #' @param preprocessing_fn a pre-processing function that gets applied to the + #' data given to the `predict` method before making the prediction. #' #' @return NULL - initialize = function(params, formula) { - super$initialize(params, formula, type = "linear") + initialize = function(params, formula, preprocessing_fn) { + super$initialize(params = params, + formula = formula, + type = "linear", + preprocessing_fn = preprocessing_fn) invisible(NULL) }, diff --git a/R/ModelMultinomialLogit.R b/R/ModelMultinomialLogit.R index 7ce4d77d..d1d36afe 100644 --- a/R/ModelMultinomialLogit.R +++ b/R/ModelMultinomialLogit.R @@ -19,10 +19,15 @@ ModelMultinomialLogit <- R6::R6Class( #' #' @param params #' @param formula a `formula` or [mlogit::mFormula()] object. + #' @param preprocessing_fn a pre-processing function that gets applied to the + #' data given to the `predict` method before making the prediction. #' #' @return NULL - initialize = function(params, formula) { - super$initialize(params, formula, type = "multinomial") + initialize = function(params, formula, preprocessing_fn) { + super$initialize(params = params, + formula = formula, + type = "multinomial", + preprocessing_fn = preprocessing_fn) invisible(NULL) }, diff --git a/man/ModelBinaryChoice.Rd b/man/ModelBinaryChoice.Rd index 238ee32f..d7d73197 100644 --- a/man/ModelBinaryChoice.Rd +++ b/man/ModelBinaryChoice.Rd @@ -11,6 +11,9 @@ \item{formula}{a \code{formula} object.} +\item{preprocessing_fn}{a pre-processing function that gets applied to the +data given to the \code{predict} method before making the prediction.} + \item{newdata}{a \code{data.frame} object.} \item{link_function}{:: \code{character(1)}\cr diff --git a/man/ModelCustom.Rd b/man/ModelCustom.Rd index 848359f5..c5fb691a 100644 --- a/man/ModelCustom.Rd +++ b/man/ModelCustom.Rd @@ -4,7 +4,14 @@ \alias{ModelCustom} \title{ModelCustom} \arguments{ -\item{type}{} +\item{params}{a named \code{numeric()}.} + +\item{formula}{a model \code{formula()}.} + +\item{type}{type of the model.} + +\item{preprocessing_fn}{a pre-processing function that gets applied to the +data given to the \code{predict} method before making the prediction.} } \value{ \code{NULL} diff --git a/man/ModelLinear.Rd b/man/ModelLinear.Rd index b055074e..0fac0290 100644 --- a/man/ModelLinear.Rd +++ b/man/ModelLinear.Rd @@ -7,6 +7,9 @@ \item{params}{a \code{data.frame} object.} \item{formula}{a \code{formula} object.} + +\item{preprocessing_fn}{a pre-processing function that gets applied to the +data given to the \code{predict} method before making the prediction.} } \description{ Initialisation function diff --git a/man/ModelMultinomialLogit.Rd b/man/ModelMultinomialLogit.Rd index ab849202..037a08ee 100644 --- a/man/ModelMultinomialLogit.Rd +++ b/man/ModelMultinomialLogit.Rd @@ -6,6 +6,9 @@ \arguments{ \item{formula}{a \code{formula} or \code{\link[mlogit:mFormula]{mlogit::mFormula()}} object.} +\item{preprocessing_fn}{a pre-processing function that gets applied to the +data given to the \code{predict} method before making the prediction.} + \item{newdata}{(\code{data.frame()}) \cr new data to generate probabilities conditioned on its explanatory variables.} From 73795bf3ed3f04f4bf44967d92bf57cd45d820e2 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Tue, 16 Jun 2020 11:18:29 +1000 Subject: [PATCH 15/29] add makeModel function currently on a glm model fitted with caret is supported --- DESCRIPTION | 1 + NAMESPACE | 1 + NEWS.md | 1 + R/makeModel.R | 62 +++++++++++++++++++++++++++++++++ man/makeModel.Rd | 25 +++++++++++++ tests/testthat/test-makeModel.R | 12 +++++++ 6 files changed, 102 insertions(+) create mode 100644 R/makeModel.R create mode 100644 man/makeModel.Rd create mode 100644 tests/testthat/test-makeModel.R diff --git a/DESCRIPTION b/DESCRIPTION index 73841ab5..eb7ca253 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -105,6 +105,7 @@ Collate: 'create-world.R' 'data.R' 'dymiumCore-package.R' + 'makeModel.R' 'module.R' 'mutate-entity.R' 'population-register.R' diff --git a/NAMESPACE b/NAMESPACE index 363d19b7..2538625e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -102,6 +102,7 @@ export(is_dymium_class) export(is_scheduled) export(lookup_and_replace) export(lookup_and_replace2) +export(makeModel) export(mutate_entity) export(normalise_derived_vars) export(omit_derived_varnames) diff --git a/NEWS.md b/NEWS.md index a56965aa..90257b56 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ - renamed `unnest_datatable` to `unnest_dt` and add `unnestv_dt` which can take names of the list columns as a character vector. - introduced `ModelCustom` a model class that let users specify its parameters and `predict` function. We also add `ModelMultinomialLogit`, `ModelBinaryChoice` and `ModelLinear` which are implementations of `ModelCustom`. - add helper functions `which_min_n` and `which_max_n`. +- add `makeModel` to create a light weight Model object that can be used in the transition functions and classes. This basically creates an appropriate Model class from the given model object. # dymiumCore 0.1.8 diff --git a/R/makeModel.R b/R/makeModel.R new file mode 100644 index 00000000..a1906ec2 --- /dev/null +++ b/R/makeModel.R @@ -0,0 +1,62 @@ +#' make Model +#' +#' @description +#' Make a light weight model object that can be used by `transition()`, `TransitionClassification`, `TransitionRegression`. +#' +#' @param model a model object. See `SupportedTransitionModels()`. +#' @param ... dots. +#' +#' @return a `Model` object. +#' @export +makeModel <- function(model, ...) { + UseMethod("makeModel") +} + +#' @rdname makeModel +makeModel.train <- function(model, preprocessing_fn = NULL) { + + compatible_methods = c("glm") + + if (!checkmate::test_subset(x = model$method, choices = compatible_methods, empty.ok = FALSE)) { + stop( + "ModelCaretTrain only works with: ", paste(compatible_methods, collapse = ", "), + ".", " Consider using `Model` instead if the given model is not currently supported." + ) + } + + if (model$method == "glm") { + return(ModelBinaryChoice$new(params = model$finalModel$coefficients, + formula = model$terms, + preprocessing_fn = preprocessing_fn)) + } + + # if (model$method == "multinom") { + # + # stop("Not") + # browser() + # + # params <- model[["finalModel"]][["wts"]] + # + # names(params) <- + # lapply(model[["finalModel"]][["lab"]], function(x) { + # c(x, model[["finalModel"]][["coefnames"]]) + # }) %>% unlist() + # + # model.matrix(model$terms) + # + # m <- model.frame(formula = model$terms, toy_individuals) + # Terms <- delete.response(model$terms) + # x <- model.matrix(Terms, m, contrasts = model$contrasts) + # + # return(ModelMultinomialLogit$new(params = params, + # formula = model$terms, + # preprocessing_fn = preprocessing_fn)) + # } + + stop("something went wrong.") +} + +#' @rdname makeModel +makeModel.WrappedModel <- function() { + stop("Not implemented yet.") +} diff --git a/man/makeModel.Rd b/man/makeModel.Rd new file mode 100644 index 00000000..bce642fc --- /dev/null +++ b/man/makeModel.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/makeModel.R +\name{makeModel} +\alias{makeModel} +\alias{makeModel.train} +\alias{makeModel.WrappedModel} +\title{make Model} +\usage{ +makeModel(model, ...) + +\method{makeModel}{train}(model, preprocessing_fn = NULL) + +\method{makeModel}{WrappedModel}() +} +\arguments{ +\item{model}{a model object. See \code{SupportedTransitionModels()}.} + +\item{...}{dots.} +} +\value{ +a \code{Model} object. +} +\description{ +Make a light weight model object that can be used by \code{transition()}, \code{TransitionClassification}, \code{TransitionRegression}. +} diff --git a/tests/testthat/test-makeModel.R b/tests/testthat/test-makeModel.R new file mode 100644 index 00000000..322c4b59 --- /dev/null +++ b/tests/testthat/test-makeModel.R @@ -0,0 +1,12 @@ +test_that("makeModel", { + + # binary choice model + bc_model <- create_caret_binary_model() + Mod <- makeModel(bc_model) + checkmate::expect_r6(Mod,classes = "ModelBinaryChoice") + + # multinomial choice model + # mnl_model <- create_caret_multinomial_model() + # Mod <- makeModel(mnl_model) + +}) From 0c843c6a4c9edc888388185ccd991d1b08f09182 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Tue, 16 Jun 2020 11:19:29 +1000 Subject: [PATCH 16/29] fix: make preprocessing_fn of the new Model classes default as NULL --- R/ModelBinaryChoice.R | 2 +- R/ModelLinear.R | 2 +- R/ModelMultinomialLogit.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/ModelBinaryChoice.R b/R/ModelBinaryChoice.R index f8459744..8bcc3afe 100644 --- a/R/ModelBinaryChoice.R +++ b/R/ModelBinaryChoice.R @@ -20,7 +20,7 @@ ModelBinaryChoice <- R6::R6Class( #' data given to the `predict` method before making the prediction. #' #' @return NULL - initialize = function(params, formula, preprocessing_fn) { + initialize = function(params, formula, preprocessing_fn = NULL) { super$initialize(params = params, formula = formula, type = "binary_choice", diff --git a/R/ModelLinear.R b/R/ModelLinear.R index ede7c396..af3bd78e 100644 --- a/R/ModelLinear.R +++ b/R/ModelLinear.R @@ -16,7 +16,7 @@ ModelLinear <- R6::R6Class( #' data given to the `predict` method before making the prediction. #' #' @return NULL - initialize = function(params, formula, preprocessing_fn) { + initialize = function(params, formula, preprocessing_fn = NULL) { super$initialize(params = params, formula = formula, type = "linear", diff --git a/R/ModelMultinomialLogit.R b/R/ModelMultinomialLogit.R index d1d36afe..0f246a50 100644 --- a/R/ModelMultinomialLogit.R +++ b/R/ModelMultinomialLogit.R @@ -23,7 +23,7 @@ ModelMultinomialLogit <- R6::R6Class( #' data given to the `predict` method before making the prediction. #' #' @return NULL - initialize = function(params, formula, preprocessing_fn) { + initialize = function(params, formula, preprocessing_fn = NULL) { super$initialize(params = params, formula = formula, type = "multinomial", From 38358314974d4a0853ca919d0688fc7366f31c4a Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 00:25:20 +1000 Subject: [PATCH 17/29] fix(ModelMultinomialLogit): add support for the changes introduced in mlogit v1.1.0 #84 --- DESCRIPTION | 2 +- R/ModelMultinomialLogit.R | 39 +++++++++++++++++++- man/ModelMultinomialLogit.Rd | 9 ++++- tests/testthat/test-ModelMultinomialLogit.R | 41 +++++++++++++++++++++ 4 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-ModelMultinomialLogit.R diff --git a/DESCRIPTION b/DESCRIPTION index eb7ca253..afe77755 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,7 +35,7 @@ Suggests: furrr (>= 0.1.0), testthat (>= 2.1.0), fastmatch (>= 1.1.0), - mlogit (>= 1.0), + mlogit (>= 1.1.0), caret (>= 6.0.0), mlr (>= 2.17.0), nnet (>= 7.3.0), diff --git a/R/ModelMultinomialLogit.R b/R/ModelMultinomialLogit.R index 0f246a50..55f52452 100644 --- a/R/ModelMultinomialLogit.R +++ b/R/ModelMultinomialLogit.R @@ -8,6 +8,13 @@ #' parameters. #' #' @export +#' +#' @examples +#' +#' if (requireNamespace('mlogit')) { +#' +#' +#' } ModelMultinomialLogit <- R6::R6Class( classname = "ModelMultinomialLogit", inherit = ModelCustom, @@ -18,16 +25,27 @@ ModelMultinomialLogit <- R6::R6Class( #' Initialisation function #' #' @param params - #' @param formula a `formula` or [mlogit::mFormula()] object. + #' @param formula a `formula` object of class [mlogit::mFormula()], [Formula::Formula], or `formula`. #' @param preprocessing_fn a pre-processing function that gets applied to the #' data given to the `predict` method before making the prediction. #' #' @return NULL initialize = function(params, formula, preprocessing_fn = NULL) { + + required_pkgs <- c("mlogit") + # required_versions <- c("1.1.0") + + for (i in seq_along(required_pkgs)) { + if (!requireNamespace(required_pkgs[[i]])) { + stop("Required ", required_pkgs[[i]], " to be installed.") + } + } + super$initialize(params = params, formula = formula, type = "multinomial", preprocessing_fn = preprocessing_fn) + invisible(NULL) }, @@ -47,10 +65,27 @@ ModelMultinomialLogit <- R6::R6Class( #' choice_id (`integer()`), linear_comb (`numeric()`), prob (`numeric()`). Note #' that, 'linear_comb' stands for linear combination (i.e. $$B1 * x1 + B2 * x2$$). predict = function(newdata, chooser_id_col, choice_id_col) { + checkmate::expect_data_frame(newdata) data.table(chooser_id = newdata[[chooser_id_col]], choice_id = newdata[[choice_id_col]], - linear_comb = private$.compute_linear_combination(newdata)) %>% + linear_comb = private$.compute_linear_combination(newdata, chooser_id_col, choice_id_col)) %>% .[, prob := exp(linear_comb)/sum(exp(linear_comb)), by = chooser_id] } + ), + + private = list( + .compute_linear_combination = function(newdata, chooser_id_col, choice_id_col) { + if (inherits(newdata, "dfidx")) { + checkmate::expect_names(x = names(newdata$idx), + identical.to = c(chooser_id_col, choice_id_col)) + } else { + newdata <- + dfidx::dfidx(newdata, idx = c(chooser_id_col, choice_id_col)) + } + mf <- model.frame(newdata, self$formula) + # see https://github.com/dymium-org/dymiumCore/issues/84 + mm <- mlogit:::model.matrix.dfidx_mlogit(mf) + return(as.numeric(self$params %*% t(mm))) + } ) ) diff --git a/man/ModelMultinomialLogit.Rd b/man/ModelMultinomialLogit.Rd index 037a08ee..fdfc3fdc 100644 --- a/man/ModelMultinomialLogit.Rd +++ b/man/ModelMultinomialLogit.Rd @@ -4,7 +4,7 @@ \alias{ModelMultinomialLogit} \title{ModelMultinomialLogit} \arguments{ -\item{formula}{a \code{formula} or \code{\link[mlogit:mFormula]{mlogit::mFormula()}} object.} +\item{formula}{a \code{formula} object of class \code{\link[mlogit:mFormula]{mlogit::mFormula()}}, \link[Formula:Formula]{Formula::Formula}, or \code{formula}.} \item{preprocessing_fn}{a pre-processing function that gets applied to the data given to the \code{predict} method before making the prediction.} @@ -35,3 +35,10 @@ of this \link{Model} object. This model object is use to create a multinomial model using already estimated parameters. } +\examples{ + +if (requireNamespace('mlogit')) { + + +} +} diff --git a/tests/testthat/test-ModelMultinomialLogit.R b/tests/testthat/test-ModelMultinomialLogit.R new file mode 100644 index 00000000..3b229aab --- /dev/null +++ b/tests/testthat/test-ModelMultinomialLogit.R @@ -0,0 +1,41 @@ +test_that("ModelMultinomialLogit", { + if (requireNamespace('mlogit')) { + + data("Fishing", package = "mlogit") + + # fitting + .data_dfidx <- dfidx::dfidx(Fishing, varying = 2:9, shape = "wide", choice = "mode") + mod <- mlogit::mlogit(mode ~ price + catch, data = .data_dfidx) + + # data for prediction + .data <- dfidx::unfold_idx(.data_dfidx) + + Mod <- ModelMultinomialLogit$new(params = mod$coefficients, formula = mod$formula) + + Mod_formula <- ModelMultinomialLogit$new(params = mod$coefficients, formula = mode ~ price + catch) + + + # compare predictions + prediction_from_mlogit <- + predict(mod, .data_dfidx) %>% + as.data.table() + prediction_from_Mod <- + Mod$predict(.data, chooser_id_col = "id1", choice_id_col = "id2") %>% + data.table::dcast(chooser_id ~ choice_id, value.var = "prob") %>% + .[, -"chooser_id"] + prediction_from_Mod_formula <- Mod_formula$predict(.data, + chooser_id_col = "id1", + choice_id_col = "id2") %>% + data.table::dcast(chooser_id ~ choice_id, value.var = "prob") %>% + .[, -"chooser_id"] + prediction_from_Mod_using_dfidx <- + Mod$predict(.data_dfidx, chooser_id_col = "id1", choice_id_col = "id2") %>% + data.table::dcast(chooser_id ~ choice_id, value.var = "prob") %>% + .[, -"chooser_id"] + + expect_true(all.equal(prediction_from_mlogit, prediction_from_Mod)) + expect_true(all.equal(prediction_from_mlogit, prediction_from_Mod_formula)) + expect_true(all.equal(prediction_from_mlogit, prediction_from_Mod_using_dfidx)) + + } +}) From afcae7ad1386d2db7258607a8617b1051685ddda Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 00:25:59 +1000 Subject: [PATCH 18/29] add mlogit to setup-models.R --- tests/testthat/setup-models.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/testthat/setup-models.R b/tests/testthat/setup-models.R index a32fc058..3246a7f2 100644 --- a/tests/testthat/setup-models.R +++ b/tests/testthat/setup-models.R @@ -59,3 +59,8 @@ create_glm_binary_model <- function() { family = "binomial" ) } + +create_mlogit_model <- function() { + .data_dfidx <- dfidx::dfidx(Fishing, varying = 2:9, shape = "wide", choice = "mode") + mod <- mlogit::mlogit(mode ~ price + catch, data = .data_dfidx) +} From 4878f7a1ee07b3eced378dcee73d9b699d626b21 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 00:26:12 +1000 Subject: [PATCH 19/29] add mlogit to makeModel --- R/makeModel.R | 9 ++++++++- man/makeModel.Rd | 5 ++++- tests/testthat/test-makeModel.R | 9 +++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/R/makeModel.R b/R/makeModel.R index a1906ec2..fb7c4a03 100644 --- a/R/makeModel.R +++ b/R/makeModel.R @@ -13,6 +13,7 @@ makeModel <- function(model, ...) { } #' @rdname makeModel +#' @export makeModel.train <- function(model, preprocessing_fn = NULL) { compatible_methods = c("glm") @@ -57,6 +58,12 @@ makeModel.train <- function(model, preprocessing_fn = NULL) { } #' @rdname makeModel -makeModel.WrappedModel <- function() { +makeModel.WrappedModel <- function(model) { stop("Not implemented yet.") } + +#' @rdname makeModel +#' @export +makeModel.mlogit <- function(model) { + ModelMultinomialLogit$new(params = model[['coefficients']], formula = model[['formula']]) +} diff --git a/man/makeModel.Rd b/man/makeModel.Rd index bce642fc..088cbaa4 100644 --- a/man/makeModel.Rd +++ b/man/makeModel.Rd @@ -4,13 +4,16 @@ \alias{makeModel} \alias{makeModel.train} \alias{makeModel.WrappedModel} +\alias{makeModel.mlogit} \title{make Model} \usage{ makeModel(model, ...) \method{makeModel}{train}(model, preprocessing_fn = NULL) -\method{makeModel}{WrappedModel}() +\method{makeModel}{WrappedModel}(model) + +\method{makeModel}{mlogit}(model) } \arguments{ \item{model}{a model object. See \code{SupportedTransitionModels()}.} diff --git a/tests/testthat/test-makeModel.R b/tests/testthat/test-makeModel.R index 322c4b59..0b675234 100644 --- a/tests/testthat/test-makeModel.R +++ b/tests/testthat/test-makeModel.R @@ -3,10 +3,11 @@ test_that("makeModel", { # binary choice model bc_model <- create_caret_binary_model() Mod <- makeModel(bc_model) - checkmate::expect_r6(Mod,classes = "ModelBinaryChoice") + checkmate::expect_r6(Mod, classes = "ModelBinaryChoice") - # multinomial choice model - # mnl_model <- create_caret_multinomial_model() - # Mod <- makeModel(mnl_model) + # mlogit model + mlogit_model <- create_mlogit_model() + Mod <- makeModel(mlogit_model) + checkmate::expect_r6(Mod, classes = "ModelMultinamialLogit") }) From 1058e98a1a5b89138709ae649eeadd1687738578 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 00:29:06 +1000 Subject: [PATCH 20/29] test: move ModelMultinomialLogit test --- tests/testthat/test-ModelCustom.R | 54 +-------------------- tests/testthat/test-ModelMultinomialLogit.R | 36 ++++++++++++++ 2 files changed, 37 insertions(+), 53 deletions(-) diff --git a/tests/testthat/test-ModelCustom.R b/tests/testthat/test-ModelCustom.R index e44712c9..2c433b6f 100644 --- a/tests/testthat/test-ModelCustom.R +++ b/tests/testthat/test-ModelCustom.R @@ -19,56 +19,4 @@ test_that("ModelLinear and ModelBinaryChoice", { mBinaryChoice <- ModelBinaryChoice$new(params = params, formula = my_formula) checkmate::expect_numeric(mBinaryChoice$predict(test_data),lower = 0, upper = 1, finite = T, any.missing = FALSE, len = num_rows) -}) - -test_that("ModelMultinomialLogit", { - - num_rows <- 100 - num_choices = 30 - - my_formula <- chosen ~ x1 + x2 + I(x1^2) + x1:x2 + 0 - params = c(x1 = 2.5, x2 = 3, `I(x1^2)` = 0.5 , `x1:x2` = 1) - - test_chooser_data <- data.table( - id = 1:num_rows, - sex = sample(c("male", "female"), size = num_rows, replace = T), - age = sample(1:100, num_rows, replace = T) - ) %>% - .[, choiceset := list(list(sample(1:num_choices, size = sample(2:10, 1), replace = FALSE))), by = id] - - test_alternative_data <- data.table( - choice_id = 1:num_choices, - x1 = runif(num_choices), - x2 = runif(num_choices) - ) - - multinomial_test_data <- - test_chooser_data %>% - unnest_dt(., cols = "choiceset") %>% - .[, `:=`(choice_id = choiceset, choiceset = NULL)] %>% - merge(., test_alternative_data, by = "choice_id") %>% - .[, chosen := sample(c(T, rep(F, .N - 1))), by = "id"] %>% - data.table::setcolorder(c("id", "choice_id")) %>% - data.table::setorder("id") - - m <- ModelMultinomialLogit$new(params = params, formula = my_formula) - res <- m$predict(multinomial_test_data, chooser_id_col = "id", choice_id_col = "choice_id") - - if (requireNamespace('mlogit')) { - require("mlogit") - # no intercept! - my_formula <- mlogit::mFormula(update(my_formula, ~ . + 0)) - mlogit_model <- mlogit::mlogit(my_formula, data = multinomial_test_data, - choice="chosen", - chid.var="id", - alt.var = "choice_id", - shape = "long") - m <- ModelMultinomialLogit$new(params = params, formula = my_formula) - res_mlogit <- m$predict(multinomial_test_data, chooser_id_col = "id", choice_id_col = "choice_id") - checkmate::expect_data_table(res_mlogit, any.missing = FALSE, col.names = "strict", ncols = 4) - } - - expect_equal(res_mlogit, res) - - -}) +}) \ No newline at end of file diff --git a/tests/testthat/test-ModelMultinomialLogit.R b/tests/testthat/test-ModelMultinomialLogit.R index 3b229aab..94313a67 100644 --- a/tests/testthat/test-ModelMultinomialLogit.R +++ b/tests/testthat/test-ModelMultinomialLogit.R @@ -39,3 +39,39 @@ test_that("ModelMultinomialLogit", { } }) + +test_that("ModelMultinomialLogit - different alternatives", { + num_rows <- 100 + num_choices = 30 + + my_formula <- chosen ~ x1 + x2 + I(x1^2) + x1:x2 + 0 + params = c(x1 = 2.5, x2 = 3, `I(x1^2)` = 0.5 , `x1:x2` = 1) + + test_chooser_data <- data.table( + id = 1:num_rows, + sex = sample(c("male", "female"), size = num_rows, replace = T), + age = sample(1:100, num_rows, replace = T) + ) %>% + .[, choiceset := list(list(sample(1:num_choices, size = sample(2:10, 1), replace = FALSE))), by = id] + + test_alternative_data <- data.table( + choice_id = 1:num_choices, + x1 = runif(num_choices), + x2 = runif(num_choices) + ) + + multinomial_test_data <- + test_chooser_data %>% + unnest_dt(., cols = "choiceset") %>% + .[, `:=`(choice_id = choiceset, choiceset = NULL)] %>% + merge(., test_alternative_data, by = "choice_id") %>% + .[, chosen := sample(c(T, rep(F, .N - 1))), by = "id"] %>% + data.table::setcolorder(c("id", "choice_id")) %>% + data.table::setorder("id") %>% + as.data.frame() + + m <- ModelMultinomialLogit$new(params = params, formula = my_formula) + prediction <- m$predict(multinomial_test_data, chooser_id_col = "id", choice_id_col = "choice_id") + checkmate::expect_data_table(prediction, any.missing = FALSE, col.names = "strict", ncols = 4) + +}) From 544db57fc8e9b8ea54775117acf3d10f9c9af0aa Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 00:34:18 +1000 Subject: [PATCH 21/29] Update NAMESPACE --- NAMESPACE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 2538625e..c4027aec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,8 @@ S3method(get_history,Entity) S3method(get_log,Container) S3method(get_log,Generic) S3method(get_log,World) +S3method(makeModel,mlogit) +S3method(makeModel,train) S3method(simulate_choice,data.frame) S3method(simulate_choice,glm) S3method(simulate_choice,train) From 0a92b1446b4798c72a491602a34580f416fdff2a Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 01:31:12 +1000 Subject: [PATCH 22/29] minor clean up --- DESCRIPTION | 1 - R/ModelMultinomialLogit.R | 2 +- man/ModelMultinomialLogit.Rd | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index afe77755..98b07e7e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -100,7 +100,6 @@ Collate: 'alignment.R' 'checkmate.R' 'checks.R' - 'choice-simulation.R' 'constants.R' 'create-world.R' 'data.R' diff --git a/R/ModelMultinomialLogit.R b/R/ModelMultinomialLogit.R index 55f52452..dd2166ac 100644 --- a/R/ModelMultinomialLogit.R +++ b/R/ModelMultinomialLogit.R @@ -24,7 +24,7 @@ ModelMultinomialLogit <- R6::R6Class( #' #' Initialisation function #' - #' @param params + #' @param params a named numeric vector. #' @param formula a `formula` object of class [mlogit::mFormula()], [Formula::Formula], or `formula`. #' @param preprocessing_fn a pre-processing function that gets applied to the #' data given to the `predict` method before making the prediction. diff --git a/man/ModelMultinomialLogit.Rd b/man/ModelMultinomialLogit.Rd index fdfc3fdc..86176df5 100644 --- a/man/ModelMultinomialLogit.Rd +++ b/man/ModelMultinomialLogit.Rd @@ -4,6 +4,8 @@ \alias{ModelMultinomialLogit} \title{ModelMultinomialLogit} \arguments{ +\item{params}{a named numeric vector.} + \item{formula}{a \code{formula} object of class \code{\link[mlogit:mFormula]{mlogit::mFormula()}}, \link[Formula:Formula]{Formula::Formula}, or \code{formula}.} \item{preprocessing_fn}{a pre-processing function that gets applied to the From 4368d9a14ce81dea813ab868128b473412f6cef4 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 02:19:15 +1000 Subject: [PATCH 23/29] fix rcmdcheck warnings and errors --- DESCRIPTION | 3 ++- R/makeModel.R | 32 ++++++-------------------------- man/makeModel.Rd | 4 +++- tests/testthat/setup-models.R | 1 + tests/testthat/test-makeModel.R | 2 +- 5 files changed, 13 insertions(+), 29 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 98b07e7e..0c401c2e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,7 +54,8 @@ Suggests: scales (>= 1.1.0), prettydoc, ggthemes (>= 4.2.0), - visNetwork (>= 2.0.0) + visNetwork (>= 2.0.0), + dfidx Roxygen: list(markdown = TRUE, r6 = FALSE) RoxygenNote: 7.1.0 StagedInstall: no diff --git a/R/makeModel.R b/R/makeModel.R index fb7c4a03..26fdd44a 100644 --- a/R/makeModel.R +++ b/R/makeModel.R @@ -4,11 +4,12 @@ #' Make a light weight model object that can be used by `transition()`, `TransitionClassification`, `TransitionRegression`. #' #' @param model a model object. See `SupportedTransitionModels()`. +#' @param preprocessing_fn a preprocessing function. #' @param ... dots. #' #' @return a `Model` object. #' @export -makeModel <- function(model, ...) { +"makeModel" <- function(model, ...) { UseMethod("makeModel") } @@ -31,29 +32,6 @@ makeModel.train <- function(model, preprocessing_fn = NULL) { preprocessing_fn = preprocessing_fn)) } - # if (model$method == "multinom") { - # - # stop("Not") - # browser() - # - # params <- model[["finalModel"]][["wts"]] - # - # names(params) <- - # lapply(model[["finalModel"]][["lab"]], function(x) { - # c(x, model[["finalModel"]][["coefnames"]]) - # }) %>% unlist() - # - # model.matrix(model$terms) - # - # m <- model.frame(formula = model$terms, toy_individuals) - # Terms <- delete.response(model$terms) - # x <- model.matrix(Terms, m, contrasts = model$contrasts) - # - # return(ModelMultinomialLogit$new(params = params, - # formula = model$terms, - # preprocessing_fn = preprocessing_fn)) - # } - stop("something went wrong.") } @@ -64,6 +42,8 @@ makeModel.WrappedModel <- function(model) { #' @rdname makeModel #' @export -makeModel.mlogit <- function(model) { - ModelMultinomialLogit$new(params = model[['coefficients']], formula = model[['formula']]) +makeModel.mlogit <- function(model, preprocessing_fn = NULL) { + ModelMultinomialLogit$new(params = model[['coefficients']], + formula = model[['formula']], + preprocessing_fn = preprocessing_fn) } diff --git a/man/makeModel.Rd b/man/makeModel.Rd index 088cbaa4..8bda892f 100644 --- a/man/makeModel.Rd +++ b/man/makeModel.Rd @@ -13,12 +13,14 @@ makeModel(model, ...) \method{makeModel}{WrappedModel}(model) -\method{makeModel}{mlogit}(model) +\method{makeModel}{mlogit}(model, preprocessing_fn = NULL) } \arguments{ \item{model}{a model object. See \code{SupportedTransitionModels()}.} \item{...}{dots.} + +\item{preprocessing_fn}{a preprocessing function.} } \value{ a \code{Model} object. diff --git a/tests/testthat/setup-models.R b/tests/testthat/setup-models.R index 3246a7f2..9a62a3de 100644 --- a/tests/testthat/setup-models.R +++ b/tests/testthat/setup-models.R @@ -61,6 +61,7 @@ create_glm_binary_model <- function() { } create_mlogit_model <- function() { + data("Fishing", package = "mlogit") .data_dfidx <- dfidx::dfidx(Fishing, varying = 2:9, shape = "wide", choice = "mode") mod <- mlogit::mlogit(mode ~ price + catch, data = .data_dfidx) } diff --git a/tests/testthat/test-makeModel.R b/tests/testthat/test-makeModel.R index 0b675234..2d00024e 100644 --- a/tests/testthat/test-makeModel.R +++ b/tests/testthat/test-makeModel.R @@ -8,6 +8,6 @@ test_that("makeModel", { # mlogit model mlogit_model <- create_mlogit_model() Mod <- makeModel(mlogit_model) - checkmate::expect_r6(Mod, classes = "ModelMultinamialLogit") + checkmate::expect_r6(Mod, classes = "ModelMultinomialLogit") }) From 1bb60341c12e54b843c7cce024adfb58d74170a5 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 08:26:04 +1000 Subject: [PATCH 24/29] fix(makeModel): S3 generic/method consistency warning --- R/makeModel.R | 6 +++--- man/makeModel.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/makeModel.R b/R/makeModel.R index 26fdd44a..814d7747 100644 --- a/R/makeModel.R +++ b/R/makeModel.R @@ -15,7 +15,7 @@ #' @rdname makeModel #' @export -makeModel.train <- function(model, preprocessing_fn = NULL) { +makeModel.train <- function(model, preprocessing_fn = NULL, ...) { compatible_methods = c("glm") @@ -36,13 +36,13 @@ makeModel.train <- function(model, preprocessing_fn = NULL) { } #' @rdname makeModel -makeModel.WrappedModel <- function(model) { +makeModel.WrappedModel <- function(model, preprocessing_fn = NULL, ...) { stop("Not implemented yet.") } #' @rdname makeModel #' @export -makeModel.mlogit <- function(model, preprocessing_fn = NULL) { +makeModel.mlogit <- function(model, preprocessing_fn = NULL, ...) { ModelMultinomialLogit$new(params = model[['coefficients']], formula = model[['formula']], preprocessing_fn = preprocessing_fn) diff --git a/man/makeModel.Rd b/man/makeModel.Rd index 8bda892f..c78e0456 100644 --- a/man/makeModel.Rd +++ b/man/makeModel.Rd @@ -9,11 +9,11 @@ \usage{ makeModel(model, ...) -\method{makeModel}{train}(model, preprocessing_fn = NULL) +\method{makeModel}{train}(model, preprocessing_fn = NULL, ...) -\method{makeModel}{WrappedModel}(model) +\method{makeModel}{WrappedModel}(model, preprocessing_fn = NULL, ...) -\method{makeModel}{mlogit}(model, preprocessing_fn = NULL) +\method{makeModel}{mlogit}(model, preprocessing_fn = NULL, ...) } \arguments{ \item{model}{a model object. See \code{SupportedTransitionModels()}.} From e978a237372f870ab1e349624e700b83f325fc73 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 08:26:22 +1000 Subject: [PATCH 25/29] fix(ModelBinaryChoice: Missing link or links in documentation object --- R/ModelBinaryChoice.R | 2 +- man/ModelBinaryChoice.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ModelBinaryChoice.R b/R/ModelBinaryChoice.R index 8bcc3afe..ff315078 100644 --- a/R/ModelBinaryChoice.R +++ b/R/ModelBinaryChoice.R @@ -35,7 +35,7 @@ ModelBinaryChoice <- R6::R6Class( #' #' @param newdata a `data.frame` object. #' @param link_function :: `character(1)`\cr - #' default as 'logit' using [stats::binomial(link = "logit")]. Choice of + #' default as 'logit' using `stats::binomial(link = "logit")`. Choice of #' 'logit' and 'probit'. TODO: implement 'probit' option. #' #' @return diff --git a/man/ModelBinaryChoice.Rd b/man/ModelBinaryChoice.Rd index d7d73197..ec8ae7b4 100644 --- a/man/ModelBinaryChoice.Rd +++ b/man/ModelBinaryChoice.Rd @@ -17,7 +17,7 @@ data given to the \code{predict} method before making the prediction.} \item{newdata}{a \code{data.frame} object.} \item{link_function}{:: \code{character(1)}\cr -default as 'logit' using \link[stats:binomial(link = "logit")]{stats::binomial(link = "logit")}. Choice of +default as 'logit' using \code{stats::binomial(link = "logit")}. Choice of 'logit' and 'probit'. TODO: implement 'probit' option.} } \value{ From 6db054bef4ff4382ae2f7defa76358e69dd6a395 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 10:13:31 +1000 Subject: [PATCH 26/29] doc: fix Non-file package-anchored link --- R/ModelMultinomialLogit.R | 2 +- man/ModelMultinomialLogit.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ModelMultinomialLogit.R b/R/ModelMultinomialLogit.R index dd2166ac..f27f98ee 100644 --- a/R/ModelMultinomialLogit.R +++ b/R/ModelMultinomialLogit.R @@ -25,7 +25,7 @@ ModelMultinomialLogit <- R6::R6Class( #' Initialisation function #' #' @param params a named numeric vector. - #' @param formula a `formula` object of class [mlogit::mFormula()], [Formula::Formula], or `formula`. + #' @param formula a `formula` object of class [mlogit::mFormula], [Formula::Formula], or `formula`. #' @param preprocessing_fn a pre-processing function that gets applied to the #' data given to the `predict` method before making the prediction. #' diff --git a/man/ModelMultinomialLogit.Rd b/man/ModelMultinomialLogit.Rd index 86176df5..e953dd9d 100644 --- a/man/ModelMultinomialLogit.Rd +++ b/man/ModelMultinomialLogit.Rd @@ -6,7 +6,7 @@ \arguments{ \item{params}{a named numeric vector.} -\item{formula}{a \code{formula} object of class \code{\link[mlogit:mFormula]{mlogit::mFormula()}}, \link[Formula:Formula]{Formula::Formula}, or \code{formula}.} +\item{formula}{a \code{formula} object of class \link[mlogit:mFormula]{mlogit::mFormula}, \link[Formula:Formula]{Formula::Formula}, or \code{formula}.} \item{preprocessing_fn}{a pre-processing function that gets applied to the data given to the \code{predict} method before making the prediction.} From dd367f27df6ab9f34f6d8fb58d0aa585a278ae91 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 11:10:47 +1000 Subject: [PATCH 27/29] Increment version number --- DESCRIPTION | 2 +- NEWS.md | 6 +++--- tests/testthat/test-utils.R | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4ba797cf..b709ab47 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: dymiumCore -Version: 0.1.8.9000 +Version: 0.1.9 Title: A Toolkit for Building a Dynamic Microsimulation Model for Integrated Urban Modelling Description: A modular microsimulation modelling framework for integrated urban modelling. Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 90257b56..ab3b057d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,9 @@ -# dymiumCore (development version) +# dymiumCore 0.1.9 -- renamed `unnest_datatable` to `unnest_dt` and add `unnestv_dt` which can take names of the list columns as a character vector. - introduced `ModelCustom` a model class that let users specify its parameters and `predict` function. We also add `ModelMultinomialLogit`, `ModelBinaryChoice` and `ModelLinear` which are implementations of `ModelCustom`. -- add helper functions `which_min_n` and `which_max_n`. - add `makeModel` to create a light weight Model object that can be used in the transition functions and classes. This basically creates an appropriate Model class from the given model object. +- add helper functions `which_min_n` and `which_max_n`. +- renamed `unnest_datatable` to `unnest_dt` which can take names of the list columns as a character vector. # dymiumCore 0.1.8 diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 37feb683..f088e96d 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -111,7 +111,7 @@ test_that("dsample", { }) -test_that("unnest_dt and unnestv_dt works", { +test_that("unnest_dt", { dt <- data.table::data.table( id = 1:3, list_col_a = list(c("a","b","c"), c("a","b","c"), c("a","b","c"))) From 350c20974312ba37b7807edb96539bd8e9bbe428 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 11:59:52 +1000 Subject: [PATCH 28/29] Update _pkgdown.yml --- _pkgdown.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 312c3bbf..e7148393 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -58,6 +58,7 @@ reference: - Agent - Individual - Household + - Firm - Asset - starts_with("Building") - Environment @@ -70,8 +71,8 @@ reference: - Population - matches("^Model.") - pop_register - - household_formation -- title: Simulation tools & Alignment + - starts_with("makeModel") +- title: Simulation tools contents: - matches("^Trans.") - transition @@ -85,6 +86,7 @@ reference: - Pipeline - register - starts_with("Match") + - starts_with("simulate_choice") - title: Logging and inspection contents: - add_log @@ -113,7 +115,6 @@ reference: desc: creates an event script and handy functions to use within event functions. contents: - use_event - - assign_reference - get_models - pick_models - is_scheduled @@ -150,6 +151,8 @@ reference: - dt_group_and_sort - validate_linkages - is_dymium_class + - sim + - dsample - title: Base classes contents: From b8952ca6d68991c7bf3a8efc6a59451debafaf89 Mon Sep 17 00:00:00 2001 From: amarin <17020181+asiripanich@users.noreply.github.com> Date: Wed, 17 Jun 2020 12:27:09 +1000 Subject: [PATCH 29/29] Update create_toy_world.Rd --- man/create_toy_world.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/create_toy_world.Rd b/man/create_toy_world.Rd index 250f1b21..16435b8e 100644 --- a/man/create_toy_world.Rd +++ b/man/create_toy_world.Rd @@ -7,7 +7,7 @@ create_toy_world(add_toy_zones = TRUE) } \arguments{ -\item{add_zone}{add \code{toy_zones} to world (default TRUE).} +\item{add_toy_zones}{add \code{toy_zones} to world (default TRUE).} } \description{ Create a toy world (assigned to the global environment as \code{world}) for running tests