diff --git a/NAMESPACE b/NAMESPACE index 38cfb743f..7119f6dd7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(createDefaultExecuteSettings) export(createDefaultSplitSetting) export(createExecuteSettings) export(createFeatureEngineeringSettings) +export(createGlmModel) export(createLearningCurve) export(createLogSettings) export(createModelDesign) @@ -82,6 +83,7 @@ export(plotSparseCalibration2) export(plotSparseRoc) export(plotVariableScatterplot) export(predictCyclops) +export(predictGlm) export(predictPlp) export(recalibratePlp) export(recalibratePlpRefit) diff --git a/R/AdditionalCovariates.R b/R/AdditionalCovariates.R index f19f30803..dfb8abca8 100644 --- a/R/AdditionalCovariates.R +++ b/R/AdditionalCovariates.R @@ -67,10 +67,29 @@ getCohortCovariateData <- function( group by a.@row_id_field; " ) + if (is.null(covariateSettings$cohortTable) && + (is.null(covariateSettings$cohortDatabaseSchema))) { + ParallelLogger::logInfo("cohortTable and cohortDatabaseSchema not specified in + cohort covariateSettings. Attempting to fetch from databaseDetails") + # use settings from databaseDetails which is two frames up + # in the call stack + tryCatch(databaseDetails <- get("databaseDetails", parent.frame(n = 2)), + error = function(e) { + stop("cohortTable and cohortDatabaseSchema not specified in + cohort covariateSettings. Attempt to fetch databaseDetails from parent + frame failed with error: ", e$message) + }) + cohortCovariateTable <- databaseDetails$cohortTable + cohortCovariateDatabaseSchema <- databaseDetails$cohortDatabaseSchema + } else { + cohortCovariateTable <- covariateSettings$cohortTable + cohortCovariateDatabaseSchema <- covariateSettings$cohortDatabaseSchema + } + sql <- SqlRender::render( sql, - covariate_cohort_schema = covariateSettings$cohortDatabaseSchema, - covariate_cohort_table = covariateSettings$cohortTable, + covariate_cohort_schema = cohortCovariateDatabaseSchema, + covariate_cohort_table = cohortCovariateTable, covariate_cohort_id = covariateSettings$cohortIds, cohort_temp_table = cohortTable, row_id_field = rowIdField, @@ -154,8 +173,10 @@ getCohortCovariateData <- function( #' #' @param cohortName Name for the cohort #' @param settingId A unique id for the covariate time and -#' @param cohortDatabaseSchema The schema of the database with the cohort -#' @param cohortTable the table name that contains the covariate cohort +#' @param cohortDatabaseSchema The schema of the database with the cohort. If +#' nothing is specified then the cohortDatabaseSchema from databaseDetails at runtime is used. +#' @param cohortTable the table name that contains the covariate cohort. If +#' nothing is specified then the cohortTable from databaseDetails at runtime is used. #' @param cohortId cohort id for the covariate cohort #' @param startDay The number of days prior to index to start observing the cohort #' @param endDay The number of days prior to index to stop observing the cohort @@ -173,8 +194,8 @@ getCohortCovariateData <- function( createCohortCovariateSettings <- function( cohortName, settingId, - cohortDatabaseSchema, - cohortTable, + cohortDatabaseSchema=NULL, + cohortTable=NULL, cohortId, startDay = -30, endDay = 0, @@ -205,4 +226,4 @@ createCohortCovariateSettings <- function( attr(covariateSettings, "fun") <- "PatientLevelPrediction::getCohortCovariateData" class(covariateSettings) <- "covariateSettings" return(covariateSettings) -} \ No newline at end of file +} diff --git a/R/ExternalValidatePlp.R b/R/ExternalValidatePlp.R index f690be1a0..a3a59b32b 100644 --- a/R/ExternalValidatePlp.R +++ b/R/ExternalValidatePlp.R @@ -350,27 +350,37 @@ createValidationSettings <- function(recalibrate = NULL, #' #' @param targetId The targetId of the target cohort to validate on #' @param outcomeId The outcomeId of the outcome cohort to validate on -#' @param populationSettings A list of population restriction settings created by \code{createPopulationSettings} -#' @param restrictPlpDataSettings A list of plpData restriction settings created by \code{createRestrictPlpDataSettings} +#' @param populationSettings A list of population restriction settings created +#' by \code{createPopulationSettings}. Default is NULL and then this is taken +#' from the model +#' @param restrictPlpDataSettings A list of plpData restriction settings +#' created by \code{createRestrictPlpDataSettings}. Default is NULL and then +#' this is taken from the model. #' @param plpModelList A list of plpModels objects created by \code{runPlp} or a path to such objects #' @param recalibrate A vector of characters specifying the recalibration method to apply, #' @param runCovariateSummary whether to run the covariate summary for the validation data +#' @return A validation design object of class \code{validationDesign} #' @export createValidationDesign <- function(targetId, outcomeId, - populationSettings, - restrictPlpDataSettings, plpModelList, + populationSettings = NULL, + restrictPlpDataSettings = NULL, recalibrate = NULL, runCovariateSummary = TRUE) { checkIsClass(targetId, c("numeric", "integer")) checkIsClass(outcomeId, c("numeric", "integer")) - checkIsClass(populationSettings, c("populationSettings")) - checkIsClass(restrictPlpDataSettings, "restrictPlpDataSettings") + if (!is.null(populationSettings)) { + checkIsClass(populationSettings, c("populationSettings")) + } + if (!is.null(restrictPlpDataSettings)) { + checkIsClass(restrictPlpDataSettings, "restrictPlpDataSettings") + } checkIsClass(plpModelList, "list") - lapply(plpModelList, function(x) - checkIsClass(x, c("plpModel", "character"))) + lapply(plpModelList, function(x) { + checkIsClass(x, c("plpModel", "character")) + }) checkIsClass(recalibrate, c("character", "NULL")) checkIsClass(runCovariateSummary, "logical") @@ -404,33 +414,22 @@ validateExternal <- function(validationDesignList, outputFolder) { # Input checks #======= - if (inherits(validationDesignList, 'list')) { - lapply(validationDesignList, function(x) - checkIsClass(x, 'validationDesign')) - } else { - checkIsClass(validationDesignList, 'validationDesign') - validationDesignList <- list(validationDesignList) - } - - # check the class and make a list if a single database setting - if (inherits(databaseDetails, 'list')) { - lapply(databaseDetails, function(x) - checkIsClass(x, 'databaseDetails')) - } else { - checkIsClass(databaseDetails, 'databaseDetails') - databaseDetails <- list(databaseDetails) - } - + changedInputs <- checkValidateExternalInputs(validationDesignList, + databaseDetails, + logSettings, + outputFolder) + validationDesignList <- changedInputs[["validationDesignList"]] + databaseDetails <- changedInputs[["databaseDetails"]] # create results list with the names of the databases to validate across result <- list() length(result) <- length(databaseDetails) names(result) <- - unlist(lapply(databaseDetails, function(x) - attr(x, 'cdmDatabaseName'))) + unlist(lapply(databaseDetails, function(x) { + attr(x, "cdmDatabaseName")})) # Need to keep track of incremental analysisId's for each database - databaseNames <- unlist(lapply(databaseDetails, function(x) - x$cdmDatabaseName)) + databaseNames <- unlist(lapply(databaseDetails, function(x) { + x$cdmDatabaseName})) analysisInfo <- list() for (name in databaseNames) { analysisInfo[name] <- 1 @@ -438,7 +437,7 @@ validateExternal <- function(validationDesignList, # initiate log logSettings$saveDirectory <- outputFolder - logSettings$logFileName <- 'validationLog' + logSettings$logFileName <- "validationLog" logger <- do.call(createLog, logSettings) ParallelLogger::registerLogger(logger) on.exit(closeLog(logger)) @@ -448,78 +447,26 @@ validateExternal <- function(validationDesignList, for (database in databaseDetails) { databaseName <- database$cdmDatabaseName - ParallelLogger::logInfo(paste('Validating model on', database$cdmDatabaseName)) + ParallelLogger::logInfo(paste("Validating model on", database$cdmDatabaseName)) database$targetId <- design$targetId database$outcomeIds <- design$outcomeId - allCovSettings <- - lapply(design$plpModelList, function(plpModel) { - if (is.character(plpModel)) { - modelDesign <- ParallelLogger::loadSettingsFromJson( - normalizePath(file.path(plpModel, 'modelDesign.json')) - ) - return(modelDesign$covariateSettings) - } else { - plpModel$modelDesign$covariateSettings - } - }) - # compare all to first covSettings, if not the same stop - if (!Reduce(function(x, y) - x && - identical(y, allCovSettings[[1]]), - allCovSettings[-1], - init = TRUE)) { - stop("covariateSettings are not the same across models which is not supported yet") - } - plpDataName <- - paste0("targetId_", design$targetId, "_L", "1") # Is the 1 for how many targetIds in file ? - plpDataLocation <- - file.path(outputFolder, databaseName, plpDataName) - if (!dir.exists(plpDataLocation)) { - plpData <- tryCatch({ - do.call( - getPlpData, - list( - databaseDetails = database, - restrictPlpDataSettings = design$restrictPlpDataSettings, - covariateSettings = allCovSettings[[1]] - ) - ) - }, - error = function(e) { - ParallelLogger::logError(e) - return(NULL) - }) + modelDesigns <- extractModelDesigns(design$plpModelList) + allCovSettings <- lapply(modelDesigns, function(x) x$covariateSettings) + design <- fromDesignOrModel(design, modelDesigns, "restrictPlpDataSettings") + checkAllSameInModels(allCovSettings, "covariateSettings") + + # get plpData + plpData <- getData(design, database, outputFolder, allCovSettings) if (is.null(plpData)) { - ParallelLogger::logInfo("Couldn't extract plpData for the given design and database, proceding to the next one.") + ParallelLogger::logInfo("Couldn't extract plpData for the given design and database, proceeding to the next one.") next } - if (!dir.exists(file.path(outputFolder, databaseName))) { - dir.create(file.path(outputFolder, databaseName), recursive = TRUE) - } - savePlpData(plpData, file = plpDataLocation) - } else { - ParallelLogger::logInfo(paste0("Data already extracted for ", - plpDataName, ": Loading from disk")) - plpData <- loadPlpData(plpDataLocation) - } # create study population - population <- tryCatch({ - do.call( - createStudyPopulation, - list( - plpData = plpData, - outcomeId = design$outcomeId, - populationSettings = design$populationSettings - ) - ) - }, - error = function(e) { - ParallelLogger::logError(e) - return(NULL) - }) + population <- getPopulation(design, modelDesigns, plpData) + results <- lapply(design$plpModelList, function(model) { analysisName <- paste0("Analysis_", analysisInfo[databaseName]) analysisDone <- file.exists( @@ -527,23 +474,23 @@ validateExternal <- function(validationDesignList, outputFolder, databaseName, analysisName, - 'validationResult', - 'runPlp.rds' + "validationResult", + "runPlp.rds" ) ) if (!analysisDone) { - validateModel( - plpModel = model, - plpData = plpData, - population = population, - recalibrate = design$recalibrate, - runCovariateSummary = design$runCovariateSummary, - outputFolder = outputFolder, - databaseName = databaseName, - analysisName = analysisName) + validateModel( + plpModel = model, + plpData = plpData, + population = population, + recalibrate = design$recalibrate, + runCovariateSummary = design$runCovariateSummary, + outputFolder = outputFolder, + databaseName = databaseName, + analysisName = analysisName) } else { - ParallelLogger::logInfo(paste0("Analysis ", analysisName, " already done", - ", Proceeding to the next one.")) + ParallelLogger::logInfo(paste0("Analysis ", analysisName, " already done", + ", Proceeding to the next one.")) } analysisInfo[[databaseName]] <<- analysisInfo[[databaseName]] + 1 }) @@ -552,7 +499,7 @@ validateExternal <- function(validationDesignList, for (database in databaseDetails) { databaseName <- database$cdmDatabaseName sqliteLocation <- - file.path(outputFolder, 'sqlite') + file.path(outputFolder, "sqlite") tryCatch({ insertResultsToSqlite( resultLocation = file.path(outputFolder, databaseName), @@ -595,7 +542,153 @@ validateModel <- outputFolder, databaseName, analysisName, - 'validationResult' + "validationResult" )) return(result) } + +#' checkAllSameInModels - Check if all settings are the same across models +#' @param settingsList A list of settings to check +#' @param settingName The name of the setting to check +checkAllSameInModels <- function(settingsList, settingName) { + if (!Reduce(function(x, y) { + x && + identical(y, settingsList[[1]])}, + settingsList[-1], + init = TRUE)) { + stop(paste0(settingName, "are not the same across models which is not supported yet")) + } +} + +#' extractModelDesigns - Extract all modelDesigns from a list of plpModels +#' @param plpModelList A list of plpModels +#' @return A list of modelDesigns +extractModelDesigns <- function(plpModelList) { + lapply(plpModelList, function(plpModel) { + if (is.character(plpModel)) { + modelDesign <- ParallelLogger::loadSettingsFromJson( + normalizePath(file.path(plpModel, "modelDesign.json")) + ) + return(modelDesign) + } else { + plpModel$modelDesign + } + }) +} + +#' checkValidateExternalInputs - Check the inputs for validateExternal +#' @param validationDesignList A list of validationDesign objects +#' @param databaseDetails A list of databaseDetails objects +#' @param logSettings An object of logSettings +#' @param outputFolder The directory to save the validation results to +#' @return A list of inputs that were modified +checkValidateExternalInputs <- function(validationDesignList, + databaseDetails, + logSettings, + outputFolder) { + if (inherits(validationDesignList, "list")) { + lapply(validationDesignList, function(x) { + checkIsClass(x, "validationDesign")}) + } else { + checkIsClass(validationDesignList, "validationDesign") + validationDesignList <- list(validationDesignList) + } + + # check the class and make a list if a single database setting + if (inherits(databaseDetails, "list")) { + lapply(databaseDetails, function(x) { + checkIsClass(x, "databaseDetails")}) + } else { + checkIsClass(databaseDetails, "databaseDetails") + databaseDetails <- list(databaseDetails) + } + results <- list(validationDesignList = validationDesignList, + databaseDetails = databaseDetails) + return(results) +} + +#' fromDesignOrModel - Check if the design has the setting, if not use the model's +#' @param validationDesign The validationDesign object +#' @param modelDesigns A list of modelDesign objects +#' @param settingName The name of the setting to check +#' @return The updated design +fromDesignOrModel <- function(validationDesign, modelDesigns, settingName) { + settingsFromModel <- lapply(modelDesigns, function(x) x[[settingName]]) + if (is.null(validationDesign[[settingName]])) { + checkAllSameInModels(settingsFromModel, settingName) + validationDesign[[settingName]] <- settingsFromModel[[1]] + ParallelLogger::logInfo(paste0(settingName, " not set in design, using model's")) + } else { + if (any(lapply(modelDesigns, function(x) { + x[[settingName]] != validationDesign[[settingName]] + }))) { + ParallelLogger::logWarn(settingName, " are not the same in models and validationDesign") + } + } + return(validationDesign) +} + +#' getData - Get the plpData for the validation +#' @param design The validationDesign object +#' @param database The databaseDetails object +#' @param outputFolder The directory to save the validation results to +#' @param allCovSettings A list of covariateSettings from the models +#' @return The plpData object +getData <- function(design, database, outputFolder, allCovSettings) { + databaseName <- database$cdmDatabaseName + plpDataName <- + paste0("targetId_", design$targetId, "_L", "1") + plpDataLocation <- + file.path(outputFolder, databaseName, plpDataName) + if (!dir.exists(plpDataLocation)) { + plpData <- tryCatch({ + do.call( + getPlpData, + list( + databaseDetails = database, + restrictPlpDataSettings = design$restrictPlpDataSettings, + covariateSettings = allCovSettings[[1]] + ) + ) + }, + error = function(e) { + ParallelLogger::logError(e) + return(NULL) + }) + if (!is.null(plpData)) { + if (!dir.exists(file.path(outputFolder, databaseName))) { + dir.create(file.path(outputFolder, databaseName), recursive = TRUE) + } + savePlpData(plpData, file = plpDataLocation) + } + } else { + ParallelLogger::logInfo(paste0("Data already extracted for ", + plpDataName, ": Loading from disk")) + plpData <- loadPlpData(plpDataLocation) + } + return(plpData) +} + +#' getPopulation - Get the population for the validationDesign +#' @param validationDesign The validationDesign objects +#' @param modelDesigns A list of modelDesign objects +#' @param plpData The plpData object +#' @return The population dataframe +getPopulation <- function(validationDesign, modelDesigns, plpData) { + design <- fromDesignOrModel(validationDesign, modelDesigns, "populationSettings") + population <- tryCatch({ + do.call( + createStudyPopulation, + list( + plpData = plpData, + outcomeId = design$outcomeId, + populationSettings = design$populationSettings + ) + ) + }, + error = function(e) { + ParallelLogger::logError(e) + return(NULL) + }) + return(population) +} diff --git a/R/Glm.R b/R/Glm.R new file mode 100644 index 000000000..2f7295139 --- /dev/null +++ b/R/Glm.R @@ -0,0 +1,112 @@ +# @file Glm.R +# +# Copyright 2024 Observational Health Data Sciences and Informatics +# +# This file is part of PatientLevelPrediction +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#' predict using a logistic regression model +#' +#' @description +#' Predict risk with a given plpModel containing a generalized linear model. +#' +#' @param plpModel An object of type \code{plpModel} - a patient level +#' prediction model +#' @param data An object of type \code{plpData} - the patient level prediction +#' data extracted from the CDM. +#' @param cohort The population dataframe created using +#' /code{createStudyPopulation} who will have their risks predicted or a cohort +#' without the outcome known +#' @export +#' @return A dataframe containing the prediction for each person in the +#' population +#' @export +predictGlm <- function(plpModel, data, cohort) { + start <- Sys.time() + + ParallelLogger::logInfo("predict risk probabilities using predictGlm") + + data$covariateData$coefficients <- plpModel$model$coefficients + on.exit(data$covariateData$coefficients <- NULL) + + prediction <- data$covariateData$covariates %>% + dplyr::inner_join(data$covariateData$coefficients, by = "covariateId") %>% + dplyr::mutate(values = .data$covariateValue * .data$coefficient) %>% + dplyr::group_by(.data$rowId) %>% + dplyr::summarise(value = sum(.data$values, na.rm = TRUE)) %>% + dplyr::select("rowId", "value") + + prediction <- as.data.frame(prediction) + prediction <- merge(cohort, prediction, by = "rowId", all.x = TRUE, fill = 0) + prediction$value[is.na(prediction$value)] <- 0 + prediction$value <- prediction$value + plpModel$model$intercept + + if (plpModel$model$finalMapping == "linear") { + prediction$value <- prediction$value + } else if (plpModel$model$finalMapping == "logistic") { + prediction$value <- 1 / (1 + exp(-prediction$value)) + } else if (plpModel$model$finalMapping == "square") { + prediction$value <- prediction$value^2 + } else if (plpModel$model$finalMapping == "exponential") { + prediction$value <- exp(prediction$value) + } + + attr(prediction, "metaData")$modelType <- "binary" + + delta <- Sys.time() - start + ParallelLogger::logInfo("Prediction took ", signif(delta, 3), " ", attr(delta, "units")) + return(prediction) +} + +#' createGlmModel +#' +#' @description +#' Create a generalized linear model that can be used in the +#' PatientLevelPrediction package. +#' @param coefficients A dataframe containing two columns, coefficients and +#' covariateId, both of type numeric. The covariateId column must contain +#' valid covariateIds that match those used in the /code{FeatureExtraction} +#' package. +#' @param intercept A numeric value representing the intercept of the model. +#' @param finalMapping A string representing the final mapping from the +#' linear predictors to outcome probabilities. For generalized linear models +#' this is the inverse of the link function. Supported values is only +#' "logistic" for logistic regression model at the moment. +#' @return A model object containing the model and the prediction function. +#' @export +createGlmModel <- function(coefficients, + intercept = 0, + finalMapping = "logistic") { + checkIsClass(coefficients, c("data.frame")) + if (!all(c("covariateId", "coefficient") %in% colnames(coefficients))) { + stop("coefficients must contain columns covariateId and coefficient") + } + checkIsClass(coefficients$covariateId, c("numeric")) + checkIsClass(coefficients$coefficient, c("numeric")) + checkHigherEqual(coefficients$covariateId, 0) + checkIsClass(intercept, c("numeric")) + + checkIsClass(finalMapping, c("character")) + if (finalMapping != "logistic") { + stop("finalMapping must be 'logistic'") + } + + plpModel <- list( + intercept = intercept, + coefficients = coefficients, + finalMapping = finalMapping, + predictionFunction = "PatientLevelPrediction::predictGlm" + ) + plpModel$modelType <- "GLM" + return(plpModel) +} diff --git a/man/checkAllSameInModels.Rd b/man/checkAllSameInModels.Rd new file mode 100644 index 000000000..8ab842639 --- /dev/null +++ b/man/checkAllSameInModels.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ExternalValidatePlp.R +\name{checkAllSameInModels} +\alias{checkAllSameInModels} +\title{checkAllSameInModels - Check if all settings are the same across models} +\usage{ +checkAllSameInModels(settingsList, settingName) +} +\arguments{ +\item{settingsList}{A list of settings to check} + +\item{settingName}{The name of the setting to check} +} +\description{ +checkAllSameInModels - Check if all settings are the same across models +} diff --git a/man/checkValidateExternalInputs.Rd b/man/checkValidateExternalInputs.Rd new file mode 100644 index 000000000..24d9ba0d8 --- /dev/null +++ b/man/checkValidateExternalInputs.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ExternalValidatePlp.R +\name{checkValidateExternalInputs} +\alias{checkValidateExternalInputs} +\title{checkValidateExternalInputs - Check the inputs for validateExternal} +\usage{ +checkValidateExternalInputs( + validationDesignList, + databaseDetails, + logSettings, + outputFolder +) +} +\arguments{ +\item{validationDesignList}{A list of validationDesign objects} + +\item{databaseDetails}{A list of databaseDetails objects} + +\item{logSettings}{An object of logSettings} + +\item{outputFolder}{The directory to save the validation results to} +} +\value{ +A list of inputs that were modified +} +\description{ +checkValidateExternalInputs - Check the inputs for validateExternal +} diff --git a/man/createCohortCovariateSettings.Rd b/man/createCohortCovariateSettings.Rd index 98d3480eb..37ec35177 100644 --- a/man/createCohortCovariateSettings.Rd +++ b/man/createCohortCovariateSettings.Rd @@ -7,8 +7,8 @@ createCohortCovariateSettings( cohortName, settingId, - cohortDatabaseSchema, - cohortTable, + cohortDatabaseSchema = NULL, + cohortTable = NULL, cohortId, startDay = -30, endDay = 0, @@ -23,9 +23,11 @@ createCohortCovariateSettings( \item{settingId}{A unique id for the covariate time and} -\item{cohortDatabaseSchema}{The schema of the database with the cohort} +\item{cohortDatabaseSchema}{The schema of the database with the cohort. If +nothing is specified then the cohortDatabaseSchema from databaseDetails at runtime is used.} -\item{cohortTable}{the table name that contains the covariate cohort} +\item{cohortTable}{the table name that contains the covariate cohort. If +nothing is specified then the cohortTable from databaseDetails at runtime is used.} \item{cohortId}{cohort id for the covariate cohort} diff --git a/man/createGlmModel.Rd b/man/createGlmModel.Rd new file mode 100644 index 000000000..6a6509fd7 --- /dev/null +++ b/man/createGlmModel.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Glm.R +\name{createGlmModel} +\alias{createGlmModel} +\title{createGlmModel} +\usage{ +createGlmModel(coefficients, intercept = 0, finalMapping = "logistic") +} +\arguments{ +\item{coefficients}{A dataframe containing two columns, coefficients and +covariateId, both of type numeric. The covariateId column must contain +valid covariateIds that match those used in the /code{FeatureExtraction} +package.} + +\item{intercept}{A numeric value representing the intercept of the model.} + +\item{finalMapping}{A string representing the final mapping from the +linear predictors to outcome probabilities. For generalized linear models +this is the inverse of the link function. Supported values is only +"logistic" for logistic regression model at the moment.} +} +\value{ +A model object containing the model and the prediction function. +} +\description{ +Create a generalized linear model that can be used in the +PatientLevelPrediction package. +} diff --git a/man/createValidationDesign.Rd b/man/createValidationDesign.Rd index f54b6aa78..311b8b48f 100644 --- a/man/createValidationDesign.Rd +++ b/man/createValidationDesign.Rd @@ -7,9 +7,9 @@ createValidationDesign( targetId, outcomeId, - populationSettings, - restrictPlpDataSettings, plpModelList, + populationSettings = NULL, + restrictPlpDataSettings = NULL, recalibrate = NULL, runCovariateSummary = TRUE ) @@ -19,16 +19,23 @@ createValidationDesign( \item{outcomeId}{The outcomeId of the outcome cohort to validate on} -\item{populationSettings}{A list of population restriction settings created by \code{createPopulationSettings}} +\item{plpModelList}{A list of plpModels objects created by \code{runPlp} or a path to such objects} -\item{restrictPlpDataSettings}{A list of plpData restriction settings created by \code{createRestrictPlpDataSettings}} +\item{populationSettings}{A list of population restriction settings created +by \code{createPopulationSettings}. Default is NULL and then this is taken +from the model} -\item{plpModelList}{A list of plpModels objects created by \code{runPlp} or a path to such objects} +\item{restrictPlpDataSettings}{A list of plpData restriction settings +created by \code{createRestrictPlpDataSettings}. Default is NULL and then +this is taken from the model.} \item{recalibrate}{A vector of characters specifying the recalibration method to apply,} \item{runCovariateSummary}{whether to run the covariate summary for the validation data} } +\value{ +A validation design object of class \code{validationDesign} +} \description{ createValidationDesign - Define the validation design for external validation } diff --git a/man/extractModelDesigns.Rd b/man/extractModelDesigns.Rd new file mode 100644 index 000000000..72b0c1e65 --- /dev/null +++ b/man/extractModelDesigns.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ExternalValidatePlp.R +\name{extractModelDesigns} +\alias{extractModelDesigns} +\title{extractModelDesigns - Extract all modelDesigns from a list of plpModels} +\usage{ +extractModelDesigns(plpModelList) +} +\arguments{ +\item{plpModelList}{A list of plpModels} +} +\value{ +A list of modelDesigns +} +\description{ +extractModelDesigns - Extract all modelDesigns from a list of plpModels +} diff --git a/man/fromDesignOrModel.Rd b/man/fromDesignOrModel.Rd new file mode 100644 index 000000000..8d92b2f98 --- /dev/null +++ b/man/fromDesignOrModel.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ExternalValidatePlp.R +\name{fromDesignOrModel} +\alias{fromDesignOrModel} +\title{fromDesignOrModel - Check if the design has the setting, if not use the model's} +\usage{ +fromDesignOrModel(validationDesign, modelDesigns, settingName) +} +\arguments{ +\item{validationDesign}{The validationDesign object} + +\item{modelDesigns}{A list of modelDesign objects} + +\item{settingName}{The name of the setting to check} +} +\value{ +The updated design +} +\description{ +fromDesignOrModel - Check if the design has the setting, if not use the model's +} diff --git a/man/getData.Rd b/man/getData.Rd new file mode 100644 index 000000000..b702affa1 --- /dev/null +++ b/man/getData.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ExternalValidatePlp.R +\name{getData} +\alias{getData} +\title{getData - Get the plpData for the validation} +\usage{ +getData(design, database, outputFolder, allCovSettings) +} +\arguments{ +\item{design}{The validationDesign object} + +\item{database}{The databaseDetails object} + +\item{outputFolder}{The directory to save the validation results to} + +\item{allCovSettings}{A list of covariateSettings from the models} +} +\value{ +The plpData object +} +\description{ +getData - Get the plpData for the validation +} diff --git a/man/getPopulation.Rd b/man/getPopulation.Rd new file mode 100644 index 000000000..d59d96235 --- /dev/null +++ b/man/getPopulation.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ExternalValidatePlp.R +\name{getPopulation} +\alias{getPopulation} +\title{getPopulation - Get the population for the validationDesign} +\usage{ +getPopulation(validationDesign, modelDesigns, plpData) +} +\arguments{ +\item{validationDesign}{The validationDesign objects} + +\item{modelDesigns}{A list of modelDesign objects} + +\item{plpData}{The plpData object} +} +\value{ +The population dataframe +} +\description{ +getPopulation - Get the population for the validationDesign +} diff --git a/man/predictGlm.Rd b/man/predictGlm.Rd new file mode 100644 index 000000000..e7e09a57b --- /dev/null +++ b/man/predictGlm.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Glm.R +\name{predictGlm} +\alias{predictGlm} +\title{predict using a logistic regression model} +\usage{ +predictGlm(plpModel, data, cohort) +} +\arguments{ +\item{plpModel}{An object of type \code{plpModel} - a patient level +prediction model} + +\item{data}{An object of type \code{plpData} - the patient level prediction +data extracted from the CDM.} + +\item{cohort}{The population dataframe created using +/code{createStudyPopulation} who will have their risks predicted or a cohort +without the outcome known} +} +\value{ +A dataframe containing the prediction for each person in the +population +} +\description{ +Predict risk with a given plpModel containing a generalized linear model. +}