From c17ae5eb4f10cca2be48b868ed0c31b785b42c91 Mon Sep 17 00:00:00 2001 From: egillax Date: Thu, 12 Dec 2024 11:59:03 +0100 Subject: [PATCH] new docs --- man/createNormalization.Rd | 17 +++++++++++++++++ man/createRareFeatureRemover.Rd | 18 ++++++++++++++++++ man/deDuplicateCovariateData.Rd | 22 ++++++++++++++++++++++ man/minMaxNormalize.Rd | 24 ++++++++++++++++++++++++ man/pmmFit.Rd | 20 ++++++++++++++++++++ man/removeRareFeatures.Rd | 21 +++++++++++++++++++++ man/robustNormalize.Rd | 27 +++++++++++++++++++++++++++ 7 files changed, 149 insertions(+) create mode 100644 man/createNormalization.Rd create mode 100644 man/createRareFeatureRemover.Rd create mode 100644 man/deDuplicateCovariateData.Rd create mode 100644 man/minMaxNormalize.Rd create mode 100644 man/pmmFit.Rd create mode 100644 man/removeRareFeatures.Rd create mode 100644 man/robustNormalize.Rd diff --git a/man/createNormalization.Rd b/man/createNormalization.Rd new file mode 100644 index 00000000..fa20c6df --- /dev/null +++ b/man/createNormalization.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PreprocessingData.R +\name{createNormalization} +\alias{createNormalization} +\title{Create the settings for normalizing the data} +\usage{ +createNormalization(type = "minmax") +} +\arguments{ +\item{type}{The type of normalization to use, either "minmax" or "robust"} +} +\value{ +An object of class \code{featureEngineeringSettings} +} +\description{ +Create the settings for normalizing the data +} diff --git a/man/createRareFeatureRemover.Rd b/man/createRareFeatureRemover.Rd new file mode 100644 index 00000000..c1e70b87 --- /dev/null +++ b/man/createRareFeatureRemover.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PreprocessingData.R +\name{createRareFeatureRemover} +\alias{createRareFeatureRemover} +\title{Create the settings for removing rare features} +\usage{ +createRareFeatureRemover(ratio = 0.001) +} +\arguments{ +\item{ratio}{The minimum fraction of the training data that must have a +feature for it to be included} +} +\value{ +An object of class \code{featureEngineeringSettings} +} +\description{ +Create the settings for removing rare features +} diff --git a/man/deDuplicateCovariateData.Rd b/man/deDuplicateCovariateData.Rd new file mode 100644 index 00000000..e1f94213 --- /dev/null +++ b/man/deDuplicateCovariateData.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ExternalValidatePlp.R +\name{deDuplicateCovariateData} +\alias{deDuplicateCovariateData} +\title{deplucateCovariateData - Remove duplicate covariate data +when downloading data with multiple different covariateSettings sometimes +there will be duplicated analysisIds which need to be removed} +\usage{ +deDuplicateCovariateData(covariateData) +} +\arguments{ +\item{covariateData}{The covariate data Andromeda object} +} +\value{ +The deduplicated covariate data +} +\description{ +deplucateCovariateData - Remove duplicate covariate data +when downloading data with multiple different covariateSettings sometimes +there will be duplicated analysisIds which need to be removed +} +\keyword{internal} diff --git a/man/minMaxNormalize.Rd b/man/minMaxNormalize.Rd new file mode 100644 index 00000000..c4624293 --- /dev/null +++ b/man/minMaxNormalize.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PreprocessingData.R +\name{minMaxNormalize} +\alias{minMaxNormalize} +\title{A function that normalizes continous features to have values between 0 and 1} +\usage{ +minMaxNormalize(trainData, featureEngineeringSettings, normalized = FALSE) +} +\arguments{ +\item{trainData}{The training data to be normalized} + +\item{featureEngineeringSettings}{The settings for the normalization} + +\item{normalized}{Whether the data has already been normalized (bool)} +} +\value{ +The normalized data +} +\description{ +A function that normalizes continous features to have values between 0 and 1 +} +\details{ +uses value - min / (max - min) to normalize the data +} diff --git a/man/pmmFit.Rd b/man/pmmFit.Rd new file mode 100644 index 00000000..3cadc1df --- /dev/null +++ b/man/pmmFit.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Imputation.R +\name{pmmFit} +\alias{pmmFit} +\title{Predictive mean matching using lasso} +\usage{ +pmmFit(data, k = 5) +} +\arguments{ +\item{k}{The number of donors to use for matching (default 5)} + +\item{numericData}{An andromeda object with the following fields: +xObs: covariates table for observed data +xMiss: covariates table for missing data +yObs: outcome variable that we want to impute} +} +\description{ +Predictive mean matching using lasso +} +\keyword{internal} diff --git a/man/removeRareFeatures.Rd b/man/removeRareFeatures.Rd new file mode 100644 index 00000000..0a62ea54 --- /dev/null +++ b/man/removeRareFeatures.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PreprocessingData.R +\name{removeRareFeatures} +\alias{removeRareFeatures} +\title{A function that removes rare features from the data} +\usage{ +removeRareFeatures(trainData, featureEngineeringSettings, findRare = FALSE) +} +\arguments{ +\item{trainData}{The data to be normalized} + +\item{featureEngineeringSettings}{The settings for the normalization} + +\item{findRare}{Whether to find and remove rare features or remove them only (bool)} +} +\description{ +A function that removes rare features from the data +} +\details{ +removes features that are present in less than a certain fraction of the population +} diff --git a/man/robustNormalize.Rd b/man/robustNormalize.Rd new file mode 100644 index 00000000..cc3d558e --- /dev/null +++ b/man/robustNormalize.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PreprocessingData.R +\name{robustNormalize} +\alias{robustNormalize} +\title{A function that normalizes continous by the interquartile range and forces +the resulting values to be between -3 and 3 with f(x) = x / sqrt(1 + (x/3)^2)} +\usage{ +robustNormalize(trainData, featureEngineeringSettings, normalized = FALSE) +} +\arguments{ +\item{trainData}{The training data to be normalized} + +\item{featureEngineeringSettings}{The settings for the normalization} + +\item{normalized}{Whether the data has already been normalized (bool)} +} +\description{ +A function that normalizes continous by the interquartile range and forces +the resulting values to be between -3 and 3 with f(x) = x / sqrt(1 + (x/3)^2) +} +\details{ +uses (value - median) / iqr to normalize the data and then +applies the function f(x) = x / sqrt(1 + (x/3)^2) to the normalized values. +This forces the values to be between -3 and 3 while preserving the relative +ordering of the values.' +based on https://arxiv.org/abs/2407.04491 for more details +}