diff --git a/R/scaler.R b/R/scaler.R index 62cb646..ddbc105 100644 --- a/R/scaler.R +++ b/R/scaler.R @@ -1,20 +1,66 @@ #' Scaling a dataframe #' -#' This function scales numerical features based on scaling requirement in a data.frame +#' This function scales numerical features based on scaling requirement(standardization, minmax Scaling) in a data.frame #' #' @param X_train data.frame -#' @param X_Valid data.frame +#' @param X_valid data.frame #' @param X_test data.frame -#' @param scale_features character vector -#' @param scaler_type character +#' @param scaler_type character #' #' @return data.frame of data.frames #' @export #' #' @examples -#' X_train<- data.frame('name' = c('pandaman', 'doorman', 'eve'), 'age' = c(15,20,25), 'networth' = c(100000,100,100000)) -#' X_Valid<- data.frame('name' = c('pandaman_v', 'doorman_v', 'eve_v'), 'age' = c(15,56, 43) , 'networth' = c(123124, 352334,645645) -#' X_test <- data.frame('name' = c('pandaman_t', 'doorman_t', 'eve_t'), 'age' =c(14,15,56), 'networth' = c(123124,90914, 124124) -#' scaled_df <- scaler(X_train, X_Valid, X_test, c('age','networth'), scaler_type='standardscaler') -scaler <- function(X_train, X_Valid, X_test, scale_features, scaler_type){ -} \ No newline at end of file +#' scaled_df <- scaler(X_train, X_Valid, X_test, scaler_type='standardization') +scaler <- function(X_train, X_valid, X_test, scaler_type){ + + # checking if scaling methods are valid + if(!scaler_type %in% c('standardization', 'minmax')){ + stop('Scaling method must be standardization or minmax') + } + + #Checking input data type + if(!typeof(X_train) == 'list'){ + stop('X_train should be a dataframe object') + } + if(!typeof(X_valid) == 'list'){ + stop('X_valid should be a dataframe object') + } + if(!typeof(X_test) == 'list'){ + stop('X_test should be a dataframe object') + } + + #Checking if all data is numeric + if(!sum(sapply(X_train, is.numeric)) == dim(X_train)[2]){ + stop('X_train must contain all numeric values') + } + if(!sum(sapply(X_valid, is.numeric)) == dim(X_valid)[2]){ + stop('X_valid must contain all numeric values') + } + if(!sum(sapply(X_test, is.numeric)) == dim(X_test)[2]){ + stop('X_test must contain all numeric values') + } + + + #Scaling Code + if (scaler_type == 'standardization'){ + #Performing standardization + pp = caret::preProcess(X_train, method=c("center", "scale")) + X_train_scaled <- predict(pp, X_train) + X_valid_scaled <- predict(pp, X_valid) + X_test_scaled <- predict(pp, X_test) + } + + if (scaler_type == 'minmax'){ + #Performing Min-max Scaling + pp = caret::preProcess(X_train, method=c("range")) + X_train_scaled <- predict(pp, X_train) + X_valid_scaled <- predict(pp, X_valid) + X_test_scaled <- predict(pp, X_test) + } + out <- list() + out$train <- X_train_scaled + out$test <- X_test_scaled + out$valid <- X_valid_scaled + return(out) +} diff --git a/man/scaler.Rd b/man/scaler.Rd index 60e5b69..3c3b297 100644 --- a/man/scaler.Rd +++ b/man/scaler.Rd @@ -4,28 +4,23 @@ \alias{scaler} \title{Scaling a dataframe} \usage{ -scaler(X_train, X_Valid, X_test, scale_features, scaler_type) +scaler(X_train, X_valid, X_test, scaler_type) } \arguments{ \item{X_train}{data.frame} -\item{X_Valid}{data.frame} +\item{X_valid}{data.frame} \item{X_test}{data.frame} -\item{scale_features}{character vector} - \item{scaler_type}{character} } \value{ data.frame of data.frames } \description{ -This function scales numerical features based on scaling requirement in a data.frame +This function scales numerical features based on scaling requirement(standardization, minmax Scaling) in a data.frame } \examples{ -X_train<- data.frame('name' = c('pandaman', 'doorman', 'eve'), 'age' = c(15,20,25), 'networth' = c(100000,100,100000)) -X_Valid<- data.frame('name' = c('pandaman_v', 'doorman_v', 'eve_v'), 'age' = c(15,56, 43) , 'networth' = c(123124, 352334,645645) -X_test <- data.frame('name' = c('pandaman_t', 'doorman_t', 'eve_t'), 'age' =c(14,15,56), 'networth' = c(123124,90914, 124124) -scaled_df <- scaler(X_train, X_Valid, X_test, c('age','networth'), scaler_type='standardscaler') +scaled_df <- scaler(X_train, X_Valid, X_test, scaler_type='standardization') } diff --git a/tests/testthat/test-scaler.R b/tests/testthat/test-scaler.R new file mode 100644 index 0000000..4469d04 --- /dev/null +++ b/tests/testthat/test-scaler.R @@ -0,0 +1,35 @@ +df <- data.frame(a = 1:3, b = 10:12) +df_nonnum <- data.frame(a = 1:3, b = 'hello') +df_minmax <- data.frame(a=c(0.0,0.5,1.0), b=c(0.0,0.5,1.0)) +df_sd <- data.frame(a=c(-1,0,1), b=c(-1,0,1)) +out_minmax <- list() +out_minmax$train <- df_minmax +out_minmax$test <- df_minmax +out_minmax$valid <- df_minmax +out_sd <- list() +out_sd$train <- df_sd +out_sd$test <- df_sd +out_sd$valid <- df_sd + + test_that('tests for exceptions', { + + #Checking data in input + expect_error(scaler(df,df,df_nonnum, 'standardization')) + expect_error(scaler(df,df_nonnum,df, 'standardization')) + expect_error(scaler(df_nonnum,df,df, 'standardization')) + + #Checking Input Type + expect_error(scaler(41,df,df, 'standardization')) + expect_error(scaler(df,41,df, 'standardization')) + expect_error(scaler(df,df,41, 'standardization')) + + #Checking method + expect_error(scaler(df,df,df, 'skscaler')) + }) + + test_that("Tests to check outputs", { + # test standardization + expect_equal(scaler(df, df, df, 'standardization'), out_sd) + # test minmax + expect_equal(scaler(df, df, df, 'minmax'), out_minmax) +})