Skip to content

Commit

Permalink
Merge pull request #17 from UBC-MDS/scaler
Browse files Browse the repository at this point in the history
scaler complete
  • Loading branch information
BruhatMusunuru authored Mar 13, 2021
2 parents b5004b0 + 19b2d3c commit 1d7b3f7
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 19 deletions.
66 changes: 56 additions & 10 deletions R/scaler.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,66 @@
#' Scaling a dataframe
#'
#' This function scales numerical features based on scaling requirement in a data.frame
#' This function scales numerical features based on scaling requirement(standardization, minmax Scaling) in a data.frame
#'
#' @param X_train data.frame
#' @param X_Valid data.frame
#' @param X_valid data.frame
#' @param X_test data.frame
#' @param scale_features character vector
#' @param scaler_type character
#' @param scaler_type character
#'
#' @return data.frame of data.frames
#' @export
#'
#' @examples
#' X_train<- data.frame('name' = c('pandaman', 'doorman', 'eve'), 'age' = c(15,20,25), 'networth' = c(100000,100,100000))
#' X_Valid<- data.frame('name' = c('pandaman_v', 'doorman_v', 'eve_v'), 'age' = c(15,56, 43) , 'networth' = c(123124, 352334,645645)
#' X_test <- data.frame('name' = c('pandaman_t', 'doorman_t', 'eve_t'), 'age' =c(14,15,56), 'networth' = c(123124,90914, 124124)
#' scaled_df <- scaler(X_train, X_Valid, X_test, c('age','networth'), scaler_type='standardscaler')
scaler <- function(X_train, X_Valid, X_test, scale_features, scaler_type){
}
#' scaled_df <- scaler(X_train, X_Valid, X_test, scaler_type='standardization')
scaler <- function(X_train, X_valid, X_test, scaler_type){

# checking if scaling methods are valid
if(!scaler_type %in% c('standardization', 'minmax')){
stop('Scaling method must be standardization or minmax')
}

#Checking input data type
if(!typeof(X_train) == 'list'){
stop('X_train should be a dataframe object')
}
if(!typeof(X_valid) == 'list'){
stop('X_valid should be a dataframe object')
}
if(!typeof(X_test) == 'list'){
stop('X_test should be a dataframe object')
}

#Checking if all data is numeric
if(!sum(sapply(X_train, is.numeric)) == dim(X_train)[2]){
stop('X_train must contain all numeric values')
}
if(!sum(sapply(X_valid, is.numeric)) == dim(X_valid)[2]){
stop('X_valid must contain all numeric values')
}
if(!sum(sapply(X_test, is.numeric)) == dim(X_test)[2]){
stop('X_test must contain all numeric values')
}


#Scaling Code
if (scaler_type == 'standardization'){
#Performing standardization
pp = caret::preProcess(X_train, method=c("center", "scale"))
X_train_scaled <- predict(pp, X_train)
X_valid_scaled <- predict(pp, X_valid)
X_test_scaled <- predict(pp, X_test)
}

if (scaler_type == 'minmax'){
#Performing Min-max Scaling
pp = caret::preProcess(X_train, method=c("range"))
X_train_scaled <- predict(pp, X_train)
X_valid_scaled <- predict(pp, X_valid)
X_test_scaled <- predict(pp, X_test)
}
out <- list()
out$train <- X_train_scaled
out$test <- X_test_scaled
out$valid <- X_valid_scaled
return(out)
}
13 changes: 4 additions & 9 deletions man/scaler.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions tests/testthat/test-scaler.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
df <- data.frame(a = 1:3, b = 10:12)
df_nonnum <- data.frame(a = 1:3, b = 'hello')
df_minmax <- data.frame(a=c(0.0,0.5,1.0), b=c(0.0,0.5,1.0))
df_sd <- data.frame(a=c(-1,0,1), b=c(-1,0,1))
out_minmax <- list()
out_minmax$train <- df_minmax
out_minmax$test <- df_minmax
out_minmax$valid <- df_minmax
out_sd <- list()
out_sd$train <- df_sd
out_sd$test <- df_sd
out_sd$valid <- df_sd

test_that('tests for exceptions', {

#Checking data in input
expect_error(scaler(df,df,df_nonnum, 'standardization'))
expect_error(scaler(df,df_nonnum,df, 'standardization'))
expect_error(scaler(df_nonnum,df,df, 'standardization'))

#Checking Input Type
expect_error(scaler(41,df,df, 'standardization'))
expect_error(scaler(df,41,df, 'standardization'))
expect_error(scaler(df,df,41, 'standardization'))

#Checking method
expect_error(scaler(df,df,df, 'skscaler'))
})

test_that("Tests to check outputs", {
# test standardization
expect_equal(scaler(df, df, df, 'standardization'), out_sd)
# test minmax
expect_equal(scaler(df, df, df, 'minmax'), out_minmax)
})

0 comments on commit 1d7b3f7

Please sign in to comment.