forked from szilard/benchm-ml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path8a-Rborist.R
44 lines (29 loc) · 1.06 KB
/
8a-Rborist.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
library(data.table)
library(Rborist)
library(ROCR)
dx_train <- as.data.frame(fread("train-1m.csv"))
dx_test <- as.data.frame(fread("test.csv"))
# Rborist 0-1.1 only accepts factor and numeric predictors or response:
#
facCols <- c("UniqueCarrier", "Origin","Dest", "Month", "DayofMonth", "DayOfWeek")
numCols <- c("DepTime","Distance")
responseCol <- "dep_delayed_15min"
for (k in facCols) {
dx_train[[k]] <- as.factor(dx_train[[k]])
dx_test[[k]] <- as.factor(dx_test[[k]])
}
for (k in numCols) {
dx_train[[k]] <- as.numeric(dx_train[[k]])
dx_test[[k]] <- as.numeric(dx_test[[k]])
}
dx_train[[responseCol]] <- as.factor(dx_train[[responseCol]])
dx_test[[responseCol]] <- as.factor(dx_test[[responseCol]])
Xnames <- names(dx_train)[which(names(dx_train) != responseCol)]
system.time({
md <- Rborist(dx_train[, Xnames], dx_train[, responseCol], nTree = 100, nLevel = 20)
})
system.time({
phat <- predict(md, newdata=dx_test[, Xnames], ctgCensus="prob")$prob[,"Y"]
})
rocr_pred <- prediction(phat, dx_test$dep_delayed_15min == "Y")
performance(rocr_pred, "auc")