Model Construction

Show/Hide Code
#--------------#
#----boost-----#
#--------------#
set.seed(1234)
train_control <- trainControl(method = "cv", number = 10)

set.seed(1234)
boost_model <- train(good ~ ., 
                     data = train, 
                     method = "gbm", 
                     trControl = train_control)

save(boost_model, file = "dataset\\model\\boost.model_kfoldCV.Rdata")

K-fold CV

Show/Hide Code
# Data Import
load("dataset\\wine.data_cleaned.Rdata")
load("dataset\\train.Rdata")
load("dataset\\test.Rdata")

# Function Import
load("dataset\\function\\accu.kappa.plot.Rdata")

# Model import
load("dataset\\model\\boost.model_kfoldCV.Rdata")

boost.predictions <- predict(boost_model, newdata = test)

confusionMatrix(boost.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 891 136
         1  58 103
                                          
               Accuracy : 0.8367          
                 95% CI : (0.8144, 0.8573)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : 0.0004918       
                                          
                  Kappa : 0.4213          
                                          
 Mcnemar's Test P-Value : 3.234e-08       
                                          
            Sensitivity : 0.9389          
            Specificity : 0.4310          
         Pos Pred Value : 0.8676          
         Neg Pred Value : 0.6398          
             Prevalence : 0.7988          
         Detection Rate : 0.7500          
   Detection Prevalence : 0.8645          
      Balanced Accuracy : 0.6849          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
boost.predictions <- as.numeric(boost.predictions)
pred_obj <- prediction(boost.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.6849227
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "Boosting (10-fold CV)")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
boost.kfoldCV.ROC.plot <- recordPlot()

pander::pander(boost_model$results)
Table continues below
  shrinkage interaction.depth n.minobsinnode n.trees Accuracy
1 0.1 1 10 50 0.81
4 0.1 2 10 50 0.8136
7 0.1 3 10 50 0.8103
2 0.1 1 10 100 0.8143
5 0.1 2 10 100 0.8107
8 0.1 3 10 100 0.8125
3 0.1 1 10 150 0.814
6 0.1 2 10 150 0.8147
9 0.1 3 10 150 0.8114
  Kappa AccuracySD KappaSD
1 0.2759 0.01404 0.05789
4 0.3286 0.01312 0.03983
7 0.3374 0.02073 0.07382
2 0.3408 0.01961 0.0701
5 0.3427 0.01489 0.04562
8 0.3534 0.01818 0.06687
3 0.3457 0.02366 0.08334
6 0.3694 0.01629 0.06235
9 0.3559 0.02313 0.07864

SUmmary

Show/Hide Code
cowplot::plot_grid(boost.kfoldCV.ROC.plot)

Model Error Rate Sensitivity Specificity AUC
Boosting 0.1633 0.9389 0.4310 0.6849227