eXtreme Gradient Boosting (XGBoost)

Model Construction

Show/Hide Code
#----------------#
#----XGBoost-----#
#----------------#
set.seed(1234)
train_control <- trainControl(method = "cv", number = 10)

set.seed(1234)
xgboost_model <- train(good ~ ., 
                       data = train, 
                       method = "xgbTree",
                       trControl = train_control,
                       tuneGrid = expand.grid(nrounds = 100,
                                              max_depth = 5,
                                              eta = 0.05,
                                              gamma = 0,
                                              colsample_bytree = 0.5,
                                              min_child_weight = 1,
                                              subsample = 0.5),
                       verbose = FALSE,
                       metric = "Accuracy")

save(xgboost_model, file = "dataset\\model\\xgboost.model_kfoldCV.Rdata")

K-fold CV

Show/Hide Code
# Data Import
load("dataset\\wine.data_cleaned.Rdata")
load("dataset\\train.Rdata")
load("dataset\\test.Rdata")

# Function Import
load("dataset\\function\\accu.kappa.plot.Rdata")

# Model import
load("dataset\\model\\xgboost.model_kfoldCV.Rdata")

xgboost.predictions <- predict(xgboost_model, newdata = test)
xgboost.predictions <- ifelse(xgboost.predictions == "X1", 1, 0)
xgboost.predictions <- factor(xgboost.predictions, levels = c(0, 1))
confusionMatrix(xgboost.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 910 120
         1  39 119
                                         
               Accuracy : 0.8662         
                 95% CI : (0.8455, 0.885)
    No Information Rate : 0.7988         
    P-Value [Acc > NIR] : 8.141e-10      
                                         
                  Kappa : 0.5231         
                                         
 Mcnemar's Test P-Value : 2.233e-10      
                                         
            Sensitivity : 0.9589         
            Specificity : 0.4979         
         Pos Pred Value : 0.8835         
         Neg Pred Value : 0.7532         
             Prevalence : 0.7988         
         Detection Rate : 0.7660         
   Detection Prevalence : 0.8670         
      Balanced Accuracy : 0.7284         
                                         
       'Positive' Class : 0              
                                         
Show/Hide Code
xgboost.predictions <- as.numeric(xgboost.predictions)
pred_obj <- prediction(xgboost.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.728406
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "XGBoost (10-fold CV)")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
xgboost.kfoldCV.ROC.plot <- recordPlot()

pander::pander(xgboost_model$results)
Table continues below
nrounds max_depth eta gamma colsample_bytree min_child_weight
100 5 0.05 0 0.5 1
subsample Accuracy Kappa AccuracySD KappaSD
0.5 0.8147 0.3502 0.01803 0.06482

Summary

Show/Hide Code
cowplot::plot_grid(xgboost.kfoldCV.ROC.plot)

Model Error Rate Sensitivity Specificity AUC
XGBoost 0.1338 0.9589 0.4979 0.7284060