Random Forest (Classification)

Model Construction

Show/Hide Code
#----------------------#
#----Random Forest-----#
#----------------------#
set.seed(1234)
train_control <- trainControl(method = "cv", number = 10)

set.seed(1234)
rf_model <- train(good ~ ., 
               data = train, 
               method = "rf", 
               trControl = train_control)

save(rf_model, file = "dataset\\model\\rf.model_kfoldCV.Rdata")

K-fold CV

Show/Hide Code
# Data Import
load("dataset\\wine.data_cleaned.Rdata")
load("dataset\\train.Rdata")
load("dataset\\test.Rdata")

# Function Import
load("dataset\\function\\accu.kappa.plot.Rdata")

# Model import
load("dataset\\model\\rf.model_kfoldCV.Rdata")

rf.predictions <- predict(rf_model, newdata = test)

confusionMatrix(rf.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 929  36
         1  20 203
                                          
               Accuracy : 0.9529          
                 95% CI : (0.9392, 0.9642)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : < 2e-16         
                                          
                  Kappa : 0.8496          
                                          
 Mcnemar's Test P-Value : 0.04502         
                                          
            Sensitivity : 0.9789          
            Specificity : 0.8494          
         Pos Pred Value : 0.9627          
         Neg Pred Value : 0.9103          
             Prevalence : 0.7988          
         Detection Rate : 0.7820          
   Detection Prevalence : 0.8123          
      Balanced Accuracy : 0.9141          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
rf.predictions <- as.numeric(rf.predictions)
pred_obj <- prediction(rf.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.9141488
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "Random Forest (10-fold CV)")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
rf.kfoldCV.ROC.plot <- recordPlot()

pander::pander(rf_model$results)
mtry Accuracy Kappa AccuracySD KappaSD
2 0.8251 0.3767 0.01759 0.063
6 0.8168 0.3629 0.01911 0.07142
11 0.8139 0.3615 0.02738 0.09509

Summary

Show/Hide Code
cowplot::plot_grid(rf.kfoldCV.ROC.plot)

Model Error Rate Sensitivity Specificity AUC
Random Forest 0.0471 0.9789 0.8494 0.9141488