Quadratic Discriminant Analysis

K-fold CV (caret)

Show/Hide Code
#---------------------------#
#----Model Construction-----#
#---------------------------#
set.seed(1234)
train_control <- trainControl(method = "cv", number = 10)

set.seed(1234)
qda_model <- train(good ~ ., 
                   data = train, 
                   method = "qda", 
                   trControl = train_control)

save(qda_model, file = "dataset\\qda.model_kfoldCV.Rdata")
Show/Hide Code
# Data Import
load("dataset\\wine.data_cleaned.Rdata")
load("dataset\\train.Rdata")
load("dataset\\test.Rdata")

# Function Import
load("dataset\\function\\accu.kappa.plot.Rdata")

# Model import
load("dataset\\model\\qda.model_kfoldCV.Rdata")

qda.predictions <- predict(qda_model, newdata = test)

confusionMatrix(qda.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 704  59
         1 245 180
                                          
               Accuracy : 0.7441          
                 95% CI : (0.7183, 0.7687)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : 0.3834          
                                          
 Mcnemar's Test P-Value : <2e-16          
                                          
            Sensitivity : 0.7418          
            Specificity : 0.7531          
         Pos Pred Value : 0.9227          
         Neg Pred Value : 0.4235          
             Prevalence : 0.7988          
         Detection Rate : 0.5926          
   Detection Prevalence : 0.6423          
      Balanced Accuracy : 0.7475          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
k <- 10
qda.predictions <- as.numeric(qda.predictions)
pred_obj <- prediction(qda.predictions, test$good)

# Compute AUC value
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.7474858
Show/Hide Code
qda.perf <- performance(pred_obj, "tpr", "fpr")
plot(qda.perf, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "caret::qda ROC Curves")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(qda.perf@x.values))
y_values <- as.numeric(unlist(qda.perf@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
qda.kfoldCV_caret.ROC.plot <- recordPlot()

qda_df <- data.frame(k = 1:k,
                     Accuracy = qda_model$results$Accuracy,
                     Kappa = qda_model$results$Kappa)

K-fold CV (MASS)

Show/Hide Code
# Set the number of folds
k <- 10

# Randomly assign each row in the data to a fold
set.seed(1234) # for reproducibility
fold_indices <- sample(rep(1:k, length.out = nrow(wine.data_cleaned)))

# Initialize an empty list to store the folds
folds <- vector("list", k)

# Assign each row to a fold
for (i in 1:k) {
  folds[[i]] <- which(fold_indices == i)
}

#To store the error rate of each fold
error_rate <- numeric(k)
confusion_matrices <- vector("list", k)
kappa <- numeric(k)


# Loop through each fold
for (i in 1:10) {
  # Extract the i-th fold as the testing set
  test_indices <- unlist(folds[[i]])
  
  test <- wine.data_cleaned[test_indices, ]
  train <- wine.data_cleaned[-test_indices, ]
  
  # Fit the model on the training set
  qda_model <- qda(good ~ ., data = train, family = binomial)
  
  # Make predictions on the testing set and calculate the error rate
  qda.pred <- predict(qda_model, newdata = test, type = "response")
  predicted_classes <- ifelse(qda.pred$posterior[,2] > 0.7, 1, 0)
  
  # Compute OER
  error_rate[i] <- mean((predicted_classes > 0.7) != as.numeric(test$good))
  
  # Compute confusion matrix
  test$good <- as.factor(test$good)
  predicted_classes <- factor(predicted_classes, levels = c(0, 1))
  confusion_matrices[[i]] <- caret::confusionMatrix(predicted_classes, test$good)
  
  # Compute Kappa value
  kappa[i] <- confusion_matrices[[i]]$overall[[2]]
  
  # Print the error rates for each fold
  cat(paste0("Fold ", i, ": ", "OER:", error_rate[i], "\n"))
}
Fold 1: OER:0.19647355163728
Fold 2: OER:0.171717171717172
Fold 3: OER:0.232323232323232
Fold 4: OER:0.212121212121212
Fold 5: OER:0.20959595959596
Fold 6: OER:0.222222222222222
Fold 7: OER:0.194444444444444
Fold 8: OER:0.207070707070707
Fold 9: OER:0.174242424242424
Fold 10: OER:0.169191919191919
Show/Hide Code
best_confmat_index <- which.min(error_rate)
best_confmat_index
[1] 10
Show/Hide Code
confusion_matrices[best_confmat_index]
[[1]]
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 285  33
         1  34  44
                                          
               Accuracy : 0.8308          
                 95% CI : (0.7902, 0.8664)
    No Information Rate : 0.8056          
    P-Value [Acc > NIR] : 0.1126          
                                          
                  Kappa : 0.4626          
                                          
 Mcnemar's Test P-Value : 1.0000          
                                          
            Sensitivity : 0.8934          
            Specificity : 0.5714          
         Pos Pred Value : 0.8962          
         Neg Pred Value : 0.5641          
             Prevalence : 0.8056          
         Detection Rate : 0.7197          
   Detection Prevalence : 0.8030          
      Balanced Accuracy : 0.7324          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
#AUC and Performance Plot
predicted_classes <- as.numeric(predicted_classes)
pred_obj <- prediction(predicted_classes, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
qda.perf <- performance(pred_obj,"tpr","fpr")
auc_val
[1] 0.7324227
Show/Hide Code
plot(qda.perf, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "MASS::qda ROC Curves")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(qda.perf@x.values))
y_values <- as.numeric(unlist(qda.perf@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
qda.kfoldCV_MASS.ROC.plot <- recordPlot()

qda_df <- data.frame(k = 1:k,
                     Accuracy = 1-error_rate, 
                     Kappa = kappa)

qda.kfoldCV_MASS.plot <- accu.kappa.plot(qda_df) + 
  geom_text(aes(x = k, y = Accuracy, label = round(Accuracy, 3)), vjust = -1) +
  geom_text(aes(x = k, y = Kappa, label = round(Kappa, 3)), vjust = -1) +
  ggtitle("MASS::qda Model Performance (10-Fold CV)")
Show/Hide Code
png("dataset\\plot\\qda.png", width = 2000, height = 2000, res = 150)
klaR::partimat(as.factor(good) ~ ., data = train, method = "qda", 
         control = list(adip = 0.7, col.unmatched = "grey", 
                        col.matched = c("#E69F00", "#56B4E9")), 
         pch = 19, cex = 1.5, varNames = predictor_vars)

Summary

Show/Hide Code
cowplot::plot_grid(qda.kfoldCV_caret.ROC.plot,
                   qda.kfoldCV_MASS.ROC.plot,
                   ncol = 2, align = "hv", scale = 0.8)

Model Error Rate Sensitivity Specificity AUC
QDA (caret) 0.2559 0.7418 0.7531 0.7474858
QDA (MASS) 0.1692 0.8934 0.5714 0.7324227