K Nearest Neighbor Classifier

Model Construction

Show/Hide Code
#--------------------#
#-----K-fold CV------#
#--------------------#

set.seed(1234)
# Define the training control object for 10-fold cross-validation
train_control <- trainControl(method = "cv", number = 10)

# Train the KNN model using 10-fold cross-validation
# tuneLength argument to specify the range of values of K to be considered for tuning
set.seed(1234)
knn_model <- train(good ~ ., 
                   data = train, 
                   method = "knn", 
                   trControl = train_control,
                   tuneGrid = data.frame(k = 1:10))

# Save the model into .Rdata for future import 
save(knn_model, file = "dataset\\knn.model_kfoldCV.Rdata")


#--------------------------#
#-----K-fold CV (Mod)------#
#--------------------------#

set.seed(1234)
train_control <- trainControl(method = "cv", number = 10)

set.seed(1234)
knn_model <- train(good ~ ., 
                   data = train, 
                   method = "knn", 
                   trControl = train_control, 
                   tuneGrid = data.frame(k = 1:30))

# Save the model into .Rdata for future import 
save(knn_model, file = "dataset\\knn.model_kfoldCV_mod.Rdata")


#--------------------#
#----Hold-out CV-----#
#--------------------#

set.seed(1234)
train_control <- trainControl(method = "none",)

set.seed(1234)
knn_model <- train(good ~ ., 
                   data = train, 
                   method = "knn",
                   tuneGrid = data.frame(k = 1:10))

save(knn_model, file = "dataset\\knn.model_holdoutCV.Rdata")


#--------------------------#
#----Hold-out CV (Mod)-----#
#--------------------------#

set.seed(1234)
train_control <- trainControl(method = "none",)

set.seed(1234)
knn_model <- train(good ~ ., 
                   data = train, 
                   method = "knn",
                   tuneGrid = expand.grid(k=1:30))

save(knn_model, file = "dataset\\knn.model_holdoutCV_mod.Rdata")


#--------------------#
#-------LOOCV--------#
#--------------------#

set.seed(1234)
train_control <- trainControl(method = "LOOCV")

set.seed(1234)
knn_model <- train(good ~ ., 
                   data = train, 
                   method = "knn", 
                   trControl = train_control,
                   tuneGrid = data.frame(k = 1:10))

save(knn_model, file = "dataset\\knn.model_looCV.Rdata")


#--------------------------#
#-------LOOCV (Mod)--------#
#--------------------------#

set.seed(1234)
train_control <- trainControl(method = "LOOCV")

set.seed(1234)
knn_model <- train(good ~ ., 
                   data = train, 
                   method = "knn", 
                   trControl = train_control,
                   tuneLength = 10,
                   tuneGrid = expand.grid(k = 1:20))

save(knn_model, file = "dataset\\knn.model_looCV_mod.Rdata")


#--------------------#
#----Repeated CV-----#
#--------------------#

set.seed(1234)
train_control <- trainControl(method = "repeatedcv", number = 10, repeats = 5)

set.seed(1234)
knn_model <- train(good ~ ., 
                   data = train, 
                   method = "knn", 
                   trControl = train_control)

save(knn_model, file = "dataset\\knn.model_repeatedCV.Rdata")


#--------------------------#
#----Repeated CV (Mod)-----#
#--------------------------#

set.seed(1234)
train_control <- trainControl(method = "repeatedcv", number = 10, repeats = 5)

kknn.grid <- expand.grid(kmax = c(3, 5, 7 ,9, 11), distance = c(1, 2, 3),
                         kernel = c("rectangular", "gaussian", "cos"))

set.seed(1234)
knn_model <- train(good ~ ., 
                   data = train, 
                   method = "kknn",
                   trControl = train_control, 
                   tuneGrid = kknn.grid,
                   preProcess = c("center", "scale"))

save(knn_model, file = "dataset\\knn.model_repeatedCV_mod.Rdata")

K-fold CV

Show/Hide Code
# Data Import
load("dataset\\train.Rdata")
load("dataset\\test.Rdata")

# Model Import
load("dataset\\model\\knn.model_kfoldCV.Rdata")

# Make predictions on the test data using the trained model and calculate the test error rate
knn.predictions <- predict(knn_model, newdata = test)

confusionMatrix(knn.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 908 168
         1  41  71
                                          
               Accuracy : 0.8241          
                 95% CI : (0.8012, 0.8453)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : 0.01529         
                                          
                  Kappa : 0.3169          
                                          
 Mcnemar's Test P-Value : < 2e-16         
                                          
            Sensitivity : 0.9568          
            Specificity : 0.2971          
         Pos Pred Value : 0.8439          
         Neg Pred Value : 0.6339          
             Prevalence : 0.7988          
         Detection Rate : 0.7643          
   Detection Prevalence : 0.9057          
      Balanced Accuracy : 0.6269          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
# Convert predictions to a numeric vector
knn.predictions <- as.numeric(knn.predictions)

# Calculate the AUC using the performance() and auc() functions:
pred_obj <- prediction(knn.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.6269339
Show/Hide Code
# Performance plot for TP and FP
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "KNN ROC Curves with 10-fold CV")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
knn.kfoldCV.ROC.plot<- recordPlot()

knn_df <- data.frame(k = knn_model$results$k, 
                     Accuracy = knn_model$results$Accuracy,
                     Kappa = knn_model$results$Kappa)

# Accuracy and Kappa value plot
accu.kappa.plot <- function(model_df) {
  p <- ggplot(data = model_df) +
    geom_point(aes(x = k, y = Accuracy, color = "Accuracy")) +
    geom_point(aes(x = k, y = Kappa, color = "Kappa")) +
    geom_line(aes(x = k, y = Accuracy, linetype = "Accuracy", color = "Accuracy")) +
    geom_line(aes(x = k, y = Kappa, linetype = "Kappa", color = "Kappa")) +
    scale_color_manual(values = c("#98c379", "#e06c75"),
                       guide = guide_legend(override.aes = list(linetype = c(1, 0)) )) +
    scale_linetype_manual(values=c("solid", "dotted"),
                          guide = guide_legend(override.aes = list(color = c("#98c379", "#e06c75")))) +
    labs(x = "K value", 
         y = "Accuracy / Kappa") +
    ylim(0, 1) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5)) +
    guides(color = guide_legend(title = "Metric"),
           linetype = guide_legend(title = "Metric"))
  return(p)
}

knn.kfoldCV.plot <- accu.kappa.plot(knn_df) + 
  geom_text(aes(x = k, y = Accuracy, label = round(Accuracy, 3)), vjust = -1) +
  geom_text(aes(x = k, y = Kappa, label = round(Kappa, 3)), vjust = -1) +
  ggtitle("KNN Model Performance (10-Fold CV)")

Tuned

Show/Hide Code
load("dataset\\model\\knn.model_kfoldCV_mod.Rdata")

knn.predictions <- predict(knn_model, newdata = test)

confusionMatrix(knn.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 926 210
         1  23  29
                                          
               Accuracy : 0.8039          
                 95% CI : (0.7801, 0.8261)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : 0.3475          
                                          
                  Kappa : 0.1373          
                                          
 Mcnemar's Test P-Value : <2e-16          
                                          
            Sensitivity : 0.9758          
            Specificity : 0.1213          
         Pos Pred Value : 0.8151          
         Neg Pred Value : 0.5577          
             Prevalence : 0.7988          
         Detection Rate : 0.7795          
   Detection Prevalence : 0.9562          
      Balanced Accuracy : 0.5486          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
knn.predictions <- as.numeric(knn.predictions)
pred_obj <- prediction(knn.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.5485514
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "KNN ROC Curves with 10-fold CV")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
knn.kfoldCV_mod.ROC.plot <- recordPlot()

knn_df <- data.frame(k = knn_model$results$k, 
                     Accuracy = knn_model$results$Accuracy,
                     Kappa = knn_model$results$Kappa)

knn.kfoldCV_mod.plot <- accu.kappa.plot(knn_df) +
  geom_text(aes(x = k, y = Accuracy, label = round(Accuracy, 3)),  hjust = -0.3, angle=90)  +
  geom_text(aes(x = k, y = Kappa, label = round(Kappa, 3)),  hjust = -0.3, angle=90) +
  ggtitle("KNN Model Performance (Tuned 10-Fold CV)")

Hold-out CV (Validation Set Approach)

Show/Hide Code
load("dataset\\model\\knn.model_holdoutCV.Rdata")


knn.predictions <- predict(knn_model, newdata = test)

confusionMatrix(knn.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 908 168
         1  41  71
                                          
               Accuracy : 0.8241          
                 95% CI : (0.8012, 0.8453)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : 0.01529         
                                          
                  Kappa : 0.3169          
                                          
 Mcnemar's Test P-Value : < 2e-16         
                                          
            Sensitivity : 0.9568          
            Specificity : 0.2971          
         Pos Pred Value : 0.8439          
         Neg Pred Value : 0.6339          
             Prevalence : 0.7988          
         Detection Rate : 0.7643          
   Detection Prevalence : 0.9057          
      Balanced Accuracy : 0.6269          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
knn.predictions <- as.numeric(knn.predictions)
pred_obj <- prediction(knn.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.6269339
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "KNN ROC Curves with Hold-out CV")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
knn.holdoutCV.ROC.plot <- recordPlot()

knn_df <- data.frame(k = knn_model$results$k, 
                     Accuracy = knn_model$results$Accuracy,
                     Kappa = knn_model$results$Kappa)

knn.holdoutCV.plot <- accu.kappa.plot(knn_df) +
  geom_text(aes(x = k, y = Accuracy, label = round(Accuracy, 3)), vjust = -1) +
  geom_text(aes(x = k, y = Kappa, label = round(Kappa, 3)), vjust = -1) +
  ggtitle("KNN Model Performance (Hold-out CV)")

Tuned

Show/Hide Code
load("dataset\\model\\knn.model_holdoutCV_mod.Rdata")

knn.predictions <- predict(knn_model, newdata = test)

confusionMatrix(knn.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 944 231
         1   5   8
                                          
               Accuracy : 0.8013          
                 95% CI : (0.7775, 0.8237)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : 0.431           
                                          
                  Kappa : 0.0436          
                                          
 Mcnemar's Test P-Value : <2e-16          
                                          
            Sensitivity : 0.99473         
            Specificity : 0.03347         
         Pos Pred Value : 0.80340         
         Neg Pred Value : 0.61538         
             Prevalence : 0.79882         
         Detection Rate : 0.79461         
   Detection Prevalence : 0.98906         
      Balanced Accuracy : 0.51410         
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
knn.predictions <- as.numeric(knn.predictions)
pred_obj <- prediction(knn.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.514102
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "KNN ROC Curves with Tuned Hold-out CV")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
knn.holdoutCV_mod.ROC.plot <- recordPlot()

knn_df <- data.frame(k = knn_model$results$k, 
                     Accuracy = knn_model$results$Accuracy,
                     Kappa = knn_model$results$Kappa)

knn.holdoutCV_mod.plot <- accu.kappa.plot(knn_df) + 
  geom_text(aes(x = k, y = Accuracy, label = round(Accuracy, 3)), hjust = -0.3, angle=90) +
  geom_text(aes(x = k, y = Kappa, label = round(Kappa, 3)), hjust=-0.3, angle=90) +
  ggtitle("KNN Model Performance (Tuned Hold-out CV)")

LOOCV

Show/Hide Code
load("dataset\\model\\knn.model_looCV.Rdata")

knn.predictions <- predict(knn_model, newdata = test)
confusionMatrix(knn.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 913 168
         1  36  71
                                          
               Accuracy : 0.8283          
                 95% CI : (0.8056, 0.8493)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : 0.00558         
                                          
                  Kappa : 0.3266          
                                          
 Mcnemar's Test P-Value : < 2e-16         
                                          
            Sensitivity : 0.9621          
            Specificity : 0.2971          
         Pos Pred Value : 0.8446          
         Neg Pred Value : 0.6636          
             Prevalence : 0.7988          
         Detection Rate : 0.7685          
   Detection Prevalence : 0.9099          
      Balanced Accuracy : 0.6296          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
knn.predictions <- as.numeric(knn.predictions)
pred_obj <- prediction(knn.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.6295682
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "KNN ROC Curves with LOOCV")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
knn.looCV.ROC.plot <- recordPlot()

knn_df <- data.frame(k = knn_model$results$k, 
                     Accuracy = knn_model$results$Accuracy,
                     Kappa = knn_model$results$Kappa)

knn.looCV.plot <- accu.kappa.plot(knn_df) + 
  geom_text(aes(x = k, y = Accuracy, label = round(Accuracy, 3)), vjust = -1) +
  geom_text(aes(x = k, y = Kappa, label = round(Kappa, 3)), vjust = -1) +
  ggtitle("KNN Model Performance (LOOCV)")

Tuned

Show/Hide Code
load("dataset\\model\\knn.model_looCV_mod.Rdata")

knn.predictions <- predict(knn_model, newdata = test)
confusionMatrix(knn.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 927 215
         1  22  24
                                          
               Accuracy : 0.8005          
                 95% CI : (0.7766, 0.8229)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : 0.4596          
                                          
                  Kappa : 0.1107          
                                          
 Mcnemar's Test P-Value : <2e-16          
                                          
            Sensitivity : 0.9768          
            Specificity : 0.1004          
         Pos Pred Value : 0.8117          
         Neg Pred Value : 0.5217          
             Prevalence : 0.7988          
         Detection Rate : 0.7803          
   Detection Prevalence : 0.9613          
      Balanced Accuracy : 0.5386          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
knn.predictions <- as.numeric(knn.predictions)
pred_obj <- prediction(knn.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.5386181
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "Knn ROC Curves Tuned LOOCV")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
knn.looCV_mod.ROC.plot <- recordPlot()

knn_df <- data.frame(k = knn_model$results$k, 
                     Accuracy = knn_model$results$Accuracy,
                     Kappa = knn_model$results$Kappa)

knn.looCV_mod.plot <- accu.kappa.plot(knn_df) + 
  geom_text(aes(x = k, y = Accuracy, label = round(Accuracy, 3)), hjust = -0.3, angle=90) +
  geom_text(aes(x = k, y = Kappa, label = round(Kappa, 3)), hjust = -0.3, angle=90) +
  ggtitle("KNN Model Performance (Tuned LOOCV)")

Repeated CV

Show/Hide Code
load("dataset\\model\\knn.model_repeatedCV.Rdata")

knn.predictions <- predict(knn_model, newdata = test)

confusionMatrix(knn.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 864 126
         1  85 113
                                          
               Accuracy : 0.8224          
                 95% CI : (0.7994, 0.8437)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : 0.022056        
                                          
                  Kappa : 0.4095          
                                          
 Mcnemar's Test P-Value : 0.005892        
                                          
            Sensitivity : 0.9104          
            Specificity : 0.4728          
         Pos Pred Value : 0.8727          
         Neg Pred Value : 0.5707          
             Prevalence : 0.7988          
         Detection Rate : 0.7273          
   Detection Prevalence : 0.8333          
      Balanced Accuracy : 0.6916          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
knn.predictions <- as.numeric(knn.predictions)
pred_obj <- prediction(knn.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.6916177
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "KNN ROC Curves with Repeated CV")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
knn.repeatedCV.ROC.plot <- recordPlot()

knn_df <- knn_model$results
knn.repeatedCV.plot <- ggplot(data=knn_df, aes(x = kmax, y = Accuracy)) +
  geom_point(aes(color = "Accuracy")) +
  geom_point(aes(color = "Kappa")) +
  geom_line(aes(linetype = "Accuracy", color = "Accuracy")) +
  geom_line(aes(y = Kappa, linetype = "Kappa", color = "Kappa")) +
  geom_text(aes(label = round(Accuracy, 3)), vjust = -1) +
  geom_text(aes(y = Kappa, label = round(Kappa, 3)), vjust = -1) +
  scale_color_manual(values = c("#98c379", "#e06c75"),
                     guide = guide_legend(override.aes = list(linetype = c(1, 0)) )) +
  scale_linetype_manual(values=c("solid", "dotted"),
                        guide = guide_legend(override.aes = list(color = c("#98c379", "#e06c75")))) +
  labs(x = "K value", 
       y = "Accuracy / Kappa",
       title = "KNN Model Performance (Repeated CV)") +
  ylim(0, 1) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5)) +
  guides(color = guide_legend(title = "Metric"),
         linetype = guide_legend(title = "Metric"))

Tuned

Show/Hide Code
load("dataset\\model\\knn.model_repeatedCV_mod.Rdata")

knn.predictions <- predict(knn_model, newdata = test)

confusionMatrix(knn.predictions, test$good)
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 906  90
         1  43 149
                                          
               Accuracy : 0.888           
                 95% CI : (0.8687, 0.9054)
    No Information Rate : 0.7988          
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.624           
                                          
 Mcnemar's Test P-Value : 6.643e-05       
                                          
            Sensitivity : 0.9547          
            Specificity : 0.6234          
         Pos Pred Value : 0.9096          
         Neg Pred Value : 0.7760          
             Prevalence : 0.7988          
         Detection Rate : 0.7626          
   Detection Prevalence : 0.8384          
      Balanced Accuracy : 0.7891          
                                          
       'Positive' Class : 0               
                                          
Show/Hide Code
knn.predictions <- as.numeric(knn.predictions)
pred_obj <- prediction(knn.predictions, test$good)
auc_val <- performance(pred_obj, "auc")@y.values[[1]]
auc_val
[1] 0.7890601
Show/Hide Code
roc_obj <- performance(pred_obj, "tpr", "fpr")
plot(roc_obj, colorize = TRUE, lwd = 2,
     xlab = "False Positive Rate", 
     ylab = "True Positive Rate",
     main = "KNN ROC Curves with Tuned Repeated CV")
abline(a = 0, b = 1)
x_values <- as.numeric(unlist(roc_obj@x.values))
y_values <- as.numeric(unlist(roc_obj@y.values))
polygon(x = x_values, y = y_values, 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
polygon(x = c(0, 1, 1), y = c(0, 0, 1), 
        col = rgb(0.3803922, 0.6862745, 0.9372549, alpha = 0.3),
        border = NA)
text(0.6, 0.4, paste("AUC =", round(auc_val, 4)))
Show/Hide Code
knn.repeatedCV_mod.ROC.plot <- recordPlot()

knn.repeatedCV_mod.plot <- ggplot(knn_model) +
  labs(x = "K value", 
       y = "Accuracy", 
       title = "KNN Model Performance (Tuned Repeated CV)") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5)) 

Summary

Show/Hide Code
ggarrange(knn.kfoldCV.plot,
          knn.kfoldCV_mod.plot,
          knn.holdoutCV.plot,
          knn.holdoutCV_mod.plot,
          knn.looCV.plot,
          knn.looCV_mod.plot,
          knn.repeatedCV.plot,
          knn.repeatedCV_mod.plot,
          ncol = 2, nrow = 4)

Show/Hide Code
cowplot::plot_grid(knn.kfoldCV.ROC.plot, knn.kfoldCV_mod.ROC.plot,
                   ncol = 2, align = "hv", scale = 0.8)

Show/Hide Code
cowplot::plot_grid(knn.holdoutCV.ROC.plot, knn.holdoutCV_mod.ROC.plot,
                   ncol = 2, align = "hv", scale = 0.8)

Show/Hide Code
cowplot::plot_grid(knn.looCV.ROC.plot, knn.looCV_mod.ROC.plot,
                   ncol = 2, align = "hv", scale = 0.8)

Show/Hide Code
cowplot::plot_grid(knn.repeatedCV.ROC.plot, knn.repeatedCV_mod.ROC.plot,
                   ncol = 2, align = "hv", scale = 0.8)

Model (Resampling Method) Error Rate Sensitivity Specificity AUC
KNN 10-Fold CV, k=1:10 0.1759 0.9568 0.2971 0.6269339
KNN 10-Fold CV, k=1:30 0.1961 0.9758 0.1213 0.5485514
KNN Hold-out CV, k=1:10 0.1759 0.9568 0.2971 0.6269339
KNN Hold-out CV, k=1:30 0.1987 0.0053 0.0335 0.5141020
KNN LOOCV 0.1717 0.9621 0.2971 0.6295682
KNN LOOCV (Tuned) 0.1995 0.9768 0.1004 0.5386181
Repeated CV 0.1776 0.9104 0.4728 0.6916177
Repeated CV (Tuned) 0.1120 0.9547 0.6234 0.7890601