#---------------------------##----Model Construction-----##---------------------------#set.seed(1234)train_control <-trainControl(method ="cv", number =10)set.seed(1234)lda_model <-train(as.factor(good) ~ ., data = train, method ="lda", trControl = train_control)save(lda_model, file ="dataset\\lda.model_kfoldCV.Rdata")
Show/Hide Code
# Data Importload("dataset\\wine.data_cleaned.Rdata")load("dataset\\train.Rdata")load("dataset\\test.Rdata")# Function Importload("dataset\\function\\accu.kappa.plot.Rdata")# Model importload("dataset\\model\\lda.model_kfoldCV.Rdata")lda.predictions <-predict(lda_model, newdata = test)confusionMatrix(lda.predictions, test$good)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 881 160
1 68 79
Accuracy : 0.8081
95% CI : (0.7845, 0.8301)
No Information Rate : 0.7988
P-Value [Acc > NIR] : 0.2246
Kappa : 0.3024
Mcnemar's Test P-Value : 1.674e-09
Sensitivity : 0.9283
Specificity : 0.3305
Pos Pred Value : 0.8463
Neg Pred Value : 0.5374
Prevalence : 0.7988
Detection Rate : 0.7416
Detection Prevalence : 0.8763
Balanced Accuracy : 0.6294
'Positive' Class : 0
# Set the number of foldsk <-10# Randomly assign each row in the data to a foldset.seed(1234) # for reproducibilityfold_indices <-sample(rep(1:k, length.out =nrow(wine.data_cleaned)))# Initialize an empty list to store the foldsfolds <-vector("list", k)# Assign each row to a foldfor (i in1:k) { folds[[i]] <-which(fold_indices == i)}#To store the error rate of each folderror_rate <-numeric(k)kappa <-numeric(k)confusion_matrices <-vector("list", k)# Loop through each foldfor (i in1:10) {# Extract the i-th fold as the testing set test_indices <-unlist(folds[[i]]) test <- wine.data_cleaned[test_indices, ] train <- wine.data_cleaned[-test_indices, ]# Fit the model on the training set lda_model <-lda(good ~ ., data = train, family = binomial)# Make predictions on the testing set and calculate the error rate lda.pred <-predict(lda_model, newdata = test, type ="response") predicted_classes <-ifelse(lda.pred$posterior[, 2] >0.7, 1, 0)# Compute OER error_rate[i] <-mean((predicted_classes >0.7) !=as.numeric(test$good))# Compute confusion matrix test$good <-as.factor(test$good) predicted_classes <-factor(predicted_classes, levels =c(0, 1)) confusion_matrices[[i]] <- caret::confusionMatrix(predicted_classes, test$good)# Compute Kappa value kappa[i] <- confusion_matrices[[i]]$overall[[2]]# Print the error rates for each foldcat(paste0("Fold ", i, ": ", "OER:", error_rate[i], "\n"))}