title: "Naive Bayes"
```{r setup, include=FALSE}
knitr:: opts_chunk$ set (fig.align = TRUE )
library (tidyverse)
# ggplot2, for data visualization.
# dplyr, for data manipulation.
# tidyr, for data tidying.
# purrr, for functional programming.
# tibble, for tibbles, a modern re-imagining of data frames.
# stringr, for strings.
# forcats, for factors.
# lubridate, for date/times.
# readr, for reading .csv, .tsv, and .fwf files.
# readxl, for reading .xls, and .xlxs files.
# feather, for sharing with Python and other languages.
# haven, for SPSS, SAS and Stata files.
# httr, for web apis.
# jsonlite for JSON.
# rvest, for web scraping.
# xml2, for XML.
# modelr, for modelling within a pipeline
# broom, for turning models into tidy data
# hms, for times.
library (magrittr)
library (lobstr)
library (pander)
library (ggforce)
library (ggpubr)
# Arrange and annotate multiple plots on the same page.
# Change graphical parameters such as colors and labels.
library (sf)
library (kableExtra)
library (cowplot)
library (gridExtra)
library (animation)
library (latex2exp)
library (ellipse)
library (plotly)
library (olsrr)
library (leaps)
library (pls)
library (MASS)
library (e1071)
library (class)
library (ROCR)
library (boot)
library (caret)
library (corrgram)
library (corrplot)
set.seed (1234 ) # make random results reproducible
current_dir <- getwd ()
if (! is.null (current_dir)) {
setwd (current_dir)
remove (current_dir)
## Model Construction
```{r nb.model_savings, eval=FALSE}
#----Naive Bayes-----#
set.seed (1234 )
train_control <- trainControl (method = "cv" , number = 10 )
set.seed (1234 )
nb_model <- train (good ~ .,
data = train,
method = "naive_bayes" ,
trControl = train_control)
save (nb_model, file = "dataset \\ model \\ nb.model_kfoldCV.Rdata" )
## K-fold CV
```{r nb.kfoldCV, fig.show='hide'}
# Data Import
load ("dataset \\ wine.data_cleaned.Rdata" )
load ("dataset \\ train.Rdata" )
load ("dataset \\ test.Rdata" )
# Function Import
load ("dataset \\ function \\ accu.kappa.plot.Rdata" )
# Model import
load ("dataset \\ model \\ nb.model_kfoldCV.Rdata" )
nb.predictions <- predict (nb_model, newdata = test)
confusionMatrix (nb.predictions, test$ good)
nb.predictions <- as.numeric (nb.predictions)
pred_obj <- prediction (nb.predictions, test$ good)
auc_val <- performance (pred_obj, "auc" )@ y.values[[1 ]]
roc_obj <- performance (pred_obj, "tpr" , "fpr" )
plot (roc_obj, colorize = TRUE , lwd = 2 ,
xlab = "False Positive Rate" ,
ylab = "True Positive Rate" ,
main = "Naive Bayes (10-fold CV)" )
abline (a = 0 , b = 1 )
x_values <- as.numeric (unlist (roc_obj@ x.values))
y_values <- as.numeric (unlist (roc_obj@ y.values))
polygon (x = x_values, y = y_values,
col = rgb (0.3803922 , 0.6862745 , 0.9372549 , alpha = 0.3 ),
border = NA )
polygon (x = c (0 , 1 , 1 ), y = c (0 , 0 , 1 ),
col = rgb (0.3803922 , 0.6862745 , 0.9372549 , alpha = 0.3 ),
border = NA )
text (0.6 , 0.4 , paste ("AUC =" , round (auc_val, 4 )))
nb.kfoldCV.ROC.plot <- recordPlot ()
pander:: pander (nb_model$ results)
## Summary
```{r fig.width=5, fig.height=5}
cowplot:: plot_grid (nb.kfoldCV.ROC.plot)
| Model | Error Rate | Sensitivity | Specificity | AUC |
| ------------------ | ---------- | ----------- | ----------- | --------- |
| Naive Bayes | 0.2466 | 0.7829 | 0.6360 | 0.7094563 |
```{r, echo=FALSE}
save (nb.kfoldCV.ROC.plot, file = "dataset \\ plot \\ nb.kfoldCV.ROC.plot.Rdata" )