---
title: "Summary"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(fig.align = TRUE)
library(tidyverse) # Load core packages:
# ggplot2, for data visualization.
# dplyr, for data manipulation.
# tidyr, for data tidying.
# purrr, for functional programming.
# tibble, for tibbles, a modern re-imagining of data frames.
# stringr, for strings.
# forcats, for factors.
# lubridate, for date/times.
# readr, for reading .csv, .tsv, and .fwf files.
# readxl, for reading .xls, and .xlxs files.
# feather, for sharing with Python and other languages.
# haven, for SPSS, SAS and Stata files.
# httr, for web apis.
# jsonlite for JSON.
# rvest, for web scraping.
# xml2, for XML.
# modelr, for modelling within a pipeline
# broom, for turning models into tidy data
# hms, for times.
library(magrittr) # Pipeline operator
library(lobstr) # Visualizing abstract syntax trees, stack trees, and object sizes
library(pander) # Exporting/converting complex pandoc documents, EX: df to Pandoc table
library(ggforce) # More plot functions on top of ggplot2
library(ggpubr) # Automatically add p-values and significance levels plots.
# Arrange and annotate multiple plots on the same page.
# Change graphical parameters such as colors and labels.
library(sf) # Geo-spatial vector manipulation: points, lines, polygons
library(kableExtra) # Generate 90 % of complex/advanced/self-customized/beautiful tables
library(cowplot) # Multiple plots arrangement
library(gridExtra) # Multiple plots arrangement
library(animation) # Animated figure container
library(latex2exp) # Latex axis titles in ggplot2
library(ellipse) # Simultaneous confidence interval region to check C.I. of 2 slope parameters
library(plotly) # User interactive plots
library(ellipse) # Simultaneous confidence interval region to check C.I. of 2 regressors
library(olsrr) # Model selections
library(leaps) # Regression subsetting
library(pls) # Partial Least squares
library(MASS) # LDA, QDA, OLS, Ridge Regression, Box-Cox, stepAIC, etc,.
library(e1071) # Naive Bayesian Classfier,SVM, GKNN, ICA, LCA
library(class) # KNN, SOM, LVQ
library(ROCR) # Precision/Recall/Sensitivity/Specificity performance plot
library(boot) # LOOCV, Bootstrap,
library(caret) # Classification/Regression Training, run ?caret::trainControl
library(corrgram) # for correlation matrix
library(corrplot) # for graphical display of correlation matrix
set.seed(1234) # make random results reproducible
current_dir <- getwd()
if (!is.null(current_dir)) {
setwd(current_dir)
remove(current_dir)
}
```
## Data Modeling
```{r, echo=FALSE}
load("dataset\\plot\\knn.kfoldCV.plot.Rdata")
load("dataset\\plot\\logit.kfoldCV_MASS.plot.Rdata")
load("dataset\\plot\\lda.kfoldCV_MASS.plot.Rdata")
load("dataset\\plot\\qda.kfoldCV_MASS.plot.Rdata")
load("dataset\\plot\\knn.kfoldCV.ROC.plot.Rdata")
load("dataset\\plot\\logit.kfoldCV_MASS.ROC.plot.Rdata")
load("dataset\\plot\\lda.kfoldCV_MASS.ROC.plot.Rdata")
load("dataset\\plot\\qda.kfoldCV_MASS.ROC.plot.Rdata")
```
```{r fig.width=20, fig.height=15, echo=FALSE}
ggarrange(knn.kfoldCV.plot,
logit.kfoldCV_MASS.plot,
lda.kfoldCV_MASS.plot,
qda.kfoldCV_MASS.plot,
ncol = 2, nrow = 2)
```
```{r fig.width=15, fig.height=15, echo=FALSE}
cowplot::plot_grid(knn.kfoldCV.ROC.plot,
logit.kfoldCV_MASS.ROC.plot,
lda.kfoldCV_MASS.ROC.plot,
qda.kfoldCV_MASS.ROC.plot,
ncol = 2, align = "hv", scale = 0.8)
```
| Model(Resampling Method)| Error Rate | Sensitivity | Specificity | AUC |
| ------------------------------------------------ | ---------- | ----------- | ----------- | --------- |
| KNN 10-Fold CV, k=1:10 | 0.1759 | 0.9568 | 0.2971 | 0.6269339 |
| KNN 10-Fold CV, k=1:30 | 0.1961 | 0.9758 | 0.1213 | 0.5485514 |
| KNN Hold-out CV, k=1:10 | 0.1759 | 0.9568 | 0.2971 | 0.6269339 |
| KNN Hold-out CV, k=1:30 | 0.1987 | 0.0053 | 0.0335 | 0.5141020 |
| KNN LOOCV | 0.1717 | 0.9621 | 0.2971 | 0.6295682 |
| KNN LOOCV (Tuned) | 0.1995 | 0.9768 | 0.1004 | 0.5386181 |
| Repeated CV | 0.1776 | 0.9104 | 0.4728 | 0.6916177 |
| Repeated CV (Tuned) | 0.1120 | 0.9547 | 0.6234 | 0.7890601 |
| | | | | |
| Logistic Regression (`caret` 10-fold CV) | 0.1902 | 0.9336 | 0.3180 | 0.6258030 |
| Logistic Regression (`caret` tuned with stepAIC) | 0.1919 | 0.9895 | 0.0879 | 0.5386644 |
| Logistic Regression (`MASS` 10-fold CV) | 0.1616 | 1.0000 | 0.0857 | 0.5438871 |
| Logistic Regression (`MASS` Hold-out CV) | 0.1894 | 0.9884 | 0.1046 | 0.5465057 |
| | | | | |
| LDA (`caret` 10-fold CV) | 0.1919 | 0.9283 | 0.3305 | 0.6294448 |
| LDA (`MASS` 10-fold CV) | 0.1591 | 0.9969 | 0.1143 | 0.5488133 |
| | | | | |
| QDA (`caret`10-fold CV) | 0.2559 | 0.7418 | 0.7531 | 0.7474858 |
| QDA (`MASS` 10-fold CV) | 0.1692 | 0.8934 | 0.5714 | 0.7324227 |
## Further Modeling
```{r, echo=FALSE}
load("dataset\\plot\\nb.kfoldCV.ROC.plot.Rdata")
load("dataset\\plot\\dc.kfoldCV.ROC.plot.Rdata")
load("dataset\\plot\\dc.kfoldCV_mod.ROC.plot.Rdata")
load("dataset\\plot\\rf.kfoldCV.ROC.plot.Rdata")
load("dataset\\plot\\bag.kfoldCV.ROC.plot.Rdata")
load("dataset\\plot\\boost.kfoldCV.ROC.plot.Rdata")
load("dataset\\plot\\xgboost.kfoldCV.ROC.plot.Rdata")
load("dataset\\plot\\svm.kfoldCV.ROC.plot.Rdata")
load("dataset\\plot\\nnet.kfoldCV.ROC.plot.Rdata")
```
```{r, fig.width=20, fig.height=10, echo=FALSE}
cowplot::plot_grid(nb.kfoldCV.ROC.plot,
dc.kfoldCV.ROC.plot,
ncol = 2, align = "hv", scale = 0.8)
cowplot::plot_grid(rf.kfoldCV.ROC.plot,
bag.kfoldCV.ROC.plot,
ncol = 2, align = "hv", scale = 0.8)
cowplot::plot_grid(boost.kfoldCV.ROC.plot,
svm.kfoldCV.ROC.plot,
ncol = 2, align = "hv", scale = 0.8)
cowplot::plot_grid(xgboost.kfoldCV.ROC.plot,
nnet.kfoldCV.ROC.plot,
ncol = 2, align = "hv", scale = 0.8)
```
| Model | Error Rate | Sensitivity | Specificity | AUC |
| ----------------- | ---------- | ----------- | ----------- | --------- |
| Naive Bayes | 0.2466 | 0.7829 | 0.6360 | 0.7094563 |
| CART | 0.1827 | 0.9062 | 0.4644 | 0.6853261 |
| Random Forest | 0.0471 | 0.9789 | 0.8494 | 0.9141488 |
| Bagging | 0.0497 | 0.9694 | 0.8745 | 0.9219593 |
| Boosting | 0.1633 | 0.9389 | 0.4310 | 0.6849227 |
| XGBoost | 0.1338 | 0.9589 | 0.4979 | 0.7284060 |
| SVM | 0.1641 | 0.9726 | 0.2929 | 0.6327449 |
| Neural Network | 0.1818 | 0.9378 | 0.3431 | 0.6404628 |