---
title: "SVM model"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{SVM model}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r}
library(rvfl)
```
```{r}
glmGamma <- function(formula, ...) {
e1071::svm(formula = formula, ...)
}
```
# Example 1: MPG Prediction (mtcars dataset)
## Load and prepare data
```{r}
data(mtcars)
set.seed(1243)
train_idx <- sample(nrow(mtcars), size = floor(0.8 * nrow(mtcars)))
train_data <- mtcars[train_idx, ]
test_data <- mtcars[-train_idx, -1]
```
## Fit models
```{r}
# Fit regular linear model
start <- proc.time()[3]
lm_model <- lm(mpg ~ ., data = train_data)
print(proc.time()[3] - start)
print(summary(lm_model))
print(confint(lm_model))
# Fit calibrated model
start <- proc.time()[3]
svm_model <- rvfl::calibmodel(lambda=10**seq(-10, 10, length.out=100), x = as.matrix(train_data[,-1]), y = train_data$mpg, engine = glmGamma)
print(proc.time()[3] - start)
print(summary(svm_model$model))
#print(confint(svm_model$model))
#print(simulate(svm_model, newdata = test_data))
```
## Make predictions
```{r eval=TRUE}
lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction")
svm_pred <- predict(svm_model, newdata = as.matrix(test_data), method="bootstrap")
```
## Compare predictions
```{r eval=TRUE, fig.width=7.5}
results <- data.frame(
Actual = mtcars[-train_idx, ]$mpg,
LM_Pred = lm_pred[,"fit"],
LM_Lower = lm_pred[,"lwr"],
LM_Upper = lm_pred[,"upr"],
svm_Pred = svm_pred[,"fit"],
svm_Lower = svm_pred[,"lwr"],
svm_Upper = svm_pred[,"upr"]
)
# Print results
print("Prediction Intervals Comparison:")
print(head(results))
# Calculate coverage and Winkler scores
lm_coverage <- mean(mtcars[-train_idx, ]$mpg >= results$LM_Lower &
mtcars[-train_idx, ]$mpg <= results$LM_Upper)
svm_coverage <- mean(mtcars[-train_idx, ]$mpg >= results$svm_Lower &
mtcars[-train_idx, ]$mpg <= results$svm_Upper)
lm_winkler <- misc::winkler_score(mtcars[-train_idx, ]$mpg, results$LM_Lower, results$LM_Upper)
svm_winkler <- misc::winkler_score(mtcars[-train_idx, ]$mpg, results$svm_Lower, results$svm_Upper)
print(sprintf("\nPrediction interval metrics:"))
print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score",
100 * lm_coverage, mean(lm_winkler)))
print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score",
100 * svm_coverage, mean(svm_winkler)))
```
```{r eval=TRUE, fig.width=7.5}
sims <- simulate(svm_model, newdata = as.matrix(test_data), nsim = 500, method="bootstrap")
# Plot simulations
matplot(sims, type = "l",
col = rgb(0, 0, 1, 0.1), lty = 1,
xlab = "obs. #", ylab = "Simulated MPG",
main = "Ridge Model Simulations")
lines(mtcars[-train_idx, ]$mpg, col = "red")
```
# Example 2: Boston Housing Price Prediction
## Load and prepare data
```{r}
library(MASS)
data(Boston)
set.seed(1243)
train_idx <- sample(nrow(Boston), size = floor(0.8 * nrow(Boston)))
train_data <- Boston[train_idx, ]
test_data <- Boston[-train_idx, -14] # -14 removes 'medv' (target variable)
```
## Fit models
```{r}
# Fit regular linear model
start <- proc.time()[3]
lm_model <- lm(medv ~ ., data = train_data)
print(proc.time()[3] - start)
print(summary(lm_model))
print(confint(lm_model))
# Fit calibrated model
start <- proc.time()[3]
svm_model <- rvfl::calibmodel(lambda=10**seq(-10, 10, length.out=100), x = as.matrix(train_data[,-14]), y = train_data$medv, engine = glmGamma)
print(proc.time()[3] - start)
print(summary(svm_model$model))
#print(confint(svm_model$model))
#print(simulate(svm_model, newdata = test_data))
lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction")
svm_pred <- predict(svm_model, newdata = as.matrix(test_data), method="bootstrap")
```
## Make predictions and compare
```{r eval=TRUE, fig.width=7.5}
results <- data.frame(
Actual = Boston[-train_idx, ]$medv,
LM_Pred = lm_pred[,"fit"],
LM_Lower = lm_pred[,"lwr"],
LM_Upper = lm_pred[,"upr"],
svm_Pred = svm_pred[,"fit"],
svm_Lower = svm_pred[,"lwr"],
svm_Upper = svm_pred[,"upr"]
)
# Print results
print("Prediction Intervals Comparison:")
print(head(results))
# Calculate coverage and Winkler scores
lm_coverage <- mean(Boston[-train_idx, ]$medv >= results$LM_Lower &
Boston[-train_idx, ]$medv <= results$LM_Upper)
svm_coverage <- mean(Boston[-train_idx, ]$medv >= results$svm_Lower &
Boston[-train_idx, ]$medv <= results$svm_Upper)
lm_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv, results$LM_Lower, results$LM_Upper)
svm_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv, results$svm_Lower, results$svm_Upper)
print(sprintf("\nPrediction interval metrics:"))
print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score",
100 * lm_coverage, mean(lm_winkler)))
print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score",
100 * svm_coverage, mean(svm_winkler)))
```
```{r eval=TRUE, fig.width=7.5}
sims <- simulate(svm_model, newdata = as.matrix(test_data), nsim = 500, method="bootstrap")
# Plot simulations
matplot(sims, type = "l",
col = rgb(0, 0, 1, 0.1), lty = 1,
xlab = "obs. #", ylab = "Simulated MPG",
main = "Ridge Model Simulations")
lines(Boston[-train_idx, ]$medv, col = "red")
```
# Example 3: Car Price Analysis (Cars93 dataset)
## Load and prepare data
```{r}
data(Cars93, package = "MASS")
# Remove rows with missing values
Cars93 <- na.omit(Cars93)
# Select numeric predictors and price as response
predictors <- c("MPG.city", "MPG.highway", "EngineSize", "Horsepower",
"RPM", "Rev.per.mile", "Fuel.tank.capacity", "Length",
"Wheelbase", "Width", "Turn.circle", "Weight")
car_data <- Cars93[, c(predictors, "Price")]
set.seed(1243)
train_idx <- sample(nrow(car_data), size = floor(0.8 * nrow(car_data)))
train_data <- car_data[train_idx, ]
test_data <- car_data[-train_idx, -which(names(car_data) == "Price")]
```
## Fit models
```{r}
# Fit regular linear model
start <- proc.time()[3]
lm_model <- lm(Price ~ ., data = train_data)
print(proc.time()[3] - start)
print(summary(lm_model))
print(confint(lm_model))
# Fit calibrated model
start <- proc.time()[3]
svm_model <- rvfl::calibmodel(lambda=10**seq(-10, 10, length.out=100), x = as.matrix(train_data[,-which(names(train_data) == "Price")]),
y = train_data$Price, engine = glmGamma)
print(proc.time()[3] - start)
print(summary(svm_model$model))
#print(confint(svm_model$model))
#print(simulate(svm_model, newdata = as.matrix(test_data)))
lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction")
svm_pred <- predict(svm_model, newdata = as.matrix(test_data), method="bootstrap")
```
## Make predictions and compare
```{r eval=TRUE, fig.width=7.5}
results <- data.frame(
Actual = car_data[-train_idx, "Price"],
LM_Pred = lm_pred[,"fit"],
LM_Lower = lm_pred[,"lwr"],
LM_Upper = lm_pred[,"upr"],
svm_Pred = svm_pred[,"fit"],
svm_Lower = svm_pred[,"lwr"],
svm_Upper = svm_pred[,"upr"]
)
# Print results
print("Prediction Intervals Comparison:")
print(results)
# Calculate coverage and Winkler scores
lm_coverage <- mean(car_data[-train_idx, "Price"] >= results$LM_Lower &
car_data[-train_idx, "Price"] <= results$LM_Upper)
svm_coverage <- mean(car_data[-train_idx, "Price"] >= results$svm_Lower &
car_data[-train_idx, "Price"] <= results$svm_Upper)
lm_winkler <- misc::winkler_score(car_data[-train_idx, "Price"], results$LM_Lower, results$LM_Upper)
svm_winkler <- misc::winkler_score(car_data[-train_idx, "Price"], results$svm_Lower, results$svm_Upper)
print(sprintf("\nPrediction interval metrics:"))
print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score",
100 * lm_coverage, mean(lm_winkler)))
print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score",
100 * svm_coverage, mean(svm_winkler)))
```
```{r eval=TRUE, fig.width=7.5}
sims <- simulate(svm_model, newdata = as.matrix(test_data), nsim = 500, method="bootstrap")
# Plot simulations
matplot(sims, type = "l",
col = rgb(0, 0, 1, 0.1), lty = 1,
xlab = "obs. #", ylab = "Simulated Price",
main = "Ridge Model Simulations")
lines(car_data[-train_idx, "Price"], col = "red")
```