--- title: "SVM model" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{SVM model} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r} library(rvfl) ``` ```{r} glmGamma <- function(formula, ...) { e1071::svm(formula = formula, ...) } ``` # Example 1: MPG Prediction (mtcars dataset) ## Load and prepare data ```{r} data(mtcars) set.seed(1243) train_idx <- sample(nrow(mtcars), size = floor(0.8 * nrow(mtcars))) train_data <- mtcars[train_idx, ] test_data <- mtcars[-train_idx, -1] ``` ## Fit models ```{r} # Fit regular linear model start <- proc.time()[3] lm_model <- lm(mpg ~ ., data = train_data) print(proc.time()[3] - start) print(summary(lm_model)) print(confint(lm_model)) # Fit calibrated model start <- proc.time()[3] svm_model <- rvfl::calibmodel(lambda=10**seq(-10, 10, length.out=100), x = as.matrix(train_data[,-1]), y = train_data$mpg, engine = glmGamma) print(proc.time()[3] - start) print(summary(svm_model$model)) #print(confint(svm_model$model)) #print(simulate(svm_model, newdata = test_data)) ``` ## Make predictions ```{r eval=TRUE} lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction") svm_pred <- predict(svm_model, newdata = as.matrix(test_data), method="bootstrap") ``` ## Compare predictions ```{r eval=TRUE, fig.width=7.5} results <- data.frame( Actual = mtcars[-train_idx, ]$mpg, LM_Pred = lm_pred[,"fit"], LM_Lower = lm_pred[,"lwr"], LM_Upper = lm_pred[,"upr"], svm_Pred = svm_pred[,"fit"], svm_Lower = svm_pred[,"lwr"], svm_Upper = svm_pred[,"upr"] ) # Print results print("Prediction Intervals Comparison:") print(head(results)) # Calculate coverage and Winkler scores lm_coverage <- mean(mtcars[-train_idx, ]$mpg >= results$LM_Lower & mtcars[-train_idx, ]$mpg <= results$LM_Upper) svm_coverage <- mean(mtcars[-train_idx, ]$mpg >= results$svm_Lower & mtcars[-train_idx, ]$mpg <= results$svm_Upper) lm_winkler <- misc::winkler_score(mtcars[-train_idx, ]$mpg, results$LM_Lower, results$LM_Upper) svm_winkler <- misc::winkler_score(mtcars[-train_idx, ]$mpg, results$svm_Lower, results$svm_Upper) print(sprintf("\nPrediction interval metrics:")) print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score", 100 * lm_coverage, mean(lm_winkler))) print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score", 100 * svm_coverage, mean(svm_winkler))) ``` ```{r eval=TRUE, fig.width=7.5} sims <- simulate(svm_model, newdata = as.matrix(test_data), nsim = 500, method="bootstrap") # Plot simulations matplot(sims, type = "l", col = rgb(0, 0, 1, 0.1), lty = 1, xlab = "obs. #", ylab = "Simulated MPG", main = "Ridge Model Simulations") lines(mtcars[-train_idx, ]$mpg, col = "red") ``` # Example 2: Boston Housing Price Prediction ## Load and prepare data ```{r} library(MASS) data(Boston) set.seed(1243) train_idx <- sample(nrow(Boston), size = floor(0.8 * nrow(Boston))) train_data <- Boston[train_idx, ] test_data <- Boston[-train_idx, -14] # -14 removes 'medv' (target variable) ``` ## Fit models ```{r} # Fit regular linear model start <- proc.time()[3] lm_model <- lm(medv ~ ., data = train_data) print(proc.time()[3] - start) print(summary(lm_model)) print(confint(lm_model)) # Fit calibrated model start <- proc.time()[3] svm_model <- rvfl::calibmodel(lambda=10**seq(-10, 10, length.out=100), x = as.matrix(train_data[,-14]), y = train_data$medv, engine = glmGamma) print(proc.time()[3] - start) print(summary(svm_model$model)) #print(confint(svm_model$model)) #print(simulate(svm_model, newdata = test_data)) lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction") svm_pred <- predict(svm_model, newdata = as.matrix(test_data), method="bootstrap") ``` ## Make predictions and compare ```{r eval=TRUE, fig.width=7.5} results <- data.frame( Actual = Boston[-train_idx, ]$medv, LM_Pred = lm_pred[,"fit"], LM_Lower = lm_pred[,"lwr"], LM_Upper = lm_pred[,"upr"], svm_Pred = svm_pred[,"fit"], svm_Lower = svm_pred[,"lwr"], svm_Upper = svm_pred[,"upr"] ) # Print results print("Prediction Intervals Comparison:") print(head(results)) # Calculate coverage and Winkler scores lm_coverage <- mean(Boston[-train_idx, ]$medv >= results$LM_Lower & Boston[-train_idx, ]$medv <= results$LM_Upper) svm_coverage <- mean(Boston[-train_idx, ]$medv >= results$svm_Lower & Boston[-train_idx, ]$medv <= results$svm_Upper) lm_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv, results$LM_Lower, results$LM_Upper) svm_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv, results$svm_Lower, results$svm_Upper) print(sprintf("\nPrediction interval metrics:")) print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score", 100 * lm_coverage, mean(lm_winkler))) print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score", 100 * svm_coverage, mean(svm_winkler))) ``` ```{r eval=TRUE, fig.width=7.5} sims <- simulate(svm_model, newdata = as.matrix(test_data), nsim = 500, method="bootstrap") # Plot simulations matplot(sims, type = "l", col = rgb(0, 0, 1, 0.1), lty = 1, xlab = "obs. #", ylab = "Simulated MPG", main = "Ridge Model Simulations") lines(Boston[-train_idx, ]$medv, col = "red") ``` # Example 3: Car Price Analysis (Cars93 dataset) ## Load and prepare data ```{r} data(Cars93, package = "MASS") # Remove rows with missing values Cars93 <- na.omit(Cars93) # Select numeric predictors and price as response predictors <- c("MPG.city", "MPG.highway", "EngineSize", "Horsepower", "RPM", "Rev.per.mile", "Fuel.tank.capacity", "Length", "Wheelbase", "Width", "Turn.circle", "Weight") car_data <- Cars93[, c(predictors, "Price")] set.seed(1243) train_idx <- sample(nrow(car_data), size = floor(0.8 * nrow(car_data))) train_data <- car_data[train_idx, ] test_data <- car_data[-train_idx, -which(names(car_data) == "Price")] ``` ## Fit models ```{r} # Fit regular linear model start <- proc.time()[3] lm_model <- lm(Price ~ ., data = train_data) print(proc.time()[3] - start) print(summary(lm_model)) print(confint(lm_model)) # Fit calibrated model start <- proc.time()[3] svm_model <- rvfl::calibmodel(lambda=10**seq(-10, 10, length.out=100), x = as.matrix(train_data[,-which(names(train_data) == "Price")]), y = train_data$Price, engine = glmGamma) print(proc.time()[3] - start) print(summary(svm_model$model)) #print(confint(svm_model$model)) #print(simulate(svm_model, newdata = as.matrix(test_data))) lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction") svm_pred <- predict(svm_model, newdata = as.matrix(test_data), method="bootstrap") ``` ## Make predictions and compare ```{r eval=TRUE, fig.width=7.5} results <- data.frame( Actual = car_data[-train_idx, "Price"], LM_Pred = lm_pred[,"fit"], LM_Lower = lm_pred[,"lwr"], LM_Upper = lm_pred[,"upr"], svm_Pred = svm_pred[,"fit"], svm_Lower = svm_pred[,"lwr"], svm_Upper = svm_pred[,"upr"] ) # Print results print("Prediction Intervals Comparison:") print(results) # Calculate coverage and Winkler scores lm_coverage <- mean(car_data[-train_idx, "Price"] >= results$LM_Lower & car_data[-train_idx, "Price"] <= results$LM_Upper) svm_coverage <- mean(car_data[-train_idx, "Price"] >= results$svm_Lower & car_data[-train_idx, "Price"] <= results$svm_Upper) lm_winkler <- misc::winkler_score(car_data[-train_idx, "Price"], results$LM_Lower, results$LM_Upper) svm_winkler <- misc::winkler_score(car_data[-train_idx, "Price"], results$svm_Lower, results$svm_Upper) print(sprintf("\nPrediction interval metrics:")) print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score", 100 * lm_coverage, mean(lm_winkler))) print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score", 100 * svm_coverage, mean(svm_winkler))) ``` ```{r eval=TRUE, fig.width=7.5} sims <- simulate(svm_model, newdata = as.matrix(test_data), nsim = 500, method="bootstrap") # Plot simulations matplot(sims, type = "l", col = rgb(0, 0, 1, 0.1), lty = 1, xlab = "obs. #", ylab = "Simulated Price", main = "Ridge Model Simulations") lines(car_data[-train_idx, "Price"], col = "red") ```