---
title: "RVFL model"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{RVFL model}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r}
library(rvfl)
```
# Example 1: MPG Prediction (mtcars dataset)
## Load and prepare data
```{r}
data(mtcars)
set.seed(1243)
train_idx <- sample(nrow(mtcars), size = floor(0.8 * nrow(mtcars)))
train_data <- mtcars[train_idx, ]
test_data <- mtcars[-train_idx, -1]
```
## Fit models
```{r}
# Fit regular linear model
start <- proc.time()[3]
lm_model <- lm(mpg ~ ., data = train_data)
print(proc.time()[3] - start)
print(summary(lm_model))
print(confint(lm_model))
# Fit calibrated model
start <- proc.time()[3]
ridge_model <- rvfl::rvfl(x = as.matrix(train_data[,-1]), y = train_data$mpg)
print(proc.time()[3] - start)
print(summary(ridge_model$model))
print(confint(ridge_model$model))
#print(simulate(ridge_model, newdata = test_data))
```
## Make predictions
```{r eval=TRUE}
lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction")
ridge_pred <- predict(ridge_model, newdata = as.matrix(test_data), method = "gaussian")
```
## Compare predictions
```{r eval=TRUE, fig.width=7.5}
results <- data.frame(
Actual = mtcars[-train_idx, ]$mpg,
LM_Pred = lm_pred[,"fit"],
LM_Lower = lm_pred[,"lwr"],
LM_Upper = lm_pred[,"upr"],
Ridge_Pred = ridge_pred[,"fit"],
Ridge_Lower = ridge_pred[,"lwr"],
Ridge_Upper = ridge_pred[,"upr"]
)
# Print results
print("Prediction Intervals Comparison:")
print(head(results))
# Calculate coverage and Winkler scores
lm_coverage <- mean(mtcars[-train_idx, ]$mpg >= results$LM_Lower &
mtcars[-train_idx, ]$mpg <= results$LM_Upper)
ridge_coverage <- mean(mtcars[-train_idx, ]$mpg >= results$Ridge_Lower &
mtcars[-train_idx, ]$mpg <= results$Ridge_Upper)
lm_winkler <- misc::winkler_score(mtcars[-train_idx, ]$mpg, results$LM_Lower, results$LM_Upper)
ridge_winkler <- misc::winkler_score(mtcars[-train_idx, ]$mpg, results$Ridge_Lower, results$Ridge_Upper)
print(sprintf("\nPrediction interval metrics:"))
print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score",
100 * lm_coverage, mean(lm_winkler)))
print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score",
100 * ridge_coverage, mean(ridge_winkler)))
# Set common y-axis limits for both plots
y_limits <- range(c(results$LM_Lower, results$LM_Upper,
results$Ridge_Lower, results$Ridge_Upper))
# Plot prediction intervals
par(mfrow=c(1,2))
# Linear Model Plot
plot(results$Actual, results$LM_Pred,
main="Linear Model Predictions",
xlab="Actual MPG", ylab="Predicted MPG",
ylim=y_limits)
# Add shaded prediction intervals
x_ordered <- order(results$Actual)
polygon(c(results$Actual[x_ordered], rev(results$Actual[x_ordered])),
c(results$LM_Lower[x_ordered], rev(results$LM_Upper[x_ordered])),
col=rgb(0, 0, 1, 0.2), border=NA)
points(results$Actual, results$LM_Pred) # Replot points over shading
abline(0, 1, col="red", lty=2) # Add diagonal line
# Ridge Model Plot
plot(results$Actual, results$Ridge_Pred,
main="Ridge Model Predictions",
xlab="Actual MPG", ylab="Predicted MPG",
ylim=y_limits)
# Add shaded prediction intervals
polygon(c(results$Actual[x_ordered], rev(results$Actual[x_ordered])),
c(results$Ridge_Lower[x_ordered], rev(results$Ridge_Upper[x_ordered])),
col=rgb(0, 0, 1, 0.2), border=NA)
points(results$Actual, results$Ridge_Pred) # Replot points over shading
abline(0, 1, col="red", lty=2) # Add diagonal line
```
```{r, fig.width=7.5, eval=FALSE}
# Add simulation plot
par(mfrow=c(1,1))
# Generate 100 simulations
sims <- simulate(ridge_model, newdata = as.matrix(test_data), nsim = 500)
# Plot simulations
matplot(sims, type = "l",
col = rgb(0, 0, 1, 0.1), lty = 1,
xlab = "obs. #", ylab = "Simulated MPG",
main = "Ridge Model Simulations")
lines(mtcars[-train_idx, ]$mpg, col = "red")
```
# Example 2: Boston Housing Price Prediction
## Load and prepare data
```{r}
library(MASS)
data(Boston)
set.seed(1243)
train_idx <- sample(nrow(Boston), size = floor(0.8 * nrow(Boston)))
train_data <- Boston[train_idx, ]
test_data <- Boston[-train_idx, -14] # -14 removes 'medv' (target variable)
```
## Fit models
```{r}
# Fit regular linear model
start <- proc.time()[3]
lm_model <- rvfl::rvfl(x = as.matrix(train_data[,-14]), y = train_data$medv, activ = "linear")#lm(medv ~ ., data = train_data)
print(proc.time()[3] - start)
print(summary(lm_model$model))
print(confint(lm_model$model))
# Fit calibrated model
start <- proc.time()[3]
ridge_model <- rvfl::rvfl(x = as.matrix(train_data[,-14]), y = train_data$medv)
print(proc.time()[3] - start)
print(summary(ridge_model$model))
print(confint(ridge_model$model))
#print(simulate(ridge_model, newdata = test_data))
```
## Make predictions and compare
```{r eval=TRUE, fig.width=7.5}
lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction")
ridge_pred <- predict(ridge_model, newdata = as.matrix(test_data), method = "gaussian")
results <- data.frame(
Actual = Boston[-train_idx, ]$medv,
LM_Pred = lm_pred[,"fit"],
LM_Lower = lm_pred[,"lwr"],
LM_Upper = lm_pred[,"upr"],
Ridge_Pred = ridge_pred[,"fit"],
Ridge_Lower = ridge_pred[,"lwr"],
Ridge_Upper = ridge_pred[,"upr"]
)
# Print results
print("Prediction Intervals Comparison:")
print(head(results))
# Calculate coverage and Winkler scores
lm_coverage <- mean(Boston[-train_idx, ]$medv >= results$LM_Lower &
Boston[-train_idx, ]$medv <= results$LM_Upper)
ridge_coverage <- mean(Boston[-train_idx, ]$medv >= results$Ridge_Lower &
Boston[-train_idx, ]$medv <= results$Ridge_Upper)
lm_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv, results$LM_Lower, results$LM_Upper)
ridge_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv, results$Ridge_Lower, results$Ridge_Upper)
print(sprintf("\nPrediction interval metrics:"))
print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score",
100 * lm_coverage, mean(lm_winkler)))
print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score",
100 * ridge_coverage, mean(ridge_winkler)))
# Visualization
# Set common y-axis limits for both plots
y_limits <- range(c(results$LM_Lower, results$LM_Upper,
results$Ridge_Lower, results$Ridge_Upper))
par(mfrow=c(1,2))
# Linear Model Plot
plot(results$Actual, results$LM_Pred,
main="Linear Model Predictions",
xlab="Actual Median Value", ylab="Predicted Median Value",
ylim=y_limits)
x_ordered <- order(results$Actual)
polygon(c(results$Actual[x_ordered], rev(results$Actual[x_ordered])),
c(results$LM_Lower[x_ordered], rev(results$LM_Upper[x_ordered])),
col=rgb(0, 0, 1, 0.2), border=NA)
points(results$Actual, results$LM_Pred)
abline(0, 1, col="red", lty=2)
# Ridge Model Plot
plot(results$Actual, results$Ridge_Pred,
main="Ridge Model Predictions",
xlab="Actual Median Value", ylab="Predicted Median Value",
ylim=y_limits)
polygon(c(results$Actual[x_ordered], rev(results$Actual[x_ordered])),
c(results$Ridge_Lower[x_ordered], rev(results$Ridge_Upper[x_ordered])),
col=rgb(0, 0, 1, 0.2), border=NA)
points(results$Actual, results$Ridge_Pred)
abline(0, 1, col="red", lty=2)
```
```{r, fig.width=7.5, eval=FALSE}
# Add simulation plot
par(mfrow=c(1,1))
sims <- simulate(ridge_model, newdata = as.matrix(test_data), nsim = 500)
matplot(sims, type = "l",
col = rgb(0, 0, 1, 0.1), lty = 1,
xlab = "obs. #", ylab = "Simulated Median Value",
main = "Ridge Model Simulations")
lines(Boston[-train_idx, ]$medv, col = "red")
```
# Compare different activation functions
```{r eval=TRUE, fig.width=10, fig.height=8}
# Adjust margins to fit plots
par(mfrow=c(2,2), mar=c(4,4,2,1)) # Bottom, left, top, right margins
# Fit models with different activation functions
ridge_relu <- rvfl::rvfl(x = as.matrix(train_data[,-14]), y = train_data$medv, activ = "relu")
ridge_sigmoid <- rvfl::rvfl(x = as.matrix(train_data[,-14]), y = train_data$medv, activ = "sigmoid")
ridge_tanh <- rvfl::rvfl(x = as.matrix(train_data[,-14]), y = train_data$medv, activ = "tanh")
# Make predictions
relu_pred <- predict(ridge_relu, newdata = as.matrix(test_data), method = "gaussian")
sigmoid_pred <- predict(ridge_sigmoid, newdata = as.matrix(test_data), method = "gaussian")
tanh_pred <- predict(ridge_tanh, newdata = as.matrix(test_data), method = "gaussian")
# Combine results
results_all <- data.frame(
Actual = Boston[-train_idx, ]$medv,
LM_Pred = lm_pred[,"fit"],
LM_Lower = lm_pred[,"lwr"],
LM_Upper = lm_pred[,"upr"],
ReLU_Pred = relu_pred[,"fit"],
ReLU_Lower = relu_pred[,"lwr"],
ReLU_Upper = relu_pred[,"upr"],
Sigmoid_Pred = sigmoid_pred[,"fit"],
Sigmoid_Lower = sigmoid_pred[,"lwr"],
Sigmoid_Upper = sigmoid_pred[,"upr"],
Tanh_Pred = tanh_pred[,"fit"],
Tanh_Lower = tanh_pred[,"lwr"],
Tanh_Upper = tanh_pred[,"upr"]
)
# Calculate coverage and Winkler scores for each model
lm_coverage <- mean(Boston[-train_idx, ]$medv >= results_all$LM_Lower &
Boston[-train_idx, ]$medv <= results_all$LM_Upper)
relu_coverage <- mean(Boston[-train_idx, ]$medv >= results_all$ReLU_Lower &
Boston[-train_idx, ]$medv <= results_all$ReLU_Upper)
sigmoid_coverage <- mean(Boston[-train_idx, ]$medv >= results_all$Sigmoid_Lower &
Boston[-train_idx, ]$medv <= results_all$Sigmoid_Upper)
tanh_coverage <- mean(Boston[-train_idx, ]$medv >= results_all$Tanh_Lower &
Boston[-train_idx, ]$medv <= results_all$Tanh_Upper)
lm_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv,
results_all$LM_Lower, results_all$LM_Upper)
relu_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv,
results_all$ReLU_Lower, results_all$ReLU_Upper)
sigmoid_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv,
results_all$Sigmoid_Lower, results_all$Sigmoid_Upper)
tanh_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv,
results_all$Tanh_Lower, results_all$Tanh_Upper)
print(sprintf("\nPrediction interval metrics:"))
print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score",
100 * lm_coverage, mean(lm_winkler)))
print(sprintf("ReLU Model: %.1f%% coverage, %.3f Winkler score",
100 * relu_coverage, mean(relu_winkler)))
print(sprintf("Sigmoid Model: %.1f%% coverage, %.3f Winkler score",
100 * sigmoid_coverage, mean(sigmoid_winkler)))
print(sprintf("Tanh Model: %.1f%% coverage, %.3f Winkler score",
100 * tanh_coverage, mean(tanh_winkler)))
# Visualization
par(mfrow=c(2,2))
# Linear Model Plot
plot(results_all$Actual, results_all$LM_Pred,
main="Linear Model",
xlab="Actual", ylab="Predicted",
ylim=range(results_all[,c("LM_Lower","LM_Upper")]))
x_ordered <- order(results_all$Actual)
polygon(c(results_all$Actual[x_ordered], rev(results_all$Actual[x_ordered])),
c(results_all$LM_Lower[x_ordered], rev(results_all$LM_Upper[x_ordered])),
col=rgb(0,0,1,0.2), border=NA)
points(results_all$Actual, results_all$LM_Pred)
abline(0,1, col="red", lty=2)
# ReLU Model Plot
plot(results_all$Actual, results_all$ReLU_Pred,
main="ReLU Model",
xlab="Actual", ylab="Predicted",
ylim=range(results_all[,c("ReLU_Lower","ReLU_Upper")]))
polygon(c(results_all$Actual[x_ordered], rev(results_all$Actual[x_ordered])),
c(results_all$ReLU_Lower[x_ordered], rev(results_all$ReLU_Upper[x_ordered])),
col=rgb(0,0,1,0.2), border=NA)
points(results_all$Actual, results_all$ReLU_Pred)
abline(0,1, col="red", lty=2)
# Sigmoid Model Plot
plot(results_all$Actual, results_all$Sigmoid_Pred,
main="Sigmoid Model",
xlab="Actual", ylab="Predicted",
ylim=range(results_all[,c("Sigmoid_Lower","Sigmoid_Upper")]))
polygon(c(results_all$Actual[x_ordered], rev(results_all$Actual[x_ordered])),
c(results_all$Sigmoid_Lower[x_ordered], rev(results_all$Sigmoid_Upper[x_ordered])),
col=rgb(0,0,1,0.2), border=NA)
points(results_all$Actual, results_all$Sigmoid_Pred)
abline(0,1, col="red", lty=2)
# Tanh Model Plot
plot(results_all$Actual, results_all$Tanh_Pred,
main="Tanh Model",
xlab="Actual", ylab="Predicted",
ylim=range(results_all[,c("Tanh_Lower","Tanh_Upper")]))
polygon(c(results_all$Actual[x_ordered], rev(results_all$Actual[x_ordered])),
c(results_all$Tanh_Lower[x_ordered], rev(results_all$Tanh_Upper[x_ordered])),
col=rgb(0,0,1,0.2), border=NA)
points(results_all$Actual, results_all$Tanh_Pred)
abline(0,1, col="red", lty=2)
```
# Simulation plots
```{r eval=TRUE, fig.width=10, fig.height=8}
# Adjust margins to fit plots
par(mfrow=c(2,2), mar=c(4,4,2,1)) # Bottom, left, top, right margins
# Linear Model simulations
sims_lm <- simulate(lm_model, newdata = test_data, nsim = 500)
matplot(sims_lm, type = "l",
col = rgb(0,0,1,0.1), lty = 1,
xlab = "obs. #", ylab = "Simulated Value",
main = "Linear Model Simulations")
lines(Boston[-train_idx, ]$medv, col = "red")
# ReLU Model simulations
sims_relu <- simulate(ridge_relu, newdata = as.matrix(test_data), nsim = 500)
matplot(sims_relu, type = "l",
col = rgb(0,0,1,0.1), lty = 1,
xlab = "obs. #", ylab = "Simulated Value",
main = "ReLU Model Simulations")
lines(Boston[-train_idx, ]$medv, col = "red")
# Sigmoid Model simulations
sims_sigmoid <- simulate(ridge_sigmoid, newdata = as.matrix(test_data), nsim = 500)
matplot(sims_sigmoid, type = "l",
col = rgb(0,0,1,0.1), lty = 1,
xlab = "obs. #", ylab = "Simulated Value",
main = "Sigmoid Model Simulations")
lines(Boston[-train_idx, ]$medv, col = "red")
# Tanh Model simulations
sims_tanh <- simulate(ridge_tanh, newdata = as.matrix(test_data), nsim = 500)
matplot(sims_tanh, type = "l",
col = rgb(0,0,1,0.1), lty = 1,
xlab = "obs. #", ylab = "Simulated Value",
main = "Tanh Model Simulations")
lines(Boston[-train_idx, ]$medv, col = "red")
```