Ridge NNET model

library(rvfl)
nnetModel <- function(formula, ...) {
  nnet::nnet(formula = formula, 
             linout = TRUE,
             size = 10,
             maxit = 100,
             ...)
}

Example 1: MPG Prediction (mtcars dataset)

Load and prepare data

data(mtcars)

set.seed(1243)
train_idx <- sample(nrow(mtcars), size = floor(0.8 * nrow(mtcars)))
train_data <- mtcars[train_idx, ]
test_data <- mtcars[-train_idx, -1]

Fit models

# Fit regular linear model
start <- proc.time()[3]
lm_model <- lm(mpg ~ ., data = train_data)
print(proc.time()[3] - start)
## elapsed 
##   0.003
print(summary(lm_model))
## 
## Call:
## lm(formula = mpg ~ ., data = train_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5211 -0.9792 -0.0324  1.1808  4.9814 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -5.054416  25.456900  -0.199   0.8455  
## cyl          0.695392   1.396506   0.498   0.6262  
## disp         0.005254   0.017342   0.303   0.7664  
## hp          -0.007610   0.027723  -0.274   0.7877  
## drat         4.128157   2.724353   1.515   0.1520  
## wt          -1.621396   2.139071  -0.758   0.4610  
## qsec         0.064356   0.932144   0.069   0.9459  
## vs           0.138716   3.421183   0.041   0.9682  
## am          -0.498476   2.956568  -0.169   0.8685  
## gear         4.402648   2.287816   1.924   0.0749 .
## carb        -1.999389   1.299580  -1.538   0.1462  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.464 on 14 degrees of freedom
## Multiple R-squared:  0.8938, Adjusted R-squared:  0.818 
## F-statistic: 11.79 on 10 and 14 DF,  p-value: 3.4e-05
#print(confint(lm_model))

# Fit calibrated model 
start <- proc.time()[3]
ridge_model <- rvfl::calibmodel(lambda=10**seq(-10, 10, length.out=100), x = as.matrix(train_data[,-1]), y = train_data$mpg, engine = nnetModel)
## # weights:  121
## initial  value 473.213215 
## iter  10 value 43.483667
## iter  20 value 14.433163
## iter  30 value 2.574259
## iter  40 value 0.066823
## final  value 0.000063 
## converged
print(proc.time()[3] - start)
## elapsed 
##   0.012
print(summary(ridge_model$model))
## a 10-10-1 network with 121 weights
## options were - linear output units 
##   b->h1  i1->h1  i2->h1  i3->h1  i4->h1  i5->h1  i6->h1  i7->h1  i8->h1  i9->h1 
##    0.85    0.91   -0.21   -3.23    0.20    2.49   -6.12    2.16   -0.83    0.64 
## i10->h1 
##   -0.53 
##   b->h2  i1->h2  i2->h2  i3->h2  i4->h2  i5->h2  i6->h2  i7->h2  i8->h2  i9->h2 
##    2.08    0.02    2.32    2.75    0.94   -1.56    0.02    0.20   -2.14   -2.14 
## i10->h2 
##   -3.40 
##   b->h3  i1->h3  i2->h3  i3->h3  i4->h3  i5->h3  i6->h3  i7->h3  i8->h3  i9->h3 
##   -6.13   -1.82   -0.67   -2.07   -0.20   -1.27   -0.04   -0.89   -0.85    0.02 
## i10->h3 
##   -4.78 
##   b->h4  i1->h4  i2->h4  i3->h4  i4->h4  i5->h4  i6->h4  i7->h4  i8->h4  i9->h4 
##    1.88    4.51   -1.02   -0.34   -0.80   -4.02    3.06    0.49   -1.96   -3.93 
## i10->h4 
##   -6.29 
##   b->h5  i1->h5  i2->h5  i3->h5  i4->h5  i5->h5  i6->h5  i7->h5  i8->h5  i9->h5 
##   -4.25   -0.41    2.44   -0.29    5.04    4.19    1.10    0.49    1.51    2.79 
## i10->h5 
##   -0.49 
##   b->h6  i1->h6  i2->h6  i3->h6  i4->h6  i5->h6  i6->h6  i7->h6  i8->h6  i9->h6 
##   -1.13   -0.84   -1.86   -1.68    0.01   -1.40   -1.68    0.53   -0.78   -0.72 
## i10->h6 
##   -0.24 
##   b->h7  i1->h7  i2->h7  i3->h7  i4->h7  i5->h7  i6->h7  i7->h7  i8->h7  i9->h7 
##   -3.29    1.06    1.44    4.01    2.83   -1.92   -1.72    1.31   -1.44    0.34 
## i10->h7 
##   -1.14 
##   b->h8  i1->h8  i2->h8  i3->h8  i4->h8  i5->h8  i6->h8  i7->h8  i8->h8  i9->h8 
##   -4.25    1.98   -3.70   -1.44    2.12    5.11    1.61   -0.72    4.36    1.31 
## i10->h8 
##    2.18 
##   b->h9  i1->h9  i2->h9  i3->h9  i4->h9  i5->h9  i6->h9  i7->h9  i8->h9  i9->h9 
##    0.55    2.19    1.36    1.28    8.44    2.56   -1.75    1.23    2.11    0.17 
## i10->h9 
##   -0.83 
##   b->h10  i1->h10  i2->h10  i3->h10  i4->h10  i5->h10  i6->h10  i7->h10 
##    -0.22     1.02    -1.88     0.41     2.04    -0.21     3.07     0.24 
##  i8->h10  i9->h10 i10->h10 
##     1.27    -1.60    -3.29 
##   b->o  h1->o  h2->o  h3->o  h4->o  h5->o  h6->o  h7->o  h8->o  h9->o h10->o 
##  -3.75  -4.89  -4.58   4.07   8.07   3.22   3.06   4.75   4.56   2.44  -5.35
##print(confint(ridge_model))
#print(simulate(ridge_model, newdata = test_data))

Make predictions

lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction")
ridge_pred <- predict(ridge_model, newdata = as.matrix(test_data), method="surrogate")

Compare predictions

results <- data.frame(
  Actual = mtcars[-train_idx, ]$mpg,
  LM_Pred = lm_pred[,"fit"],
  LM_Lower = lm_pred[,"lwr"],
  LM_Upper = lm_pred[,"upr"],
  Ridge_Pred = ridge_pred[,"fit"],
  Ridge_Lower = ridge_pred[,"lwr"], 
  Ridge_Upper = ridge_pred[,"upr"]
)

# Print results
print("Prediction Intervals Comparison:")
## [1] "Prediction Intervals Comparison:"
print(head(results))
##                Actual  LM_Pred  LM_Lower LM_Upper Ridge_Pred Ridge_Lower
## Valiant          18.1 17.93324 10.149847 25.71663   18.55016    14.67114
## Merc 280C        17.8 20.63530 13.636618 27.63398   19.80226    15.92324
## Toyota Corolla   33.9 28.58373 22.379666 34.78779   32.77860    28.89958
## Camaro Z28       13.3 15.85710  8.140858 23.57335   14.57393    10.69491
## Porsche 914-2    26.0 31.07535 18.988702 43.16201   25.05885    21.17983
## Ford Pantera L   15.8 27.07516 14.930150 39.22016   22.82828    18.94926
##                Ridge_Upper
## Valiant           22.83238
## Merc 280C         24.08449
## Toyota Corolla    37.06082
## Camaro Z28        18.85615
## Porsche 914-2     29.34107
## Ford Pantera L    27.11050
# Calculate coverage and Winkler scores
lm_coverage <- mean(mtcars[-train_idx, ]$mpg >= results$LM_Lower & 
                   mtcars[-train_idx, ]$mpg <= results$LM_Upper)
ridge_coverage <- mean(mtcars[-train_idx, ]$mpg >= results$Ridge_Lower & 
                      mtcars[-train_idx, ]$mpg <= results$Ridge_Upper)

lm_winkler <- misc::winkler_score(mtcars[-train_idx, ]$mpg, results$LM_Lower, results$LM_Upper)
ridge_winkler <- misc::winkler_score(mtcars[-train_idx, ]$mpg, results$Ridge_Lower, results$Ridge_Upper)

print(sprintf("\nPrediction interval metrics:"))
## [1] "\nPrediction interval metrics:"
print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score", 
              100 * lm_coverage, mean(lm_winkler)))
## [1] "Linear Model: 100.0% coverage, 18.226 Winkler score"
print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score", 
              100 * ridge_coverage, mean(ridge_winkler)))
## [1] "Calibrated Model: 71.4% coverage, 72.362 Winkler score"
sims <- simulate(ridge_model, newdata = as.matrix(test_data), nsim = 500, method="surrogate")
# Plot simulations
matplot(sims, type = "l", 
        col = rgb(0, 0, 1, 0.1), lty = 1,
        xlab = "obs. #", ylab = "Simulated MPG",
        main = "Ridge Model Simulations")
lines(mtcars[-train_idx, ]$mpg, col = "red")        

Example 2: Boston Housing Price Prediction

Load and prepare data

library(MASS)
data(Boston)

set.seed(1243)
train_idx <- sample(nrow(Boston), size = floor(0.8 * nrow(Boston)))
train_data <- Boston[train_idx, ]
test_data <- Boston[-train_idx, -14]  # -14 removes 'medv' (target variable)

Fit models

# Fit regular linear model
start <- proc.time()[3]
lm_model <- lm(medv ~ ., data = train_data)
print(proc.time()[3] - start)
## elapsed 
##   0.002
print(summary(lm_model$model))
##       medv            crim                zn             indus       
##  Min.   : 5.00   Min.   : 0.00906   Min.   :  0.00   Min.   : 1.210  
##  1st Qu.:16.77   1st Qu.: 0.08260   1st Qu.:  0.00   1st Qu.: 5.287  
##  Median :20.90   Median : 0.26888   Median :  0.00   Median : 9.690  
##  Mean   :22.45   Mean   : 3.83730   Mean   : 11.54   Mean   :11.227  
##  3rd Qu.:25.00   3rd Qu.: 3.79366   3rd Qu.: 12.50   3rd Qu.:18.100  
##  Max.   :50.00   Max.   :88.97620   Max.   :100.00   Max.   :27.740  
##       chas              nox               rm             age        
##  Min.   :0.00000   Min.   :0.3850   Min.   :3.863   Min.   :  2.90  
##  1st Qu.:0.00000   1st Qu.:0.4490   1st Qu.:5.884   1st Qu.: 44.23  
##  Median :0.00000   Median :0.5380   Median :6.185   Median : 78.60  
##  Mean   :0.07673   Mean   :0.5555   Mean   :6.287   Mean   : 68.82  
##  3rd Qu.:0.00000   3rd Qu.:0.6240   3rd Qu.:6.626   3rd Qu.: 94.33  
##  Max.   :1.00000   Max.   :0.8710   Max.   :8.780   Max.   :100.00  
##       dis              rad              tax           ptratio     
##  Min.   : 1.130   Min.   : 1.000   Min.   :187.0   Min.   :12.60  
##  1st Qu.: 2.070   1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40  
##  Median : 3.216   Median : 5.000   Median :330.0   Median :19.10  
##  Mean   : 3.797   Mean   : 9.668   Mean   :409.8   Mean   :18.47  
##  3rd Qu.: 5.234   3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20  
##  Max.   :12.127   Max.   :24.000   Max.   :711.0   Max.   :22.00  
##      black            lstat       
##  Min.   :  0.32   Min.   : 1.730  
##  1st Qu.:374.53   1st Qu.: 7.107  
##  Median :391.56   Median :11.430  
##  Mean   :354.65   Mean   :12.792  
##  3rd Qu.:396.35   3rd Qu.:17.102  
##  Max.   :396.90   Max.   :37.970
#print(confint(lm_model$model))

# Fit calibrated model 
start <- proc.time()[3]
ridge_model <- rvfl::calibmodel(lambda=10**seq(-10, 10, length.out=100), x = as.matrix(train_data[,-14]), y = train_data$medv, engine = nnetModel)
## # weights:  151
## initial  value 16880.769624 
## iter  10 value 6192.445129
## iter  20 value 4515.206735
## iter  30 value 3683.356749
## iter  40 value 2784.082812
## iter  50 value 2144.441727
## iter  60 value 1759.937537
## iter  70 value 1523.434885
## iter  80 value 1336.862343
## iter  90 value 1228.464772
## iter 100 value 1138.038049
## final  value 1138.038049 
## stopped after 100 iterations
print(proc.time()[3] - start)
## elapsed 
##   0.054
print(summary(ridge_model$model))
## a 13-10-1 network with 151 weights
## options were - linear output units 
##   b->h1  i1->h1  i2->h1  i3->h1  i4->h1  i5->h1  i6->h1  i7->h1  i8->h1  i9->h1 
##  -14.68   -2.84    0.75   17.23   -2.64   -8.64   -6.68    5.54   -3.16    5.69 
## i10->h1 i11->h1 i12->h1 i13->h1 
##    0.74   -9.70   -7.21    8.77 
##   b->h2  i1->h2  i2->h2  i3->h2  i4->h2  i5->h2  i6->h2  i7->h2  i8->h2  i9->h2 
##  -11.87   -5.63   -5.71   -4.56    4.49    3.29  -14.81    5.26   13.30  -14.71 
## i10->h2 i11->h2 i12->h2 i13->h2 
##    5.07   -4.14   25.89   -3.89 
##   b->h3  i1->h3  i2->h3  i3->h3  i4->h3  i5->h3  i6->h3  i7->h3  i8->h3  i9->h3 
##   10.31    8.00   11.83    7.47   -7.18    4.03  -13.33    8.70   -7.05    3.16 
## i10->h3 i11->h3 i12->h3 i13->h3 
##   -3.85   15.04   -1.48    6.09 
##   b->h4  i1->h4  i2->h4  i3->h4  i4->h4  i5->h4  i6->h4  i7->h4  i8->h4  i9->h4 
##   24.96    0.02   -2.93   -2.37    0.09   -9.93   -9.05    6.49    3.26   -2.17 
## i10->h4 i11->h4 i12->h4 i13->h4 
##  -10.44    5.76   -8.65   19.54 
##   b->h5  i1->h5  i2->h5  i3->h5  i4->h5  i5->h5  i6->h5  i7->h5  i8->h5  i9->h5 
##   -8.61   -1.51   -5.05    5.88   -3.88    0.04    4.15    0.60    8.33    9.20 
## i10->h5 i11->h5 i12->h5 i13->h5 
##   -3.64    3.76   -2.67   12.35 
##   b->h6  i1->h6  i2->h6  i3->h6  i4->h6  i5->h6  i6->h6  i7->h6  i8->h6  i9->h6 
##  -16.79    3.93   -5.75   15.75   -7.72    9.68  -14.77    7.73   -7.88    2.89 
## i10->h6 i11->h6 i12->h6 i13->h6 
##   -0.25    2.95  -15.17   -5.79 
##   b->h7  i1->h7  i2->h7  i3->h7  i4->h7  i5->h7  i6->h7  i7->h7  i8->h7  i9->h7 
##  -11.43   -7.41   -6.03   15.47    1.23    6.03   17.90    4.57   12.18   24.00 
## i10->h7 i11->h7 i12->h7 i13->h7 
##   -5.63   -4.19   11.88  -15.27 
##   b->h8  i1->h8  i2->h8  i3->h8  i4->h8  i5->h8  i6->h8  i7->h8  i8->h8  i9->h8 
##   -5.86    0.50   -1.35   -0.11    7.61   -4.76    9.65   -3.20    6.81   -1.05 
## i10->h8 i11->h8 i12->h8 i13->h8 
##    0.15  -10.39    2.27  -15.74 
##   b->h9  i1->h9  i2->h9  i3->h9  i4->h9  i5->h9  i6->h9  i7->h9  i8->h9  i9->h9 
##  -12.43    0.49   -0.39    5.18   -0.21  -11.15    5.00    8.89    6.40   12.42 
## i10->h9 i11->h9 i12->h9 i13->h9 
##   -3.29    6.12   -5.67  -21.89 
##   b->h10  i1->h10  i2->h10  i3->h10  i4->h10  i5->h10  i6->h10  i7->h10 
##   -12.21     6.28     6.33    -3.99    -4.73     4.75     7.94    -7.45 
##  i8->h10  i9->h10 i10->h10 i11->h10 i12->h10 i13->h10 
##    -0.31    21.34    23.03     1.12    -3.91   -12.75 
##   b->o  h1->o  h2->o  h3->o  h4->o  h5->o  h6->o  h7->o  h8->o  h9->o h10->o 
##  22.17   8.42  -5.88  -7.74 -12.04 -10.55  -8.67   5.99  -5.89   4.00  -3.35
##print(confint(ridge_model))
#print(simulate(ridge_model, newdata = test_data))

lm_pred <- predict(lm_model, newdata = test_data, interval = "prediction")
ridge_pred <- predict(ridge_model, newdata = as.matrix(test_data), method="surrogate")

Make predictions and compare

results <- data.frame(
  Actual = Boston[-train_idx, ]$medv,
  LM_Pred = lm_pred[,"fit"],
  LM_Lower = lm_pred[,"lwr"],
  LM_Upper = lm_pred[,"upr"],
  Ridge_Pred = ridge_pred[,"fit"],
  Ridge_Lower = ridge_pred[,"lwr"], 
  Ridge_Upper = ridge_pred[,"upr"]
)

# Print results
print("Prediction Intervals Comparison:")
## [1] "Prediction Intervals Comparison:"
print(head(results))
##    Actual  LM_Pred  LM_Lower LM_Upper Ridge_Pred Ridge_Lower Ridge_Upper
## 1    24.0 30.57209 20.958399 40.18579  24.613519   13.409561    32.60415
## 3    34.7 30.68339 21.107377 40.25940  30.536193   19.564175    38.10443
## 4    33.4 28.70511 19.107688 38.30253  30.404423   19.793354    43.33836
## 18   17.5 17.06191  7.487523 26.63630  18.505387    7.148120    26.46307
## 21   13.6 12.85420  3.239688 22.46872   8.816302   -2.132452    21.41255
## 24   14.5 14.14956  4.535627 23.76348   8.389287   -1.837779    16.01995
# Calculate coverage and Winkler scores
lm_coverage <- mean(Boston[-train_idx, ]$medv >= results$LM_Lower & 
                   Boston[-train_idx, ]$medv <= results$LM_Upper)
ridge_coverage <- mean(Boston[-train_idx, ]$medv >= results$Ridge_Lower & 
                      Boston[-train_idx, ]$medv <= results$Ridge_Upper)

lm_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv, results$LM_Lower, results$LM_Upper)
ridge_winkler <- misc::winkler_score(Boston[-train_idx, ]$medv, results$Ridge_Lower, results$Ridge_Upper)

print(sprintf("\nPrediction interval metrics:"))
## [1] "\nPrediction interval metrics:"
print(sprintf("Linear Model: %.1f%% coverage, %.3f Winkler score", 
              100 * lm_coverage, mean(lm_winkler)))
## [1] "Linear Model: 95.1% coverage, 26.711 Winkler score"
print(sprintf("Calibrated Model: %.1f%% coverage, %.3f Winkler score", 
              100 * ridge_coverage, mean(ridge_winkler)))
## [1] "Calibrated Model: 94.1% coverage, 28.723 Winkler score"
sims <- simulate(ridge_model, newdata = as.matrix(test_data), nsim = 500, method="surrogate")
# Plot simulations
matplot(sims, type = "l", 
        col = rgb(0, 0, 1, 0.1), lty = 1,
        xlab = "obs. #", ylab = "Simulated MPG",
        main = "Ridge Model Simulations")
lines(Boston[-train_idx, ]$medv, col = "red")