Getting started

library(learningmachine)
library(caret)
library(mlbench)
library(palmerpenguins)
X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg
set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
                                 size = floor(0.8*nrow(X)),
                                 replace = FALSE))
##  [1] 31 15 19 14  3 10 18 22 11  5 20 29 23 30  9 28  8 27  7 32 26 17  4  1 24
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 25 10
dim(X_test)
## [1]  7 10

0 lm regression

obj <- learningmachine::Regressor$new(method = "lm")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
obj$get_method()
## [1] "lm"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, pi_method = "splitconformal")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.003 s
print(sqrt(mean((obj$predict(X_test) - y_test)^2)))
## [1] 3.548852
res <- obj$predict(X = X_test, level = 95)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train, 
        pi_method = "jackknifeplus")
##   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%  |                                                                              |======                                                                |   8%  |                                                                              |=========                                                             |  12%  |                                                                              |============                                                          |  17%  |                                                                              |===============                                                       |  21%  |                                                                              |==================                                                    |  25%  |                                                                              |====================                                                  |  29%  |                                                                              |=======================                                               |  33%  |                                                                              |==========================                                            |  38%  |                                                                              |=============================                                         |  42%  |                                                                              |================================                                      |  46%  |                                                                              |===================================                                   |  50%  |                                                                              |======================================                                |  54%  |                                                                              |=========================================                             |  58%  |                                                                              |============================================                          |  62%  |                                                                              |===============================================                       |  67%  |                                                                              |==================================================                    |  71%  |                                                                              |====================================================                  |  75%  |                                                                              |=======================================================               |  79%  |                                                                              |==========================================================            |  83%  |                                                                              |=============================================================         |  88%  |                                                                              |================================================================      |  92%  |                                                                              |===================================================================   |  96%  |                                                                              |======================================================================| 100%
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.038 s
obj$set_level(95L)

res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1

1 ranger regression

obj <- learningmachine::Regressor$new(method = "ranger")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.016 s
print(sqrt(mean((obj$predict(X_test) - y_test)^2)))
## [1] 2.302783
t0 <- proc.time()[3]
obj$fit(X_train, y_train,  
        pi_method = "splitconformal")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.016 s
obj$set_level(95)

res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1

3 KRR & ranger regression

# Boston dataset (dataset has an ethical problem)
library(MASS)
data("Boston")

set.seed(13)
train_idx <- sample(nrow(Boston), 0.8 * nrow(Boston))
X_train <- as.matrix(Boston[train_idx, -ncol(Boston)])
X_test <- as.matrix(Boston[-train_idx, -ncol(Boston)])
y_train <- Boston$medv[train_idx]
y_test <- Boston$medv[-train_idx]

KRR

obj <- learningmachine::Regressor$new(method = "krr")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
obj$get_method()
## [1] "krr"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.065 s
print(sqrt(mean((obj$predict(X_test, level = 95)$preds - y_test)^2)))
## [1] 4.072951
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.8148312
## 
## $R_squared_adj
## [1] 0.7874767
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -7.7026 -1.4233 -0.3691  0.5978  1.4476 20.7134 
## 
## $Coverage_rate
## [1] 100
## 
## $ttests
##              estimate         lower        upper      p-value signif
## crim      -0.75274784 -2.333133e+00    0.8276368 3.469830e-01       
## zn        -0.07757719 -5.595593e-01    0.4044049 7.501657e-01       
## indus     -3.10664765 -5.254146e+00   -0.9591496 5.003978e-03     **
## chas     572.74709319  4.993069e+02  646.1872537 2.136485e-28    ***
## nox     -610.56024448 -7.369184e+02 -484.2020600 7.354758e-16    ***
## rm       532.95644265  4.664325e+02  599.4803740 3.089503e-29    ***
## age       -6.41906094 -7.435801e+00   -5.4023213 2.805561e-22    ***
## dis      -72.22208238 -8.561047e+01  -58.8336944 2.572441e-18    ***
## rad       13.54949329  1.074985e+01   16.3491337 6.803325e-16    ***
## tax       -1.13482377 -1.257141e+00   -1.0125065 4.917300e-34    ***
## ptratio  -56.75576750 -6.635240e+01  -47.1591315 1.436394e-20    ***
## black      0.12291776 -9.999774e-03    0.2558353 6.952517e-02      .
## lstat    -21.11686762 -2.377286e+01  -18.4608753 5.360275e-29    ***
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             102    
## Number of columns          13     
## _______________________           
## Column type frequency:            
##   numeric                  13     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean      sd       p0       p25       p50      p75
##  1 crim            -0.753    8.05    -31.4     -3.16     2.44      4.61 
##  2 zn              -0.0776   2.45     -5.80    -1.51     0.0312    0.489
##  3 indus           -3.11    10.9     -35.8     -9.29    -4.59     -0.510
##  4 chas           573.     374.    -1824.     491.     601.      710.   
##  5 nox           -611.     643.    -2499.   -1006.    -616.     -211.   
##  6 rm             533.     339.     -104.     242.     453.      787.   
##  7 age             -6.42     5.18    -19.3     -9.28    -6.21     -3.24 
##  8 dis            -72.2     68.2    -276.    -129.     -68.6     -14.2  
##  9 rad             13.5     14.3     -19.4      6.05    16.3      22.9  
## 10 tax             -1.13     0.623    -3.57    -1.38    -1.04     -0.713
## 11 ptratio        -56.8     48.9    -211.     -73.4    -49.9     -35.5  
## 12 black            0.123    0.677    -1.54    -0.204    0.0551    0.376
## 13 lstat          -21.1     13.5     -53.8    -30.1    -22.3     -10.5  
##        p100 hist 
##  1    6.58  ▁▁▁▂▇
##  2    8.02  ▁▅▇▁▁
##  3   29.4   ▁▂▇▁▁
##  4 1371.    ▁▁▁▇▂
##  5 1165.    ▁▃▇▅▁
##  6 1295.    ▃▇▅▅▂
##  7    7.88  ▁▃▇▃▁
##  8   42.3   ▁▃▇▇▇
##  9   41.0   ▃▂▇▇▃
## 10    0.113 ▁▁▂▇▂
## 11  107.    ▁▂▇▂▁
## 12    2.19  ▂▆▇▂▁
## 13    9.00  ▁▅▇▅▂

ranger

obj <- learningmachine::Regressor$new(method = "ranger")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.103 s
print(sqrt(mean((obj$predict(X_test, level=95)$preds - y_test)^2)))
## [1] 3.74535
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.8434207
## 
## $R_squared_adj
## [1] 0.8202897
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -8.8563 -1.5963 -0.1491  0.4260  1.9961 12.6714 
## 
## $Coverage_rate
## [1] 100
## 
## $ttests
##             estimate         lower        upper      p-value signif
## crim     -49.6344261 -232.91685299  133.6480007 5.923034e-01       
## zn         0.0784486   -0.06027044    0.2171676 2.645894e-01       
## indus    -18.8096072  -34.04595819   -3.5732562 1.604939e-02      *
## chas       0.0000000           NaN          NaN          NaN       
## nox     -467.8840363 -627.07556952 -308.6925031 6.664467e-08    ***
## rm       276.5215702  225.72209883  327.3210416 1.573542e-18    ***
## age       -2.0367081   -2.86343009   -1.2099861 3.857020e-06    ***
## dis       22.5199587   -1.94139968   46.9813170 7.076063e-02      .
## rad        1.5745877   -0.25222578    3.4014011 9.036604e-02      .
## tax       -0.6832444   -0.89986179   -0.4666270 9.564055e-09    ***
## ptratio  -31.9041989  -38.38822071  -25.4201770 3.019931e-16    ***
## black     -0.3827134   -0.63975500   -0.1256717 3.907316e-03     **
## lstat    -44.9609199  -57.58730968  -32.3345300 2.116676e-10    ***
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             102    
## Number of columns          13     
## _______________________           
## Column type frequency:            
##   numeric                  13     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean      sd       p0      p25      p50     p75     p100
##  1 crim           -49.6    933.    -6399.    -70.6    -10.7    53.3   2113.   
##  2 zn               0.0784   0.706    -1.09    0        0       0        5.76 
##  3 indus          -18.8     77.6    -451.     -7.13     2.68    7.68    49.6  
##  4 chas             0        0         0       0        0       0        0    
##  5 nox           -468.     810.    -3221.   -897.    -224.     53.8    769.   
##  6 rm             277.     259.      -17.7    73.5    191.    361.     855.   
##  7 age             -2.04     4.21    -34.1    -2.99    -1.10    0        7.16 
##  8 dis             22.5    125.     -167.    -25.8     -3.58    4.74   644.   
##  9 rad              1.57     9.30     -4.32    0        0       0.242   88.1  
## 10 tax             -0.683    1.10     -4.90   -0.606   -0.305  -0.121    0.333
## 11 ptratio        -31.9     33.0    -150.    -42.1    -21.2    -8.87     7.14 
## 12 black           -0.383    1.31     -4.96   -0.884    0       0.424    2.11 
## 13 lstat          -45.0     64.3    -332.    -53.9    -24.2    -6.86    24.7  
##    hist 
##  1 ▁▁▁▇▁
##  2 ▇▁▁▁▁
##  3 ▁▁▁▁▇
##  4 ▁▁▇▁▁
##  5 ▁▁▅▇▆
##  6 ▇▃▂▁▂
##  7 ▁▁▁▇▇
##  8 ▇▇▁▁▁
##  9 ▇▁▁▁▁
## 10 ▁▁▁▁▇
## 11 ▁▁▂▅▇
## 12 ▁▁▂▇▂
## 13 ▁▁▁▂▇

4 KRR regression

X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg

set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
                                 size = floor(0.7*nrow(X)),
                                 replace = FALSE))
##  [1] 31 15 19 14  3 10 18 22 11  5 20 29 23 30  9 28  8 27  7 32 26 17
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 22 10
dim(X_test)
## [1] 10 10
obj <- learningmachine::Regressor$new(method = "krr")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.004 s
print(obj$predict(X_test, level = 95))
## $preds
##  [1] 22.151349 21.802194 12.541365 10.124759 13.408181 14.155816  7.421184
##  [8] 16.879536 13.615153 12.749565
## 
## $lower
##  [1] 12.1513495 11.8021941  2.5413650  0.1247588  3.4081805  4.1558157
##  [7] -2.5788160  6.8795365  3.6151533  2.7495651
## 
## $upper
##  [1] 32.15135 31.80219 22.54137 20.12476 23.40818 24.15582 17.42118 26.87954
##  [9] 23.61515 22.74957
t0 <- proc.time()[3]
obj$fit(X_train, y_train, lambda = 0.1,  
        pi_method = "splitconformal")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.003 s
obj$set_level(95)
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train, lambda = 0.1,  
        pi_method = "splitconformal")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.003 s
res <- obj$predict(X = X_test, level= 95)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -0.8614864
## 
## $R_squared_adj
## [1] 17.75338
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.1513  0.5083  3.0680  3.4751  5.9929  8.8586 
## 
## $Coverage_rate
## [1] 100
## 
## $ttests
##           estimate        lower        upper      p-value signif
## cyl   -23.98943109  -46.4975109   -1.4813513 3.918219e-02      *
## disp   -0.61133395   -0.9655770   -0.2570909 3.597927e-03     **
## hp     -0.07828878   -0.3785573    0.2219797 5.698268e-01       
## drat  310.94399534  160.4146969  461.4732937 1.163859e-03     **
## wt   -197.39979731 -240.1776661 -154.6219286 2.500030e-06    ***
## qsec  -19.50660485  -54.1139966   15.1007869 2.342132e-01       
## vs     69.84795566  -85.8899529  225.5858643 3.368080e-01       
## am    137.97019623   -0.2148915  276.1552839 5.028830e-02      .
## gear  191.57905165  134.3446800  248.8134233 3.424783e-05    ***
## carb    3.39227959  -22.2875140   29.0720732 7.718555e-01       
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean      sd      p0      p25       p50      p75      p100
##  1 cyl            -24.0     31.5    -64.0   -40.9    -34.1      -0.849   37.6   
##  2 disp            -0.611    0.495   -1.66   -0.934   -0.429    -0.307   -0.0817
##  3 hp              -0.0783   0.420   -1.00   -0.218   -0.0402    0.235    0.359 
##  4 drat           311.     210.    -159.    195.     369.      464.     534.    
##  5 wt            -197.      59.8   -280.   -252.    -196.     -144.    -124.    
##  6 qsec           -19.5     48.4    -73.4   -60.0    -29.3      12.8     60.3   
##  7 vs              69.8    218.    -218.   -104.      86.1     103.     421.    
##  8 am             138.     193.    -161.     99.8    162.      201.     516.    
##  9 gear           192.      80.0     74.7   142.     178.      224.     367.    
## 10 carb             3.39    35.9    -56.3    -6.54     3.71     36.0     41.3   
##    hist 
##  1 ▃▇▂▃▂
##  2 ▂▂▂▆▇
##  3 ▂▁▆▃▇
##  4 ▂▁▆▃▇
##  5 ▇▁▇▂▇
##  6 ▇▇▂▂▅
##  7 ▆▂▇▁▃
##  8 ▂▁▇▁▁
##  9 ▂▇▃▂▂
## 10 ▃▁▆▂▇
t0 <- proc.time()[3]
obj$fit(X_train, y_train, lambda = 0.1,  
        pi_method = "kdejackknifeplus")
##   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   5%  |                                                                              |=======                                                               |  10%  |                                                                              |==========                                                            |  14%  |                                                                              |=============                                                         |  19%  |                                                                              |=================                                                     |  24%  |                                                                              |====================                                                  |  29%  |                                                                              |=======================                                               |  33%  |                                                                              |===========================                                           |  38%  |                                                                              |==============================                                        |  43%  |                                                                              |=================================                                     |  48%  |                                                                              |=====================================                                 |  52%  |                                                                              |========================================                              |  57%  |                                                                              |===========================================                           |  62%  |                                                                              |===============================================                       |  67%  |                                                                              |==================================================                    |  71%  |                                                                              |=====================================================                 |  76%  |                                                                              |=========================================================             |  81%  |                                                                              |============================================================          |  86%  |                                                                              |===============================================================       |  90%  |                                                                              |===================================================================   |  95%  |                                                                              |======================================================================| 100%
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.012 s
res <- obj$predict(X = X_test, level= 95)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -6.211227
## 
## $R_squared_adj
## [1] 65.90104
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.782   4.220   9.255   8.492  12.321  14.733 
## 
## $Coverage_rate
## [1] 100
## 
## $ttests
##           estimate        lower        upper      p-value signif
## cyl   -23.98943109  -46.4975109   -1.4813513 3.918219e-02      *
## disp   -0.61133395   -0.9655770   -0.2570909 3.597927e-03     **
## hp     -0.07828878   -0.3785573    0.2219797 5.698268e-01       
## drat  310.94399534  160.4146969  461.4732937 1.163859e-03     **
## wt   -197.39979731 -240.1776661 -154.6219286 2.500030e-06    ***
## qsec  -19.50660485  -54.1139966   15.1007869 2.342132e-01       
## vs     69.84795566  -85.8899529  225.5858643 3.368080e-01       
## am    137.97019623   -0.2148915  276.1552839 5.028830e-02      .
## gear  191.57905165  134.3446800  248.8134233 3.424783e-05    ***
## carb    3.39227959  -22.2875140   29.0720732 7.718555e-01       
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean      sd      p0      p25       p50      p75      p100
##  1 cyl            -24.0     31.5    -64.0   -40.9    -34.1      -0.849   37.6   
##  2 disp            -0.611    0.495   -1.66   -0.934   -0.429    -0.307   -0.0817
##  3 hp              -0.0783   0.420   -1.00   -0.218   -0.0402    0.235    0.359 
##  4 drat           311.     210.    -159.    195.     369.      464.     534.    
##  5 wt            -197.      59.8   -280.   -252.    -196.     -144.    -124.    
##  6 qsec           -19.5     48.4    -73.4   -60.0    -29.3      12.8     60.3   
##  7 vs              69.8    218.    -218.   -104.      86.1     103.     421.    
##  8 am             138.     193.    -161.     99.8    162.      201.     516.    
##  9 gear           192.      80.0     74.7   142.     178.      224.     367.    
## 10 carb             3.39    35.9    -56.3    -6.54     3.71     36.0     41.3   
##    hist 
##  1 ▃▇▂▃▂
##  2 ▂▂▂▆▇
##  3 ▂▁▆▃▇
##  4 ▂▁▆▃▇
##  5 ▇▁▇▂▇
##  6 ▇▇▂▂▅
##  7 ▆▂▇▁▃
##  8 ▂▁▇▁▁
##  9 ▂▇▃▂▂
## 10 ▃▁▆▂▇

xgboost

obj <- learningmachine::Regressor$new(method = "xgboost")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.03 s
print(obj$predict(X_test, level = 95))
## $preds
##  [1] 18.13500 18.13500 17.13105 17.13105 14.64118 14.64118 14.03685 21.29947
##  [9] 15.33300 14.03685
## 
## $lower
##  [1] 12.135002 12.135002 11.131052 11.131052  8.641179  8.641179  8.036854
##  [8] 15.299475  9.333004  8.036854
## 
## $upper
##  [1] 24.13500 24.13500 23.13105 23.13105 20.64118 20.64118 20.03685 27.29947
##  [9] 21.33300 20.03685
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.2881145
## 
## $R_squared_adj
## [1] 7.406969
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.6369  0.3926  2.2088  1.5079  2.8650  5.1631 
## 
## $Coverage_rate
## [1] 100
## 
## $ttests
##        estimate       lower      upper   p-value signif
## cyl   0.0000000         NaN        NaN       NaN       
## disp -0.1859971  -0.6067516  0.2347575 0.3434364       
## hp    0.0000000         NaN        NaN       NaN       
## drat 28.9866074 -18.4823056 76.4555203 0.2004909       
## wt    0.0000000         NaN        NaN       NaN       
## qsec -1.7295559  -5.6420830  2.1829713 0.3434364       
## vs    0.0000000         NaN        NaN       NaN       
## am    0.0000000         NaN        NaN       NaN       
## gear  0.0000000         NaN        NaN       NaN       
## carb  0.0000000         NaN        NaN       NaN       
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable   mean     sd     p0 p25 p50 p75 p100 hist 
##  1 cyl            0      0       0      0   0   0   0  ▁▁▇▁▁
##  2 disp          -0.186  0.588  -1.86   0   0   0   0  ▁▁▁▁▇
##  3 hp             0      0       0      0   0   0   0  ▁▁▇▁▁
##  4 drat          29.0   66.4     0      0   0   0 200. ▇▁▁▁▁
##  5 wt             0      0       0      0   0   0   0  ▁▁▇▁▁
##  6 qsec          -1.73   5.47  -17.3    0   0   0   0  ▁▁▁▁▇
##  7 vs             0      0       0      0   0   0   0  ▁▁▇▁▁
##  8 am             0      0       0      0   0   0   0  ▁▁▇▁▁
##  9 gear           0      0       0      0   0   0   0  ▁▁▇▁▁
## 10 carb           0      0       0      0   0   0   0  ▁▁▇▁▁
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE,  
        pi_method = "splitconformal")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.009 s
obj$set_level(95)
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE,  
        pi_method = "kdesplitconformal")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.009 s
obj$set_level(95)
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE,  
        pi_method = "bootjackknifeplus")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.002 s
obj$set_level(95)
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1