Quasi-Randomized – Neural – Networks

library(learningmachine)
library(caret)
library(mlbench)
library(palmerpenguins)
X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg
set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
                                 size = floor(0.8*nrow(X)),
                                 replace = FALSE))
##  [1] 31 15 19 14  3 10 18 22 11  5 20 29 23 30  9 28  8 27  7 32 26 17  4  1 24
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 25 10
dim(X_test)
## [1]  7 10

1 ranger regression

obj <- learningmachine::Regressor$new(method = "ranger", nb_hidden=5L,  
        pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.012 s
print(sqrt(mean((obj$predict(X_test)$preds - y_test)^2)))
## [1] 2.214384
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.011 s
obj$set_level(95)

res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1

2 - Using Classifier object

set.seed(43)
X <- as.matrix(iris[, 1:4])
# y <- factor(as.numeric(iris$Species))
y <- iris$Species

index_train <- base::sample.int(n = nrow(X),
                                 size = floor(0.8*nrow(X)),
                                 replace = FALSE)
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 120   4
dim(X_test)
## [1] 30  4
obj <- learningmachine::Classifier$new(method = "ranger", nb_hidden=5L, pi_method="kdesplitconformal", type_prediction_set="score")
obj$get_type()
## [1] "classification"
obj$get_name()
## [1] "Classifier"
obj$set_B(10)
obj$set_level(95)

t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.074 s
probs <- obj$predict_proba(X_test)
df <- reshape2::melt(probs$sims$setosa[1:3, ])
df$Var2 <- NULL 
colnames(df) <- c("individual", "prob_setosa")
df$individual <- as.factor(df$individual)
ggplot2::ggplot(df, aes(x=individual, y=prob_setosa)) + geom_boxplot() + coord_flip()

ggplot2::ggplot(df, aes(x=prob_setosa, fill=individual)) + geom_density(alpha=.3)

obj$summary(X_test, y=y_test, 
            class_name = "setosa",
            show_progress=FALSE)
## $Coverage_rate
## [1] 100
## 
## $citests
##                 estimate       lower       upper     p-value signif
## Sepal.Length -0.12967127 -0.21325092 -0.04609162 0.003554026     **
## Sepal.Width   0.26340453  0.09394408  0.43286497 0.003500682     **
## Petal.Length -0.09228918 -0.16702011 -0.01755825 0.017263911      *
## Petal.Width   0.02116192 -0.13388895  0.17621279 0.782118080       
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             30     
## Number of columns          4      
## _______________________           
## Column type frequency:            
##   numeric                  4      
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##   skim_variable    mean    sd     p0     p25      p50      p75   p100 hist 
## 1 Sepal.Length  -0.130  0.224 -1.08  -0.153  -0.0923  -0.00407 0.0936 ▁▁▁▂▇
## 2 Sepal.Width    0.263  0.454 -0.228  0.0179  0.159    0.247   1.83   ▇▃▁▁▁
## 3 Petal.Length  -0.0923 0.200 -0.876 -0.159  -0.00498  0.00438 0.114  ▁▁▁▂▇
## 4 Petal.Width    0.0212 0.415 -0.815 -0.168   0        0.0254  1.59   ▁▇▁▁▁