set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
size = floor(0.8*nrow(X)),
replace = FALSE))
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
1 ranger
regression
obj <- learningmachine::Regressor$new(method = "ranger", nb_hidden=5L,
pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.011 s
print(sqrt(mean((obj$predict(X_test)$preds - y_test)^2)))
## [1] 2.214384
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.011 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
2 - Using Classifier
object
set.seed(43)
X <- as.matrix(iris[, 1:4])
# y <- factor(as.numeric(iris$Species))
y <- iris$Species
index_train <- base::sample.int(n = nrow(X),
size = floor(0.8*nrow(X)),
replace = FALSE)
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 120 4
## [1] 30 4
obj <- learningmachine::Classifier$new(method = "ranger", nb_hidden=5L, pi_method="kdesplitconformal", type_prediction_set="score")
obj$get_type()
## [1] "classification"
## [1] "Classifier"
obj$set_B(10)
obj$set_level(95)
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.071 s
probs <- obj$predict_proba(X_test)
df <- reshape2::melt(probs$sims$setosa[1:3, ])
df$Var2 <- NULL
colnames(df) <- c("individual", "prob_setosa")
df$individual <- as.factor(df$individual)
ggplot2::ggplot(df, aes(x=individual, y=prob_setosa)) + geom_boxplot() + coord_flip()
ggplot2::ggplot(df, aes(x=prob_setosa, fill=individual)) + geom_density(alpha=.3)
obj$summary(X_test, y=y_test,
class_name = "setosa",
show_progress=FALSE)
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## Sepal.Length -0.12967127 -0.21325092 -0.04609162 0.003554026 **
## Sepal.Width 0.26340453 0.09394408 0.43286497 0.003500682 **
## Petal.Length -0.09228918 -0.16702011 -0.01755825 0.017263911 *
## Petal.Width 0.02116192 -0.13388895 0.17621279 0.782118080
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 30
## Number of columns 4
## _______________________
## Column type frequency:
## numeric 4
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100 hist
## 1 Sepal.Length -0.130 0.224 -1.08 -0.153 -0.0923 -0.00407 0.0936 ▁▁▁▂▇
## 2 Sepal.Width 0.263 0.454 -0.228 0.0179 0.159 0.247 1.83 ▇▃▁▁▁
## 3 Petal.Length -0.0923 0.200 -0.876 -0.159 -0.00498 0.00438 0.114 ▁▁▁▂▇
## 4 Petal.Width 0.0212 0.415 -0.815 -0.168 0 0.0254 1.59 ▁▇▁▁▁