---
title: "quickstart"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{quickstart}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r}
library(glmnet)
library(xgboost)
library(Matrix)
library(randomForest)
library(forecast)
```
Examples for regression, time series, and classification.
## Regression
### Create a dataset
```{r}
# dataset
set.seed(123)
n <- 100 ; p <- 5
X <- matrix(rnorm(n * p), n, p)
y <- rnorm(n)
```
### Cross-validation for a few models
__Linear model__
```{r}
# 'X' contains the explanatory variables
# 'y' is the response
# 'k' is the number of folds in k-fold cross-validation
# 'repeats' is the number of repeats of the k-fold cross-validation procedure
# linear model example -----
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE)
# linear model example, with validation set
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, p = 0.8, show_progress = FALSE)
```
__glmnet__
```{r}
# glmnet example -----
# fit glmnet, with alpha = 1, lambda = 0.1
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE,
fit_func = glmnet::glmnet, predict_func = predict.glmnet,
packages = c("glmnet", "Matrix"), fit_params = list(alpha = 0.5, lambda = 0.1))
# fit glmnet, with alpha = 0, lambda = 0.01
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE,
fit_func = glmnet::glmnet, predict_func = predict.glmnet,
packages = c("glmnet", "Matrix"), fit_params = list(alpha = 0, lambda = 0.01))
# fit glmnet, with alpha = 0, lambda = 0.01, with validation set
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 2, p = 0.8,
show_progress = FALSE,
fit_func = glmnet::glmnet, predict_func = predict.glmnet,
packages = c("glmnet", "Matrix"), fit_params = list(alpha = 0, lambda = 0.01))
```
__Random Forest__
```{r}
# randomForest example -----
# fit randomForest with mtry = 2
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3,
show_progress = FALSE,
fit_func = randomForest::randomForest, predict_func = predict,
packages = "randomForest", fit_params = list(mtry = 2))
# fit randomForest with mtry = 4
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3,
show_progress = FALSE,
fit_func = randomForest::randomForest, predict_func = predict,
packages = "randomForest", fit_params = list(mtry = 4))
# fit randomForest with mtry = 4, with validation set
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 2, p = 0.8,
show_progress = FALSE,
fit_func = randomForest::randomForest, predict_func = predict,
packages = "randomForest", fit_params = list(mtry = 4))
```
__xgboost__
```{r}
# xgboost example -----
# The response and covariates are named 'label' and 'data'
# So, we do this:
f_xgboost <- function(x, y, ...) xgboost::xgboost(data = x, label = y, ...)
# fit xgboost with nrounds = 5
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3,
show_progress = FALSE,
fit_func = f_xgboost, predict_func = predict,
packages = "xgboost", fit_params = list(nrounds = 5,
verbose = FALSE))
# fit xgboost with nrounds = 10
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3,
show_progress = FALSE,
fit_func = f_xgboost, predict_func = predict,
packages = "xgboost", fit_params = list(nrounds = 10,
verbose = FALSE))
# fit xgboost with nrounds = 10, with validation set
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 2, p = 0.8,
show_progress = FALSE,
fit_func = f_xgboost, predict_func = predict,
packages = "xgboost", fit_params = list(nrounds = 10,
verbose = FALSE))
```
## Time series
__Theta method (time series)__
```{r}
res <- crossvalidation::crossval_ts(y=AirPassengers, initial_window = 10, fcast_func = thetaf, show_progress = FALSE)
print(colMeans(res))
```
## Classification
```{r}
# Input data
# Transforming model response into a factor
y <- as.factor(as.numeric(iris$Species))
# Explanatory variables
X <- as.matrix(iris[, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")])
```
```{r}
# 5-fold cross-validation repeated 3 times
# default error metric, when y is a factor: accuracy
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3,
fit_func = randomForest::randomForest,
predict_func = predict,
fit_params = list(mtry = 2),
packages = "randomForest",
show_progress = FALSE)
```
```{r}
# We can specify custom error metrics for crossvalidation::crossval_ml
# here, the error rate
eval_metric <- function (preds, actual)
{
stopifnot(length(preds) == length(actual))
res <- 1-mean(preds == actual)
names(res) <- "error rate"
return(res)
}
# specify `eval_metric` argument for measuring the error rate
# instead of the (default) accuracy
crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3,
fit_func = randomForest::randomForest,
predict_func = predict,
fit_params = list(mtry = 2),
packages = "randomForest",
eval_metric=eval_metric,
show_progress = FALSE)
```