--- title: "quickstart" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{quickstart} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r} library(glmnet) library(xgboost) library(Matrix) library(randomForest) library(forecast) ``` Examples for regression, time series, and classification. ## Regression ### Create a dataset ```{r} # dataset set.seed(123) n <- 100 ; p <- 5 X <- matrix(rnorm(n * p), n, p) y <- rnorm(n) ``` ### Cross-validation for a few models __Linear model__ ```{r} # 'X' contains the explanatory variables # 'y' is the response # 'k' is the number of folds in k-fold cross-validation # 'repeats' is the number of repeats of the k-fold cross-validation procedure # linear model example ----- crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE) # linear model example, with validation set crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, p = 0.8, show_progress = FALSE) ``` __glmnet__ ```{r} # glmnet example ----- # fit glmnet, with alpha = 1, lambda = 0.1 crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE, fit_func = glmnet::glmnet, predict_func = predict.glmnet, packages = c("glmnet", "Matrix"), fit_params = list(alpha = 0.5, lambda = 0.1)) # fit glmnet, with alpha = 0, lambda = 0.01 crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE, fit_func = glmnet::glmnet, predict_func = predict.glmnet, packages = c("glmnet", "Matrix"), fit_params = list(alpha = 0, lambda = 0.01)) # fit glmnet, with alpha = 0, lambda = 0.01, with validation set crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 2, p = 0.8, show_progress = FALSE, fit_func = glmnet::glmnet, predict_func = predict.glmnet, packages = c("glmnet", "Matrix"), fit_params = list(alpha = 0, lambda = 0.01)) ``` __Random Forest__ ```{r} # randomForest example ----- # fit randomForest with mtry = 2 crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE, fit_func = randomForest::randomForest, predict_func = predict, packages = "randomForest", fit_params = list(mtry = 2)) # fit randomForest with mtry = 4 crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE, fit_func = randomForest::randomForest, predict_func = predict, packages = "randomForest", fit_params = list(mtry = 4)) # fit randomForest with mtry = 4, with validation set crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 2, p = 0.8, show_progress = FALSE, fit_func = randomForest::randomForest, predict_func = predict, packages = "randomForest", fit_params = list(mtry = 4)) ``` __xgboost__ ```{r} # xgboost example ----- # The response and covariates are named 'label' and 'data' # So, we do this: f_xgboost <- function(x, y, ...) xgboost::xgboost(data = x, label = y, ...) # fit xgboost with nrounds = 5 crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE, fit_func = f_xgboost, predict_func = predict, packages = "xgboost", fit_params = list(nrounds = 5, verbose = FALSE)) # fit xgboost with nrounds = 10 crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, show_progress = FALSE, fit_func = f_xgboost, predict_func = predict, packages = "xgboost", fit_params = list(nrounds = 10, verbose = FALSE)) # fit xgboost with nrounds = 10, with validation set crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 2, p = 0.8, show_progress = FALSE, fit_func = f_xgboost, predict_func = predict, packages = "xgboost", fit_params = list(nrounds = 10, verbose = FALSE)) ``` ## Time series __Theta method (time series)__ ```{r} res <- crossvalidation::crossval_ts(y=AirPassengers, initial_window = 10, fcast_func = thetaf, show_progress = FALSE) print(colMeans(res)) ``` ## Classification ```{r} # Input data # Transforming model response into a factor y <- as.factor(as.numeric(iris$Species)) # Explanatory variables X <- as.matrix(iris[, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")]) ``` ```{r} # 5-fold cross-validation repeated 3 times # default error metric, when y is a factor: accuracy crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, fit_func = randomForest::randomForest, predict_func = predict, fit_params = list(mtry = 2), packages = "randomForest", show_progress = FALSE) ``` ```{r} # We can specify custom error metrics for crossvalidation::crossval_ml # here, the error rate eval_metric <- function (preds, actual) { stopifnot(length(preds) == length(actual)) res <- 1-mean(preds == actual) names(res) <- "error rate" return(res) } # specify `eval_metric` argument for measuring the error rate # instead of the (default) accuracy crossvalidation::crossval_ml(x = X, y = y, k = 5, repeats = 3, fit_func = randomForest::randomForest, predict_func = predict, fit_params = list(mtry = 2), packages = "randomForest", eval_metric=eval_metric, show_progress = FALSE) ```