Title: | Miscellaneous Statistical/Machine Learning stuff |
---|---|
Description: | Miscellaneous Statistical/Machine Learning stuff. |
Authors: | T. Moudiki |
Maintainer: | T. Moudiki <[email protected]> |
License: | BSD_3_clause Clear + file LICENSE |
Version: | 0.22.2 |
Built: | 2024-10-13 03:21:56 UTC |
Source: | https://github.com/Techtonique/mlsauce_r |
AdaOpt classifier
AdaOpt( n_iterations = 50L, learning_rate = 0.3, reg_lambda = 0.1, reg_alpha = 0.5, eta = 0.01, gamma = 0.01, k = 3L, tolerance = 0, n_clusters = 0, batch_size = 100L, row_sample = 1, type_dist = "euclidean-f", cache = TRUE, n_clusters_input = 0, clustering_method = "kmeans", cluster_scaling = "standard", seed = 123L )
AdaOpt( n_iterations = 50L, learning_rate = 0.3, reg_lambda = 0.1, reg_alpha = 0.5, eta = 0.01, gamma = 0.01, k = 3L, tolerance = 0, n_clusters = 0, batch_size = 100L, row_sample = 1, type_dist = "euclidean-f", cache = TRUE, n_clusters_input = 0, clustering_method = "kmeans", cluster_scaling = "standard", seed = 123L )
n_iterations |
number of iterations of the optimizer at training time |
learning_rate |
controls the speed of the optimizer at training time |
reg_lambda |
L2 regularization parameter for successive errors in the optimizer (at training time) |
reg_alpha |
L1 regularization parameter for successive errors in the optimizer (at training time) |
eta |
controls the slope in gradient descent (at training time) |
gamma |
controls the step size in gradient descent (at training time) |
k |
number of nearest neighbors selected at test time for classification |
tolerance |
controls early stopping in gradient descent (at training time) |
n_clusters |
number of clusters, if MiniBatch k-means is used at test time (for faster prediction) |
batch_size |
size of the batch, if MiniBatch k-means is used at test time (for faster prediction) |
row_sample |
percentage of rows chosen from training set (by stratified subsampling, for faster prediction) |
type_dist |
distance used for finding the nearest neighbors; currently |
cache |
if the nearest neighbors are cached or not, for faster retrieval in subsequent calls |
n_clusters_input |
number of clusters a priori on inpu data |
clustering_method |
either "kmeans" or "gmm" (Gaussian mixture) |
cluster_scaling |
either 'standard', 'minmax', 'robust' |
seed |
reproducibility seed for initial weak learner and clustering |
An object of class AdaOpt
## Not run: library(datasets) X <- as.matrix(iris[, 1:4]) y <- as.integer(iris[, 5]) - 1L n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(iris[train_index, 1:4]) y_train <- as.integer(iris[train_index, 5]) - 1L X_test <- as.matrix(iris[test_index, 1:4]) y_test <- as.integer(iris[test_index, 5]) - 1L obj <- mlsauce::AdaOpt() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
## Not run: library(datasets) X <- as.matrix(iris[, 1:4]) y <- as.integer(iris[, 5]) - 1L n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(iris[train_index, 1:4]) y_train <- as.integer(iris[train_index, 5]) - 1L X_test <- as.matrix(iris[test_index, 1:4]) y_test <- as.integer(iris[test_index, 5]) - 1L obj <- mlsauce::AdaOpt() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
Download datasets
download( pkgname = "MASS", dataset = "Boston", source = "https://cran.r-universe.dev/" )
download( pkgname = "MASS", dataset = "Boston", source = "https://cran.r-universe.dev/" )
pkgname |
a string; R package name |
dataset |
a string; dataset name |
source |
a string; package location (address) |
A data frame
df <- mlsauce::download(pkgname="MASS", dataset="Boston", source="https://cran.r-universe.dev/") print(df)
df <- mlsauce::download(pkgname="MASS", dataset="Boston", source="https://cran.r-universe.dev/") print(df)
GenericBoosting classifier
GenericBoostingClassifier( base_model, n_estimators = 100L, learning_rate = 0.1, n_hidden_features = 5L, reg_lambda = 0.1, row_sample = 1, col_sample = 1, dropout = 0, tolerance = 1e-04, direct_link = 1L, verbose = 1L, seed = 123L, activation = "relu", n_clusters = 0, clustering_method = "kmeans", cluster_scaling = "standard", degree = 0, weights_distr = "uniform" )
GenericBoostingClassifier( base_model, n_estimators = 100L, learning_rate = 0.1, n_hidden_features = 5L, reg_lambda = 0.1, row_sample = 1, col_sample = 1, dropout = 0, tolerance = 1e-04, direct_link = 1L, verbose = 1L, seed = 123L, activation = "relu", n_clusters = 0, clustering_method = "kmeans", cluster_scaling = "standard", degree = 0, weights_distr = "uniform" )
n_estimators: |
int, number of boosting iterations. |
learning_rate: |
float, controls the learning speed at training time. |
int |
|
number |
of nodes in successive hidden layers. |
reg_lambda: |
float, L2 regularization parameter for successive errors in the optimizer (at training time). |
row_sample: |
float, percentage of rows chosen from the training set. |
col_sample: |
float, percentage of columns chosen from the training set. |
dropout: |
float, percentage of nodes dropped from the training set. |
tolerance: |
float, controls early stopping in gradient descent (at training time). |
direct_link: |
bool, indicates whether the original features are included (True) in model's fitting or not (False). |
verbose: |
int, progress bar (yes = 1) or not (no = 0) (currently). |
seed: |
int, reproducibility seed for nodes_sim=='uniform', clustering and dropout. |
activation: |
str, activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh' |
n_clusters: |
int, number of clusters for clustering. |
clustering_method: |
str, clustering method: currently 'kmeans', 'gmm' (Gaussian Mixture Model) |
cluster_scaling: |
str, scaling method for clustering: currently 'standard', 'minmax', 'robust' |
degree: |
int, degree of polynomial interactions features. |
weights_distr: |
str, distribution of weights for the hidden layer currently 'uniform', 'gaussian' |
An object of class GenericBoostingClassifier
library(datasets) X <- as.matrix(iris[, 1:4]) y <- as.integer(iris[, 5]) - 1L n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.integer(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.integer(y[test_index]) ## Not run: regr <- sklearn$linear_model$Ridge() obj <- mlsauce::GenericBoostingClassifier(regr) print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
library(datasets) X <- as.matrix(iris[, 1:4]) y <- as.integer(iris[, 5]) - 1L n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.integer(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.integer(y[test_index]) ## Not run: regr <- sklearn$linear_model$Ridge() obj <- mlsauce::GenericBoostingClassifier(regr) print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
GenericBoosting Regressor
GenericBoostingRegressor( base_model, n_estimators = 100L, learning_rate = 0.1, n_hidden_features = 5L, reg_lambda = 0.1, row_sample = 1, col_sample = 1, dropout = 0, tolerance = 1e-04, direct_link = 1L, verbose = 1L, seed = 123L, activation = "relu", n_clusters = 0, clustering_method = "kmeans", cluster_scaling = "standard", degree = 0, weights_distr = "uniform" )
GenericBoostingRegressor( base_model, n_estimators = 100L, learning_rate = 0.1, n_hidden_features = 5L, reg_lambda = 0.1, row_sample = 1, col_sample = 1, dropout = 0, tolerance = 1e-04, direct_link = 1L, verbose = 1L, seed = 123L, activation = "relu", n_clusters = 0, clustering_method = "kmeans", cluster_scaling = "standard", degree = 0, weights_distr = "uniform" )
n_estimators: |
int, number of boosting iterations. |
learning_rate: |
float, controls the learning speed at training time. |
int |
|
number |
of nodes in successive hidden layers. |
reg_lambda: |
float, L2 regularization parameter for successive errors in the optimizer (at training time). |
row_sample: |
float, percentage of rows chosen from the training set. |
col_sample: |
float, percentage of columns chosen from the training set. |
dropout: |
float, percentage of nodes dropped from the training set. |
tolerance: |
float, controls early stopping in gradient descent (at training time). |
direct_link: |
bool, indicates whether the original features are included (True) in model's fitting or not (False). |
verbose: |
int, progress bar (yes = 1) or not (no = 0) (currently). |
seed: |
int, reproducibility seed for nodes_sim=='uniform', clustering and dropout. |
activation: |
str, activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh' |
n_clusters: |
int, number of clusters for clustering. |
clustering_method: |
str, clustering method: currently 'kmeans', 'gmm' (Gaussian Mixture Model) |
cluster_scaling: |
str, scaling method for clustering: currently 'standard', 'minmax', 'robust' |
degree: |
int, degree of polynomial interactions features. |
weights_distr: |
str, distribution of weights for the hidden layer currently 'uniform', 'gaussian' |
An object of class GenericBoostingRegressor
## Not run: library(datasets) X <- as.matrix(datasets::mtcars[, -1]) y <- as.integer(datasets::mtcars[, 1]) n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.double(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.double(y[test_index]) regr <- sklearn$linear_model$Ridge() obj <- mlsauce::GenericBoostingRegressor(regr) print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
## Not run: library(datasets) X <- as.matrix(datasets::mtcars[, -1]) y <- as.integer(datasets::mtcars[, 1]) n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.double(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.double(y[test_index]) regr <- sklearn$linear_model$Ridge() obj <- mlsauce::GenericBoostingRegressor(regr) print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
Lasso regressor
LassoRegressor(reg_lambda = 0.1, max_iter = 10L, tol = 0.001)
LassoRegressor(reg_lambda = 0.1, max_iter = 10L, tol = 0.001)
reg_lambda |
L1 regularization parameter |
max_iter |
number of iterations of lasso shooting algorithm. |
tol |
tolerance for convergence of lasso shooting algorithm. |
An object of class Lasso
## Not run: library(datasets) X <- as.matrix(datasets::mtcars[, -1]) y <- as.integer(datasets::mtcars[, 1]) n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.double(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.double(y[test_index]) obj <- mlsauce::LassoRegressor() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
## Not run: library(datasets) X <- as.matrix(datasets::mtcars[, -1]) y <- as.integer(datasets::mtcars[, 1]) n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.double(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.double(y[test_index]) obj <- mlsauce::LassoRegressor() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
LSBoost classifier
LSBoostClassifier( n_estimators = 100L, learning_rate = 0.1, n_hidden_features = 5L, reg_lambda = 0.1, row_sample = 1, col_sample = 1, dropout = 0, tolerance = 1e-04, direct_link = 1L, verbose = 1L, seed = 123L, solver = c("ridge", "lasso"), activation = "relu", n_clusters = 0, clustering_method = "kmeans", cluster_scaling = "standard", degree = 0, weights_distr = "uniform" )
LSBoostClassifier( n_estimators = 100L, learning_rate = 0.1, n_hidden_features = 5L, reg_lambda = 0.1, row_sample = 1, col_sample = 1, dropout = 0, tolerance = 1e-04, direct_link = 1L, verbose = 1L, seed = 123L, solver = c("ridge", "lasso"), activation = "relu", n_clusters = 0, clustering_method = "kmeans", cluster_scaling = "standard", degree = 0, weights_distr = "uniform" )
n_estimators: |
int, number of boosting iterations. |
learning_rate: |
float, controls the learning speed at training time. |
int |
|
number |
of nodes in successive hidden layers. |
reg_lambda: |
float, L2 regularization parameter for successive errors in the optimizer (at training time). |
row_sample: |
float, percentage of rows chosen from the training set. |
col_sample: |
float, percentage of columns chosen from the training set. |
dropout: |
float, percentage of nodes dropped from the training set. |
tolerance: |
float, controls early stopping in gradient descent (at training time). |
direct_link: |
bool, indicates whether the original features are included (True) in model's fitting or not (False). |
verbose: |
int, progress bar (yes = 1) or not (no = 0) (currently). |
seed: |
int, reproducibility seed for nodes_sim=='uniform', clustering and dropout. |
solver: |
str, type of 'weak' learner; currently in ('ridge', 'lasso') |
activation: |
str, activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh' |
n_clusters: |
int, number of clusters for clustering. |
clustering_method: |
str, clustering method: currently 'kmeans', 'gmm' (Gaussian Mixture Model) |
cluster_scaling: |
str, scaling method for clustering: currently 'standard', 'minmax', 'robust' |
degree: |
int, degree of polynomial interactions features. |
weights_distr: |
str, distribution of weights for the hidden layer currently 'uniform', 'gaussian' |
An object of class LSBoostClassifier
library(datasets) X <- as.matrix(iris[, 1:4]) y <- as.integer(iris[, 5]) - 1L n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.integer(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.integer(y[test_index]) ## Not run: obj <- mlsauce::LSBoostClassifier() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
library(datasets) X <- as.matrix(iris[, 1:4]) y <- as.integer(iris[, 5]) - 1L n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.integer(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.integer(y[test_index]) ## Not run: obj <- mlsauce::LSBoostClassifier() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
LSBoost Regressor
LSBoostRegressor( n_estimators = 100L, learning_rate = 0.1, n_hidden_features = 5L, reg_lambda = 0.1, row_sample = 1, col_sample = 1, dropout = 0, tolerance = 1e-04, direct_link = 1L, verbose = 1L, seed = 123L, solver = c("ridge", "lasso"), activation = "relu", n_clusters = 0, clustering_method = "kmeans", cluster_scaling = "standard", degree = 0, weights_distr = "uniform" )
LSBoostRegressor( n_estimators = 100L, learning_rate = 0.1, n_hidden_features = 5L, reg_lambda = 0.1, row_sample = 1, col_sample = 1, dropout = 0, tolerance = 1e-04, direct_link = 1L, verbose = 1L, seed = 123L, solver = c("ridge", "lasso"), activation = "relu", n_clusters = 0, clustering_method = "kmeans", cluster_scaling = "standard", degree = 0, weights_distr = "uniform" )
n_estimators: |
int, number of boosting iterations. |
learning_rate: |
float, controls the learning speed at training time. |
int |
|
number |
of nodes in successive hidden layers. |
reg_lambda: |
float, L2 regularization parameter for successive errors in the optimizer (at training time). |
row_sample: |
float, percentage of rows chosen from the training set. |
col_sample: |
float, percentage of columns chosen from the training set. |
dropout: |
float, percentage of nodes dropped from the training set. |
tolerance: |
float, controls early stopping in gradient descent (at training time). |
direct_link: |
bool, indicates whether the original features are included (True) in model's fitting or not (False). |
verbose: |
int, progress bar (yes = 1) or not (no = 0) (currently). |
seed: |
int, reproducibility seed for nodes_sim=='uniform', clustering and dropout. |
solver: |
str, type of 'weak' learner; currently in ('ridge', 'lasso') |
activation: |
str, activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh' |
n_clusters: |
int, number of clusters for clustering. |
clustering_method: |
str, clustering method: currently 'kmeans', 'gmm' (Gaussian Mixture Model) |
cluster_scaling: |
str, scaling method for clustering: currently 'standard', 'minmax', 'robust' |
degree: |
int, degree of polynomial interactions features. |
weights_distr: |
str, distribution of weights for the hidden layer currently 'uniform', 'gaussian' |
An object of class LSBoostRegressor
## Not run: library(datasets) X <- as.matrix(datasets::mtcars[, -1]) y <- as.integer(datasets::mtcars[, 1]) n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.double(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.double(y[test_index]) obj <- mlsauce::LSBoostRegressor() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
## Not run: library(datasets) X <- as.matrix(datasets::mtcars[, -1]) y <- as.integer(datasets::mtcars[, 1]) n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.double(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.double(y[test_index]) obj <- mlsauce::LSBoostRegressor() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
This is the Python mlsauce
module imported using reticulate.
ms
ms
An object of class python.builtin.module
(inherits from python.builtin.object
) of length 0.
This is the Python numpy
module imported using reticulate.
numpy
numpy
An object of class python.builtin.module
(inherits from python.builtin.object
) of length 0.
This is the Python pandas
module imported using reticulate.
pandas
pandas
An object of class python.builtin.module
(inherits from python.builtin.object
) of length 0.
Ridge regressor
RidgeRegressor(reg_lambda = 0.1)
RidgeRegressor(reg_lambda = 0.1)
reg_lambda |
L2 regularization parameter |
An object of class Ridge
## Not run: library(datasets) X <- as.matrix(datasets::mtcars[, -1]) y <- as.integer(datasets::mtcars[, 1]) n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.double(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.double(y[test_index]) obj <- mlsauce::RidgeRegressor() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
## Not run: library(datasets) X <- as.matrix(datasets::mtcars[, -1]) y <- as.integer(datasets::mtcars[, 1]) n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(X[train_index, ]) y_train <- as.double(y[train_index]) X_test <- as.matrix(X[test_index, ]) y_test <- as.double(y[test_index]) obj <- mlsauce::RidgeRegressor() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
This is the Python sklearn
module imported using reticulate.
sklearn
sklearn
An object of class python.builtin.module
(inherits from python.builtin.object
) of length 0.
Stump classifier
StumpClassifier(bins = "auto")
StumpClassifier(bins = "auto")
bins: |
int, number of histogram bins. |
An object of class StumpClassifier
## Not run: library(datasets) X <- as.matrix(iris[, 1:4]) y <- as.integer(iris[, 5]) - 1L n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(iris[train_index, 1:4]) y_train <- as.integer(iris[train_index, 5]) - 1L X_test <- as.matrix(iris[test_index, 1:4]) y_test <- as.integer(iris[test_index, 5]) - 1L obj <- mlsauce::StumpClassifier() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)
## Not run: library(datasets) X <- as.matrix(iris[, 1:4]) y <- as.integer(iris[, 5]) - 1L n <- dim(X)[1] p <- dim(X)[2] set.seed(21341) train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE) test_index <- -train_index X_train <- as.matrix(iris[train_index, 1:4]) y_train <- as.integer(iris[train_index, 5]) - 1L X_test <- as.matrix(iris[test_index, 1:4]) y_test <- as.integer(iris[test_index, 5]) - 1L obj <- mlsauce::StumpClassifier() print(obj$get_params()) obj$fit(X_train, y_train) print(obj$score(X_test, y_test)) ## End(Not run)