--- title: "Linear SHAP" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Linear SHAP} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Example usage ```{r, fig.width=7.5} library(misc) # Prepare test data data(mtcars) set.seed(42) train_idx <- sample(nrow(mtcars), 20) X_train <- mtcars[train_idx, c("wt", "hp", "qsec")] y_train <- mtcars[train_idx, "mpg"] X_test <- mtcars[-train_idx, c("wt", "hp", "qsec")] y_test <- mtcars[-train_idx, "mpg"] # Let's manually verify with a simple example simple_X <- X_test[1:2, ] result_debug <- fit_predict_shap(X_train, y_train, simple_X, model_type = "lm", center_response = FALSE) cat("Simple example (no response centering):\n") cat("Coefficients:", round(coef(result_debug$model), 4), "\n") cat("Feature means:", round(result_debug$feature_means, 4), "\n") cat("Expected prediction at means:", round(result_debug$base_value, 4), "\n") cat("Test data:\n") print(simple_X) cat("Predictions:", round(result_debug$predictions, 4), "\n") cat("Base value:", round(result_debug$base_value, 4), "\n") cat("SHAP values:\n") print(round(result_debug$shap_values, 4)) cat("Base + SHAP sum:", round(result_debug$base_value + rowSums(result_debug$shap_values), 4), "\n") cat("Residuals:", round(result_debug$validation$residuals, 4), "\n") # Manual calculation coefs <- coef(result_debug$model) intercept <- coefs["(Intercept)"] slopes <- coefs[names(coefs) != "(Intercept)"] manual_pred <- intercept + as.matrix(simple_X) %*% slopes manual_base <- intercept + sum(slopes * result_debug$feature_means) cat("Manual prediction:", round(as.numeric(manual_pred), 4), "\n") cat("Manual base value:", round(as.numeric(manual_base), 4), "\n") # ============================================================================= # ORIGINAL EXAMPLE USAGE AND TESTING # ============================================================================= cat("=== Testing Different Model Types ===\n\n") # Test 1: Standard linear model cat("1. Linear Model (lm):\n") result_lm <- fit_predict_shap(X_train, y_train, X_test, model_type = "lm", center_response = TRUE) print_shap_summary(result_lm) # Debug: Let's manually check the decomposition for lm cat("\nManual verification for lm:\n") manual_pred <- predict(result_lm$model, newdata = X_test) cat("Model predictions:", round(head(manual_pred + result_lm$response_mean), 4), "\n") cat("Our predictions:", round(head(result_lm$predictions), 4), "\n") cat("Base + sum(SHAP):", round(head(result_lm$base_value + rowSums(result_lm$shap_values)), 4), "\n") cat("\n") # Test 2: Robust linear model with correlations cat("2. Robust Linear Model (rlm) with correlations:\n") result_rlm <- fit_predict_shap(X_train, y_train, X_test, model_type = "rlm", account_correlations = TRUE, n_samples = 50) print_shap_summary(result_rlm) cat("\n") # Test 3: Quantile regression cat("3. Quantile Regression (rq):\n") result_rq <- fit_predict_shap(X_train, y_train, X_test, model_type = "rq", tau = 0.5) print_shap_summary(result_rq) cat("\n") # Test 4: Elastic net (glmnet) cat("4. Elastic Net (glmnet):\n") result_glmnet <- fit_predict_shap(X_train, y_train, X_test, model_type = "glmnet", lambda = 0.1, alpha = 0.5) print_shap_summary(result_glmnet) cat("\n") # Comparison of interventional vs correlation-aware SHAP cat("=== Comparison: Interventional vs Correlation-Aware SHAP ===\n") result_interventional <- fit_predict_shap(X_train, y_train, X_test[1:3, ], model_type = "lm", account_correlations = FALSE) result_corr_aware <- fit_predict_shap(X_train, y_train, X_test[1:3, ], model_type = "lm", account_correlations = TRUE, n_samples = 100) cat("\nInterventional SHAP (first 3 observations):\n") print(round(result_interventional$shap_values, 4)) cat("\nCorrelation-aware SHAP (first 3 observations):\n") print(round(result_corr_aware$shap_values, 4)) cat("\nDifference (Correlation-aware - Interventional):\n") print(round(result_corr_aware$shap_values - result_interventional$shap_values, 4)) ```