---
title: "Linear SHAP"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Linear SHAP}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Example usage
```{r, fig.width=7.5}
library(misc)
# Prepare test data
data(mtcars)
set.seed(42)
train_idx <- sample(nrow(mtcars), 20)
X_train <- mtcars[train_idx, c("wt", "hp", "qsec")]
y_train <- mtcars[train_idx, "mpg"]
X_test <- mtcars[-train_idx, c("wt", "hp", "qsec")]
y_test <- mtcars[-train_idx, "mpg"]
# Let's manually verify with a simple example
simple_X <- X_test[1:2, ]
result_debug <- fit_predict_shap(X_train, y_train, simple_X,
model_type = "lm", center_response = FALSE)
cat("Simple example (no response centering):\n")
cat("Coefficients:", round(coef(result_debug$model), 4), "\n")
cat("Feature means:", round(result_debug$feature_means, 4), "\n")
cat("Expected prediction at means:", round(result_debug$base_value, 4), "\n")
cat("Test data:\n")
print(simple_X)
cat("Predictions:", round(result_debug$predictions, 4), "\n")
cat("Base value:", round(result_debug$base_value, 4), "\n")
cat("SHAP values:\n")
print(round(result_debug$shap_values, 4))
cat("Base + SHAP sum:", round(result_debug$base_value + rowSums(result_debug$shap_values), 4), "\n")
cat("Residuals:", round(result_debug$validation$residuals, 4), "\n")
# Manual calculation
coefs <- coef(result_debug$model)
intercept <- coefs["(Intercept)"]
slopes <- coefs[names(coefs) != "(Intercept)"]
manual_pred <- intercept + as.matrix(simple_X) %*% slopes
manual_base <- intercept + sum(slopes * result_debug$feature_means)
cat("Manual prediction:", round(as.numeric(manual_pred), 4), "\n")
cat("Manual base value:", round(as.numeric(manual_base), 4), "\n")
# =============================================================================
# ORIGINAL EXAMPLE USAGE AND TESTING
# =============================================================================
cat("=== Testing Different Model Types ===\n\n")
# Test 1: Standard linear model
cat("1. Linear Model (lm):\n")
result_lm <- fit_predict_shap(X_train, y_train, X_test,
model_type = "lm", center_response = TRUE)
print_shap_summary(result_lm)
# Debug: Let's manually check the decomposition for lm
cat("\nManual verification for lm:\n")
manual_pred <- predict(result_lm$model, newdata = X_test)
cat("Model predictions:", round(head(manual_pred + result_lm$response_mean), 4), "\n")
cat("Our predictions:", round(head(result_lm$predictions), 4), "\n")
cat("Base + sum(SHAP):", round(head(result_lm$base_value + rowSums(result_lm$shap_values)), 4), "\n")
cat("\n")
# Test 2: Robust linear model with correlations
cat("2. Robust Linear Model (rlm) with correlations:\n")
result_rlm <- fit_predict_shap(X_train, y_train, X_test,
model_type = "rlm",
account_correlations = TRUE,
n_samples = 50)
print_shap_summary(result_rlm)
cat("\n")
# Test 3: Quantile regression
cat("3. Quantile Regression (rq):\n")
result_rq <- fit_predict_shap(X_train, y_train, X_test,
model_type = "rq", tau = 0.5)
print_shap_summary(result_rq)
cat("\n")
# Test 4: Elastic net (glmnet)
cat("4. Elastic Net (glmnet):\n")
result_glmnet <- fit_predict_shap(X_train, y_train, X_test,
model_type = "glmnet",
lambda = 0.1, alpha = 0.5)
print_shap_summary(result_glmnet)
cat("\n")
# Comparison of interventional vs correlation-aware SHAP
cat("=== Comparison: Interventional vs Correlation-Aware SHAP ===\n")
result_interventional <- fit_predict_shap(X_train, y_train, X_test[1:3, ],
model_type = "lm",
account_correlations = FALSE)
result_corr_aware <- fit_predict_shap(X_train, y_train, X_test[1:3, ],
model_type = "lm",
account_correlations = TRUE,
n_samples = 100)
cat("\nInterventional SHAP (first 3 observations):\n")
print(round(result_interventional$shap_values, 4))
cat("\nCorrelation-aware SHAP (first 3 observations):\n")
print(round(result_corr_aware$shap_values, 4))
cat("\nDifference (Correlation-aware - Interventional):\n")
print(round(result_corr_aware$shap_values - result_interventional$shap_values, 4))
```