Skip to content

Commit

Permalink
Add permutation_local and header for MDI+
Browse files Browse the repository at this point in the history
  • Loading branch information
zyliang2001 committed Jan 13, 2024
1 parent 85c21f1 commit af2e41c
Showing 1 changed file with 64 additions and 4 deletions.
68 changes: 64 additions & 4 deletions feature_importance/scripts/competing_methods_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import sklearn.base
from sklearn.base import RegressorMixin, ClassifierMixin
from sklearn.metrics import mean_squared_error
from functools import reduce

import shap
Expand All @@ -15,9 +16,7 @@ def tree_shap_local(X, y, fit):
:param X: design matrix
:param y: response
:param fit: fitted model of interest (tree-based)
:return: dataframe - [Var, Importance]
Var: variable name
Importance: average absolute shap value
:return: dataframe of shape: (n_samples, n_features)
"""
explainer = shap.TreeExplainer(fit)
shap_values = explainer.shap_values(X, check_additivity=False)
Expand All @@ -27,7 +26,7 @@ def add_abs(a, b):
results = reduce(add_abs, shap_values)
else:
results = abs(shap_values)
result_table = pd.DataFrame(results)
result_table = pd.DataFrame(results, columns=[f'Feature_{i}' for i in range(X.shape[1])])
# results = results.mean(axis=0)
# results = pd.DataFrame(data=results, columns=['importance'])
# # Use column names from dataframe if possible
Expand All @@ -36,4 +35,65 @@ def add_abs(a, b):
# results.index.name = 'var'
# results.reset_index(inplace=True)

return result_table

def permutation_local(X, y, fit, num_permutations=100):
"""
Compute local permutation importance for each feature and sample.
:param X: design matrix
:param y: response
:param fit: fitted model of interest (tree-based)
:num_permutations: Number of permutations for each feature (default is 100)
:return: dataframe of shape: (n_samples, n_features)
"""

# Get the number of samples and features
num_samples, num_features = X.shape

# Initialize array to store local permutation importance
lpi = np.zeros((num_samples, num_features))

# For each feature
for k in range(num_features):
# Permute X_k num_permutations times
for b in range(num_permutations):
X_permuted = X.copy()
X_permuted[:, k] = np.random.permutation(X[:, k])

# Feed permuted data through the fitted model
y_pred_permuted = fit.predict(X_permuted)

# Calculate MSE for each sample
mse_values = mean_squared_error(y, y_pred_permuted)

# Store MSE values in the array
lpi[:, k] += mse_values

# Average MSE values across permutations for each sample
lpi[:, k] /= num_permutations

# Convert the array to a DataFrame
result_table = pd.DataFrame(lpi, columns=[f'Feature_{i}' for i in range(num_features)])

return result_table

def MDI_plus_local(X, y, fit):
"""
Compute local MDI+ importance for each feature and sample.
:param X: design matrix
:param y: response
:param fit: fitted model of interest (tree-based)
:return: dataframe of shape: (n_samples, n_features)
"""

## To Do for Zach: Please add the implementation of local MDI+ below
num_samples, num_features = X.shape


result = None

# Convert the array to a DataFrame
result_table = pd.DataFrame(result, columns=[f'Feature_{i}' for i in range(num_features)])

return result_table

0 comments on commit af2e41c

Please sign in to comment.