diff --git a/.gitignore b/.gitignore index a4f17075..534e380f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +**token .vscode/settings.json **.DS_Store diff --git a/python/dalex/NEWS.md b/python/dalex/NEWS.md index 49387658..23c03e31 100644 --- a/python/dalex/NEWS.md +++ b/python/dalex/NEWS.md @@ -1,5 +1,8 @@ ## Changelog +### development + +* added a way to pass `sample_weight` to loss functions in `model_parts()` (variable importance) using `weights` from `dx.Explainer` ([#563](https://github.com/ModelOriented/DALEX/issues/563)) ### v1.7.0 (2024-02-28) diff --git a/python/dalex/dalex/__init__.py b/python/dalex/dalex/__init__.py index 6530be72..4dcb5772 100644 --- a/python/dalex/dalex/__init__.py +++ b/python/dalex/dalex/__init__.py @@ -9,7 +9,7 @@ from .aspect import Aspect -__version__ = '1.7.0' +__version__ = '1.7.0.9000' __all__ = [ "Arena", diff --git a/python/dalex/dalex/_global_checks.py b/python/dalex/dalex/_global_checks.py index eabec284..85cbf8e8 100644 --- a/python/dalex/dalex/_global_checks.py +++ b/python/dalex/dalex/_global_checks.py @@ -1,8 +1,6 @@ import pkg_resources from importlib import import_module from re import search -import numpy as np -import pandas as pd # WARNING: below code is parsed by setup.py # WARNING: each dependency should be in new line diff --git a/python/dalex/dalex/model_explanations/_variable_importance/utils.py b/python/dalex/dalex/model_explanations/_variable_importance/utils.py index 9d08f9bd..a376815a 100644 --- a/python/dalex/dalex/model_explanations/_variable_importance/utils.py +++ b/python/dalex/dalex/model_explanations/_variable_importance/utils.py @@ -57,18 +57,18 @@ def loss_after_permutation(data, y, weights, model, predict, loss_function, vari sampled_rows = rng.choice(np.arange(data.shape[0]), N, replace=False) sampled_data = data.iloc[sampled_rows, :] observed = y[sampled_rows] - sample_weights = weights[sampled_rows] if weights is not None else None + sample_weight = weights[sampled_rows] if weights is not None else None else: sampled_data = data observed = y - sample_weights = weights + sample_weight = weights # loss on the full model or when outcomes are permuted - loss_full = calculate_loss(loss_function, observed, predict(model, sampled_data), sample_weights) + loss_full = calculate_loss(loss_function, observed, predict(model, sampled_data), sample_weight) sampled_rows2 = rng.choice(range(observed.shape[0]), observed.shape[0], replace=False) - sample_weights_rows2 = sample_weights[sampled_rows2] if sample_weights is not None else None - loss_baseline = calculate_loss(loss_function, observed[sampled_rows2], predict(model, sampled_data), sample_weights_rows2) + sample_weight_rows2 = sample_weight[sampled_rows2] if sample_weight is not None else None + loss_baseline = calculate_loss(loss_function, observed[sampled_rows2], predict(model, sampled_data), sample_weight_rows2) loss_features = {} for variables_set_key in variables: @@ -79,7 +79,7 @@ def loss_after_permutation(data, y, weights, model, predict, loss_function, vari predicted = predict(model, ndf) - loss_features[variables_set_key] = calculate_loss(loss_function, observed, predicted, sample_weights) + loss_features[variables_set_key] = calculate_loss(loss_function, observed, predicted, sample_weight) loss_features['_full_model_'] = loss_full loss_features['_baseline_'] = loss_baseline @@ -87,16 +87,14 @@ def loss_after_permutation(data, y, weights, model, predict, loss_function, vari return pd.DataFrame(loss_features, index=[0]) -def calculate_loss(loss_function, observed, predicted, sample_weights=None): +def calculate_loss(loss_function, observed, predicted, sample_weight=None): # Determine if loss function accepts 'sample_weight' loss_args = inspect.signature(loss_function).parameters supports_weight = "sample_weight" in loss_args if supports_weight: - return loss_function(observed, predicted, sample_weight=sample_weights) + return loss_function(observed, predicted, sample_weight=sample_weight) else: - if sample_weights is not None: - warnings.warn( - f"Loss function `{loss_function.__name__}` does not have `sample_weight` argument. Calculating unweighted loss." - ) + if sample_weight is not None: + raise UserWarning(f"Loss function `{loss_function.__name__}` does not have `sample_weight` argument. Calculating unweighted loss.") return loss_function(observed, predicted) diff --git a/python/dalex/test/test_variable_importance.py b/python/dalex/test/test_variable_importance.py index 3d9e7661..d9167faa 100644 --- a/python/dalex/test/test_variable_importance.py +++ b/python/dalex/test/test_variable_importance.py @@ -55,28 +55,35 @@ def test_loss_after_permutation(self): variables = {} for col in self.X.columns: variables[col] = col - lap = utils.loss_after_permutation(self.X, self.y, self.exp.model, self.exp.predict_function, rmse, + lap = utils.loss_after_permutation(self.X, self.y, None, self.exp.model, self.exp.predict_function, rmse, variables, 100, np.random) self.assertIsInstance(lap, pd.DataFrame) self.assertTrue(np.isin(np.array(['_full_model_', '_baseline_']), lap.columns).all(), np.random) + + with self.assertRaises(UserWarning): + lap = utils.loss_after_permutation(self.X, self.y, self.y, self.exp.model, self.exp.predict_function, rmse, + variables, 100, np.random) + self.assertIsInstance(lap, pd.DataFrame) + self.assertTrue(np.isin(np.array(['_full_model_', '_baseline_']), + lap.columns).all(), np.random) variables = {'age': 'age', 'embarked': 'embarked'} - lap = utils.loss_after_permutation(self.X, self.y, self.exp.model, self.exp.predict_function, mad, + lap = utils.loss_after_permutation(self.X, self.y, None, self.exp.model, self.exp.predict_function, mad, variables, 10, np.random) self.assertIsInstance(lap, pd.DataFrame) self.assertTrue(np.isin(np.array(['_full_model_', '_baseline_']), lap.columns).all()) variables = {'embarked': 'embarked'} - lap = utils.loss_after_permutation(self.X, self.y, self.exp.model, self.exp.predict_function, mae, + lap = utils.loss_after_permutation(self.X, self.y, None, self.exp.model, self.exp.predict_function, mae, variables, None, np.random) self.assertIsInstance(lap, pd.DataFrame) self.assertTrue(np.isin(np.array(['_full_model_', '_baseline_']), lap.columns).all()) variables = {'age': 'age'} - lap = utils.loss_after_permutation(self.X, self.y, self.exp.model, self.exp.predict_function, rmse, + lap = utils.loss_after_permutation(self.X, self.y, None, self.exp.model, self.exp.predict_function, rmse, variables, None, np.random) self.assertIsInstance(lap, pd.DataFrame) self.assertTrue(np.isin(np.array(['_full_model_', '_baseline_']), @@ -84,7 +91,7 @@ def test_loss_after_permutation(self): variables = {'personal': ['gender', 'age', 'sibsp', 'parch'], 'wealth': ['class', 'fare']} - lap = utils.loss_after_permutation(self.X, self.y, self.exp.model, self.exp.predict_function, mae, + lap = utils.loss_after_permutation(self.X, self.y, None, self.exp.model, self.exp.predict_function, mae, variables, None, np.random) self.assertIsInstance(lap, pd.DataFrame) self.assertTrue(np.isin(np.array(['_full_model_', '_baseline_']),