diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cabe45..5b310bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ # Changes +# 2.2.0 + - [BUG] fix the collinearity feature elimination + - [BUG] fix the feature importance if fasttreeshap not installed + - [REFACTORING] refactor the association module for removing redundancy and faster computation # 2.1.3 - [BUG] fix the hardcoded threshold in collinearity elimination, closes #33 # 2.1.2 diff --git a/src/arfs/feature_selection/mrmr.py b/src/arfs/feature_selection/mrmr.py index 11b74d1..de4b087 100644 --- a/src/arfs/feature_selection/mrmr.py +++ b/src/arfs/feature_selection/mrmr.py @@ -110,7 +110,7 @@ def __init__( denominator_func=np.mean, only_same_domain=False, return_scores=False, - n_jobs=-1, + n_jobs=1, show_progress=True, ): self.n_features_to_select = n_features_to_select diff --git a/src/arfs/feature_selection/unsupervised.py b/src/arfs/feature_selection/unsupervised.py index 318842d..a5b85e4 100644 --- a/src/arfs/feature_selection/unsupervised.py +++ b/src/arfs/feature_selection/unsupervised.py @@ -27,7 +27,14 @@ # ARFS from .base import BaseThresholdSelector from ..utils import create_dtype_dict -from ..association import association_matrix, xy_to_matrix, plot_association_matrix +from ..association import ( + association_matrix, + xy_to_matrix, + plot_association_matrix, + weighted_theils_u, + weighted_corr, + correlation_ratio, +) from ..preprocessing import OrdinalEncoderPandas @@ -296,10 +303,10 @@ def __init__( self, threshold=0.80, method="association", - n_jobs=-1, - nom_nom_assoc="theil", - num_num_assoc="spearman", - nom_num_assoc="correlation_ratio", + n_jobs=1, + nom_nom_assoc=weighted_theils_u, + num_num_assoc=weighted_corr, + nom_num_assoc=correlation_ratio, ): self.threshold = threshold self.method = method @@ -444,7 +451,7 @@ def _recursive_collinear_elimination(association_matrix, threshold): while True: most_collinear_feature, to_drop = _most_collinear(dum, threshold) - + # Break if no more features to drop if not to_drop: break @@ -453,4 +460,4 @@ def _recursive_collinear_elimination(association_matrix, threshold): most_collinear_features.append(most_collinear_feature) dum = dum.drop(columns=most_collinear_feature, index=most_collinear_feature) - return most_collinear_features \ No newline at end of file + return most_collinear_features