diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index 09499563d4..2472054108 100644 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -119,21 +119,18 @@ jobs: Python3.9_Sklearn1.0: PYTHON_VERSION: '3.9' SKLEARN_VERSION: '1.0' - Python3.9_Sklearn1.1: - PYTHON_VERSION: '3.9' - SKLEARN_VERSION: '1.1' - Python3.10_Sklearn1.2: + Python3.10_Sklearn1.3: PYTHON_VERSION: '3.10' - SKLEARN_VERSION: '1.2' - Python3.11_Sklearn1.3: - PYTHON_VERSION: '3.11' SKLEARN_VERSION: '1.3' - Python3.12_Sklearn1.4: - PYTHON_VERSION: '3.12' + Python3.11_Sklearn1.4: + PYTHON_VERSION: '3.11' SKLEARN_VERSION: '1.4' - Python3.13_Sklearn1.5: - PYTHON_VERSION: '3.13' + Python3.12_Sklearn1.5: + PYTHON_VERSION: '3.12' SKLEARN_VERSION: '1.5' + Python3.13_Sklearn1.6: + PYTHON_VERSION: '3.13' + SKLEARN_VERSION: '1.6' pool: vmImage: 'ubuntu-22.04' steps: @@ -146,21 +143,18 @@ jobs: Python3.9_Sklearn1.0: PYTHON_VERSION: '3.9' SKLEARN_VERSION: '1.0' - Python3.9_Sklearn1.1: - PYTHON_VERSION: '3.9' - SKLEARN_VERSION: '1.1' - Python3.10_Sklearn1.2: + Python3.10_Sklearn1.3: PYTHON_VERSION: '3.10' - SKLEARN_VERSION: '1.2' - Python3.11_Sklearn1.3: - PYTHON_VERSION: '3.11' SKLEARN_VERSION: '1.3' - Python3.12_Sklearn1.4: - PYTHON_VERSION: '3.12' + Python3.11_Sklearn1.4: + PYTHON_VERSION: '3.11' SKLEARN_VERSION: '1.4' - Python3.13_Sklearn1.5: - PYTHON_VERSION: '3.13' + Python3.12_Sklearn1.5: + PYTHON_VERSION: '3.12' SKLEARN_VERSION: '1.5' + Python3.13_Sklearn1.6: + PYTHON_VERSION: '3.13' + SKLEARN_VERSION: '1.6' pool: vmImage: 'windows-2022' steps: diff --git a/.ci/scripts/run_sklearn_tests.py b/.ci/scripts/run_sklearn_tests.py index 3521fea859..a7f5b04b7f 100644 --- a/.ci/scripts/run_sklearn_tests.py +++ b/.ci/scripts/run_sklearn_tests.py @@ -25,6 +25,8 @@ import pytest import sklearn +from daal4py.sklearn._utils import sklearn_check_version + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( @@ -43,6 +45,9 @@ if os.environ["SELECTED_TESTS"] == "all": os.environ["SELECTED_TESTS"] = "" + if sklearn_check_version("1.6"): + os.environ["SCIPY_ARRAY_API"] = "1" + pytest_args = ( "--verbose --durations=100 --durations-min=0.01 " f"--rootdir={sklearn_file_dir} " diff --git a/README.md b/README.md index 114a943a4c..6a045cdaaf 100755 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ [![PyPI Version](https://img.shields.io/pypi/v/scikit-learn-intelex)](https://pypi.org/project/scikit-learn-intelex/) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/scikit-learn-intelex)](https://anaconda.org/conda-forge/scikit-learn-intelex) [![python version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue) -[![scikit-learn supported versions](https://img.shields.io/badge/sklearn-1.0%20%7C%201.2%20%7C%201.3%20%7C%201.4%20%7C%201.5-blue)](https://img.shields.io/badge/sklearn-1.0%20%7C%201.2%20%7C%201.3%20%7C%201.4%20%7C%201.5-blue) +[![scikit-learn supported versions](https://img.shields.io/badge/sklearn-1.0%20%7C%201.2%20%7C%201.3%20%7C%201.4%20%7C%201.5%20%7C%201.6-blue)](https://img.shields.io/badge/sklearn-1.0%20%7C%201.2%20%7C%201.3%20%7C%201.4%20%7C%201.5%20%7C%201.6-blue) --- diff --git a/daal4py/sklearn/ensemble/AdaBoostClassifier.py b/daal4py/sklearn/ensemble/AdaBoostClassifier.py index 1cc9bad41d..9ccb592cca 100644 --- a/daal4py/sklearn/ensemble/AdaBoostClassifier.py +++ b/daal4py/sklearn/ensemble/AdaBoostClassifier.py @@ -25,13 +25,19 @@ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y import daal4py as d4p +from daal4py.sklearn._utils import sklearn_check_version from .._n_jobs_support import control_n_jobs from .._utils import getFPType +if sklearn_check_version("1.6"): + from sklearn.utils.validation import validate_data +else: + validate_data = BaseEstimator._validate_data + @control_n_jobs(decorated_methods=["fit", "predict"]) -class AdaBoostClassifier(BaseEstimator, ClassifierMixin): +class AdaBoostClassifier(ClassifierMixin, BaseEstimator): def __init__( self, split_criterion="gini", @@ -89,7 +95,7 @@ def fit(self, X, y): ) # Check that X and y have correct shape - X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double]) + X, y = check_X_y(X, y, y_numeric=False, dtype=[np.float64, np.float32]) check_classification_targets(y) @@ -151,9 +157,7 @@ def predict(self, X): check_is_fitted(self) # Input validation - X = check_array(X, dtype=[np.single, np.double]) - if X.shape[1] != self.n_features_in_: - raise ValueError("Shape of input is different from what was seen in `fit`") + X = validate_data(self, X, dtype=[np.float64, np.float32], reset=False) # Trivial case if self.n_classes_ == 1: diff --git a/daal4py/sklearn/ensemble/GBTDAAL.py b/daal4py/sklearn/ensemble/GBTDAAL.py index b4de6ba9e3..f8f7a48aaa 100644 --- a/daal4py/sklearn/ensemble/GBTDAAL.py +++ b/daal4py/sklearn/ensemble/GBTDAAL.py @@ -26,10 +26,16 @@ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y import daal4py as d4p +from daal4py.sklearn._utils import sklearn_check_version from .._n_jobs_support import control_n_jobs from .._utils import getFPType +if sklearn_check_version("1.6"): + from sklearn.utils.validation import validate_data +else: + validate_data = BaseEstimator._validate_data + class GBTDAALBase(BaseEstimator, d4p.mb.GBTDAALBaseModel): def __init__( @@ -128,15 +134,22 @@ def _check_params(self): def _more_tags(self): return {"allow_nan": self.allow_nan_} + if sklearn_check_version("1.6"): + + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.input_tags.allow_nan = self.allow_nan_ + return tags + @control_n_jobs(decorated_methods=["fit", "predict"]) -class GBTDAALClassifier(GBTDAALBase, ClassifierMixin): +class GBTDAALClassifier(ClassifierMixin, GBTDAALBase): def fit(self, X, y): # Check the algorithm parameters self._check_params() # Check that X and y have correct shape - X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double]) + X, y = check_X_y(X, y, y_numeric=False, dtype=[np.float64, np.float32]) check_classification_targets(y) @@ -196,15 +209,18 @@ def fit(self, X, y): def _predict( self, X, resultsToEvaluate, pred_contribs=False, pred_interactions=False ): - # Input validation - if not self.allow_nan_: - X = check_array(X, dtype=[np.single, np.double]) - else: - X = check_array(X, dtype=[np.single, np.double], force_all_finite="allow-nan") - # Check is fit had been called check_is_fitted(self, ["n_features_in_", "n_classes_"]) + # Input validation + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + force_all_finite="allow-nan" if self.allow_nan_ else True, + reset=False, + ) + # Trivial case if self.n_classes_ == 1: return np.full(X.shape[0], self.classes_[0]) @@ -251,13 +267,13 @@ def convert_model(model): @control_n_jobs(decorated_methods=["fit", "predict"]) -class GBTDAALRegressor(GBTDAALBase, RegressorMixin): +class GBTDAALRegressor(RegressorMixin, GBTDAALBase): def fit(self, X, y): # Check the algorithm parameters self._check_params() # Check that X and y have correct shape - X, y = check_X_y(X, y, y_numeric=True, dtype=[np.single, np.double]) + X, y = check_X_y(X, y, y_numeric=True, dtype=[np.float64, np.float32]) # Convert to 2d array y_ = y.reshape((-1, 1)) @@ -297,15 +313,18 @@ def fit(self, X, y): return self def predict(self, X, pred_contribs=False, pred_interactions=False): - # Input validation - if not self.allow_nan_: - X = check_array(X, dtype=[np.single, np.double]) - else: - X = check_array(X, dtype=[np.single, np.double], force_all_finite="allow-nan") - # Check is fit had been called check_is_fitted(self, ["n_features_in_"]) + # Input validation + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + force_all_finite="allow-nan" if self.allow_nan_ else True, + reset=False, + ) + fptype = getFPType(X) return self._predict_regression(X, fptype, pred_contribs, pred_interactions) diff --git a/daal4py/sklearn/linear_model/tests/test_linear.py b/daal4py/sklearn/linear_model/tests/test_linear.py index 57a11c6cdb..29137b475a 100644 --- a/daal4py/sklearn/linear_model/tests/test_linear.py +++ b/daal4py/sklearn/linear_model/tests/test_linear.py @@ -14,6 +14,18 @@ # limitations under the License. # ============================================================================== + +from os import environ + +from daal4py.sklearn._utils import sklearn_check_version + +# sklearn requires manual enabling of Scipy array API support +# if `array-api-compat` package is present in environment +# TODO: create generic approach to handle this for all tests +if sklearn_check_version("1.6"): + environ["SCIPY_ARRAY_API"] = "1" + + import numpy as np import pytest from sklearn.datasets import make_regression diff --git a/daal4py/sklearn/metrics/_pairwise.py b/daal4py/sklearn/metrics/_pairwise.py index 432c0d60a1..dba150c307 100755 --- a/daal4py/sklearn/metrics/_pairwise.py +++ b/daal4py/sklearn/metrics/_pairwise.py @@ -48,7 +48,12 @@ def _precompute_metric_params(*args, **kwrds): from .._utils import PatchingConditionsChain, getFPType, sklearn_check_version if sklearn_check_version("1.3"): - from sklearn.utils._param_validation import Integral, StrOptions, validate_params + from sklearn.utils._param_validation import ( + Hidden, + Integral, + StrOptions, + validate_params, + ) def _daal4py_cosine_distance_dense(X): @@ -65,7 +70,7 @@ def _daal4py_correlation_distance_dense(X): return res.correlationDistance -def pairwise_distances( +def _pairwise_distances( X, Y=None, metric="euclidean", *, n_jobs=None, force_all_finite=True, **kwds ): if metric not in _VALID_METRICS and not callable(metric) and metric != "precomputed": @@ -140,16 +145,92 @@ def pairwise_distances( return _parallel_pairwise(X, Y, func, n_jobs, **kwds) +# logic to deprecate `force_all_finite` from sklearn: +# it was renamed to `ensure_all_finite` since 1.6 and will be removed in 1.8 if sklearn_check_version("1.3"): + pairwise_distances_parameters = { + "X": ["array-like", "sparse matrix"], + "Y": ["array-like", "sparse matrix", None], + "metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable], + "n_jobs": [Integral, None], + "force_all_finite": [ + "boolean", + StrOptions({"allow-nan"}), + Hidden(StrOptions({"deprecated"})), + ], + "ensure_all_finite": [ + "boolean", + StrOptions({"allow-nan"}), + Hidden(None), + ], + } + if sklearn_check_version("1.6"): + if sklearn_check_version("1.8"): + del pairwise_distances_parameters["force_all_finite"] + + def pairwise_distances( + X, + Y=None, + metric="euclidean", + *, + n_jobs=None, + ensure_all_finite=None, + **kwds, + ): + return _pairwise_distances( + X, + Y, + metric, + n_jobs=n_jobs, + force_all_finite=ensure_all_finite, + **kwds, + ) + + else: + from sklearn.utils.deprecation import _deprecate_force_all_finite + + def pairwise_distances( + X, + Y=None, + metric="euclidean", + *, + n_jobs=None, + force_all_finite="deprecated", + ensure_all_finite=None, + **kwds, + ): + force_all_finite = _deprecate_force_all_finite( + force_all_finite, ensure_all_finite + ) + return _pairwise_distances( + X, Y, metric, n_jobs=n_jobs, force_all_finite=force_all_finite, **kwds + ) + + else: + del pairwise_distances_parameters["ensure_all_finite"] + + def pairwise_distances( + X, + Y=None, + metric="euclidean", + *, + n_jobs=None, + force_all_finite=True, + **kwds, + ): + return _pairwise_distances( + X, + Y, + metric, + n_jobs=n_jobs, + force_all_finite=force_all_finite, + **kwds, + ) + pairwise_distances = validate_params( - { - "X": ["array-like", "sparse matrix"], - "Y": ["array-like", "sparse matrix", None], - "metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable], - "n_jobs": [Integral, None], - "force_all_finite": ["boolean", StrOptions({"allow-nan"})], - }, + pairwise_distances_parameters, prefer_skip_nested_validation=True, )(pairwise_distances) - +else: + pairwise_distances = _pairwise_distances pairwise_distances.__doc__ = pairwise_distances_original.__doc__ diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 57e36a9208..86e60136a5 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -25,6 +25,20 @@ # will exclude deselection in versions 0.18.1, and 0.18.2 only. deselected_tests: + # sklearn 1.6 unsupported features + - linear_model/tests/test_base.py::test_linear_regression_sample_weight_consistency[42-True-None-X_shape1] + - linear_model/tests/test_base.py::test_linear_regression_sample_weight_consistency[42-True-None-X_shape2] + - linear_model/tests/test_ridge.py::test_ridge_shapes_type + - linear_model/tests/test_ridge.py::test_ridge_cv_results_predictions[2-False-False] + - linear_model/tests/test_ridge.py::test_ridge_cv_results_predictions[2-False-True] + - neighbors/tests/test_neighbors.py::test_nan_euclidean_support[KNeighborsClassifier-params0] + - neighbors/tests/test_neighbors.py::test_nan_euclidean_support[KNeighborsRegressor-params1] + - neighbors/tests/test_neighbors.py::test_nan_euclidean_support[LocalOutlierFactor-params6] + - neighbors/tests/test_neighbors.py::test_neighbor_classifiers_loocv[ball_tree-nn_model0] + - neighbors/tests/test_neighbors.py::test_neighbor_classifiers_loocv[brute-nn_model0] + - neighbors/tests/test_neighbors.py::test_neighbor_classifiers_loocv[kd_tree-nn_model0] + - neighbors/tests/test_neighbors.py::test_neighbor_classifiers_loocv[auto-nn_model0] + # Array API support # sklearnex functional Array API support doesn't guaranty namespace consistency for the estimator's array attributes. - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh')-check_array_api_input_and_values-array_api_strict-None-None] diff --git a/examples/sklearnex/knn_bf_regression_spmd.py b/examples/sklearnex/knn_bf_regression_spmd.py index 28ce112290..06e70ca013 100644 --- a/examples/sklearnex/knn_bf_regression_spmd.py +++ b/examples/sklearnex/knn_bf_regression_spmd.py @@ -79,7 +79,8 @@ def generate_X_y(par, coef_seed, data_seed): ) ) print( - "RMSE for entire rank {}: {}\n".format( - rank, mean_squared_error(y_test, dpt.to_numpy(y_predict), squared=False) + "MSE for entire rank {}: {}\n".format( + rank, + mean_squared_error(y_test, dpt.to_numpy(y_predict)), ) ) diff --git a/onedal/svm/tests/test_svc.py b/onedal/svm/tests/test_svc.py index 9f7eaa4810..f81b60cb13 100644 --- a/onedal/svm/tests/test_svc.py +++ b/onedal/svm/tests/test_svc.py @@ -14,6 +14,14 @@ # limitations under the License. # ============================================================================== +from os import environ + +# sklearn requires manual enabling of Scipy array API support +# if `array-api-compat` package is present in environment +# TODO: create generic approach to handle this for all tests +environ["SCIPY_ARRAY_API"] = "1" + + import numpy as np import pytest import sklearn.utils.estimator_checks diff --git a/requirements-test.txt b/requirements-test.txt index e59fdf0606..7a39fc7267 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -6,7 +6,8 @@ numpy>=1.19.5 ; python_version <= '3.9' numpy>=1.21.6 ; python_version == '3.10' numpy>=1.23.5 ; python_version == '3.11' numpy>=2.0.0 ; python_version >= '3.12' -scikit-learn==1.5.2 +scikit-learn==1.5.2 ; python_version <= '3.9' +scikit-learn==1.6.0 ; python_version >= '3.10' pandas==2.1.3 ; python_version < '3.11' pandas==2.2.3 ; python_version >= '3.11' xgboost==2.1.3 diff --git a/sklearnex/_config.py b/sklearnex/_config.py index fafdde6e68..6589f77d85 100644 --- a/sklearnex/_config.py +++ b/sklearnex/_config.py @@ -15,10 +15,12 @@ # ============================================================================== from contextlib import contextmanager +from os import environ from sklearn import get_config as skl_get_config from sklearn import set_config as skl_set_config +from daal4py.sklearn._utils import sklearn_check_version from onedal._config import _get_config as onedal_get_config @@ -65,6 +67,11 @@ def set_config( config_context : Context manager for global configuration. get_config : Retrieve current values of the global configuration. """ + + array_api_dispatch = sklearn_configs.get("array_api_dispatch", False) + if array_api_dispatch and sklearn_check_version("1.6"): + environ["SCIPY_ARRAY_API"] = "1" + skl_set_config(**sklearn_configs) local_config = onedal_get_config(copy=False) diff --git a/sklearnex/covariance/tests/test_incremental_covariance.py b/sklearnex/covariance/tests/test_incremental_covariance.py index 68272ced9e..e42373cf84 100644 --- a/sklearnex/covariance/tests/test_incremental_covariance.py +++ b/sklearnex/covariance/tests/test_incremental_covariance.py @@ -14,6 +14,17 @@ # limitations under the License. # =============================================================================== +from os import environ + +from daal4py.sklearn._utils import sklearn_check_version + +# sklearn requires manual enabling of Scipy array API support +# if `array-api-compat` package is present in environment +# TODO: create generic approach to handle this for all tests +if sklearn_check_version("1.6"): + environ["SCIPY_ARRAY_API"] = "1" + + import numpy as np import pytest from numpy.linalg import slogdet diff --git a/tests/test_estimators.py b/tests/test_estimators.py index 15e1923bcd..df05c49639 100644 --- a/tests/test_estimators.py +++ b/tests/test_estimators.py @@ -14,6 +14,14 @@ # limitations under the License. # ============================================================================== +from os import environ + +# sklearn requires manual enabling of Scipy array API support +# if `array-api-compat` package is present in environment +# TODO: create generic approach to handle this for all tests +environ["SCIPY_ARRAY_API"] = "1" + + import unittest import sklearn.utils.estimator_checks