From 5b2bf5fd664a8e01aac900880a89845f0d592421 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 4 Dec 2024 22:26:19 +0100 Subject: [PATCH 1/6] Update svm.py --- onedal/svm/svm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index f4184a40ac..81a7aafe19 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -354,7 +354,7 @@ def __init__( ): super().__init__( C=C, - nu=0.5, + nu=0.0, epsilon=epsilon, kernel=kernel, degree=degree, @@ -406,8 +406,8 @@ def __init__( ): super().__init__( C=C, - nu=0.5, - epsilon=0.0, + nu=0.0, + epsilon=0.0, # unused in BaseSVC kernel=kernel, degree=degree, gamma=gamma, @@ -518,9 +518,9 @@ def __init__( **kwargs, ): super().__init__( - C=1.0, + C=0.0, nu=nu, - epsilon=0.0, + epsilon=0.0, # unused in libSVM kernel=kernel, degree=degree, gamma=gamma, From 7d5f680201e37028287eef0e0e9a57453a4c4fdd Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Wed, 4 Dec 2024 23:24:57 +0100 Subject: [PATCH 2/6] formatting --- onedal/svm/svm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index 81a7aafe19..3942c4094f 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -407,7 +407,7 @@ def __init__( super().__init__( C=C, nu=0.0, - epsilon=0.0, # unused in BaseSVC + epsilon=0.0, # unused in BaseSVC kernel=kernel, degree=degree, gamma=gamma, @@ -520,7 +520,7 @@ def __init__( super().__init__( C=0.0, nu=nu, - epsilon=0.0, # unused in libSVM + epsilon=0.0, # unused in libSVM kernel=kernel, degree=degree, gamma=gamma, From 1aad858db9c6fb08c462e055f4ed31e373ad8380 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Dec 2024 13:35:19 +0100 Subject: [PATCH 3/6] Update test_forest.py --- sklearnex/ensemble/tests/test_forest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearnex/ensemble/tests/test_forest.py b/sklearnex/ensemble/tests/test_forest.py index 16c47a8beb..0005e3fc16 100644 --- a/sklearnex/ensemble/tests/test_forest.py +++ b/sklearnex/ensemble/tests/test_forest.py @@ -39,7 +39,8 @@ @pytest.mark.parametrize("block, trees, rows, scale", hparam_values) def test_sklearnex_import_rf_classifier(dataframe, queue, block, trees, rows, scale): from sklearnex.ensemble import RandomForestClassifier - + from sklearnex.utils.validation import validate_data + X, y = make_classification( n_samples=1000, n_features=4, @@ -51,6 +52,8 @@ def test_sklearnex_import_rf_classifier(dataframe, queue, block, trees, rows, sc X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) rf = RandomForestClassifier(max_depth=2, random_state=0).fit(X, y) + # Test to see if this changes validation coverage + validate_data(rf, X, reset=False) hparams = RandomForestClassifier.get_hyperparameters("infer") if hparams and block is not None: hparams.block_size = block From ddfc6929a45c4cac30d6ed9997cc2ac565b79ad3 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Dec 2024 14:32:05 +0100 Subject: [PATCH 4/6] Update __init__.py --- sklearnex/utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/utils/__init__.py b/sklearnex/utils/__init__.py index 686e089adf..765be14fda 100755 --- a/sklearnex/utils/__init__.py +++ b/sklearnex/utils/__init__.py @@ -14,6 +14,6 @@ # limitations under the License. # =============================================================================== -from .validation import assert_all_finite +from .validation import _check_sample_weight, assert_all_finite, validate_data __all__ = ["assert_all_finite"] From 7c89cfeb8e1370a5a52c3d6fb8813ca69ce0395b Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 14 Dec 2024 23:03:12 +0100 Subject: [PATCH 5/6] Delete sklearnex/tests/test_patching.py --- sklearnex/tests/test_patching.py | 377 ------------------------------- 1 file changed, 377 deletions(-) delete mode 100755 sklearnex/tests/test_patching.py diff --git a/sklearnex/tests/test_patching.py b/sklearnex/tests/test_patching.py deleted file mode 100755 index 036ebf6412..0000000000 --- a/sklearnex/tests/test_patching.py +++ /dev/null @@ -1,377 +0,0 @@ -# ============================================================================== -# Copyright 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -import importlib -import inspect -import logging -import os -import re -import sys -from inspect import signature - -import numpy as np -import numpy.random as nprnd -import pytest -from sklearn.base import BaseEstimator - -from daal4py.sklearn._utils import sklearn_check_version -from onedal.tests.utils._dataframes_support import ( - _convert_to_dataframe, - get_dataframes_and_queues, -) -from sklearnex import is_patched_instance -from sklearnex.dispatcher import _is_preview_enabled -from sklearnex.metrics import pairwise_distances, roc_auc_score -from sklearnex.tests.utils import ( - DTYPES, - PATCHED_FUNCTIONS, - PATCHED_MODELS, - SPECIAL_INSTANCES, - UNPATCHED_FUNCTIONS, - UNPATCHED_MODELS, - call_method, - gen_dataset, - gen_models_info, -) - - -@pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues()) -@pytest.mark.parametrize("metric", ["cosine", "correlation"]) -def test_pairwise_distances_patching(caplog, dataframe, queue, dtype, metric): - with caplog.at_level(logging.WARNING, logger="sklearnex"): - if dtype == np.float16 and queue and not queue.sycl_device.has_aspect_fp16: - pytest.skip("Hardware does not support fp16 SYCL testing") - elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64: - pytest.skip("Hardware does not support fp64 SYCL testing") - elif queue and queue.sycl_device.is_gpu: - pytest.skip("pairwise_distances does not support GPU queues") - - rng = nprnd.default_rng() - if dataframe == "pandas": - X = _convert_to_dataframe( - rng.random(size=1000).astype(dtype).reshape(1, -1), - target_df=dataframe, - ) - else: - X = _convert_to_dataframe( - rng.random(size=1000), sycl_queue=queue, target_df=dataframe, dtype=dtype - )[None, :] - - _ = pairwise_distances(X, metric=metric) - assert all( - [ - "running accelerated version" in i.message - or "fallback to original Scikit-learn" in i.message - for i in caplog.records - ] - ), f"sklearnex patching issue in pairwise_distances with log: \n{caplog.text}" - - -@pytest.mark.parametrize( - "dtype", [i for i in DTYPES if "32" in i.__name__ or "64" in i.__name__] -) -@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues()) -def test_roc_auc_score_patching(caplog, dataframe, queue, dtype): - if dtype in [np.uint32, np.uint64] and sys.platform == "win32": - pytest.skip("Windows issue with unsigned ints") - elif dtype == np.float64 and queue and not queue.sycl_device.has_aspect_fp64: - pytest.skip("Hardware does not support fp64 SYCL testing") - - with caplog.at_level(logging.WARNING, logger="sklearnex"): - rng = nprnd.default_rng() - X = rng.integers(2, size=1000) - y = rng.integers(2, size=1000) - - X = _convert_to_dataframe( - X, - sycl_queue=queue, - target_df=dataframe, - dtype=dtype, - ) - y = _convert_to_dataframe( - y, - sycl_queue=queue, - target_df=dataframe, - dtype=dtype, - ) - - _ = roc_auc_score(X, y) - assert all( - [ - "running accelerated version" in i.message - or "fallback to original Scikit-learn" in i.message - for i in caplog.records - ] - ), f"sklearnex patching issue in roc_auc_score with log: \n{caplog.text}" - - -@pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues()) -@pytest.mark.parametrize("estimator, method", gen_models_info(PATCHED_MODELS)) -def test_standard_estimator_patching(caplog, dataframe, queue, dtype, estimator, method): - with caplog.at_level(logging.WARNING, logger="sklearnex"): - est = PATCHED_MODELS[estimator]() - - if queue: - if dtype == np.float16 and not queue.sycl_device.has_aspect_fp16: - pytest.skip("Hardware does not support fp16 SYCL testing") - elif dtype == np.float64 and not queue.sycl_device.has_aspect_fp64: - pytest.skip("Hardware does not support fp64 SYCL testing") - elif queue.sycl_device.is_gpu and estimator in [ - "ElasticNet", - "Lasso", - ]: - pytest.skip(f"{estimator} does not support GPU queues") - - if "NearestNeighbors" in estimator and "radius" in method: - pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") - - if estimator == "TSNE" and method == "fit_transform": - pytest.skip("TSNE.fit_transform is too slow for common testing") - elif estimator == "IncrementalLinearRegression" and np.issubdtype( - dtype, np.integer - ): - pytest.skip( - "IncrementalLinearRegression fails on oneDAL side with int types because dataset is filled by zeroes" - ) - elif method and not hasattr(est, method): - pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}") - - X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0] - est.fit(X, y) - - if method: - call_method(est, method, X, y) - - assert all( - [ - "running accelerated version" in i.message - or "fallback to original Scikit-learn" in i.message - for i in caplog.records - ] - ), f"sklearnex patching issue in {estimator}.{method} with log: \n{caplog.text}" - - -@pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues()) -@pytest.mark.parametrize("estimator, method", gen_models_info(SPECIAL_INSTANCES)) -def test_special_estimator_patching(caplog, dataframe, queue, dtype, estimator, method): - # prepare logging - - with caplog.at_level(logging.WARNING, logger="sklearnex"): - est = SPECIAL_INSTANCES[estimator] - - if queue: - # Its not possible to get the dpnp/dpctl arrays to be in the proper dtype - if dtype == np.float16 and not queue.sycl_device.has_aspect_fp16: - pytest.skip("Hardware does not support fp16 SYCL testing") - elif dtype == np.float64 and not queue.sycl_device.has_aspect_fp64: - pytest.skip("Hardware does not support fp64 SYCL testing") - - if "NearestNeighbors" in estimator and "radius" in method: - pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") - - X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0] - est.fit(X, y) - - if method and not hasattr(est, method): - pytest.skip(f"sklearn available_if prevents testing {estimator}.{method}") - - if method: - call_method(est, method, X, y) - - assert all( - [ - "running accelerated version" in i.message - or "fallback to original Scikit-learn" in i.message - for i in caplog.records - ] - ), f"sklearnex patching issue in {estimator}.{method} with log: \n{caplog.text}" - - -@pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys()) -def test_standard_estimator_signatures(estimator): - est = PATCHED_MODELS[estimator]() - unpatched_est = UNPATCHED_MODELS[estimator]() - - # all public sklearn methods should have signature matches in sklearnex - - unpatched_est_methods = [ - i - for i in dir(unpatched_est) - if not i.startswith("_") and not i.endswith("_") and hasattr(unpatched_est, i) - ] - for method in unpatched_est_methods: - est_method = getattr(est, method) - unpatched_est_method = getattr(unpatched_est, method) - if callable(unpatched_est_method): - regex = rf"(?:sklearn|daal4py)\S*{estimator}" # needed due to differences in module structure - patched_sig = re.sub(regex, estimator, str(signature(est_method))) - unpatched_sig = re.sub(regex, estimator, str(signature(unpatched_est_method))) - assert ( - patched_sig == unpatched_sig - ), f"Signature of {estimator}.{method} does not match sklearn" - - -@pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys()) -def test_standard_estimator_init_signatures(estimator): - # Several estimators have additional parameters that are user-accessible - # which are sklearnex-specific. They will fail and are removed from tests. - # remove n_jobs due to estimator patching for sklearnex (known deviation) - patched_sig = str(signature(PATCHED_MODELS[estimator].__init__)) - unpatched_sig = str(signature(UNPATCHED_MODELS[estimator].__init__)) - - # Sklearnex allows for positional kwargs and n_jobs, when sklearn doesn't - for kwarg in ["n_jobs=None", "*"]: - patched_sig = patched_sig.replace(", " + kwarg, "") - unpatched_sig = unpatched_sig.replace(", " + kwarg, "") - - # Special sklearnex-specific kwargs are removed from signatures here - if estimator in [ - "RandomForestRegressor", - "RandomForestClassifier", - "ExtraTreesRegressor", - "ExtraTreesClassifier", - ]: - for kwarg in ["min_bin_size=1", "max_bins=256"]: - patched_sig = patched_sig.replace(", " + kwarg, "") - - assert ( - patched_sig == unpatched_sig - ), f"Signature of {estimator}.__init__ does not match sklearn" - - -@pytest.mark.parametrize( - "function", - [ - i - for i in UNPATCHED_FUNCTIONS.keys() - if i not in ["train_test_split", "set_config", "config_context"] - ], -) -def test_patched_function_signatures(function): - # certain functions are dropped from the test - # as they add functionality to the underlying sklearn function - if not sklearn_check_version("1.1") and function == "_assert_all_finite": - pytest.skip("Sklearn versioning not added to _assert_all_finite") - func = PATCHED_FUNCTIONS[function] - unpatched_func = UNPATCHED_FUNCTIONS[function] - - if callable(unpatched_func): - assert str(signature(func)) == str( - signature(unpatched_func) - ), f"Signature of {func} does not match sklearn" - - -def test_patch_map_match(): - # This rule applies to functions and classes which are out of preview. - # Items listed in a matching submodule's __all__ attribute should be - # in get_patch_map. There should not be any missing or additional elements. - - def list_all_attr(string): - try: - modules = set(importlib.import_module(string).__all__) - except ModuleNotFoundError: - modules = set([None]) - return modules - - if _is_preview_enabled(): - pytest.skip("preview sklearnex has been activated") - patched = {**PATCHED_MODELS, **PATCHED_FUNCTIONS} - - sklearnex__all__ = list_all_attr("sklearnex") - sklearn__all__ = list_all_attr("sklearn") - - module_map = {i: i for i in sklearnex__all__.intersection(sklearn__all__)} - - # _assert_all_finite patches an internal sklearn function which isn't - # exposed via __all__ in sklearn. It is a special case where this rule - # is not applied (e.g. it is grandfathered in). - del patched["_assert_all_finite"] - - # remove all scikit-learn-intelex-only estimators - for i in patched.copy(): - if i not in UNPATCHED_MODELS and i not in UNPATCHED_FUNCTIONS: - del patched[i] - - for module in module_map: - sklearn_module__all__ = list_all_attr("sklearn." + module_map[module]) - sklearnex_module__all__ = list_all_attr("sklearnex." + module) - intersect = sklearnex_module__all__.intersection(sklearn_module__all__) - - for i in intersect: - if i: - del patched[i] - else: - del patched[module] - assert patched == {}, f"{patched.keys()} were not properly patched" - - -@pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys()) -def test_is_patched_instance(estimator): - patched = PATCHED_MODELS[estimator] - unpatched = UNPATCHED_MODELS[estimator] - assert is_patched_instance(patched), f"{patched} is a patched instance" - assert not is_patched_instance(unpatched), f"{unpatched} is an unpatched instance" - - -@pytest.mark.parametrize("estimator", PATCHED_MODELS.keys()) -def test_if_estimator_inherits_sklearn(estimator): - est = PATCHED_MODELS[estimator] - if estimator in UNPATCHED_MODELS: - assert issubclass( - est, UNPATCHED_MODELS[estimator] - ), f"{estimator} does not inherit from the patched sklearn estimator" - else: - assert issubclass(est, BaseEstimator) - - -@pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys()) -def test_docstring_patching_match(estimator): - patched = PATCHED_MODELS[estimator] - unpatched = UNPATCHED_MODELS[estimator] - patched_docstrings = { - i: getattr(patched, i).__doc__ - for i in dir(patched) - if not i.startswith("_") and not i.endswith("_") and hasattr(patched, i) - } - unpatched_docstrings = { - i: getattr(unpatched, i).__doc__ - for i in dir(unpatched) - if not i.startswith("_") and not i.endswith("_") and hasattr(unpatched, i) - } - - # check class docstring match if a docstring is available - - assert (patched.__doc__ is None) == (unpatched.__doc__ is None) - - # check class attribute docstrings - - for i in unpatched_docstrings: - assert (patched_docstrings[i] is None) == (unpatched_docstrings[i] is None) - - -@pytest.mark.parametrize("member", ["_onedal_cpu_supported", "_onedal_gpu_supported"]) -@pytest.mark.parametrize( - "name", - [i for i in PATCHED_MODELS.keys() if "sklearnex" in PATCHED_MODELS[i].__module__], -) -def test_onedal_supported_member(name, member): - patched = PATCHED_MODELS[name] - sig = str(inspect.signature(getattr(patched, member))) - assert "(self, method_name, *data)" == sig From 442b4da3518c29201dffb6f6b8751d07319331fa Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 14 Dec 2024 23:03:34 +0100 Subject: [PATCH 6/6] Delete sklearnex/tests/test_common.py --- sklearnex/tests/test_common.py | 390 --------------------------------- 1 file changed, 390 deletions(-) delete mode 100644 sklearnex/tests/test_common.py diff --git a/sklearnex/tests/test_common.py b/sklearnex/tests/test_common.py deleted file mode 100644 index c6a6bd06c9..0000000000 --- a/sklearnex/tests/test_common.py +++ /dev/null @@ -1,390 +0,0 @@ -# ============================================================================== -# Copyright 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import importlib.util -import os -import pathlib -import pkgutil -import re -import sys -import trace - -import pytest -from sklearn.utils import all_estimators - -from daal4py.sklearn._utils import sklearn_check_version -from onedal.tests.test_common import _check_primitive_usage_ban -from sklearnex.tests.utils import ( - PATCHED_MODELS, - SPECIAL_INSTANCES, - call_method, - gen_dataset, - gen_models_info, -) - -TARGET_OFFLOAD_ALLOWED_LOCATIONS = [ - "_config.py", - "_device_offload.py", - "test", - "svc.py", - "svm" + os.sep + "_common.py", -] - -_DESIGN_RULE_VIOLATIONS = { - "PCA-fit_transform-call_validate_data": "calls both 'fit' and 'transform'", - "IncrementalEmpiricalCovariance-score-call_validate_data": "must call clone of itself", - "SVC(probability=True)-fit-call_validate_data": "SVC fit can use sklearn estimator", - "NuSVC(probability=True)-fit-call_validate_data": "NuSVC fit can use sklearn estimator", - "LogisticRegression-score-n_jobs_check": "uses daal4py for cpu in sklearnex", - "LogisticRegression-fit-n_jobs_check": "uses daal4py for cpu in sklearnex", - "LogisticRegression-predict-n_jobs_check": "uses daal4py for cpu in sklearnex", - "LogisticRegression-predict_log_proba-n_jobs_check": "uses daal4py for cpu in sklearnex", - "LogisticRegression-predict_proba-n_jobs_check": "uses daal4py for cpu in sklearnex", - "KNeighborsClassifier-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier-fit-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier-score-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier-predict-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier-predict_proba-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor-fit-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor-score-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor-predict-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors-fit-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors-radius_neighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors-radius_neighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "LocalOutlierFactor-fit-n_jobs_check": "uses daal4py for cpu in onedal", - "LocalOutlierFactor-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "LocalOutlierFactor-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier(algorithm='brute')-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier(algorithm='brute')-fit-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier(algorithm='brute')-score-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier(algorithm='brute')-predict-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier(algorithm='brute')-predict_proba-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsClassifier(algorithm='brute')-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor(algorithm='brute')-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor(algorithm='brute')-fit-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor(algorithm='brute')-score-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor(algorithm='brute')-predict-n_jobs_check": "uses daal4py for cpu in onedal", - "KNeighborsRegressor(algorithm='brute')-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors(algorithm='brute')-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors(algorithm='brute')-fit-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors(algorithm='brute')-radius_neighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors(algorithm='brute')-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "NearestNeighbors(algorithm='brute')-radius_neighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "LocalOutlierFactor(novelty=True)-fit-n_jobs_check": "uses daal4py for cpu in onedal", - "LocalOutlierFactor(novelty=True)-kneighbors-n_jobs_check": "uses daal4py for cpu in onedal", - "LocalOutlierFactor(novelty=True)-kneighbors_graph-n_jobs_check": "uses daal4py for cpu in onedal", - "LogisticRegression(solver='newton-cg')-score-n_jobs_check": "uses daal4py for cpu in sklearnex", - "LogisticRegression(solver='newton-cg')-fit-n_jobs_check": "uses daal4py for cpu in sklearnex", - "LogisticRegression(solver='newton-cg')-predict-n_jobs_check": "uses daal4py for cpu in sklearnex", - "LogisticRegression(solver='newton-cg')-predict_log_proba-n_jobs_check": "uses daal4py for cpu in sklearnex", - "LogisticRegression(solver='newton-cg')-predict_proba-n_jobs_check": "uses daal4py for cpu in sklearnex", -} - - -def test_target_offload_ban(): - """This test blocks the use of target_offload in - in sklearnex files. Offloading computation to devices - via target_offload should only occur externally, and not - within the architecture of the sklearnex classes. This - is for clarity, traceability and maintainability. - """ - output = _check_primitive_usage_ban( - primitive_name="target_offload", - package="sklearnex", - allowed_locations=TARGET_OFFLOAD_ALLOWED_LOCATIONS, - ) - output = "\n".join(output) - assert output == "", f"target offloading is occuring in: \n{output}" - - -def _sklearnex_walk(func): - """this replaces checks on pkgutils to look through sklearnex - folders specifically""" - - def wrap(*args, **kwargs): - if "prefix" in kwargs and kwargs["prefix"] == "sklearn.": - kwargs["prefix"] = "sklearnex." - if "path" in kwargs: - # force root to sklearnex - kwargs["path"] = [str(pathlib.Path(__file__).parent.parent)] - for pkginfo in func(*args, **kwargs): - # Do not allow spmd to be yielded - if "spmd" not in pkginfo.name.split("."): - yield pkginfo - - return wrap - - -def test_class_trailing_underscore_ban(monkeypatch): - """Trailing underscores are defined for sklearn to be signatures of a fitted - estimator instance, sklearnex extends this to the classes as well""" - monkeypatch.setattr(pkgutil, "walk_packages", _sklearnex_walk(pkgutil.walk_packages)) - estimators = all_estimators() # list of tuples - for name, obj in estimators: - if "preview" not in obj.__module__ and "daal4py" not in obj.__module__: - # propeties also occur in sklearn, especially in deprecations and are expected - # to error if queried and the estimator is not fitted - assert all( - [ - isinstance(getattr(obj, attr), property) - or (attr.startswith("_") or not attr.endswith("_")) - for attr in dir(obj) - ] - ), f"{name} contains class attributes which have a trailing underscore but no leading one" - - -def test_all_estimators_covered(monkeypatch): - """Check that all estimators defined in sklearnex are available in either the - patch map or covered in special testing via SPECIAL_INSTANCES. The estimator - must inherit sklearn's BaseEstimator and must not have a leading underscore. - The sklearnex.spmd and sklearnex.preview packages are not tested. - """ - monkeypatch.setattr(pkgutil, "walk_packages", _sklearnex_walk(pkgutil.walk_packages)) - estimators = all_estimators() # list of tuples - uncovered_estimators = [] - for name, obj in estimators: - # do nothing if defined in preview - if "preview" not in obj.__module__ and not ( - any([issubclass(est, obj) for est in PATCHED_MODELS.values()]) - or any([issubclass(est.__class__, obj) for est in SPECIAL_INSTANCES.values()]) - ): - uncovered_estimators += [".".join([obj.__module__, name])] - - assert ( - uncovered_estimators == [] - ), f"{uncovered_estimators} are currently not included" - - -def _fullpath(path): - return os.path.realpath(os.path.expanduser(path)) - - -_TRACE_ALLOW_DICT = { - i: _fullpath(os.path.dirname(importlib.util.find_spec(i).origin)) - for i in ["sklearn", "sklearnex", "onedal", "daal4py"] -} - - -def _whitelist_to_blacklist(): - """block all standard library, built-in or site packages which are not - related to sklearn, daal4py, onedal or sklearnex""" - - def _commonpath(inp): - # ValueError generated by os.path.commonpath when it is on a separate drive - try: - return os.path.commonpath(inp) - except ValueError: - return "" - - blacklist = [] - for path in sys.path: - fpath = _fullpath(path) - try: - # if candidate path is a parent directory to any directory in the whitelist - if any( - [_commonpath([i, fpath]) == fpath for i in _TRACE_ALLOW_DICT.values()] - ): - # find all sub-paths which are not in the whitelist and block them - # they should not have a common path that is either the whitelist path - # or the sub-path (meaning one is a parent directory of the either) - for f in os.scandir(fpath): - temppath = _fullpath(f.path) - if all( - [ - _commonpath([i, temppath]) not in [i, temppath] - for i in _TRACE_ALLOW_DICT.values() - ] - ): - blacklist += [temppath] - # add path to blacklist if not a sub path of anything in the whitelist - elif all([_commonpath([i, fpath]) != i for i in _TRACE_ALLOW_DICT.values()]): - blacklist += [fpath] - except FileNotFoundError: - blacklist += [fpath] - return blacklist - - -_TRACE_BLOCK_LIST = _whitelist_to_blacklist() - - -@pytest.fixture -def estimator_trace(estimator, method, cache, capsys, monkeypatch): - """Generate a trace of all function calls in calling estimator.method with cache. - - Parameters - ---------- - estimator : str - name of estimator which is a key from PATCHED_MODELS or - - method : str - name of estimator method which is to be traced and stored - - cache: pytest.fixture (standard) - - capsys: pytest.fixture (standard) - - monkeypatch: pytest.fixture (standard) - - Returns - ------- - dict: [calledfuncs, tracetext, modules, callinglines] - Returns a list of important attributes of the trace. - calledfuncs is the list of called functions, tracetext is the - total text output of the trace as a string, modules are the - module locations of the called functions (must be from daal4py, - onedal, sklearn, or sklearnex), and callinglines is the line - which calls the function in calledfuncs - """ - key = "-".join((str(estimator), method)) - flag = cache.get("key", "") != key - if flag: - # get estimator - try: - est = PATCHED_MODELS[estimator]() - except KeyError: - est = SPECIAL_INSTANCES[estimator] - - # get dataset - X, y = gen_dataset(est)[0] - # fit dataset if method does not contain 'fit' - if "fit" not in method: - est.fit(X, y) - - # initialize tracer to have a more verbose module naming - # this impacts ignoremods, but it is not used. - monkeypatch.setattr(trace, "_modname", _fullpath) - tracer = trace.Trace( - count=0, - trace=1, - ignoredirs=_TRACE_BLOCK_LIST, - ) - # call trace on method with dataset - tracer.runfunc(call_method, est, method, X, y) - - # collect trace for analysis - text = capsys.readouterr().out - for modulename, file in _TRACE_ALLOW_DICT.items(): - text = text.replace(file, modulename) - regex_func = ( - r"(?<=funcname: )\S*(?=\n)" # needed due to differences in module structure - ) - regex_mod = r"(?<=--- modulename: )\S*(?=\.py)" # needed due to differences in module structure - - regex_callingline = r"(?<=\n)\S.*(?=\n --- modulename: )" - - cache.set("key", key) - cache.set( - "text", - { - "funcs": re.findall(regex_func, text), - "trace": text, - "modules": [i.replace(os.sep, ".") for i in re.findall(regex_mod, text)], - "callingline": [""] + re.findall(regex_callingline, text), - }, - ) - - return cache.get("text", None) - - -def call_validate_data(text, estimator, method): - """test that the sklearn function/attribute validate_data is - called once before offloading to oneDAL in sklearnex""" - try: - # get last to_table call showing end of oneDAL input portion of code - idx = len(text["funcs"]) - 1 - text["funcs"][::-1].index("to_table") - validfuncs = text["funcs"][:idx] - except ValueError: - pytest.skip("onedal backend not used in this function") - - validate_data = "validate_data" if sklearn_check_version("1.6") else "_validate_data" - - assert ( - validfuncs.count(validate_data) == 1 - ), f"sklearn's {validate_data} should be called" - assert ( - validfuncs.count("_check_feature_names") == 1 - ), "estimator should check feature names in validate_data" - - -def n_jobs_check(text, estimator, method): - """verify the n_jobs is being set if '_get_backend' or 'to_table' is called""" - # remove the _get_backend function from sklearnex from considered _get_backend - count = max( - text["funcs"].count("to_table"), - len( - [ - i - for i in range(len(text["funcs"])) - if text["funcs"][i] == "_get_backend" - and "sklearnex" not in text["modules"][i] - ] - ), - ) - n_jobs_count = text["funcs"].count("n_jobs_wrapper") - - assert bool(count) == bool( - n_jobs_count - ), f"verify if {method} should be in control_n_jobs' decorated_methods for {estimator}" - - -def runtime_property_check(text, estimator, method): - """use of Python's 'property' should not be used at runtime, only at class instantiation""" - assert ( - len(re.findall(r"property\(", text["trace"])) == 0 - ), f"{estimator}.{method} should only use 'property' at instantiation" - - -def fit_check_before_support_check(text, estimator, method): - if "fit" not in method: - if "dispatch" not in text["funcs"]: - pytest.skip(f"onedal dispatching not used in {estimator}.{method}") - idx = len(text["funcs"]) - 1 - text["funcs"][::-1].index("dispatch") - validfuncs = text["funcs"][:idx] - assert ( - "check_is_fitted" in validfuncs - ), f"sklearn's check_is_fitted must be called before checking oneDAL support" - - else: - pytest.skip(f"fitting occurs in {estimator}.{method}") - - -DESIGN_RULES = [n_jobs_check, runtime_property_check, fit_check_before_support_check] - - -if sklearn_check_version("1.0"): - DESIGN_RULES += [call_validate_data] - - -@pytest.mark.parametrize("design_pattern", DESIGN_RULES) -@pytest.mark.parametrize( - "estimator, method", - gen_models_info({**PATCHED_MODELS, **SPECIAL_INSTANCES}, fit=True, daal4py=False), -) -def test_estimator(estimator, method, design_pattern, estimator_trace): - # These tests only apply to sklearnex estimators - try: - design_pattern(estimator_trace, estimator, method) - except AssertionError: - key = "-".join([estimator, method, design_pattern.__name__]) - if key in _DESIGN_RULE_VIOLATIONS: - pytest.xfail(_DESIGN_RULE_VIOLATIONS[key]) - else: - raise