From eb5db41fb0bee44ed7a606e4e88c9ecc3d77f877 Mon Sep 17 00:00:00 2001 From: Milo Lurati Date: Thu, 4 Jul 2024 16:52:37 +0200 Subject: [PATCH] added documentation and related fixes --- doc/source/optimization.rst | 1 + kernel_tuner/interface.py | 1 + kernel_tuner/runners/parallel.py | 99 ++++++++++++++++++++++++-- kernel_tuner/runners/sequential.py | 6 ++ kernel_tuner/strategies/brute_force.py | 2 +- kernel_tuner/strategies/ensemble.py | 21 ++++-- 6 files changed, 119 insertions(+), 11 deletions(-) diff --git a/doc/source/optimization.rst b/doc/source/optimization.rst index 59219ad51..2b8dd8987 100644 --- a/doc/source/optimization.rst +++ b/doc/source/optimization.rst @@ -25,6 +25,7 @@ the ``strategy=`` optional argument of ``tune_kernel()``. Kernel Tuner currently * "pso" particle swarm optimization * "random_sample" takes a random sample of the search space * "simulated_annealing" simulated annealing strategy + * "ensemble" ensemble strategy Most strategies have some mechanism built in to detect when to stop tuning, which may be controlled through specific parameters that can be passed to the strategies using the ``strategy_options=`` optional argument of ``tune_kernel()``. You diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py index e40304d08..0be907737 100644 --- a/kernel_tuner/interface.py +++ b/kernel_tuner/interface.py @@ -467,6 +467,7 @@ def __deepcopy__(self, _): ), ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")), ("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")), + ("parallel_mode", ("Run the auto-tuning on multiple devices (brute-force execution)", "bool")), ("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")), ] ) diff --git a/kernel_tuner/runners/parallel.py b/kernel_tuner/runners/parallel.py index 871b93228..a7f2d95fc 100644 --- a/kernel_tuner/runners/parallel.py +++ b/kernel_tuner/runners/parallel.py @@ -12,9 +12,41 @@ from kernel_tuner.strategies.common import create_actor_on_device, initialize_ray class ParallelRunner(Runner): + """ParallelRunner is used for tuning with multiple processes/threads using Ray for distributed computing.""" def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, num_gpus=None, cache_manager=None, actors=None, simulation_mode=False): + """Instantiate the ParallelRunner. + + :param kernel_source: The kernel source + :type kernel_source: kernel_tuner.core.KernelSource + + :param kernel_options: A dictionary with all options for the kernel. + :type kernel_options: kernel_tuner.interface.Options + + :param device_options: A dictionary with all options for the device + on which the kernel should be tuned. + :type device_options: kernel_tuner.interface.Options + + :param iterations: The number of iterations used for benchmarking + each kernel instance. + :type iterations: int + + :param observers: List of observers. + :type observers: list + + :param num_gpus: Number of GPUs to use. Defaults to None. + :type num_gpus: int, optional + + :param cache_manager: Cache manager instance. Defaults to None. + :type cache_manager: kernel_tuner.runners.ray.cache_manager.CacheManager, optional + + :param actors: List of pre-initialized actors. Defaults to None. + :type actors: list, optional + + :param simulation_mode: Flag to indicate simulation mode. Defaults to False. + :type simulation_mode: bool, optional + """ self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, **device_options) if not simulation_mode else None self.kernel_source = kernel_source self.simulation_mode = simulation_mode @@ -41,6 +73,26 @@ def get_environment(self, tuning_options): return self.dev.get_environment() def run(self, parameter_space=None, tuning_options=None, ensemble=None, searchspace=None, cache_manager=None): + """Run the tuning process with parallel execution. + + :param parameter_space: The parameter space to explore. + :type parameter_space: iterable + + :param tuning_options: Tuning options. Defaults to None. + :type tuning_options: dict, optional + + :param ensemble: List of strategies for ensemble. Defaults to None. + :type ensemble: list, optional + + :param searchspace: The search space to explore. Defaults to None. + :type searchspace: kernel_tuner.searchspace.Searchspace, optional + + :param cache_manager: Cache manager instance. Defaults to None. + :type cache_manager: kernel_tuner.runners.ray.cache_manager.CacheManager, optional + + :returns: Results of the tuning process. + :rtype: list of dict + """ if tuning_options is None: #HACK as tuning_options can't be the first argument and parameter_space needs to be a default argument raise ValueError("tuning_options cannot be None") @@ -84,9 +136,20 @@ def run(self, parameter_space=None, tuning_options=None, ensemble=None, searchsp return results def multi_strategy_parallel_execution(self, ensemble, tuning_options, searchspace): - """ - Runs strategies from the ensemble in parallel using distributed actors, + """Runs strategies from the ensemble in parallel using distributed actors, manages dynamic task allocation, and collects results. + + :param ensemble: List of strategies to execute. + :type ensemble: list + + :param tuning_options: Tuning options. + :type tuning_options: dict + + :param searchspace: Search space to explore. + :type searchspace: kernel_tuner.searchspace.Searchspace + + :returns: Processed results and tuning options list. + :rtype: tuple """ ensemble_queue = deque(ensemble) pending_tasks = {} @@ -137,6 +200,17 @@ def multi_strategy_parallel_execution(self, ensemble, tuning_options, searchspac def _setup_tuning_options(self, tuning_options, evaluations_per_strategy): + """Set up tuning options for each strategy in the ensemble. + + :param tuning_options: Original tuning options. + :type tuning_options: dict + + :param evaluations_per_strategy: Number of evaluations per strategy. + :type evaluations_per_strategy: list + + :returns: Modified tuning options. + :rtype: dict + """ new_tuning_options = copy.deepcopy(tuning_options) new_tuning_options.strategy_options["max_fevals"] = evaluations_per_strategy.pop(0) # the stop criterion uses the max feval in tuning options for some reason @@ -144,8 +218,13 @@ def _setup_tuning_options(self, tuning_options, evaluations_per_strategy): return new_tuning_options def _process_results_ensemble(self, all_results): - """ - Process the results from the ensemble execution. + """Process the results from the ensemble execution. + + :param all_results: List of results from all strategies. + :type all_results: list + + :returns: Processed results and tuning options list. + :rtype: tuple """ results = [] tuning_options_list = [] @@ -158,8 +237,16 @@ def _process_results_ensemble(self, all_results): def parallel_function_evaluation(self, tuning_options, parameter_space): - """ - Perform parallel function evaluation. + """Perform parallel function evaluation. + + :param tuning_options: Tuning options. + :type tuning_options: dict + + :param parameter_space: Parameter space to explore. + :type parameter_space: list + + :returns: Results and tuning options list. + :rtype: tuple """ # Create a pool of RemoteActor actors self.actor_pool = ActorPool(self.actors) diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py index e19242549..46ba17e0a 100644 --- a/kernel_tuner/runners/sequential.py +++ b/kernel_tuner/runners/sequential.py @@ -28,6 +28,12 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob :param iterations: The number of iterations used for benchmarking each kernel instance. :type iterations: int + + :param observers: List of observers. + :type observers: list + + :param cache_manager: Cache manager instance. Defaults to None. + :type cache_manager: kernel_tuner.runners.ray.cache_manager.CacheManager, optional """ #detect language and create high-level device interface self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, **device_options) if dev is None else dev diff --git a/kernel_tuner/strategies/brute_force.py b/kernel_tuner/strategies/brute_force.py index 1ba83a467..cf6ba521b 100644 --- a/kernel_tuner/strategies/brute_force.py +++ b/kernel_tuner/strategies/brute_force.py @@ -4,7 +4,7 @@ from kernel_tuner.runners.parallel import ParallelRunner from kernel_tuner.runners.ray.cache_manager import CacheManager -_options = {} +_options = dict(num_gpus=("Number of gpus to run parallel execution", None)) def tune(searchspace: Searchspace, runner, tuning_options): diff --git a/kernel_tuner/strategies/ensemble.py b/kernel_tuner/strategies/ensemble.py index 7e66f0360..2dab125f4 100644 --- a/kernel_tuner/strategies/ensemble.py +++ b/kernel_tuner/strategies/ensemble.py @@ -1,6 +1,11 @@ +""" +The ensemble strategy that optimizes the search through the parameter space using a combination of multiple strategies. +""" + import warnings from kernel_tuner.searchspace import Searchspace +from kernel_tuner.strategies import common from kernel_tuner.strategies.common import initialize_ray from kernel_tuner.runners.simulation import SimulationRunner from kernel_tuner.util import get_num_devices @@ -40,20 +45,26 @@ "bayes_opt": bayes_opt, } +_options = dict( + ensemble=("List of strategies to be used in the ensemble", ["random_sample", "random_sample"]), + max_fevals=("Maximum number of function evaluations", None), + num_gpus=("Number of gpus to run the parallel ensemble on", None) +) + def tune(searchspace: Searchspace, runner, tuning_options, cache_manager=None, actors=None): clean_up = True if actors is None and cache_manager is None else False options = tuning_options.strategy_options simulation_mode = True if isinstance(runner, SimulationRunner) else False initialize_ray() - num_devices = tuning_options['num_gpus'] if 'num_gpus' in tuning_options else get_num_devices(simulation_mode=simulation_mode) - - ensemble = options.get('ensemble', ["diff_evo", "diff_evo"]) + + ensemble, max_fevals, num_gpus =common.get_options(tuning_options.strategy_options, _options) + num_devices = num_gpus if num_gpus is not None else get_num_devices(simulation_mode=simulation_mode) ensemble_size = len(ensemble) # setup strategy options if 'bayes_opt' in ensemble: # All strategies start from a random sample except for BO tuning_options.strategy_options["samplingmethod"] = 'random' - tuning_options.strategy_options["max_fevals"] = options.get("max_fevals", 100 * ensemble_size) + tuning_options.strategy_options["max_fevals"] = 100 * ensemble_size if max_fevals is None else max_fevals tuning_options.strategy_options['check_and_retrieve'] = True # define number of ray actors needed @@ -73,3 +84,5 @@ def tune(searchspace: Searchspace, runner, tuning_options, cache_manager=None, a parallel_runner.clean_up_ray() return final_results + +tune.__doc__ = common.get_strategy_docstring("Ensemble", _options) \ No newline at end of file