diff --git a/kernel_tuner/runners/parallel.py b/kernel_tuner/runners/parallel.py index e8134116..871b9322 100644 --- a/kernel_tuner/runners/parallel.py +++ b/kernel_tuner/runners/parallel.py @@ -144,6 +144,9 @@ def _setup_tuning_options(self, tuning_options, evaluations_per_strategy): return new_tuning_options def _process_results_ensemble(self, all_results): + """ + Process the results from the ensemble execution. + """ results = [] tuning_options_list = [] @@ -155,6 +158,9 @@ def _process_results_ensemble(self, all_results): def parallel_function_evaluation(self, tuning_options, parameter_space): + """ + Perform parallel function evaluation. + """ # Create a pool of RemoteActor actors self.actor_pool = ActorPool(self.actors) # Distribute execution of the `execute` method across the actor pool with varying parameters and tuning options, collecting the results asynchronously. @@ -164,6 +170,9 @@ def parallel_function_evaluation(self, tuning_options, parameter_space): return results, tuning_options_list def _process_results(self, all_results, searchspace): + """ + Process the results and remove duplicates based on the searchspace. + """ unique_configs = set() final_results = [] @@ -176,12 +185,18 @@ def _process_results(self, all_results, searchspace): return final_results def _calculate_simulated_time(self, tuning_options_list): + """ + Calculate the maximum simulated time from the list of tuning options. + """ simulated_times = [] for tuning_options in tuning_options_list: simulated_times.append(tuning_options.simulated_time) return max(simulated_times) def _check_gpus_equals(self): + """ + Check if all GPUs are of the same type. + """ gpu_types = [] env_refs = [actor.get_environment.remote() for actor in self.actors] environments = ray.get(env_refs) @@ -194,6 +209,9 @@ def _check_gpus_equals(self): return False def clean_up_ray(self): + """ + Clean up Ray actors and cache manager. + """ if self.actors is not None: for actor in self.actors: ray.kill(actor) diff --git a/kernel_tuner/strategies/ensemble.py b/kernel_tuner/strategies/ensemble.py index 9cdc0b90..7e66f036 100644 --- a/kernel_tuner/strategies/ensemble.py +++ b/kernel_tuner/strategies/ensemble.py @@ -50,19 +50,23 @@ def tune(searchspace: Searchspace, runner, tuning_options, cache_manager=None, a ensemble = options.get('ensemble', ["diff_evo", "diff_evo"]) ensemble_size = len(ensemble) + # setup strategy options if 'bayes_opt' in ensemble: # All strategies start from a random sample except for BO tuning_options.strategy_options["samplingmethod"] = 'random' tuning_options.strategy_options["max_fevals"] = options.get("max_fevals", 100 * ensemble_size) tuning_options.strategy_options['check_and_retrieve'] = True + # define number of ray actors needed if num_devices < ensemble_size: warnings.warn("Number of devices is less than the number of strategies in the ensemble. Some strategies will wait until devices are available.", UserWarning) num_actors = num_devices if ensemble_size > num_devices else ensemble_size ensemble = [strategy_map[strategy] for strategy in ensemble] + parallel_runner = ParallelRunner(runner.kernel_source, runner.kernel_options, runner.device_options, runner.iterations, runner.observers, num_gpus=num_actors, cache_manager=cache_manager, simulation_mode=simulation_mode, actors=actors) + final_results = parallel_runner.run(tuning_options=tuning_options, ensemble=ensemble, searchspace=searchspace) if clean_up: