diff --git a/.vscode/settings.json b/.vscode/settings.json index 25a1231ab..61ee9cde7 100755 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,7 +8,7 @@ "[python]": { "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnType": true, - "editor.formatOnSave": true, + "editor.formatOnSave": false, "editor.codeActionsOnSave": { "source.fixAll": true, "source.organizeImports": true, diff --git a/kernel_tuner/energy/energy.py b/kernel_tuner/energy/energy.py index 55306a09c..ab0582c52 100644 --- a/kernel_tuner/energy/energy.py +++ b/kernel_tuner/energy/energy.py @@ -1,13 +1,9 @@ -""" -This module contains a set of helper functions specifically for auto-tuning codes -for energy efficiency. -""" -from collections import OrderedDict - +"""This module contains a set of helper functions specifically for auto-tuning codes for energy efficiency.""" import numpy as np +from scipy import optimize + from kernel_tuner import tune_kernel, util from kernel_tuner.observers.nvml import NVMLObserver, get_nvml_gr_clocks -from scipy import optimize try: import pycuda.driver as drv @@ -42,8 +38,7 @@ """ def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None): - """ Use NVML and PyCUDA with a synthetic kernel to obtain samples of frequency-power pairs """ - + """Use NVML and PyCUDA with a synthetic kernel to obtain samples of frequency-power pairs.""" # get some numbers about the device if not cache: if drv is None: @@ -70,14 +65,14 @@ def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback= arguments = [data] # setup tunable parameters - tune_params = OrderedDict() + tune_params = dict() tune_params["block_size_x"] = [max_block_dim_x] tune_params["nr_outer"] = [64] tune_params["nr_inner"] = [1024] tune_params.update(nvml_gr_clocks) # metrics - metrics = OrderedDict() + metrics = dict() metrics["f"] = lambda p: p["core_freq"] nvmlobserver = NVMLObserver( @@ -95,12 +90,12 @@ def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback= def estimated_voltage(clocks, clock_threshold, voltage_scale): - """ estimate voltage based on clock_threshold and voltage_scale """ + """Estimate voltage based on clock_threshold and voltage_scale.""" return [1 + ((clock > clock_threshold) * (1e-3 * voltage_scale * (clock-clock_threshold))) for clock in clocks] def estimated_power(clocks, clock_threshold, voltage_scale, clock_scale, power_max): - """ estimate power consumption based on clock threshold, clock_scale and max power """ + """Estimate power consumption based on clock threshold, clock_scale and max power.""" n = len(clocks) powers = np.zeros(n) @@ -116,7 +111,7 @@ def estimated_power(clocks, clock_threshold, voltage_scale, clock_scale, power_m def fit_power_frequency_model(freqs, nvml_power): - """ Fit the power-frequency model based on frequency and power measurements """ + """Fit the power-frequency model based on frequency and power measurements.""" nvml_gr_clocks = np.array(freqs) nvml_power = np.array(nvml_power) @@ -148,7 +143,7 @@ def fit_power_frequency_model(freqs, nvml_power): def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None): - """ Calculate the most energy-efficient clock frequency of device + """Calculate the most energy-efficient clock frequency of device. This function uses a performance model to fit the power-frequency curve using a synthethic benchmarking kernel. The method has been described in: @@ -202,8 +197,7 @@ def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_s def get_frequency_range_around_ridge(ridge_frequency, all_frequencies, freq_range, number_of_freqs, verbose=False): - """ Return number_of_freqs frequencies in a freq_range percentage around the ridge_frequency from among all_frequencies """ - + """Return number_of_freqs frequencies in a freq_range percentage around the ridge_frequency from among all_frequencies.""" min_freq = 1e-2 * (100 - int(freq_range)) * ridge_frequency max_freq = 1e-2 * (100 + int(freq_range)) * ridge_frequency frequency_selection = np.unique([all_frequencies[np.argmin(abs( diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py index 0d5024187..e5d3dcb90 100644 --- a/kernel_tuner/file_utils.py +++ b/kernel_tuner/file_utils.py @@ -1,13 +1,13 @@ -""" This module contains utility functions for operations on files, mostly JSON cache files """ +"""This module contains utility functions for operations on files, mostly JSON cache files.""" -import os import json +import os import subprocess -import xmltodict -from sys import platform +from importlib.metadata import PackageNotFoundError, requires, version from pathlib import Path +from sys import platform -from importlib.metadata import requires, version, PackageNotFoundError +import xmltodict from packaging.requirements import Requirement from kernel_tuner import util @@ -16,7 +16,7 @@ def output_file_schema(target): - """Get the requested JSON schema and the version number + """Get the requested JSON schema and the version number. :param target: Name of the T4 schema to return, should be any of ['output', 'metadata'] :type target: string @@ -33,7 +33,7 @@ def output_file_schema(target): def get_configuration_validity(objective) -> str: - """Convert internal Kernel Tuner error to string""" + """Convert internal Kernel Tuner error to string.""" errorstring: str if not isinstance(objective, util.ErrorConfig): errorstring = "correct" @@ -50,21 +50,21 @@ def get_configuration_validity(objective) -> str: def filename_ensure_json_extension(filename: str) -> str: - """Check if the filename has a .json extension, if not, add it""" + """Check if the filename has a .json extension, if not, add it.""" if filename[-5:] != ".json": filename += ".json" return filename def make_filenamepath(filenamepath: Path): - """Create the given path to a filename if the path does not yet exist""" + """Create the given path to a filename if the path does not yet exist.""" filepath = filenamepath.parents[0] if not filepath.exists(): filepath.mkdir() def store_output_file(output_filename: str, results, tune_params, objective="time"): - """Store the obtained auto-tuning results in a JSON output file + """Store the obtained auto-tuning results in a JSON output file. This function produces a JSON file that adheres to the T4 auto-tuning output JSON schema. @@ -75,7 +75,7 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim :type results: list of dicts :param tune_params: Tunable parameters as passed to tune_kernel - :type tune_params: OrderedDict + :type tune_params: dict :param objective: The objective used during auto-tuning, default is 'time'. :type objective: string @@ -140,7 +140,7 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim def get_dependencies(package="kernel_tuner"): - """Get the Python dependencies of Kernel Tuner currently installed and their version numbers""" + """Get the Python dependencies of Kernel Tuner currently installed and their version numbers.""" requirements = requires(package) deps = [Requirement(req).name for req in requirements] depends = [] @@ -155,7 +155,7 @@ def get_dependencies(package="kernel_tuner"): def get_device_query(target): - """Get the information about GPUs in the current system, target is any of ['nvidia', 'amd']""" + """Get the information about GPUs in the current system, target is any of ['nvidia', 'amd'].""" if target == "nvidia": nvidia_smi_out = subprocess.run(["nvidia-smi", "--query", "-x"], capture_output=True) nvidia_smi = xmltodict.parse(nvidia_smi_out.stdout) @@ -176,7 +176,7 @@ def get_device_query(target): def store_metadata_file(metadata_filename: str): - """Store the metadata about the current hardware and software environment in a JSON output file + """Store the metadata about the current hardware and software environment in a JSON output file. This function produces a JSON file that adheres to the T4 auto-tuning metadata JSON schema. diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py index 2d9750a66..c5a1119c3 100644 --- a/kernel_tuner/interface.py +++ b/kernel_tuner/interface.py @@ -24,7 +24,6 @@ limitations under the License. """ import logging -from collections import OrderedDict from datetime import datetime from time import perf_counter @@ -79,7 +78,7 @@ } -class Options(OrderedDict): +class Options(dict): """read-only class for passing options around.""" def __getattr__(self, name): @@ -462,7 +461,7 @@ def __deepcopy__(self, _): "string", ), ), - ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "OrderedDict")), + ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")), ("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")), ("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")), ] diff --git a/kernel_tuner/observers/nvml.py b/kernel_tuner/observers/nvml.py index 2a5abd3b0..17fa8b06b 100644 --- a/kernel_tuner/observers/nvml.py +++ b/kernel_tuner/observers/nvml.py @@ -1,8 +1,8 @@ +import re import subprocess import time -import re + import numpy as np -from collections import OrderedDict from kernel_tuner.observers.observer import BenchmarkObserver, ContinuousObserver @@ -13,13 +13,12 @@ class nvml: - """Class that gathers the NVML functionality for one device""" + """Class that gathers the NVML functionality for one device.""" def __init__( self, device_id=0, nvidia_smi_fallback="nvidia-smi", use_locked_clocks=False ): - """Create object to control device using NVML""" - + """Create object to control device using NVML.""" pynvml.nvmlInit() self.dev = pynvml.nvmlDeviceGetHandleByIndex(device_id) self.id = device_id @@ -94,12 +93,12 @@ def __del__(self): @property def pwr_state(self): - """Get the Device current Power State""" + """Get the Device current Power State.""" return pynvml.nvmlDeviceGetPowerState(self.dev) @property def pwr_limit(self): - """Control the power limit (may require permission), check pwr_constraints for the allowed range""" + """Control the power limit (may require permission), check pwr_constraints for the allowed range.""" return pynvml.nvmlDeviceGetPowerManagementLimit(self.dev) @pwr_limit.setter @@ -127,12 +126,12 @@ def pwr_limit(self, new_limit): @property def persistence_mode(self): - """Control persistence mode (may require permission), 0 for disabled, 1 for enabled""" + """Control persistence mode (may require permission), 0 for disabled, 1 for enabled.""" return self._persistence_mode @persistence_mode.setter def persistence_mode(self, new_mode): - if not new_mode in [0, 1]: + if new_mode not in [0, 1]: raise ValueError( "Illegal value for persistence mode, should be either 0 or 1" ) @@ -140,11 +139,11 @@ def persistence_mode(self, new_mode): self._persistence_mode = pynvml.nvmlDeviceGetPersistenceMode(self.dev) def set_clocks(self, mem_clock, gr_clock): - """Set the memory and graphics clock for this device (may require permission)""" + """Set the memory and graphics clock for this device (may require permission).""" self.modified_clocks = True - if not mem_clock in self.supported_mem_clocks: + if mem_clock not in self.supported_mem_clocks: raise ValueError("Illegal value for memory clock") - if not gr_clock in self.supported_gr_clocks[mem_clock]: + if gr_clock not in self.supported_gr_clocks[mem_clock]: raise ValueError("Graphics clock incompatible with memory clock") if self.use_locked_clocks: try: @@ -183,7 +182,7 @@ def set_clocks(self, mem_clock, gr_clock): subprocess.run(args, check=True) def reset_clocks(self): - """Reset the clocks to the default clock if the device uses a non default clock""" + """Reset the clocks to the default clock if the device uses a non default clock.""" if self.use_locked_clocks: try: pynvml.nvmlDeviceResetGpuLockedClocks(self.dev) @@ -222,7 +221,7 @@ def reset_clocks(self): @property def gr_clock(self): - """Control the graphics clock (may require permission), only values compatible with the memory clock can be set directly""" + """Control the graphics clock (may require permission), only values compatible with the memory clock can be set directly.""" return pynvml.nvmlDeviceGetClockInfo(self.dev, pynvml.NVML_CLOCK_GRAPHICS) @gr_clock.setter @@ -239,7 +238,7 @@ def gr_clock(self, new_clock): @property def mem_clock(self): - """Control the memory clock (may require permission), only values compatible with the graphics clock can be set directly""" + """Control the memory clock (may require permission), only values compatible with the graphics clock can be set directly.""" if self.use_locked_clocks: # nvmlDeviceGetClock returns slightly different values than nvmlDeviceGetSupportedMemoryClocks, # therefore set mem_clock to the closest supported value @@ -262,18 +261,18 @@ def mem_clock(self, new_clock): @property def temperature(self): - """Get the GPU temperature""" + """Get the GPU temperature.""" return pynvml.nvmlDeviceGetTemperature(self.dev, pynvml.NVML_TEMPERATURE_GPU) @property def auto_boost(self): - """Control the auto boost setting (may require permission), 0 for disable, 1 for enabled""" + """Control the auto boost setting (may require permission), 0 for disable, 1 for enabled.""" return self._auto_boost @auto_boost.setter def auto_boost(self, setting): # might need to use pynvml.NVML_FEATURE_DISABLED or pynvml.NVML_FEATURE_ENABLED instead of 0 or 1 - if not setting in [0, 1]: + if setting not in [0, 1]: raise ValueError( "Illegal value for auto boost enabled, should be either 0 or 1" ) @@ -281,11 +280,11 @@ def auto_boost(self, setting): self._auto_boost = pynvml.nvmlDeviceGetAutoBoostedClocksEnabled(self.dev)[0] def pwr_usage(self): - """Return current power usage in milliwatts""" + """Return current power usage in milliwatts.""" return pynvml.nvmlDeviceGetPowerUsage(self.dev) def gr_voltage(self): - """Return current graphics voltage in millivolts""" + """Return current graphics voltage in millivolts.""" args = ["nvidia-smi", "-i", str(self.id), "-q", "-d", "VOLTAGE"] try: result = subprocess.run(args, check=True, capture_output=True) @@ -296,7 +295,7 @@ def gr_voltage(self): class NVMLObserver(BenchmarkObserver): - """Observer that uses NVML to monitor power, energy, clock frequencies, voltages and temperature + """Observer that uses NVML to monitor power, energy, clock frequencies, voltages and temperature. The NVMLObserver can also be used to tune application-specific clock frequencies or power limits in combination with other parameters. @@ -338,12 +337,7 @@ def __init__( use_locked_clocks=False, continous_duration=1, ): - """ - - Create an NVMLObserver. - - - """ + """Create an NVMLObserver.""" if nvidia_smi_fallback: self.nvml = nvml( device, @@ -364,7 +358,7 @@ def __init__( "gr_voltage", ] for obs in observables: - if not obs in supported: + if obs not in supported: raise ValueError(f"Observable {obs} not in supported: {supported}") self.observables = observables @@ -461,7 +455,7 @@ def get_results(self): class NVMLPowerObserver(ContinuousObserver): - """Observer that measures power using NVML and continuous benchmarking""" + """Observer that measures power using NVML and continuous benchmarking.""" def __init__(self, observables, parent, nvml_instance, continous_duration=1): self.parent = parent @@ -534,8 +528,7 @@ def get_results(self): def get_nvml_pwr_limits(device, n=None, quiet=False): - """Get tunable parameter for NVML power limits, n is desired number of values""" - + """Get tunable parameter for NVML power limits, n is desired number of values.""" d = nvml(device) power_limits = d.pwr_constraints power_limit_min = power_limits[0] @@ -544,8 +537,8 @@ def get_nvml_pwr_limits(device, n=None, quiet=False): power_limit_min *= 1e-3 power_limit_max *= 1e-3 power_limit_round = 5 - tune_params = OrderedDict() - if n == None: + tune_params = dict() + if n is None: n = int((power_limit_max - power_limit_min) / power_limit_round) + 1 # Rounded power limit values @@ -561,8 +554,7 @@ def get_nvml_pwr_limits(device, n=None, quiet=False): def get_nvml_gr_clocks(device, n=None, quiet=False): - """Get tunable parameter for NVML graphics clock, n is desired number of values""" - + """Get tunable parameter for NVML graphics clock, n is desired number of values.""" d = nvml(device) mem_clock = max(d.supported_mem_clocks) gr_clocks = d.supported_gr_clocks[mem_clock] @@ -571,7 +563,7 @@ def get_nvml_gr_clocks(device, n=None, quiet=False): indices = np.array(np.ceil(np.linspace(0, len(gr_clocks) - 1, n)), dtype=int) gr_clocks = np.array(gr_clocks)[indices] - tune_params = OrderedDict() + tune_params = dict() tune_params["nvml_gr_clock"] = list(gr_clocks) if not quiet: @@ -580,15 +572,14 @@ def get_nvml_gr_clocks(device, n=None, quiet=False): def get_nvml_mem_clocks(device, n=None, quiet=False): - """Get tunable parameter for NVML memory clock, n is desired number of values""" - + """Get tunable parameter for NVML memory clock, n is desired number of values.""" d = nvml(device) mem_clocks = d.supported_mem_clocks if n and len(mem_clocks) > n: mem_clocks = mem_clocks[:: int(len(mem_clocks) / n)] - tune_params = OrderedDict() + tune_params = dict() tune_params["nvml_mem_clock"] = mem_clocks if not quiet: @@ -597,7 +588,7 @@ def get_nvml_mem_clocks(device, n=None, quiet=False): def get_idle_power(device, n=5, sleep_s=0.1): - """Use NVML to measure device idle power consumption""" + """Use NVML to measure device idle power consumption.""" d = nvml(device) readings = [] for _ in range(n): diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py index 99f2ac972..7d4c0ea0d 100644 --- a/kernel_tuner/runners/sequential.py +++ b/kernel_tuner/runners/sequential.py @@ -1,20 +1,18 @@ -""" The default runner for sequentially tuning the parameter space """ +"""The default runner for sequentially tuning the parameter space.""" import logging -from collections import OrderedDict from datetime import datetime, timezone from time import perf_counter from kernel_tuner.core import DeviceInterface -from kernel_tuner.util import (ErrorConfig, print_config_output, - process_metrics, store_cache) from kernel_tuner.runners.runner import Runner +from kernel_tuner.util import ErrorConfig, print_config_output, process_metrics, store_cache class SequentialRunner(Runner): - """ SequentialRunner is used for tuning with a single process/thread """ + """SequentialRunner is used for tuning with a single process/thread.""" def __init__(self, kernel_source, kernel_options, device_options, iterations, observers): - """ Instantiate the SequentialRunner + """Instantiate the SequentialRunner. :param kernel_source: The kernel source :type kernel_source: kernel_tuner.core.KernelSource @@ -30,7 +28,6 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob each kernel instance. :type iterations: int """ - #detect language and create high-level device interface self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, **device_options) @@ -51,7 +48,7 @@ def get_environment(self, tuning_options): return self.dev.get_environment() def run(self, parameter_space, tuning_options): - """ Iterate through the entire parameter space using a single Python process + """Iterate through the entire parameter space using a single Python process. :param parameter_space: The parameter space as an iterable. :type parameter_space: iterable @@ -71,7 +68,7 @@ def run(self, parameter_space, tuning_options): # iterate over parameter space for element in parameter_space: - params = OrderedDict(zip(tuning_options.tune_params.keys(), element)) + params = dict(zip(tuning_options.tune_params.keys(), element)) result = None warmup_time = 0 diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py index 7c591b63a..20e800f6e 100644 --- a/kernel_tuner/strategies/basinhopping.py +++ b/kernel_tuner/strategies/basinhopping.py @@ -1,23 +1,17 @@ -""" The strategy that uses the basinhopping global optimization method """ -from collections import OrderedDict - +"""The strategy that uses the basinhopping global optimization method.""" import scipy.optimize + from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common -from kernel_tuner.strategies.common import (CostFunc, - setup_method_arguments, - setup_method_options) +from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options supported_methods = ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"] -_options = OrderedDict(method=(f"Local optimization algorithm to use, choose any from {supported_methods}", "L-BFGS-B"), +_options = dict(method=(f"Local optimization algorithm to use, choose any from {supported_methods}", "L-BFGS-B"), T=("Temperature parameter for the accept or reject criterion", 1.0)) def tune(searchspace: Searchspace, runner, tuning_options): - - results = [] - method, T = common.get_options(tuning_options.strategy_options, _options) # scale variables in x to make 'eps' relevant for multiple variables diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py index d6cf620a9..034fefd6f 100644 --- a/kernel_tuner/strategies/common.py +++ b/kernel_tuner/strategies/common.py @@ -1,9 +1,9 @@ import logging import sys -from collections import OrderedDict from time import perf_counter import numpy as np + from kernel_tuner import util from kernel_tuner.searchspace import Searchspace @@ -29,12 +29,12 @@ def get_strategy_docstring(name, strategy_options): - """ Generate docstring for a 'tune' method of a strategy """ + """Generate docstring for a 'tune' method of a strategy.""" return _docstring_template.replace("$NAME$", name).replace("$STRAT_OPT$", make_strategy_options_doc(strategy_options)) def make_strategy_options_doc(strategy_options): - """ Generate documentation for the supported strategy options and their defaults """ + """Generate documentation for the supported strategy options and their defaults.""" doc = "" for opt, val in strategy_options.items(): doc += f" * {opt}: {val[0]}, default {str(val[1])}. \n" @@ -43,12 +43,12 @@ def make_strategy_options_doc(strategy_options): def get_options(strategy_options, options): - """ Get the strategy-specific options or their defaults from user-supplied strategy_options """ + """Get the strategy-specific options or their defaults from user-supplied strategy_options.""" accepted = list(options.keys()) + ["max_fevals", "time_limit"] for key in strategy_options: if key not in accepted: raise ValueError(f"Unrecognized option {key} in strategy_options") - assert isinstance(options, OrderedDict) + assert isinstance(options, dict) return [strategy_options.get(opt, default) for opt, (_, default) in options.items()] @@ -62,7 +62,7 @@ def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling= self.results = [] def __call__(self, x, check_restrictions=True): - """ Cost function used by almost all strategies """ + """Cost function used by almost all strategies.""" self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time) # error value to return for numeric optimizers that need a numerical value @@ -88,7 +88,7 @@ def __call__(self, x, check_restrictions=True): # else check if this is a legal (non-restricted) configuration if check_restrictions and self.searchspace.restrictions: - params_dict = OrderedDict(zip(self.searchspace.tune_params.keys(), params)) + params_dict = dict(zip(self.searchspace.tune_params.keys(), params)) legal = util.check_restrictions(self.searchspace.restrictions, params_dict, self.tuning_options.verbose) if not legal: result = params_dict @@ -115,7 +115,7 @@ def __call__(self, x, check_restrictions=True): return return_value def get_bounds_x0_eps(self): - """compute bounds, x0 (the initial guess), and eps""" + """Compute bounds, x0 (the initial guess), and eps.""" values = list(self.searchspace.tune_params.values()) if "x0" in self.tuning_options.strategy_options: @@ -154,7 +154,7 @@ def get_bounds_x0_eps(self): return bounds, x0, eps def get_bounds(self): - """ create a bounds array from the tunable parameters """ + """Create a bounds array from the tunable parameters.""" bounds = [] for values in self.searchspace.tune_params.values(): sorted_values = np.sort(values) @@ -163,7 +163,7 @@ def get_bounds(self): def setup_method_arguments(method, bounds): - """ prepare method specific arguments """ + """Prepare method specific arguments.""" kwargs = {} # pass bounds to methods that support it if method in ["L-BFGS-B", "TNC", "SLSQP"]: @@ -172,7 +172,7 @@ def setup_method_arguments(method, bounds): def setup_method_options(method, tuning_options): - """ prepare method specific options """ + """Prepare method specific options.""" kwargs = {} # Note that not all methods iterpret maxiter in the same manner @@ -200,7 +200,7 @@ def setup_method_options(method, tuning_options): def snap_to_nearest_config(x, tune_params): - """helper func that for each param selects the closest actual value""" + """Helper func that for each param selects the closest actual value.""" params = [] for i, k in enumerate(tune_params.keys()): values = np.array(tune_params[k]) @@ -210,7 +210,7 @@ def snap_to_nearest_config(x, tune_params): def unscale_and_snap_to_nearest(x, tune_params, eps): - """helper func that snaps a scaled variable to the nearest config""" + """Helper func that snaps a scaled variable to the nearest config.""" x_u = [i for i in x] for i, v in enumerate(tune_params.values()): # create an evenly spaced linear space to map [0,1]-interval @@ -232,7 +232,7 @@ def unscale_and_snap_to_nearest(x, tune_params, eps): def scale_from_params(params, tune_params, eps): - """helper func to do the inverse of the 'unscale' function""" + """Helper func to do the inverse of the 'unscale' function.""" x = np.zeros(len(params)) for i, v in enumerate(tune_params.values()): x[i] = 0.5 * eps + v.index(params[i])*eps diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py index ecb257199..5ad2b9474 100644 --- a/kernel_tuner/strategies/diff_evo.py +++ b/kernel_tuner/strategies/diff_evo.py @@ -1,22 +1,20 @@ -""" The differential evolution strategy that optimizes the search through the parameter space """ -from collections import OrderedDict +"""The differential evolution strategy that optimizes the search through the parameter space.""" +from scipy.optimize import differential_evolution from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -from scipy.optimize import differential_evolution supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"] -_options = OrderedDict(method=(f"Creation method for new population, any of {supported_methods}", "best1bin"), +_options = dict(method=(f"Creation method for new population, any of {supported_methods}", "best1bin"), popsize=("Population size", 20), maxiter=("Number of generations", 100)) def tune(searchspace: Searchspace, runner, tuning_options): - results = [] method, popsize, maxiter = common.get_options(tuning_options.strategy_options, _options) diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py index ebe095bde..0f44bd849 100644 --- a/kernel_tuner/strategies/dual_annealing.py +++ b/kernel_tuner/strategies/dual_annealing.py @@ -1,17 +1,14 @@ -""" The strategy that uses the dual annealing optimization method """ -from collections import OrderedDict - +"""The strategy that uses the dual annealing optimization method.""" import scipy.optimize + from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common -from kernel_tuner.strategies.common import (CostFunc, - setup_method_arguments, - setup_method_options) +from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options supported_methods = ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'] -_options = OrderedDict(method=(f"Local optimization method to use, choose any from {supported_methods}", "Powell")) +_options = dict(method=(f"Local optimization method to use, choose any from {supported_methods}", "Powell")) def tune(searchspace: Searchspace, runner, tuning_options): diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py index 0c053ed9c..dc43aae6f 100644 --- a/kernel_tuner/strategies/firefly_algorithm.py +++ b/kernel_tuner/strategies/firefly_algorithm.py @@ -1,15 +1,15 @@ -""" The strategy that uses the firefly algorithm for optimization""" +"""The strategy that uses the firefly algorithm for optimization.""" import sys -from collections import OrderedDict import numpy as np + from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common -from kernel_tuner.strategies.common import (CostFunc, scale_from_params) +from kernel_tuner.strategies.common import CostFunc, scale_from_params from kernel_tuner.strategies.pso import Particle -_options = OrderedDict(popsize=("Population size", 20), +_options = dict(popsize=("Population size", 20), maxiter=("Maximum number of iterations", 100), B0=("Maximum attractiveness", 1.0), gamma=("Light absorption coefficient", 1.0), @@ -88,20 +88,20 @@ def tune(searchspace: Searchspace, runner, tuning_options): tune.__doc__ = common.get_strategy_docstring("firefly algorithm", _options) class Firefly(Particle): - """Firefly object for use in the Firefly Algorithm""" + """Firefly object for use in the Firefly Algorithm.""" def __init__(self, bounds): - """Create Firefly at random position within bounds""" + """Create Firefly at random position within bounds.""" super().__init__(bounds) self.bounds = bounds self.intensity = 1 / self.score def distance_to(self, other): - """Return Euclidian distance between self and other Firefly""" + """Return Euclidian distance between self and other Firefly.""" return np.linalg.norm(self.position-other.position) def compute_intensity(self, fun): - """Evaluate cost function and compute intensity at this position""" + """Evaluate cost function and compute intensity at this position.""" self.evaluate(fun) if self.score == sys.float_info.max: self.intensity = -sys.float_info.max @@ -109,7 +109,7 @@ def compute_intensity(self, fun): self.intensity = 1 / self.score def move_towards(self, other, beta, alpha): - """Move firefly towards another given beta and alpha values""" + """Move firefly towards another given beta and alpha values.""" self.position += beta * (other.position - self.position) self.position += alpha * (np.random.uniform(-0.5, 0.5, len(self.position))) self.position = np.minimum(self.position, [b[1] for b in self.bounds]) diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py index 76fd84539..c29c150b5 100644 --- a/kernel_tuner/strategies/genetic_algorithm.py +++ b/kernel_tuner/strategies/genetic_algorithm.py @@ -1,14 +1,14 @@ -""" A simple genetic algorithm for parameter search """ +"""A simple genetic algorithm for parameter search.""" import random -from collections import OrderedDict import numpy as np + from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -_options = OrderedDict( +_options = dict( popsize=("population size", 20), maxiter=("maximum number of generations", 100), method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"), @@ -77,7 +77,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): def weighted_choice(population, n): - """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected""" + """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected.""" def random_index_betavariate(pop_size): # has a higher probability of returning index of item at the head of the list @@ -86,7 +86,7 @@ def random_index_betavariate(pop_size): return int(random.betavariate(alpha, beta) * pop_size) def random_index_weighted(pop_size): - """use weights to increase probability of selection""" + """Use weights to increase probability of selection.""" weights = [w for _, w in population] # invert because lower is better inverted_weights = [1.0 / w for w in weights] @@ -109,8 +109,7 @@ def random_index_weighted(pop_size): def mutate(dna, mutation_chance, searchspace: Searchspace, cache=True): - """Mutate DNA with 1/mutation_chance chance""" - + """Mutate DNA with 1/mutation_chance chance.""" # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list if int(random.random() * mutation_chance) == 0: if cache: @@ -123,14 +122,14 @@ def mutate(dna, mutation_chance, searchspace: Searchspace, cache=True): def single_point_crossover(dna1, dna2): - """crossover dna1 and dna2 at a random index""" + """Crossover dna1 and dna2 at a random index.""" # check if you can do the crossovers using the neighbor index: check which valid parameter configuration is closest to the crossover, probably best to use "adjacent" as it is least strict? pos = int(random.random() * (len(dna1))) return (dna1[:pos] + dna2[pos:], dna2[:pos] + dna1[pos:]) def two_point_crossover(dna1, dna2): - """crossover dna1 and dna2 at 2 random indices""" + """Crossover dna1 and dna2 at 2 random indices.""" if len(dna1) < 5: start, end = 0, len(dna1) else: @@ -142,7 +141,7 @@ def two_point_crossover(dna1, dna2): def uniform_crossover(dna1, dna2): - """randomly crossover genes between dna1 and dna2""" + """Randomly crossover genes between dna1 and dna2.""" ind = np.random.random(len(dna1)) > 0.5 child1 = [dna1[i] if ind[i] else dna2[i] for i in range(len(ind))] child2 = [dna2[i] if ind[i] else dna1[i] for i in range(len(ind))] @@ -150,7 +149,7 @@ def uniform_crossover(dna1, dna2): def disruptive_uniform_crossover(dna1, dna2): - """disruptive uniform crossover + """Disruptive uniform crossover. uniformly crossover genes between dna1 and dna2, with children guaranteed to be different from parents, diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py index 1630c6c17..a4c521746 100644 --- a/kernel_tuner/strategies/greedy_ils.py +++ b/kernel_tuner/strategies/greedy_ils.py @@ -1,6 +1,4 @@ -""" A simple greedy iterative local search algorithm for parameter search """ -from collections import OrderedDict - +"""A simple greedy iterative local search algorithm for parameter search.""" from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common @@ -8,7 +6,7 @@ from kernel_tuner.strategies.genetic_algorithm import mutate from kernel_tuner.strategies.hillclimbers import base_hillclimb -_options = OrderedDict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), +_options = dict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), restart=("controls greedyness, i.e. whether to restart from a position as soon as an improvement is found", True), no_improvement=("number of evaluations to exceed without improvement before restarting", 50), random_walk=("controls greedyness, i.e. whether to restart from a position as soon as an improvement is found", 0.3)) diff --git a/kernel_tuner/strategies/greedy_mls.py b/kernel_tuner/strategies/greedy_mls.py index 3da456aa7..1b34da501 100644 --- a/kernel_tuner/strategies/greedy_mls.py +++ b/kernel_tuner/strategies/greedy_mls.py @@ -1,12 +1,10 @@ -""" A greedy multi-start local search algorithm for parameter search """ -from collections import OrderedDict - +"""A greedy multi-start local search algorithm for parameter search.""" from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.hillclimbers import base_hillclimb -_options = OrderedDict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), +_options = dict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), restart=("controls greedyness, i.e. whether to restart from a position as soon as an improvement is found", True), order=("set a user-specified order to search among dimensions while hillclimbing", None), randomize=("use a random order to search among dimensions while hillclimbing", True)) diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py index 952d18d2c..80c1c6f82 100644 --- a/kernel_tuner/strategies/minimize.py +++ b/kernel_tuner/strategies/minimize.py @@ -1,22 +1,20 @@ -""" The strategy that uses a minimizer method for searching through the parameter space """ -import logging -import sys -from collections import OrderedDict -from time import perf_counter +"""The strategy that uses a minimizer method for searching through the parameter space.""" -import numpy as np import scipy.optimize + from kernel_tuner import util from kernel_tuner.searchspace import Searchspace -from kernel_tuner.strategies.common import (CostFunc, - get_options, - get_strategy_docstring, - setup_method_arguments, - setup_method_options) +from kernel_tuner.strategies.common import ( + CostFunc, + get_options, + get_strategy_docstring, + setup_method_arguments, + setup_method_options, +) supported_methods = ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"] -_options = OrderedDict(method=(f"Local optimization algorithm to use, choose any from {supported_methods}", "L-BFGS-B")) +_options = dict(method=(f"Local optimization algorithm to use, choose any from {supported_methods}", "L-BFGS-B")) def tune(searchspace: Searchspace, runner, tuning_options): diff --git a/kernel_tuner/strategies/mls.py b/kernel_tuner/strategies/mls.py index f075424b4..b8ecf030c 100644 --- a/kernel_tuner/strategies/mls.py +++ b/kernel_tuner/strategies/mls.py @@ -1,11 +1,9 @@ -""" The strategy that uses multi-start local search """ -from collections import OrderedDict - +"""The strategy that uses multi-start local search.""" from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.greedy_mls import tune as mls_tune -_options = OrderedDict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), +_options = dict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), restart=("controls greedyness, i.e. whether to restart from a position as soon as an improvement is found", False), order=("set a user-specified order to search among dimensions while hillclimbing", None), randomize=("use a random order to search among dimensions while hillclimbing", True)) diff --git a/kernel_tuner/strategies/ordered_greedy_mls.py b/kernel_tuner/strategies/ordered_greedy_mls.py index fd0f9030a..cd40ba778 100644 --- a/kernel_tuner/strategies/ordered_greedy_mls.py +++ b/kernel_tuner/strategies/ordered_greedy_mls.py @@ -1,11 +1,9 @@ -""" A greedy multi-start local search algorithm for parameter search that traverses variables in order.""" -from collections import OrderedDict - +"""A greedy multi-start local search algorithm for parameter search that traverses variables in order.""" from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.greedy_mls import tune as mls_tune -_options = OrderedDict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), +_options = dict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), restart=("controls greedyness, i.e. whether to restart from a position as soon as an improvement is found", True), order=("set a user-specified order to search among dimensions while hillclimbing", None), randomize=("use a random order to search among dimensions while hillclimbing", False)) diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py index 37caedc7f..5b0df1429 100644 --- a/kernel_tuner/strategies/pso.py +++ b/kernel_tuner/strategies/pso.py @@ -1,16 +1,15 @@ -""" The strategy that uses particle swarm optimization""" +"""The strategy that uses particle swarm optimization.""" import random import sys -from collections import OrderedDict import numpy as np + from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common -from kernel_tuner.strategies.common import (CostFunc, - scale_from_params) +from kernel_tuner.strategies.common import CostFunc, scale_from_params -_options = OrderedDict(popsize=("Population size", 20), +_options = dict(popsize=("Population size", 20), maxiter=("Maximum number of iterations", 100), w=("Inertia weight constant", 0.5), c1=("Cognitive constant", 2.0), diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py index 77e69505d..022eda534 100644 --- a/kernel_tuner/strategies/random_sample.py +++ b/kernel_tuner/strategies/random_sample.py @@ -1,13 +1,12 @@ -""" Iterate over a random sample of the parameter space """ -from collections import OrderedDict - +"""Iterate over a random sample of the parameter space.""" import numpy as np + from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -_options = OrderedDict(fraction=("Fraction of the search space to cover value in [0, 1]", 0.1)) +_options = dict(fraction=("Fraction of the search space to cover value in [0, 1]", 0.1)) def tune(searchspace: Searchspace, runner, tuning_options): diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py index 883e6ff98..dce929b7b 100644 --- a/kernel_tuner/strategies/simulated_annealing.py +++ b/kernel_tuner/strategies/simulated_annealing.py @@ -1,15 +1,15 @@ -""" The strategy that uses particle swarm optimization""" +"""The strategy that uses particle swarm optimization.""" import random import sys -from collections import OrderedDict import numpy as np + from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -_options = OrderedDict(T=("Starting temperature", 1.0), +_options = dict(T=("Starting temperature", 1.0), T_min=("End temperature", 0.001), alpha=("Alpha parameter", 0.995), maxiter=("Number of iterations within each annealing step", 1)) @@ -86,7 +86,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): tune.__doc__ = common.get_strategy_docstring("Simulated Annealing", _options) def acceptance_prob(old_cost, new_cost, T, tuning_options): - """annealing equation, with modifications to work towards a lower value""" + """Annealing equation, with modifications to work towards a lower value.""" error_val = sys.float_info.max if not tuning_options.objective_higher_is_better else -sys.float_info.max # if start pos is not valid, always move if old_cost == error_val: @@ -104,7 +104,7 @@ def acceptance_prob(old_cost, new_cost, T, tuning_options): def neighbor(pos, searchspace: Searchspace): - """return a random neighbor of pos""" + """Return a random neighbor of pos.""" # Note: this is not the same as the previous implementation, because it is possible that non-edge parameters remain the same, but suggested configurations will all be within restrictions neighbors = searchspace.get_neighbors(tuple(pos), neighbor_method='Hamming') if random.random() < 0.2 else searchspace.get_neighbors(tuple(pos), neighbor_method='strictly-adjacent') if len(neighbors) > 0: diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py index e33fefcd4..7ea9840d2 100644 --- a/kernel_tuner/util.py +++ b/kernel_tuner/util.py @@ -10,7 +10,6 @@ import tempfile import time import warnings -from collections import OrderedDict from inspect import signature from types import FunctionType from typing import Optional, Union @@ -293,7 +292,7 @@ def check_thread_block_dimensions(params, max_threads, block_size_names=None): def config_valid(config, tuning_options, max_threads): """Combines restrictions and a check on the max thread block dimension to check config validity.""" legal = True - params = OrderedDict(zip(tuning_options.tune_params.keys(), config)) + params = dict(zip(tuning_options.tune_params.keys(), config)) if tuning_options.restrictions: legal = check_restrictions(tuning_options.restrictions, params, False) if not legal: @@ -375,9 +374,7 @@ def get_dimension_divisor(divisor_list, default, params): def get_instance_string(params): - """Combine the parameters to a string mostly used for debug output - use of OrderedDict is advised. - """ + """Combine the parameters to a string mostly used for debug output use of dict is advised.""" return "_".join([str(i) for i in params.values()]) @@ -528,14 +525,14 @@ def print_config_output(tune_params, params, quiet, metrics, units): def process_metrics(params, metrics): """Process user-defined metrics for derived benchmark results. - Metrics must be an OrderedDict to support composable metrics. The dictionary keys describe + Metrics must be a dictionary to support composable metrics. The dictionary keys describe the name given to this user-defined metric and will be used as the key in the results dictionaries return by Kernel Tuner. The values describe how to calculate the user-defined metric, using either a string expression in which the tunable parameters and benchmark results can be used as variables, or as a function that accepts a dictionary as argument. Example: - metrics = OrderedDict() + metrics = dict() metrics["x"] = "10000 / time" metrics["x2"] = "x*x" @@ -543,21 +540,21 @@ def process_metrics(params, metrics): Example: - metrics = OrderedDict() + metrics = dict() metrics["GFLOP/s"] = lambda p : 10000 / p["time"] :param params: A dictionary with tunable parameters and benchmark results. :type params: dict - :param metrics: An OrderedDict with user-defined metrics that can be used to create derived benchmark results. - :type metrics: OrderedDict + :param metrics: A dictionary with user-defined metrics that can be used to create derived benchmark results. + :type metrics: dict :returns: An updated params dictionary with the derived metrics inserted along with the benchmark results. :rtype: dict """ - if not isinstance(metrics, OrderedDict): - raise ValueError("metrics should be an OrderedDict to preserve order and support composability") + if not isinstance(metrics, dict): + raise ValueError("metrics should be a dictionary to preserve order and support composability") for k, v in metrics.items(): if isinstance(v, str): value = eval(replace_param_occurrences(v, params)) @@ -645,7 +642,7 @@ def prepare_kernel_string(kernel_name, kernel_string, params, grid, threads, blo # * each tunable parameter # * kernel_tuner=1 if defines is None: - defines = OrderedDict() + defines = dict() grid_dim_names = ["grid_size_x", "grid_size_y", "grid_size_z"] for i, g in enumerate(grid): @@ -1014,16 +1011,16 @@ def process_cache(cache, kernel_options, tuning_options, runner): from an earlier (abruptly ended) tuning session. """ - # caching only works correctly if tunable_parameters are stored in a OrderedDict - if not isinstance(tuning_options.tune_params, OrderedDict): - raise ValueError("Caching only works correctly when tunable parameters are stored in a OrderedDict") + # caching only works correctly if tunable_parameters are stored in a dictionary + if not isinstance(tuning_options.tune_params, dict): + raise ValueError("Caching only works correctly when tunable parameters are stored in a dictionary") # if file does not exist, create new cache if not os.path.isfile(cache): if tuning_options.simulation_mode: raise ValueError(f"Simulation mode requires an existing cachefile: file {cache} does not exist") - c = OrderedDict() + c = dict() c["device_name"] = runner.dev.name c["kernel_name"] = kernel_options.kernel_name c["problem_size"] = kernel_options.problem_size if not callable(kernel_options.problem_size) else "callable" diff --git a/test/strategies/test_bayesian_optimization.py b/test/strategies/test_bayesian_optimization.py index d7d7d5986..6081f034c 100644 --- a/test/strategies/test_bayesian_optimization.py +++ b/test/strategies/test_bayesian_optimization.py @@ -1,16 +1,16 @@ -import enum import itertools -from re import L +from collections import namedtuple from random import uniform as randfloat + import numpy as np -from collections import OrderedDict, namedtuple + from kernel_tuner.interface import Options from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import bayes_opt from kernel_tuner.strategies.bayes_opt import BayesianOptimization from kernel_tuner.strategies.common import CostFunc -tune_params = OrderedDict() +tune_params = dict() tune_params["x"] = [1, 2, 3] tune_params["y"] = [4, 5, 6] tune_params["z"] = [7] diff --git a/test/strategies/test_common.py b/test/strategies/test_common.py index 7bbd8f892..29ead8615 100644 --- a/test/strategies/test_common.py +++ b/test/strategies/test_common.py @@ -1,10 +1,9 @@ import sys -from collections import OrderedDict from time import perf_counter +from kernel_tuner.interface import Options from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common -from kernel_tuner.interface import Options from kernel_tuner.strategies.common import CostFunc try: @@ -23,7 +22,7 @@ def fake_runner(): return runner -tune_params = OrderedDict([("x", [1, 2, 3]), ("y", [4, 5, 6])]) +tune_params = dict([("x", [1, 2, 3]), ("y", [4, 5, 6])]) def test_cost_func(): @@ -32,13 +31,13 @@ def test_cost_func(): restrictions=None, strategy_options={}, cache={}, unique_results={}, objective="time", objective_higher_is_better=False, metrics=None) runner = fake_runner() - results = [] time = CostFunc(Searchspace(tune_params, None, 1024), tuning_options, runner)(x) assert time == 5 # check if restrictions are properly handled - restrictions = lambda _: False + def restrictions(_): + return False tuning_options = Options(scaling=False, snap=False, tune_params=tune_params, restrictions=restrictions, strategy_options={}, verbose=True, cache={}, unique_results={}, diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py index b41334242..cb07f8d7f 100644 --- a/test/strategies/test_genetic_algorithm.py +++ b/test/strategies/test_genetic_algorithm.py @@ -1,9 +1,7 @@ -from collections import OrderedDict -from kernel_tuner.strategies import genetic_algorithm as ga -from kernel_tuner.interface import Options from kernel_tuner.searchspace import Searchspace +from kernel_tuner.strategies import genetic_algorithm as ga -tune_params = OrderedDict() +tune_params = dict() tune_params["x"] = [1, 2, 3] tune_params["y"] = [4, 5, 6] diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py index c1b4c0936..395cf2bf9 100644 --- a/test/strategies/test_strategies.py +++ b/test/strategies/test_strategies.py @@ -1,12 +1,11 @@ -from collections import OrderedDict import os -import pytest import numpy as np +import pytest import kernel_tuner -from kernel_tuner.interface import strategy_map from kernel_tuner import util +from kernel_tuner.interface import strategy_map cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/../test_cache_file.json" @@ -28,7 +27,7 @@ def vector_add(): n = np.int32(size) args = [c, a, b, n] - tune_params = OrderedDict() + tune_params = dict() tune_params["block_size_x"] = [128 + 64 * i for i in range(15)] return ["vector_add", kernel_string, size, args, tune_params] diff --git a/test/test_common.py b/test/test_common.py index c9d4bfcc5..132068843 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -1,15 +1,14 @@ -from collections import OrderedDict - import random + import numpy as np -from kernel_tuner.interface import Options import kernel_tuner.strategies.common as common +from kernel_tuner.interface import Options from kernel_tuner.searchspace import Searchspace def test_get_bounds_x0_eps(): - tune_params = OrderedDict() + tune_params = dict() tune_params['x'] = [0, 1, 2, 3, 4] searchspace = Searchspace(tune_params, [], 1024) @@ -30,7 +29,7 @@ def test_get_bounds_x0_eps(): def test_get_bounds(): - tune_params = OrderedDict() + tune_params = dict() tune_params['x'] = [0, 1, 2, 3, 4] tune_params['y'] = [i for i in range(0, 10000, 100)] tune_params['z'] = [-11.2, 55.67, 123.27] @@ -47,7 +46,7 @@ def test_get_bounds(): def test_snap_to_nearest_config(): - tune_params = OrderedDict() + tune_params = dict() tune_params['x'] = [0, 1, 2, 3, 4, 5] tune_params['y'] = [0, 1, 2, 3, 4, 5] tune_params['z'] = [0, 1, 2, 3, 4, 5] @@ -61,7 +60,7 @@ def test_snap_to_nearest_config(): def test_unscale(): - params = OrderedDict() + params = dict() params['x'] = [2**i for i in range(4, 9)] eps = 1.0 / len(params['x']) diff --git a/test/test_cuda_functions.py b/test/test_cuda_functions.py index 0709eecb3..1dc68652d 100644 --- a/test/test_cuda_functions.py +++ b/test/test_cuda_functions.py @@ -1,13 +1,12 @@ import numpy as np - -import kernel_tuner -from .context import skip_if_no_cuda -from .test_runners import env - import pytest + from kernel_tuner import tune_kernel from kernel_tuner.backends import nvcuda -from kernel_tuner.core import KernelSource, KernelInstance +from kernel_tuner.core import KernelInstance, KernelSource + +from .context import skip_if_no_cuda +from .test_runners import env # noqa: F401 try: from cuda import cuda diff --git a/test/test_cupy_functions.py b/test/test_cupy_functions.py index a505b385c..4bb4d16f4 100644 --- a/test/test_cupy_functions.py +++ b/test/test_cupy_functions.py @@ -1,7 +1,9 @@ import kernel_tuner + from .context import skip_if_no_cupy -from .test_runners import env +from .test_runners import env # noqa: F401 + @skip_if_no_cupy def test_tune_kernel(env): diff --git a/test/test_file_utils.py b/test/test_file_utils.py index bc16939a2..e84e00da4 100644 --- a/test/test_file_utils.py +++ b/test/test_file_utils.py @@ -1,12 +1,13 @@ -from kernel_tuner.file_utils import store_output_file, store_metadata_file, output_file_schema -from kernel_tuner.util import delete_temp_file -from .test_integration import fake_results -from .test_runners import env, cache_filename, tune_kernel +import json import pytest -import json from jsonschema import validate +from kernel_tuner.file_utils import output_file_schema, store_metadata_file, store_output_file +from kernel_tuner.util import delete_temp_file + +from .test_runners import cache_filename, env, tune_kernel # noqa: F401 + def test_store_output_file(env): # setup variables diff --git a/test/test_hip_functions.py b/test/test_hip_functions.py index ce3eb0642..df0893788 100644 --- a/test/test_hip_functions.py +++ b/test/test_hip_functions.py @@ -1,15 +1,15 @@ -import numpy as np import ctypes -from .context import skip_if_no_pyhip -from collections import OrderedDict +import numpy as np import pytest -import kernel_tuner + from kernel_tuner import tune_kernel from kernel_tuner.backends import hip as kt_hip -from kernel_tuner.core import KernelSource, KernelInstance +from kernel_tuner.core import KernelInstance, KernelSource -try: +from .context import skip_if_no_pyhip + +try: from pyhip import hip, hiprtc hip_present = True except ImportError: @@ -33,7 +33,7 @@ def env(): n = np.int32(size) args = [c, a, b, n] - tune_params = OrderedDict() + tune_params = dict() tune_params["block_size_x"] = [128 + 64 * i for i in range(15)] return ["vector_add", kernel_string, size, args, tune_params] @@ -64,7 +64,7 @@ def __getitem__(self, key): ctypes.c_int(a), b.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), ctypes.c_bool(c)) - + assert(gpu_args[1] == argListStructure[1]) assert(gpu_args[3] == argListStructure[3]) @@ -141,7 +141,7 @@ def test_copy_constant_memory_args(): output = np.full(100, 0).astype(np.float32) gpu_args = dev.ready_argument_list([output]) - + threads = (100, 1, 1) grid = (1, 1, 1) dev.run_kernel(kernel, gpu_args, threads, grid) diff --git a/test/test_hyper.py b/test/test_hyper.py index b6ac83f61..9d1dc55df 100644 --- a/test/test_hyper.py +++ b/test/test_hyper.py @@ -1,13 +1,11 @@ -from collections import OrderedDict - from kernel_tuner.hyper import tune_hyper_params -from .test_runners import env, cache_filename +from .test_runners import cache_filename, env # noqa: F401 def test_hyper(env): - hyper_params = OrderedDict() + hyper_params = dict() hyper_params["popsize"] = [5] hyper_params["maxiter"] = [5, 10] hyper_params["method"] = ["uniform"] diff --git a/test/test_observers.py b/test/test_observers.py index b4b55041c..d881fed74 100644 --- a/test/test_observers.py +++ b/test/test_observers.py @@ -1,12 +1,11 @@ -import pytest import kernel_tuner from kernel_tuner.observers.nvml import NVMLObserver from kernel_tuner.observers.observer import BenchmarkObserver from .context import skip_if_no_pycuda, skip_if_no_pynvml -from .test_runners import env +from .test_runners import env # noqa: F401 @skip_if_no_pycuda diff --git a/test/test_opencl_functions.py b/test/test_opencl_functions.py index de370ae53..644c5dc08 100644 --- a/test/test_opencl_functions.py +++ b/test/test_opencl_functions.py @@ -1,11 +1,9 @@ -from collections import OrderedDict - -import pytest import numpy as np +import pytest import kernel_tuner from kernel_tuner.backends import opencl -from kernel_tuner.core import KernelSource, KernelInstance +from kernel_tuner.core import KernelInstance, KernelSource from .context import skip_if_no_opencl @@ -88,7 +86,7 @@ def env(): n = np.int32(size) args = [c, a, b, n] - tune_params = OrderedDict() + tune_params = dict() tune_params["block_size_x"] = [32, 64, 128] return ["vector_add", kernel_string, size, args, tune_params] diff --git a/test/test_runners.py b/test/test_runners.py index cb0e03c7a..527c1d252 100644 --- a/test/test_runners.py +++ b/test/test_runners.py @@ -1,12 +1,11 @@ import os import time -from collections import OrderedDict import numpy as np import pytest -from kernel_tuner import util, tune_kernel, core -from kernel_tuner.interface import Options, _kernel_options, _device_options, _tuning_options +from kernel_tuner import core, tune_kernel, util +from kernel_tuner.interface import Options, _device_options, _kernel_options, _tuning_options from kernel_tuner.runners.sequential import SequentialRunner from .context import skip_if_no_pycuda @@ -33,7 +32,7 @@ def env(): n = np.int32(size) args = [c, a, b, n] - tune_params = OrderedDict() + tune_params = dict() tune_params["block_size_x"] = [128 + 64 * i for i in range(15)] return ["vector_add", kernel_string, size, args, tune_params] @@ -262,7 +261,7 @@ def test_runner(env): iterations = 7 verbose = False objective = "GFLOP/s" - metrics = OrderedDict({objective: lambda p: 1}) + metrics = dict({objective: lambda p: 1}) opts = locals() kernel_options = Options([(k, opts.get(k, None)) for k in _kernel_options.keys()]) diff --git a/test/test_searchspace.py b/test/test_searchspace.py index 2a94a5059..0afe31aac 100644 --- a/test/test_searchspace.py +++ b/test/test_searchspace.py @@ -1,24 +1,24 @@ from __future__ import print_function -from collections import OrderedDict -from random import randrange + from math import ceil +from random import randrange try: from mock import patch except ImportError: from unittest.mock import patch +import numpy as np +from constraint import ExactSumConstraint, FunctionConstraint + from kernel_tuner.interface import Options from kernel_tuner.searchspace import Searchspace -from constraint import ExactSumConstraint, FunctionConstraint -import numpy as np - max_threads = 1024 value_error_expectation_message = "Expected a ValueError to be raised" # 9 combinations without restrictions -simple_tune_params = OrderedDict() +simple_tune_params = dict() simple_tune_params["x"] = [1, 1.5, 2, 3] simple_tune_params["y"] = [4, 5.5] simple_tune_params["z"] = ["string_1", "string_2"] @@ -28,7 +28,7 @@ # 3.1 million combinations, of which 10600 pass the restrictions num_layers = 42 -tune_params = OrderedDict() +tune_params = dict() tune_params["gpu1"] = list(range(num_layers)) tune_params["gpu2"] = list(range(num_layers)) tune_params["gpu3"] = list(range(num_layers)) @@ -48,20 +48,20 @@ def min_func(gpu1, gpu2, gpu3, gpu4): searchspace = Searchspace(tune_params, restrict, max_threads) # 74088 combinations intended to test whether sorting works -sort_tune_params = OrderedDict() +sort_tune_params = dict() sort_tune_params["gpu1"] = list(range(num_layers)) sort_tune_params["gpu2"] = list(range(num_layers)) sort_tune_params["gpu3"] = list(range(num_layers)) searchspace_sort = Searchspace(sort_tune_params, [], max_threads) def test_size(): - """test that the searchspace after applying restrictions is the expected size""" + """Test that the searchspace after applying restrictions is the expected size.""" assert simple_searchspace.size == 12 assert searchspace.size == 10660 def test_internal_representation(): - """test that the list and dict representations match in size, type and elements""" + """Test that the list and dict representations match in size, type and elements.""" assert searchspace.size == len(searchspace.list) assert searchspace.size == len(searchspace.get_list_dict().keys()) assert isinstance(searchspace.list[0], tuple) @@ -71,7 +71,7 @@ def test_internal_representation(): def test_sort(): - """test that the sort searchspace option works as expected""" + """Test that the sort searchspace option works as expected.""" simple_searchspace_sort = Searchspace( simple_tuning_options.tune_params, simple_tuning_options.restrictions, @@ -109,7 +109,7 @@ def test_sort(): def test_sort_reversed(): - """test that the sort searchspace option with the sort_last_param_first option enabled works as expected""" + """Test that the sort searchspace option with the sort_last_param_first option enabled works as expected.""" simple_searchspace_sort_reversed = Searchspace( simple_tuning_options.tune_params, simple_tuning_options.restrictions, @@ -147,7 +147,7 @@ def test_sort_reversed(): def test_index_lookup(): - """test that index lookups are consistent for ~1% of the searchspace""" + """Test that index lookups are consistent for ~1% of the searchspace.""" size = searchspace.size for _ in range(ceil(size / 100)): random_index = randrange(0, size) @@ -157,7 +157,7 @@ def test_index_lookup(): def test_param_index_lookup(): - """test the parameter index lookup for a parameter config is as expected""" + """Test the parameter index lookup for a parameter config is as expected.""" first = tuple([1, 4, "string_1"]) last = tuple([3, 5.5, "string_2"]) assert simple_searchspace.get_param_indices(first) == (0, 0, 0) @@ -165,7 +165,7 @@ def test_param_index_lookup(): def test_random_sample(): - """test whether the random sample indices exists and are unique, and if it throws an error for too many samples""" + """Test whether the random sample indices exists and are unique, and if it throws an error for too many samples.""" random_sample_indices = searchspace.get_random_sample_indices(100) assert len(random_sample_indices) == 100 for index in random_sample_indices: @@ -222,7 +222,7 @@ def __test_neighbors(param_config: tuple, expected_neighbors: list, neighbor_met def test_neighbors_hamming(): - """test whether the neighbors with Hamming distance are as expected""" + """Test whether the neighbors with Hamming distance are as expected.""" test_config = tuple([1, 4, "string_1"]) expected_neighbors = [ (2, 4, "string_1"), @@ -234,7 +234,7 @@ def test_neighbors_hamming(): def test_neighbors_strictlyadjacent(): - """test whether the strictly adjacent neighbors are as expected""" + """Test whether the strictly adjacent neighbors are as expected.""" test_config = tuple([1, 4, "string_1"]) expected_neighbors = [ (1, 5.5, "string_2"), @@ -246,7 +246,7 @@ def test_neighbors_strictlyadjacent(): def test_neighbors_adjacent(): - """test whether the adjacent neighbors are as expected""" + """Test whether the adjacent neighbors are as expected.""" test_config = tuple([1, 4, "string_1"]) expected_neighbors = [ (2, 5.5, "string_2"), @@ -262,7 +262,7 @@ def test_neighbors_adjacent(): def test_neighbors_fictious(): - """test whether the neighbors are as expected for a fictious parameter configuration (i.e. not existing in the search space due to restrictions)""" + """Test whether the neighbors are as expected for a fictious parameter configuration (i.e. not existing in the search space due to restrictions).""" test_config = tuple([1.5, 4, "string_1"]) expected_neighbors_hamming = [ (1, 4, "string_1"), @@ -288,7 +288,7 @@ def test_neighbors_fictious(): def test_neighbors_cached(): - """test whether retrieving a set of neighbors twice returns the cached version""" + """Test whether retrieving a set of neighbors twice returns the cached version.""" simple_searchspace_duplicate = Searchspace( simple_tuning_options.tune_params, simple_tuning_options.restrictions, @@ -306,7 +306,7 @@ def test_neighbors_cached(): def test_param_neighbors(): - """test whether for a given parameter configuration and index the correct neighboring parameters are returned""" + """Test whether for a given parameter configuration and index the correct neighboring parameters are returned.""" test_config = tuple([1.5, 4, "string_1"]) expected_neighbors = [[1, 2], [5.5], ["string_2"]] @@ -320,7 +320,7 @@ def test_param_neighbors(): @patch("kernel_tuner.searchspace.choice", lambda x: x[0]) def test_order_param_configs(): - """test whether the ordering of parameter configurations according to parameter index happens as expected""" + """Test whether the ordering of parameter configurations according to parameter index happens as expected.""" test_order = [1, 2, 0] test_config = tuple([1, 4, "string_1"]) expected_order = [ diff --git a/test/test_util_functions.py b/test/test_util_functions.py index 7547b0a97..2a48eb58e 100644 --- a/test/test_util_functions.py +++ b/test/test_util_functions.py @@ -3,7 +3,6 @@ import json import os import warnings -from collections import OrderedDict import numpy as np import pytest @@ -161,7 +160,7 @@ def test_prepare_kernel_string(): assert output == expected # Check custom defines - defines = OrderedDict(foo=1, bar="custom", baz=lambda config: config["is"] * 5) + defines = dict(foo=1, bar="custom", baz=lambda config: config["is"] * 5) _, output = prepare_kernel_string("this", kernel, params, grid, threads, block_size_names, "", defines) expected = "#define foo 1\n" "#define bar custom\n" "#define baz 40\n" "#line 1\n" "this is a weird kernel" @@ -600,7 +599,7 @@ def assert_open_cachefile_is_correctly_parsed(cache): def test_process_metrics(): params = {"x": 15, "b": 12} - metrics = OrderedDict() + metrics = dict() metrics["y"] = lambda p: p["x"] # test if lambda function is correctly evaluated @@ -615,19 +614,19 @@ def test_process_metrics(): # test if composability works correctly params = {"x": 15, "b": 12} - metrics = OrderedDict() + metrics = dict() metrics["y"] = "x" metrics["z"] = "y" params = process_metrics(params, metrics) assert params["z"] == params["x"] - # test ValueError is raised when metrics is not an OrderedDict + # test ValueError is raised when metrics is not a dictionary with pytest.raises(ValueError): - params = process_metrics(params, {}) + params = process_metrics(params, list()) # test ValueError is raised when b already exists in params params = {"x": 15, "b": 12} - metrics = OrderedDict() + metrics = dict() metrics["b"] = "x" with pytest.raises(ValueError): params = process_metrics(params, metrics)