Skip to content

Commit

Permalink
Merge pull request #219 from KernelTuner/issue210
Browse files Browse the repository at this point in the history
Minor set of fixes, addressing mainly Issue #210
  • Loading branch information
isazi authored Oct 4, 2023
2 parents dd2787d + e620097 commit 9955ab3
Showing 1 changed file with 21 additions and 26 deletions.
47 changes: 21 additions & 26 deletions kernel_tuner/core.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
""" Module for grouping the core functionality needed by most runners """

import time
from collections import namedtuple
import logging
import re
import numpy as np
import time
from collections import namedtuple

import numpy as np

try:
import cupy as cp
except ImportError:
cp = np

import kernel_tuner.util as util
from kernel_tuner.accuracy import Tunable
from kernel_tuner.observers.nvml import NVMLObserver
from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver
from kernel_tuner.backends.c import CFunctions
from kernel_tuner.backends.cupy import CupyFunctions
from kernel_tuner.backends.pycuda import PyCudaFunctions
from kernel_tuner.backends.hip import HipFunctions
from kernel_tuner.backends.nvcuda import CudaFunctions
from kernel_tuner.backends.opencl import OpenCLFunctions
from kernel_tuner.backends.c import CFunctions
from kernel_tuner.backends.opencl import OpenCLFunctions
from kernel_tuner.backends.hip import HipFunctions
import kernel_tuner.util as util
from kernel_tuner.backends.pycuda import PyCudaFunctions
from kernel_tuner.observers.nvml import NVMLObserver
from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver

try:
import torch
Expand Down Expand Up @@ -245,7 +244,7 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
elif lang.upper() == "HIP":
dev = HipFunctions(device, compiler_options=compiler_options, iterations=iterations, observers=observers)
else:
raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
raise ValueError("Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet")

#look for NVMLObserver in observers, if present, enable special tunable parameters through nvml
self.use_nvml = False
Expand Down Expand Up @@ -529,26 +528,22 @@ def compile_kernel(self, instance, verbose):
raise e
return func

@staticmethod
def preprocess_gpu_arguments(old_arguments, params):
""" Get a flat list of arguments based on the configuration given by `params` """
return _preprocess_gpu_arguments(old_arguments, params)

def copy_shared_memory_args(self, smem_args):
"""adds shared memory arguments to the most recently compiled module, if using CUDA"""
if self.lang == "CUDA":
self.dev.copy_shared_memory_args(smem_args)
else:
raise RuntimeError("Error cannot copy shared memory arguments when language is not CUDA")
"""adds shared memory arguments to the most recently compiled module"""
self.dev.copy_shared_memory_args(smem_args)

def copy_constant_memory_args(self, cmem_args):
"""adds constant memory arguments to the most recently compiled module, if using CUDA"""
if self.lang == "CUDA":
self.dev.copy_constant_memory_args(cmem_args)
else:
raise RuntimeError("Error cannot copy constant memory arguments when language is not CUDA")
"""adds constant memory arguments to the most recently compiled module"""
self.dev.copy_constant_memory_args(cmem_args)

def copy_texture_memory_args(self, texmem_args):
"""adds texture memory arguments to the most recently compiled module, if using CUDA"""
if self.lang == "CUDA":
self.dev.copy_texture_memory_args(texmem_args)
else:
raise RuntimeError("Error cannot copy texture memory arguments when language is not CUDA")
"""adds texture memory arguments to the most recently compiled module"""
self.dev.copy_texture_memory_args(texmem_args)

def create_kernel_instance(self, kernel_source, kernel_options, params, verbose):
"""create kernel instance from kernel source, parameters, problem size, grid divisors, and so on"""
Expand Down

0 comments on commit 9955ab3

Please sign in to comment.