Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable cl_khr_fp16 for the Environments unsupporting cl_khr_fp16 #266

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clpy/backend/compiler.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ cpdef function.Module compile_with_cache(
str source, tuple options=(), arch=None, cache_dir=None,
extra_source=None):
options += (' -cl-fp32-correctly-rounded-divide-sqrt', )
if clpy.backend.opencl.env.supports_cl_khr_fp16():
options += (' -D__CLPY_ENABLE_CL_KHR_FP16', )
optionStr = functools.reduce(operator.add, options)

device = clpy.backend.opencl.env.get_device()
Expand Down
2 changes: 2 additions & 0 deletions clpy/backend/opencl/env.pxd
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
include "common_decl.pxi"
from libcpp cimport bool

cdef cl_context get_context()
cdef cl_command_queue get_command_queue()
cpdef int get_device_id()
cpdef set_device_id(int id)
cdef cl_device_id* get_devices()
cdef cl_device_id get_device()
cpdef bool supports_cl_khr_fp16() except *
32 changes: 32 additions & 0 deletions clpy/backend/opencl/env.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ from clpy.backend.opencl cimport api
from cython.view cimport array as cython_array

from libc.stdlib cimport malloc
from libcpp cimport bool

cdef interpret_versionstr(versionstr):
version_detector = re.compile('''OpenCL (\d+)\.(\d+)''')
Expand Down Expand Up @@ -72,6 +73,33 @@ cdef void check_device_version(cl_device_id device, required_version) except *:
if not interpret_versionstr(versionstr) >= required_version:
raise RuntimeError("Device's OpenCL version must be >= 1.2")

cdef cl_bool check_device_extension(cl_device_id device,
str extension) except *:
cdef size_t param_value_size
api.GetDeviceInfo(
device,
CL_DEVICE_EXTENSIONS,
0,
NULL,
&param_value_size)

cdef cython_array extensions_buffer =\
cython_array(shape=(param_value_size,),
itemsize=sizeof(char),
format='b')
api.GetDeviceInfo(
device,
CL_DEVICE_EXTENSIONS,
param_value_size,
<void*>extensions_buffer.data,
&param_value_size)

extensions =\
extensions_buffer.data[:param_value_size]\
.decode(locale.getpreferredencoding())

return CL_TRUE if extension in extensions else CL_FALSE
LWisteria marked this conversation as resolved.
Show resolved Hide resolved


##########################################
# Initialization
Expand Down Expand Up @@ -164,6 +192,10 @@ cdef cl_device_id get_device():
global __current_device_id
return __devices[__current_device_id]

cpdef bool supports_cl_khr_fp16() except *:
global __current_device_id
return check_device_extension(get_device(), 'cl_khr_fp16') == CL_TRUE


def release():
"""Release command_queue and context automatically."""
Expand Down
4 changes: 3 additions & 1 deletion clpy/backend/ultima/compiler.pxd
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
cpdef str exec_ultima(str source, str _clpy_header_include=*)
cpdef str exec_ultima(str source,
str _clpy_header_include=*,
tuple _options=*)
12 changes: 9 additions & 3 deletions clpy/backend/ultima/compiler.pyx
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import functools
import operator
import os
import subprocess
import tempfile
Expand All @@ -21,7 +23,9 @@ class TempFile(object):
os.remove(self.fn)


cpdef str exec_ultima(str source, str _clpy_header_include=''):
cpdef str exec_ultima(str source,
str _clpy_header_include='',
tuple _options=('',)):
kernel_arg_size_t_code = 'typedef ' \
+ clpy.backend.opencl.utility.typeof_size() + ' __kernel_arg_size_t;\n'
source = kernel_arg_size_t_code + _clpy_header_include + '\n' \
Expand All @@ -35,15 +39,17 @@ cpdef str exec_ultima(str source, str _clpy_header_include=''):

with TempFile(filename, source) as tf:
root_dir = os.path.join(clpy.__path__[0], "..")
proc = subprocess.Popen('{} {} -- -I {}'
proc = subprocess.Popen('{} {} -- -I {} {}'
.format(os.path.join(root_dir,
"ultima",
"ultima"),
filename,
os.path.join(root_dir,
"clpy",
"core",
"include"))
"include"),
functools.reduce(operator.add,
_options))
.strip().split(" "),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
Expand Down
8 changes: 7 additions & 1 deletion clpy/core/carray.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,13 @@ cpdef function.Module compile_with_cache(
str source, tuple options=(), arch=None, cache_dir=None):
kernel_arg_size_t_code = 'typedef ' \
+ clpy.backend.opencl.utility.typeof_size() + ' __kernel_arg_size_t;\n'
source = clpy.backend.ultima.exec_ultima(source, _clpy_header)
if clpy.backend.opencl.env.supports_cl_khr_fp16():
ultima_options = (' -D__CLPY_ENABLE_CL_KHR_FP16', )
else:
ultima_options = ('', )
source = clpy.backend.ultima.exec_ultima(source,
_clpy_header,
ultima_options)

extra_source = _get_header_source()
options += ('-I%s' % _get_header_dir_path(),)
Expand Down
2 changes: 2 additions & 0 deletions clpy/core/include/cl_stub.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ typedef unsigned long ulong;
typedef unsigned long size_t;
typedef long ptrdiff_t;

#ifdef __CLPY_ENABLE_CL_KHR_FP16
typedef float half;
typedef half __clpy__half;
#endif
#define half __clpy__half

__attribute__((annotate("clpy_no_mangle"))) static unsigned int atomic_cmpxchg(volatile __global unsigned int*, unsigned int, unsigned int);
Expand Down
97 changes: 64 additions & 33 deletions clpy/core/include/clpy/carray.clh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once
#ifdef __CLPY_ENABLE_CL_KHR_FP16
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why didn't you just remove cl_khr_fp16 for all environments but disable it only if it doesn't support it?

Do you have any definite disadvantages or problems when you treat half as int16 on a cl_khr_fp16 machine? If yes, please leave comments on the code, otherwise (yet) please make the code simple.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that it's better in performance to use half implemented as hardware. If we will support all half operations, the performance given from native half in supported environments is more important.

Copy link
Member

@LWisteria LWisteria Feb 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we will support all half operations

We have not proven the ability neither decided to do that, must continue to discuss on #265 .

Then, until to decide it, the half performance is not important even if it has native half support.
Adding cl_khr_fp16 is now too superfluous to just disable it #264 .

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

must continue to discuss on #265 .

Sure.

Adding cl_khr_fp16 is now too superfluous to just disable it #264 .

So, you want to just remove cl_khr_fp16 whether the extension is available or not, right?

#pragma OPENCL EXTENSION cl_khr_fp16: enable
#endif

// TODO: Implement common functions in OpenCL C
#if 0
Expand Down Expand Up @@ -530,54 +532,83 @@ static void __clpy_end_print_out() __attribute__((annotate("clpy_end_print_out")


#ifdef __ULTIMA
__attribute__((annotate("clpy_no_mangle"))) static half convert_float_to_half(float x);
__attribute__((annotate("clpy_no_mangle"))) static ushort convert_float_to_half_ushort(float x);
__attribute__((annotate("clpy_no_mangle"))) static float convert_half_ushort_to_float(ushort x);
#else
#include "fp16.clh"
#endif

#ifdef __CLPY_ENABLE_CL_KHR_FP16

#ifndef __ULTIMA

typedef half __clpy__half;

#endif

#ifdef __ULTIMA
__attribute__((annotate("clpy_no_mangle"))) static half clpy_nextafter_fp16(half x1, half x2);
__attribute__((annotate("clpy_no_mangle"))) static __clpy__half clpy_nextafter_fp16(__clpy__half x1, __clpy__half x2){
return nextafter(x1, x2);
}

__attribute__((annotate("clpy_no_mangle"))) static __clpy__half convert_float_to_half(float x){
const ushort h = convert_float_to_half_ushort(x);
return *(const half*)&h;
}

#else
static int isnan_fp16(half x){
unsigned short const* x_raw = (unsigned short const*)&x;
return (*x_raw & 0x7c00u) == 0x7c00u && (*x_raw & 0x03ffu) != 0x0000u;
}
static int isfinite_fp16(half x){
unsigned short const* x_raw = (unsigned short const*)&x;
return (*x_raw & 0x7c00u) != 0x7c00u;
}
static int iszero_fp16(half x){
unsigned short const* x_raw = (unsigned short const*)&x;
return (*x_raw & 0x7fffu) == 0;
}
static int eq_nonan_fp16(half x1, half x2){
unsigned short const* x1_raw = (unsigned short const*)&x1;
unsigned short const* x2_raw = (unsigned short const*)&x2;
return (*x1_raw == *x2_raw || ((*x1_raw | *x2_raw) & 0x7fff) == 0);
}
static half clpy_nextafter_fp16(half x1, half x2){
unsigned short const* x1_raw = (unsigned short const*)&x1;
unsigned short const* x2_raw = (unsigned short const*)&x2;

#ifdef __ULTIMA
static void __clpy_begin_print_out() __attribute__((annotate("clpy_begin_print_out")));

typedef struct __attribute__((packed)) __attribute__((aligned(2))) __clpy__half_{
ushort v;
__clpy__half_() = default;
__clpy__half_(float f):v{convert_float_to_half_ushort(f)}{}
operator float()const{return convert_half_ushort_to_float(v);}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand Ultima can solve the problem.

But I don't understand why you need Ultima. As I told you in the some old issue, you tend to solve everything by using Ultima but we must avoid it unless there's no solution without Ultima. Minimize Ultima to minimize maintenance difficulties.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I need the changes in this PR to ultima only for passing ultima_tests .
If we decide to change the behavior of half identifier, we doesn't need it.
However, we will need it to support all half operations, so I proposed this changes.

Copy link
Member

@LWisteria LWisteria Feb 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we decide to change the behavior of half

As of #266 (comment), no half variable will be constructed by users. So you don't need Ultima.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As of #266 (comment), no half variable will be constructed by users.

Until to decide to support all half operations . OK.

Then, when you

decide to change the behavior of half identifier

? Again, we need to decide to change the behavior of half identifier from current ClPy to disable cl_khr_fp16 without Ultima. It means that we need to fix or disable some tests. If you have already decided it in your own, you should indicate your opinion here.

}__clpy__half;

static int isnan_fp16(__clpy__half x){
return (x.v & 0x7c00u) == 0x7c00u && (x.v & 0x03ffu) != 0x0000u;
}
static int isfinite_fp16(__clpy__half x){
return (x.v & 0x7c00u) != 0x7c00u;
}
static int iszero_fp16(__clpy__half x){
return (x.v & 0x7fffu) == 0;
}
static int eq_nonan_fp16(__clpy__half x1, __clpy__half x2){
return (x1.v == x2.v || ((x1.v | x2.v) & 0x7fff) == 0);
}
__attribute__((annotate("clpy_no_mangle"))) static __clpy__half clpy_nextafter_fp16(__clpy__half x1, __clpy__half x2){
unsigned short ret_raw_;

if (!isfinite_fp16(x1) || isnan_fp16(x2)){
ret_raw_ = 0x7e00u; // NaN in fp16
}else if(eq_nonan_fp16(x1, x2)){
ret_raw_ = *x1_raw;
ret_raw_ = x1.v;
}else if(iszero_fp16(x1)){
ret_raw_ = (*x2_raw & 0x8000u) + 1;
}else if(!(*x1_raw & 0x8000u)){
if (*(short const*)x1_raw > *(short const*)x2_raw){
ret_raw_ = *x1_raw - 1;
ret_raw_ = (x2.v & 0x8000u) + 1;
}else if(!(x1.v & 0x8000u)){
if (*(short const*)&x1.v > *(short const*)&x2.v){
ret_raw_ = x1.v - 1;
}else{
ret_raw_ = *x1_raw + 1;
ret_raw_ = x1.v + 1;
}
}else if(!(*x2_raw & 0x8000u) || (*x1_raw & 0x7fffu) > (*x2_raw & 0x7fffu)) {
ret_raw_ = *x1_raw - 1;
}else if(!(x2.v & 0x8000u) || (x1.v & 0x7fffu) > (x2.v & 0x7fffu)) {
ret_raw_ = x1.v - 1;
} else {
ret_raw_ = *x1_raw + 1;
ret_raw_ = x1.v + 1;
}
return *(half*)&ret_raw_;
__clpy__half ret;
ret.v = ret_raw_;
return ret;
}

__attribute__((annotate("clpy_no_mangle"))) static __clpy__half convert_float_to_half(float x){
__clpy__half ret(x);
return ret;
}

static void __clpy_end_print_out() __attribute__((annotate("clpy_end_print_out")));
#endif
#endif
25 changes: 23 additions & 2 deletions clpy/core/include/clpy/fp16.clh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

static half convert_float_to_half(float f) {
static ushort convert_float_to_half_ushort(float f) {
const float scale_to_inf = 0x1.0p+112f;
const float scale_to_zero = 0x1.0p-110f;
float base = (fabs(f) * scale_to_inf) * scale_to_zero;
Expand All @@ -32,5 +32,26 @@ static half convert_float_to_half(float f) {
const uint mantissa_bits = bits & (uint)(0x00000FFF);
const uint nonsign = exp_bits + mantissa_bits;
const ushort ret = ((sign >> 16) | (shl1_w > (uint)(0xFF000000) ? (ushort)(0x7E00) : nonsign));
return *(const half*)&ret;
return ret;
}

static inline float convert_half_ushort_to_float(ushort h) {
const uint w = (uint) h << 16;
const uint sign = w & (uint)0x80000000;
const uint two_w = w + w;

const uint exp_offset = (uint)0xE0 << 23;
const float exp_scale = 0x1.0p-112f;
const uint normalized_value_ = (two_w >> 4) + exp_offset;
const float normalized_value = *(const float*)&normalized_value_ * exp_scale;

const uint magic_mask = (uint)(126u) << 23;
const float magic_bias = .5f;
const uint denormalized_value_ = (two_w >> 17) | magic_mask;
const float denormalized_value = *(const float*)&denormalized_value_ - magic_bias;

const uint denormalized_cutoff = (uint)1u << 27;
const uint result = sign |
*(const uint*)(two_w < denormalized_cutoff ? &denormalized_value : &normalized_value);
return *(const float*)&result;
}
52 changes: 40 additions & 12 deletions tests/clpy_tests/opencl_tests/ultima_tests/test_half.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,35 @@
class TestUltimaHalfTrick(unittest.TestCase):

def test_type_half(self):
x = '''
supports_cl_khr_fp16 = clpy.backend.opencl.env.supports_cl_khr_fp16()
options = ('-D__CLPY_ENABLE_CL_KHR_FP16'
if supports_cl_khr_fp16
else '', )
x = clpy.backend.ultima.exec_ultima('',
'#include <cupy/carray.hpp>',
_options=options) + ('''
__clpy__half f()
{
__clpy__half a;
return (__clpy__half)(42.F);
__clpy__half a = 42.F;
return a;
}
'''
''' if supports_cl_khr_fp16 else '''
__clpy__half f()
{
__clpy__half a;constructor___clpy__half___left_paren____clpy__half_float__right_paren__(&a, 42.F);
return a;
}
''')[1:]
y = clpy.backend.ultima.exec_ultima(
'''
half f(){
half a;
return static_cast<half>(42.f);
half a = 42.f;
return a;
}
''')
self.assertEqual(x[1:], y)
''',
'#include <cupy/carray.hpp>',
_options=options)
self.assertEqual(x, y)

def test_variable_named_half(self):
x = '''
Expand Down Expand Up @@ -54,21 +68,35 @@ def test_argument_named_half(self):
self.assertEqual(x[1:], y)

def test_clpy_half(self):
x = '''
supports_cl_khr_fp16 = clpy.backend.opencl.env.supports_cl_khr_fp16()
options = ('-D__CLPY_ENABLE_CL_KHR_FP16'
if supports_cl_khr_fp16
else '', )
x = clpy.backend.ultima.exec_ultima('',
'#include <cupy/carray.hpp>',
_options=options) + ('''
void f()
{
__clpy__half half_ = 42.F;
int __clpy__half = half_;
}
'''
''' if supports_cl_khr_fp16 else '''
void f()
{
__clpy__half half_;constructor___clpy__half___left_paren____clpy__half_float__right_paren__(&half_, 42.F);
int __clpy__half = operatorfloat___clpy__half_(&half_);
}
''')[1:]
y = clpy.backend.ultima.exec_ultima(
'''
void f(){
__clpy__half half_ = 42.f;
int __clpy__half = half_;
}
''')
self.assertEqual(x[1:], y)
''',
'#include <cupy/carray.hpp>',
_options=options)
self.assertEqual(x, y)


if __name__ == "__main__":
Expand Down
Loading