From 396cc358f78a8a2c85f781414ddcc9a1b2aec811 Mon Sep 17 00:00:00 2001 From: vorj <40021161+vorj@users.noreply.github.com> Date: Thu, 6 Feb 2020 02:41:02 +0900 Subject: [PATCH 1/8] Revert "disable member function" This reverts commit a1d49aa6042f06fc53835a1c81d9834971a27d9a. --- ultima/ultima.cpp | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/ultima/ultima.cpp b/ultima/ultima.cpp index 24041328861..144ad01ff3a 100644 --- a/ultima/ultima.cpp +++ b/ultima/ultima.cpp @@ -1466,7 +1466,41 @@ class stmt_visitor : public clang::StmtVisitor { } } } - throw std::runtime_error("current ultima doesn't support member function call."); + // If we have a conversion operator call only print the argument. + auto *MD = Node->getMethodDecl(); + if (MD && clang::isa(MD)) { + PrintExpr(Node->getImplicitObjectArgument()); + return; + } + auto Call = clang::cast(Node); + if(auto f = clang::dyn_cast(Call->getCalleeDecl())){ + auto it = func_name.find(f); + if(it != func_name.end()) + os << it->second; + else{ + PrintExpr(Call->getCallee()); + os << '_' << to_identifier(base_type->getAsCXXRecordDecl()->getName()); + if(auto list = f->getTemplateSpecializationArgs()){ + os << '_'; + print_template_arguments(list); + } + } + } + else + PrintExpr(Call->getCallee()); + os << '('; + os << '&'; + PrintExpr(base); + for (unsigned i = 0, e = Call->getNumArgs(); i != e; ++i) { + if (clang::isa(Call->getArg(i))) { + // Don't print any defaulted arguments + break; + } + + os << ", "; + PrintExpr(Call->getArg(i)); + } + os << ')'; } void VisitCXXNamedCastExpr(clang::CXXNamedCastExpr *Node) { From d02431899fab9658f60a86d704893a3fee2c9989 Mon Sep 17 00:00:00 2001 From: vorj <40021161+vorj@users.noreply.github.com> Date: Thu, 6 Feb 2020 03:06:54 +0900 Subject: [PATCH 2/8] support user-defined conversion --- ultima/ultima.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ultima/ultima.cpp b/ultima/ultima.cpp index 144ad01ff3a..026acf84f99 100644 --- a/ultima/ultima.cpp +++ b/ultima/ultima.cpp @@ -1466,12 +1466,6 @@ class stmt_visitor : public clang::StmtVisitor { } } } - // If we have a conversion operator call only print the argument. - auto *MD = Node->getMethodDecl(); - if (MD && clang::isa(MD)) { - PrintExpr(Node->getImplicitObjectArgument()); - return; - } auto Call = clang::cast(Node); if(auto f = clang::dyn_cast(Call->getCalleeDecl())){ auto it = func_name.find(f); @@ -2738,7 +2732,7 @@ class decl_visitor : public clang::DeclVisitor{ parent_name = sv.to_identifier(parent_name); } - auto name = D->getNameInfo().getAsString(); + auto name = sv.to_identifier(D->getNameInfo().getAsString()); if(CDecl) name = "constructor"; From 324126bc9ef3a1fe68a1a2e7ac0643c166456424 Mon Sep 17 00:00:00 2001 From: vorj <40021161+vorj@users.noreply.github.com> Date: Tue, 16 Apr 2019 15:46:22 +0900 Subject: [PATCH 3/8] implement clpy.backend.opencl.env.supports_cl_khr_fp16 --- clpy/backend/opencl/env.pxd | 2 ++ clpy/backend/opencl/env.pyx | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/clpy/backend/opencl/env.pxd b/clpy/backend/opencl/env.pxd index ee552729da1..b9080ab6d43 100644 --- a/clpy/backend/opencl/env.pxd +++ b/clpy/backend/opencl/env.pxd @@ -1,4 +1,5 @@ include "common_decl.pxi" +from libcpp cimport bool cdef cl_context get_context() cdef cl_command_queue get_command_queue() @@ -6,3 +7,4 @@ cpdef int get_device_id() cpdef set_device_id(int id) cdef cl_device_id* get_devices() cdef cl_device_id get_device() +cpdef bool supports_cl_khr_fp16() except * diff --git a/clpy/backend/opencl/env.pyx b/clpy/backend/opencl/env.pyx index fc3c57575b4..498c199a3b4 100644 --- a/clpy/backend/opencl/env.pyx +++ b/clpy/backend/opencl/env.pyx @@ -8,6 +8,7 @@ from clpy.backend.opencl cimport api from cython.view cimport array as cython_array from libc.stdlib cimport malloc +from libcpp cimport bool cdef interpret_versionstr(versionstr): version_detector = re.compile('''OpenCL (\d+)\.(\d+)''') @@ -72,6 +73,33 @@ cdef void check_device_version(cl_device_id device, required_version) except *: if not interpret_versionstr(versionstr) >= required_version: raise RuntimeError("Device's OpenCL version must be >= 1.2") +cdef cl_bool check_device_extension(cl_device_id device, + str extension) except *: + cdef size_t param_value_size + api.GetDeviceInfo( + device, + CL_DEVICE_EXTENSIONS, + 0, + NULL, + ¶m_value_size) + + cdef cython_array extensions_buffer =\ + cython_array(shape=(param_value_size,), + itemsize=sizeof(char), + format='b') + api.GetDeviceInfo( + device, + CL_DEVICE_EXTENSIONS, + param_value_size, + extensions_buffer.data, + ¶m_value_size) + + extensions =\ + extensions_buffer.data[:param_value_size]\ + .decode(locale.getpreferredencoding()) + + return CL_TRUE if extension in extensions else CL_FALSE + ########################################## # Initialization @@ -164,6 +192,10 @@ cdef cl_device_id get_device(): global __current_device_id return __devices[__current_device_id] +cpdef bool supports_cl_khr_fp16() except *: + global __current_device_id + return check_device_extension(get_device(), 'cl_khr_fp16') == CL_TRUE + def release(): """Release command_queue and context automatically.""" From 93b0e710d4740863844edacb3fc6db53d9d486b8 Mon Sep 17 00:00:00 2001 From: vorj <40021161+vorj@users.noreply.github.com> Date: Wed, 5 Feb 2020 23:33:46 +0900 Subject: [PATCH 4/8] ultima supports options --- clpy/backend/ultima/compiler.pxd | 4 +++- clpy/backend/ultima/compiler.pyx | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/clpy/backend/ultima/compiler.pxd b/clpy/backend/ultima/compiler.pxd index 80d6f709bc7..011f92c6b83 100644 --- a/clpy/backend/ultima/compiler.pxd +++ b/clpy/backend/ultima/compiler.pxd @@ -1 +1,3 @@ -cpdef str exec_ultima(str source, str _clpy_header_include=*) +cpdef str exec_ultima(str source, + str _clpy_header_include=*, + tuple _options=*) diff --git a/clpy/backend/ultima/compiler.pyx b/clpy/backend/ultima/compiler.pyx index 00c2c419a78..4c05990823a 100644 --- a/clpy/backend/ultima/compiler.pyx +++ b/clpy/backend/ultima/compiler.pyx @@ -1,3 +1,5 @@ +import functools +import operator import os import subprocess import tempfile @@ -21,7 +23,9 @@ class TempFile(object): os.remove(self.fn) -cpdef str exec_ultima(str source, str _clpy_header_include=''): +cpdef str exec_ultima(str source, + str _clpy_header_include='', + tuple _options=('',)): kernel_arg_size_t_code = 'typedef ' \ + clpy.backend.opencl.utility.typeof_size() + ' __kernel_arg_size_t;\n' source = kernel_arg_size_t_code + _clpy_header_include + '\n' \ @@ -35,7 +39,7 @@ cpdef str exec_ultima(str source, str _clpy_header_include=''): with TempFile(filename, source) as tf: root_dir = os.path.join(clpy.__path__[0], "..") - proc = subprocess.Popen('{} {} -- -I {}' + proc = subprocess.Popen('{} {} -- -I {} {}' .format(os.path.join(root_dir, "ultima", "ultima"), @@ -43,7 +47,9 @@ cpdef str exec_ultima(str source, str _clpy_header_include=''): os.path.join(root_dir, "clpy", "core", - "include")) + "include"), + functools.reduce(operator.add, + _options)) .strip().split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE, From aa89256258f65a731b4d0fa73a55ac83568b7f70 Mon Sep 17 00:00:00 2001 From: vorj <40021161+vorj@users.noreply.github.com> Date: Wed, 5 Feb 2020 22:47:20 +0900 Subject: [PATCH 5/8] define __CLPY_ENABLE_CL_KHR_FP16 when the device supports cl_khr_fp16 --- clpy/backend/compiler.pyx | 2 ++ clpy/core/carray.pxi | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clpy/backend/compiler.pyx b/clpy/backend/compiler.pyx index 35ef298657c..2fe01d1f377 100644 --- a/clpy/backend/compiler.pyx +++ b/clpy/backend/compiler.pyx @@ -9,6 +9,8 @@ cpdef function.Module compile_with_cache( str source, tuple options=(), arch=None, cache_dir=None, extra_source=None): options += (' -cl-fp32-correctly-rounded-divide-sqrt', ) + if clpy.backend.opencl.env.supports_cl_khr_fp16(): + options += (' -D__CLPY_ENABLE_CL_KHR_FP16', ) optionStr = functools.reduce(operator.add, options) device = clpy.backend.opencl.env.get_device() diff --git a/clpy/core/carray.pxi b/clpy/core/carray.pxi index 42f55625564..bb4742bcd3a 100644 --- a/clpy/core/carray.pxi +++ b/clpy/core/carray.pxi @@ -130,7 +130,13 @@ cpdef function.Module compile_with_cache( str source, tuple options=(), arch=None, cache_dir=None): kernel_arg_size_t_code = 'typedef ' \ + clpy.backend.opencl.utility.typeof_size() + ' __kernel_arg_size_t;\n' - source = clpy.backend.ultima.exec_ultima(source, _clpy_header) + if clpy.backend.opencl.env.supports_cl_khr_fp16(): + ultima_options = (' -D__CLPY_ENABLE_CL_KHR_FP16', ) + else: + ultima_options = ('', ) + source = clpy.backend.ultima.exec_ultima(source, + _clpy_header, + ultima_options) extra_source = _get_header_source() options += ('-I%s' % _get_header_dir_path(),) From 909ec19b6f8f49c10d099d2fff162907636ab27d Mon Sep 17 00:00:00 2001 From: vorj <40021161+vorj@users.noreply.github.com> Date: Tue, 16 Apr 2019 15:51:06 +0900 Subject: [PATCH 6/8] disable cl_khr_fp16 when __CLPY_ENABLE_CL_KHR_FP16 is not defined --- clpy/core/include/cl_stub.hpp | 2 + clpy/core/include/clpy/carray.clh | 97 ++++++++++++++++++++----------- clpy/core/include/clpy/fp16.clh | 25 +++++++- 3 files changed, 89 insertions(+), 35 deletions(-) diff --git a/clpy/core/include/cl_stub.hpp b/clpy/core/include/cl_stub.hpp index 564d63cf8b8..a6f242f0b0c 100644 --- a/clpy/core/include/cl_stub.hpp +++ b/clpy/core/include/cl_stub.hpp @@ -11,8 +11,10 @@ typedef unsigned long ulong; typedef unsigned long size_t; typedef long ptrdiff_t; +#ifdef __CLPY_ENABLE_CL_KHR_FP16 typedef float half; typedef half __clpy__half; +#endif #define half __clpy__half __attribute__((annotate("clpy_no_mangle"))) static unsigned int atomic_cmpxchg(volatile __global unsigned int*, unsigned int, unsigned int); diff --git a/clpy/core/include/clpy/carray.clh b/clpy/core/include/clpy/carray.clh index 7efa290d0d4..438c5000ae6 100644 --- a/clpy/core/include/clpy/carray.clh +++ b/clpy/core/include/clpy/carray.clh @@ -1,5 +1,7 @@ #pragma once +#ifdef __CLPY_ENABLE_CL_KHR_FP16 #pragma OPENCL EXTENSION cl_khr_fp16: enable +#endif // TODO: Implement common functions in OpenCL C #if 0 @@ -530,54 +532,83 @@ static void __clpy_end_print_out() __attribute__((annotate("clpy_end_print_out") #ifdef __ULTIMA -__attribute__((annotate("clpy_no_mangle"))) static half convert_float_to_half(float x); +__attribute__((annotate("clpy_no_mangle"))) static ushort convert_float_to_half_ushort(float x); +__attribute__((annotate("clpy_no_mangle"))) static float convert_half_ushort_to_float(ushort x); #else #include "fp16.clh" +#endif + +#ifdef __CLPY_ENABLE_CL_KHR_FP16 + +#ifndef __ULTIMA + typedef half __clpy__half; + #endif -#ifdef __ULTIMA -__attribute__((annotate("clpy_no_mangle"))) static half clpy_nextafter_fp16(half x1, half x2); +__attribute__((annotate("clpy_no_mangle"))) static __clpy__half clpy_nextafter_fp16(__clpy__half x1, __clpy__half x2){ + return nextafter(x1, x2); +} + +__attribute__((annotate("clpy_no_mangle"))) static __clpy__half convert_float_to_half(float x){ + const ushort h = convert_float_to_half_ushort(x); + return *(const half*)&h; +} + #else -static int isnan_fp16(half x){ - unsigned short const* x_raw = (unsigned short const*)&x; - return (*x_raw & 0x7c00u) == 0x7c00u && (*x_raw & 0x03ffu) != 0x0000u; -} -static int isfinite_fp16(half x){ - unsigned short const* x_raw = (unsigned short const*)&x; - return (*x_raw & 0x7c00u) != 0x7c00u; -} -static int iszero_fp16(half x){ - unsigned short const* x_raw = (unsigned short const*)&x; - return (*x_raw & 0x7fffu) == 0; -} -static int eq_nonan_fp16(half x1, half x2){ - unsigned short const* x1_raw = (unsigned short const*)&x1; - unsigned short const* x2_raw = (unsigned short const*)&x2; - return (*x1_raw == *x2_raw || ((*x1_raw | *x2_raw) & 0x7fff) == 0); -} -static half clpy_nextafter_fp16(half x1, half x2){ - unsigned short const* x1_raw = (unsigned short const*)&x1; - unsigned short const* x2_raw = (unsigned short const*)&x2; + +#ifdef __ULTIMA +static void __clpy_begin_print_out() __attribute__((annotate("clpy_begin_print_out"))); + +typedef struct __attribute__((packed)) __attribute__((aligned(2))) __clpy__half_{ + ushort v; + __clpy__half_() = default; + __clpy__half_(float f):v{convert_float_to_half_ushort(f)}{} + operator float()const{return convert_half_ushort_to_float(v);} +}__clpy__half; + +static int isnan_fp16(__clpy__half x){ + return (x.v & 0x7c00u) == 0x7c00u && (x.v & 0x03ffu) != 0x0000u; +} +static int isfinite_fp16(__clpy__half x){ + return (x.v & 0x7c00u) != 0x7c00u; +} +static int iszero_fp16(__clpy__half x){ + return (x.v & 0x7fffu) == 0; +} +static int eq_nonan_fp16(__clpy__half x1, __clpy__half x2){ + return (x1.v == x2.v || ((x1.v | x2.v) & 0x7fff) == 0); +} +__attribute__((annotate("clpy_no_mangle"))) static __clpy__half clpy_nextafter_fp16(__clpy__half x1, __clpy__half x2){ unsigned short ret_raw_; if (!isfinite_fp16(x1) || isnan_fp16(x2)){ ret_raw_ = 0x7e00u; // NaN in fp16 }else if(eq_nonan_fp16(x1, x2)){ - ret_raw_ = *x1_raw; + ret_raw_ = x1.v; }else if(iszero_fp16(x1)){ - ret_raw_ = (*x2_raw & 0x8000u) + 1; - }else if(!(*x1_raw & 0x8000u)){ - if (*(short const*)x1_raw > *(short const*)x2_raw){ - ret_raw_ = *x1_raw - 1; + ret_raw_ = (x2.v & 0x8000u) + 1; + }else if(!(x1.v & 0x8000u)){ + if (*(short const*)&x1.v > *(short const*)&x2.v){ + ret_raw_ = x1.v - 1; }else{ - ret_raw_ = *x1_raw + 1; + ret_raw_ = x1.v + 1; } - }else if(!(*x2_raw & 0x8000u) || (*x1_raw & 0x7fffu) > (*x2_raw & 0x7fffu)) { - ret_raw_ = *x1_raw - 1; + }else if(!(x2.v & 0x8000u) || (x1.v & 0x7fffu) > (x2.v & 0x7fffu)) { + ret_raw_ = x1.v - 1; } else { - ret_raw_ = *x1_raw + 1; + ret_raw_ = x1.v + 1; } - return *(half*)&ret_raw_; + __clpy__half ret; + ret.v = ret_raw_; + return ret; +} + +__attribute__((annotate("clpy_no_mangle"))) static __clpy__half convert_float_to_half(float x){ + __clpy__half ret(x); + return ret; } + +static void __clpy_end_print_out() __attribute__((annotate("clpy_end_print_out"))); +#endif #endif diff --git a/clpy/core/include/clpy/fp16.clh b/clpy/core/include/clpy/fp16.clh index d1ee1685fa8..41fcc5c34c6 100644 --- a/clpy/core/include/clpy/fp16.clh +++ b/clpy/core/include/clpy/fp16.clh @@ -12,7 +12,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -static half convert_float_to_half(float f) { +static ushort convert_float_to_half_ushort(float f) { const float scale_to_inf = 0x1.0p+112f; const float scale_to_zero = 0x1.0p-110f; float base = (fabs(f) * scale_to_inf) * scale_to_zero; @@ -32,5 +32,26 @@ static half convert_float_to_half(float f) { const uint mantissa_bits = bits & (uint)(0x00000FFF); const uint nonsign = exp_bits + mantissa_bits; const ushort ret = ((sign >> 16) | (shl1_w > (uint)(0xFF000000) ? (ushort)(0x7E00) : nonsign)); - return *(const half*)&ret; + return ret; +} + +static inline float convert_half_ushort_to_float(ushort h) { + const uint w = (uint) h << 16; + const uint sign = w & (uint)0x80000000; + const uint two_w = w + w; + + const uint exp_offset = (uint)0xE0 << 23; + const float exp_scale = 0x1.0p-112f; + const uint normalized_value_ = (two_w >> 4) + exp_offset; + const float normalized_value = *(const float*)&normalized_value_ * exp_scale; + + const uint magic_mask = (uint)(126u) << 23; + const float magic_bias = .5f; + const uint denormalized_value_ = (two_w >> 17) | magic_mask; + const float denormalized_value = *(const float*)&denormalized_value_ - magic_bias; + + const uint denormalized_cutoff = (uint)1u << 27; + const uint result = sign | + *(const uint*)(two_w < denormalized_cutoff ? &denormalized_value : &normalized_value); + return *(const float*)&result; } From d5e48652c12b00ddbd975761fe18c314518ffb9b Mon Sep 17 00:00:00 2001 From: vorj <40021161+vorj@users.noreply.github.com> Date: Thu, 6 Feb 2020 02:09:29 +0900 Subject: [PATCH 7/8] fix test cases --- .../opencl_tests/ultima_tests/test_half.py | 52 ++++++++++++++----- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/tests/clpy_tests/opencl_tests/ultima_tests/test_half.py b/tests/clpy_tests/opencl_tests/ultima_tests/test_half.py index 93f8a854c09..a7e860f91e6 100644 --- a/tests/clpy_tests/opencl_tests/ultima_tests/test_half.py +++ b/tests/clpy_tests/opencl_tests/ultima_tests/test_half.py @@ -10,21 +10,35 @@ class TestUltimaHalfTrick(unittest.TestCase): def test_type_half(self): - x = ''' + supports_cl_khr_fp16 = clpy.backend.opencl.env.supports_cl_khr_fp16() + options = ('-D__CLPY_ENABLE_CL_KHR_FP16' + if supports_cl_khr_fp16 + else '', ) + x = clpy.backend.ultima.exec_ultima('', + '#include ', + _options=options) + (''' __clpy__half f() { - __clpy__half a; - return (__clpy__half)(42.F); + __clpy__half a = 42.F; + return a; } -''' +''' if supports_cl_khr_fp16 else ''' +__clpy__half f() +{ + __clpy__half a;constructor___clpy__half___left_paren____clpy__half_float__right_paren__(&a, 42.F); + return a; +} +''')[1:] y = clpy.backend.ultima.exec_ultima( ''' half f(){ - half a; - return static_cast(42.f); + half a = 42.f; + return a; } - ''') - self.assertEqual(x[1:], y) + ''', + '#include ', + _options=options) + self.assertEqual(x, y) def test_variable_named_half(self): x = ''' @@ -54,21 +68,35 @@ def test_argument_named_half(self): self.assertEqual(x[1:], y) def test_clpy_half(self): - x = ''' + supports_cl_khr_fp16 = clpy.backend.opencl.env.supports_cl_khr_fp16() + options = ('-D__CLPY_ENABLE_CL_KHR_FP16' + if supports_cl_khr_fp16 + else '', ) + x = clpy.backend.ultima.exec_ultima('', + '#include ', + _options=options) + (''' void f() { __clpy__half half_ = 42.F; int __clpy__half = half_; } -''' +''' if supports_cl_khr_fp16 else ''' +void f() +{ + __clpy__half half_;constructor___clpy__half___left_paren____clpy__half_float__right_paren__(&half_, 42.F); + int __clpy__half = operatorfloat___clpy__half_(&half_); +} +''')[1:] y = clpy.backend.ultima.exec_ultima( ''' void f(){ __clpy__half half_ = 42.f; int __clpy__half = half_; } - ''') - self.assertEqual(x[1:], y) + ''', + '#include ', + _options=options) + self.assertEqual(x, y) if __name__ == "__main__": From aaf590cdd46890ea42ccaef6a6ee3ab26467e54b Mon Sep 17 00:00:00 2001 From: vorj <40021161+vorj@users.noreply.github.com> Date: Thu, 6 Feb 2020 14:18:35 +0900 Subject: [PATCH 8/8] fix partial matching on check_device_extension --- clpy/backend/opencl/env.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clpy/backend/opencl/env.pyx b/clpy/backend/opencl/env.pyx index 498c199a3b4..6ef69d983fb 100644 --- a/clpy/backend/opencl/env.pyx +++ b/clpy/backend/opencl/env.pyx @@ -98,7 +98,7 @@ cdef cl_bool check_device_extension(cl_device_id device, extensions_buffer.data[:param_value_size]\ .decode(locale.getpreferredencoding()) - return CL_TRUE if extension in extensions else CL_FALSE + return CL_TRUE if extension in extensions.split() else CL_FALSE ##########################################