diff --git a/generate_cuda.sh b/generate_cuda.sh index 6683960..5f3e515 100755 --- a/generate_cuda.sh +++ b/generate_cuda.sh @@ -1,3 +1,6 @@ #!/bin/bash -e clang2py /usr/include/cuda.h /usr/include/nvrtc.h -o gpuctypes/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so -l /usr/lib/x86_64-linux-gnu/libnvrtc.so +sed -i "s\import ctypes\import ctypes, ctypes.util\g" gpuctypes/cuda.py +sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libcuda.so')\ctypes.CDLL(ctypes.util.find_library('cuda'))\g" gpuctypes/cuda.py +sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libnvrtc.so')\ctypes.CDLL(ctypes.util.find_library('nvrtc'))\g" gpuctypes/cuda.py grep FIXME_STUB gpuctypes/cuda.py || true diff --git a/generate_hip.sh b/generate_hip.sh index 9191e2b..dcdc1b1 100755 --- a/generate_hip.sh +++ b/generate_hip.sh @@ -1,3 +1,7 @@ #!/bin/bash -e clang2py /opt/rocm/include/hip/hiprtc.h /opt/rocm/include/hip/hip_runtime_api.h /opt/rocm/include/hip/driver_types.h --clang-args="-D__HIP_PLATFORM_AMD__ -I/opt/rocm/include" -o gpuctypes/hip.py -l /opt/rocm/lib/libhiprtc.so -l /opt/rocm/lib/libamdhip64.so grep FIXME_STUB gpuctypes/hip.py || true +sed -i "s\import ctypes\import ctypes, ctypes.util\g" gpuctypes/hip.py +sed -i "s\ctypes.CDLL('/opt/rocm/lib/libhiprtc.so')\ctypes.CDLL(ctypes.util.find_library('hiprtc'))\g" gpuctypes/hip.py +sed -i "s\ctypes.CDLL('/opt/rocm/lib/libamdhip64.so')\ctypes.CDLL(ctypes.util.find_library('amdhip64'))\g" gpuctypes/hip.py +python3 -c "import gpuctypes.hip" diff --git a/gpuctypes/cuda.py b/gpuctypes/cuda.py index 0090d20..acba81c 100644 --- a/gpuctypes/cuda.py +++ b/gpuctypes/cuda.py @@ -5,7 +5,7 @@ # POINTER_SIZE is: 8 # LONGDOUBLE_SIZE is: 16 # -import ctypes +import ctypes, ctypes.util class AsDictMixin: @@ -144,11 +144,8 @@ def char_pointer_cast(string, encoding='utf-8'): _libraries = {} -_libraries['libcuda.so'] = ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libcuda.so') -try: - _libraries['libnvrtc.so'] = ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libnvrtc.so') -except OSError: - _libraries['libnvrtc.so'] = ctypes.CDLL('/usr/local/cuda/targets/x86_64-linux/lib/libnvrtc.so') +_libraries['libcuda.so'] = ctypes.CDLL(ctypes.util.find_library('cuda')) +_libraries['libnvrtc.so'] = ctypes.CDLL(ctypes.util.find_library('nvrtc')) cuuint32_t = ctypes.c_uint32 @@ -401,13 +398,28 @@ class struct_CUipcMemHandle_st(Structure): class union_CUstreamBatchMemOpParams_union(Union): pass -class struct_CUstreamMemOpFlushRemoteWritesParams_st(Structure): +class struct_CUstreamMemOpWaitValueParams_st(Structure): pass -struct_CUstreamMemOpFlushRemoteWritesParams_st._pack_ = 1 # source:False -struct_CUstreamMemOpFlushRemoteWritesParams_st._fields_ = [ +class union_CUstreamMemOpWaitValueParams_st_0(Union): + pass + +union_CUstreamMemOpWaitValueParams_st_0._pack_ = 1 # source:False +union_CUstreamMemOpWaitValueParams_st_0._fields_ = [ + ('value', ctypes.c_uint32), + ('value64', ctypes.c_uint64), +] + +struct_CUstreamMemOpWaitValueParams_st._pack_ = 1 # source:False +struct_CUstreamMemOpWaitValueParams_st._anonymous_ = ('_0',) +struct_CUstreamMemOpWaitValueParams_st._fields_ = [ ('operation', CUstreamBatchMemOpType), + ('PADDING_0', ctypes.c_ubyte * 4), + ('address', ctypes.c_uint64), + ('_0', union_CUstreamMemOpWaitValueParams_st_0), ('flags', ctypes.c_uint32), + ('PADDING_1', ctypes.c_ubyte * 4), + ('alias', ctypes.c_uint64), ] class struct_CUstreamMemOpWriteValueParams_st(Structure): @@ -434,28 +446,13 @@ class union_CUstreamMemOpWriteValueParams_st_0(Union): ('alias', ctypes.c_uint64), ] -class struct_CUstreamMemOpWaitValueParams_st(Structure): - pass - -class union_CUstreamMemOpWaitValueParams_st_0(Union): +class struct_CUstreamMemOpFlushRemoteWritesParams_st(Structure): pass -union_CUstreamMemOpWaitValueParams_st_0._pack_ = 1 # source:False -union_CUstreamMemOpWaitValueParams_st_0._fields_ = [ - ('value', ctypes.c_uint32), - ('value64', ctypes.c_uint64), -] - -struct_CUstreamMemOpWaitValueParams_st._pack_ = 1 # source:False -struct_CUstreamMemOpWaitValueParams_st._anonymous_ = ('_0',) -struct_CUstreamMemOpWaitValueParams_st._fields_ = [ +struct_CUstreamMemOpFlushRemoteWritesParams_st._pack_ = 1 # source:False +struct_CUstreamMemOpFlushRemoteWritesParams_st._fields_ = [ ('operation', CUstreamBatchMemOpType), - ('PADDING_0', ctypes.c_ubyte * 4), - ('address', ctypes.c_uint64), - ('_0', union_CUstreamMemOpWaitValueParams_st_0), ('flags', ctypes.c_uint32), - ('PADDING_1', ctypes.c_ubyte * 4), - ('alias', ctypes.c_uint64), ] union_CUstreamBatchMemOpParams_union._pack_ = 1 # source:False @@ -1884,6 +1881,22 @@ class struct_CUDA_RESOURCE_DESC_st(Structure): class union_CUDA_RESOURCE_DESC_st_res(Union): pass +class struct_CUDA_RESOURCE_DESC_st_0_array(Structure): + pass + +struct_CUDA_RESOURCE_DESC_st_0_array._pack_ = 1 # source:False +struct_CUDA_RESOURCE_DESC_st_0_array._fields_ = [ + ('hArray', ctypes.POINTER(struct_CUarray_st)), +] + +class struct_CUDA_RESOURCE_DESC_st_0_mipmap(Structure): + pass + +struct_CUDA_RESOURCE_DESC_st_0_mipmap._pack_ = 1 # source:False +struct_CUDA_RESOURCE_DESC_st_0_mipmap._fields_ = [ + ('hMipmappedArray', ctypes.POINTER(struct_CUmipmappedArray_st)), +] + class struct_CUDA_RESOURCE_DESC_st_0_linear(Structure): pass @@ -1916,22 +1929,6 @@ class struct_CUDA_RESOURCE_DESC_st_0_reserved(Structure): ('reserved', ctypes.c_int32 * 32), ] -class struct_CUDA_RESOURCE_DESC_st_0_mipmap(Structure): - pass - -struct_CUDA_RESOURCE_DESC_st_0_mipmap._pack_ = 1 # source:False -struct_CUDA_RESOURCE_DESC_st_0_mipmap._fields_ = [ - ('hMipmappedArray', ctypes.POINTER(struct_CUmipmappedArray_st)), -] - -class struct_CUDA_RESOURCE_DESC_st_0_array(Structure): - pass - -struct_CUDA_RESOURCE_DESC_st_0_array._pack_ = 1 # source:False -struct_CUDA_RESOURCE_DESC_st_0_array._fields_ = [ - ('hArray', ctypes.POINTER(struct_CUarray_st)), -] - union_CUDA_RESOURCE_DESC_st_res._pack_ = 1 # source:False union_CUDA_RESOURCE_DESC_st_res._fields_ = [ ('array', struct_CUDA_RESOURCE_DESC_st_0_array), @@ -2267,6 +2264,14 @@ class struct_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st(Structure): class struct_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_params(Structure): pass +class struct_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_0_fence(Structure): + pass + +struct_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_0_fence._pack_ = 1 # source:False +struct_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_0_fence._fields_ = [ + ('value', ctypes.c_uint64), +] + class union_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_0_nvSciSync(Union): pass @@ -2276,14 +2281,6 @@ class union_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_0_nvSciSync(Union): ('reserved', ctypes.c_uint64), ] -class struct_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_0_fence(Structure): - pass - -struct_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_0_fence._pack_ = 1 # source:False -struct_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_0_fence._fields_ = [ - ('value', ctypes.c_uint64), -] - class struct_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st_0_keyedMutex(Structure): pass @@ -2324,6 +2321,15 @@ class struct_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_fence(Structure): ('value', ctypes.c_uint64), ] +class union_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_nvSciSync(Union): + pass + +union_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_nvSciSync._pack_ = 1 # source:False +union_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_nvSciSync._fields_ = [ + ('fence', ctypes.POINTER(None)), + ('reserved', ctypes.c_uint64), +] + class struct_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_keyedMutex(Structure): pass @@ -2334,15 +2340,6 @@ class struct_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_keyedMutex(Structure): ('PADDING_0', ctypes.c_ubyte * 4), ] -class union_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_nvSciSync(Union): - pass - -union_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_nvSciSync._pack_ = 1 # source:False -union_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_nvSciSync._fields_ = [ - ('fence', ctypes.POINTER(None)), - ('reserved', ctypes.c_uint64), -] - struct_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_params._pack_ = 1 # source:False struct_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_params._fields_ = [ ('fence', struct_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st_0_fence), @@ -2504,17 +2501,6 @@ class union_CUarrayMapInfo_st_resource(Union): class union_CUarrayMapInfo_st_subresource(Union): pass -class struct_CUarrayMapInfo_st_1_miptail(Structure): - pass - -struct_CUarrayMapInfo_st_1_miptail._pack_ = 1 # source:False -struct_CUarrayMapInfo_st_1_miptail._fields_ = [ - ('layer', ctypes.c_uint32), - ('PADDING_0', ctypes.c_ubyte * 4), - ('offset', ctypes.c_uint64), - ('size', ctypes.c_uint64), -] - class struct_CUarrayMapInfo_st_1_sparseLevel(Structure): pass @@ -2530,6 +2516,17 @@ class struct_CUarrayMapInfo_st_1_sparseLevel(Structure): ('extentDepth', ctypes.c_uint32), ] +class struct_CUarrayMapInfo_st_1_miptail(Structure): + pass + +struct_CUarrayMapInfo_st_1_miptail._pack_ = 1 # source:False +struct_CUarrayMapInfo_st_1_miptail._fields_ = [ + ('layer', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('offset', ctypes.c_uint64), + ('size', ctypes.c_uint64), +] + union_CUarrayMapInfo_st_subresource._pack_ = 1 # source:False union_CUarrayMapInfo_st_subresource._fields_ = [ ('sparseLevel', struct_CUarrayMapInfo_st_1_sparseLevel), diff --git a/gpuctypes/hip.py b/gpuctypes/hip.py index 3975fa8..51838ec 100644 --- a/gpuctypes/hip.py +++ b/gpuctypes/hip.py @@ -5,7 +5,7 @@ # POINTER_SIZE is: 8 # LONGDOUBLE_SIZE is: 16 # -import ctypes +import ctypes, ctypes.util class AsDictMixin: @@ -117,7 +117,7 @@ class Union(ctypes.Union, AsDictMixin): _libraries = {} -_libraries['libhiprtc.so'] = ctypes.CDLL('/opt/rocm/lib/libhiprtc.so') +_libraries['libhiprtc.so'] = ctypes.CDLL(ctypes.util.find_library('hiprtc')) def string_cast(char_pointer, encoding='utf-8', errors='strict'): value = ctypes.cast(char_pointer, ctypes.c_char_p).value if value is not None and encoding is not None: @@ -155,7 +155,7 @@ def __getattr__(self, _): # You can either re-run clan2py with -l /path/to/library.so # Or manually fix this by comment the ctypes.CDLL loading _libraries['FIXME_STUB'] = FunctionFactoryStub() # ctypes.CDLL('FIXME_STUB') -_libraries['libamdhip64.so'] = ctypes.CDLL('/opt/rocm/lib/libamdhip64.so') +_libraries['libamdhip64.so'] = ctypes.CDLL(ctypes.util.find_library('amdhip64'))