From 6e8d69c88b9bf2928aa7b7c6a3d36095a3620ee8 Mon Sep 17 00:00:00 2001 From: Gong-air <994321849@qq.com> Date: Mon, 9 Sep 2024 10:47:52 +0800 Subject: [PATCH] and file comment& add tang_shared_so load & clang format --- .../vendor/droplet/communicatorimpl.cpp | 4 +- .../csrc_dipu/vendor/droplet/pccl.cpp | 237 ++++++++++-------- .../csrc_dipu/vendor/droplet/pcclcommon.h | 18 +- 3 files changed, 154 insertions(+), 105 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp b/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp index f2282dbeb..d459d2958 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp +++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp @@ -2,6 +2,8 @@ #include #include +#include "pccl.h" + #include #include @@ -9,8 +11,8 @@ #include "csrc_dipu/runtime/devproxy/deviceproxy.h" #include #include + #include "vendorapi.h" -#include "pccl.h" namespace dipu { diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/pccl.cpp b/dipu/torch_dipu/csrc_dipu/vendor/droplet/pccl.cpp index d5289ff25..5f47aa87a 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/pccl.cpp +++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/pccl.cpp @@ -1,45 +1,59 @@ +/** + * pccl.cpp + * + * Description: + * This file implements the dynamic loading and invocation of PCCL APIs required + * by DICL. If the pccllib.so library is not found, a log message will be + * printed, and a Fallback API will be executed. + * + * Notes: + * - We have copied the PCCL header file. If the PCCL header file are updated, + * please correspondingly update them here. + */ #include "pccl.h" + #include #include + #include "pcclcommon.h" + #include +#include #include "csrc_dipu/runtime/device/basedef.h" #include "csrc_dipu/runtime/devproxy/deviceproxy.h" -#include - #include #include - namespace { -template +template ReturnType callPcclImpl(Args... args) { - static const auto pcclFuncAddr = getCommPcclFuncAddr(PcclFuncName); - using dipuPcclFunc = ReturnType (*)(Args...); - static dipuPcclFunc pcclFunc = reinterpret_cast(pcclFuncAddr); - auto pcclCallReturn = pcclFunc(args...); - return pcclCallReturn; -} - -#define DIPU_PCCL_IMPL(NAME, RETURN, ...) \ - RETURN NAME(DIPU_TYPE_PARAM(__VA_ARGS__)) { \ - static constexpr const char fstr[] = #NAME; \ - return callPcclImpl(DIPU_PARAM(__VA_ARGS__)); \ - } \ + static const auto pcclFuncAddr = getCommPcclFuncAddr(PcclFuncName); + using dipuPcclFunc = ReturnType (*)(Args...); + static dipuPcclFunc pcclFunc = reinterpret_cast(pcclFuncAddr); + auto pcclCallReturn = pcclFunc(args...); + return pcclCallReturn; +} + +#define DIPU_PCCL_IMPL(NAME, RETURN, ...) \ + RETURN NAME(DIPU_TYPE_PARAM(__VA_ARGS__)) { \ + static constexpr const char fstr[] = #NAME; \ + return callPcclImpl(DIPU_PARAM(__VA_ARGS__)); \ + } \ static RETURN CONCAT(my__, NAME)(DIPU_TYPE_PARAM(__VA_ARGS__)); \ - static const int CONCAT(n_, NAME) = []() { \ - fn[#NAME] = reinterpret_cast(CONCAT(my__, NAME)); \ - return 0; \ - }(); \ + static const int CONCAT(n_, NAME) = []() { \ + fn[#NAME] = reinterpret_cast(CONCAT(my__, NAME)); \ + return 0; \ + }(); \ RETURN CONCAT(my__, NAME)(DIPU_TYPE_PARAM(__VA_ARGS__)) -#define DIPU_PCCL_COMM_IMPL(NAME, ...) DIPU_PCCL_IMPL(NAME, pcclResult_t, __VA_ARGS__) -#define DIPU_PCCL_ERROR_IMPL(NAME, ...) DIPU_PCCL_IMPL(NAME, const char*, __VA_ARGS__) +#define DIPU_PCCL_COMM_IMPL(NAME, ...) \ + DIPU_PCCL_IMPL(NAME, pcclResult_t, __VA_ARGS__) +#define DIPU_PCCL_ERROR_IMPL(NAME, ...) \ + DIPU_PCCL_IMPL(NAME, const char*, __VA_ARGS__) std::map fn; - static const std::map toScalarType = { {pcclInt8, at::kChar}, {pcclUint8, at::kByte}, @@ -54,17 +68,18 @@ static const std::map toScalarType = { }; at::ScalarType PcclDataTypeToScalarType(pcclDataType_t pccl_data_type) { - auto p = toScalarType.find(pccl_data_type); - if (p == toScalarType.end()) { - throw std::runtime_error("Not supported pcclDataType_t: " + std::to_string(pccl_data_type)); - } - return p->second; + auto p = toScalarType.find(pccl_data_type); + if (p == toScalarType.end()) { + throw std::runtime_error("Not supported pcclDataType_t: " + + std::to_string(pccl_data_type)); + } + return p->second; } static const pcclComm_t kMagicComm = reinterpret_cast(0x5043434C); void checkCommOrThrow(pcclComm_t comm) { - if (comm == nullptr || comm!=kMagicComm) { + if (comm == nullptr || comm != kMagicComm) { throw std::runtime_error("Invalid comm."); } } @@ -88,89 +103,111 @@ void checkRankOrThrow(int rank) { void singleDeviceMemcpy(dipu::deviceStream_t stream, void* dst, const void* src, size_t nbytes) { - if(dst != src){ - auto device = dipu::devproxy::current_device(); - dipu::devproxy::memCopyD2DAsync(stream, nbytes, device, dst, device, src); + if (dst != src) { + auto device = dipu::devproxy::current_device(); + dipu::devproxy::memCopyD2DAsync(stream, nbytes, device, dst, device, src); } } } // namespace +DIPU_PCCL_COMM_IMPL(pcclGetUniqueId, (pcclUniqueId*, uniqueId)) { + return pcclSuccess; +} +DIPU_PCCL_COMM_IMPL(pcclCommInitRank, (pcclComm_t*, comm), (int, ndev), + (pcclUniqueId, commIdI), (int, rank)) { + checkNrankOrThrow(ndev); + checkRankOrThrow(rank); + DIPU_LOGW( + "PCCL is not enabled. DIPU will simulate single GPU " + "communication using memcpy."); + *comm = kMagicComm; + return pcclSuccess; +} +DIPU_PCCL_COMM_IMPL(pcclCommDestroy, (pcclComm_t, comm)) { + checkCommOrThrow(comm); + // destroyDiclComm(comm); + return pcclSuccess; +} - DIPU_PCCL_COMM_IMPL(pcclGetUniqueId, (pcclUniqueId*, uniqueId)) { - return pcclSuccess; - } - - DIPU_PCCL_COMM_IMPL(pcclCommInitRank, (pcclComm_t*, comm), (int, ndev), (pcclUniqueId, commIdI), (int, rank)) { - checkNrankOrThrow(ndev); - checkRankOrThrow(rank); - DIPU_LOGW( - "PCCL is not enabled. DIPU will simulate single GPU " - "communication using memcpy."); - *comm = kMagicComm; - return pcclSuccess; - } - - DIPU_PCCL_COMM_IMPL(pcclCommDestroy, (pcclComm_t, comm)) { - checkCommOrThrow(comm); - // destroyDiclComm(comm); - return pcclSuccess; - } - - DIPU_PCCL_COMM_IMPL(pcclCommGetAsyncError, (pcclComm_t, comm), (pcclResult_t*, asyncError)) { - checkCommOrThrow(comm); - return pcclSuccess; - } - - DIPU_PCCL_ERROR_IMPL(pcclGetErrorString, (pcclResult_t, result)){ - throw std::runtime_error("Fallback pccl impl should not call pcclGetErrorString"); - } +DIPU_PCCL_COMM_IMPL(pcclCommGetAsyncError, (pcclComm_t, comm), + (pcclResult_t*, asyncError)) { + checkCommOrThrow(comm); + return pcclSuccess; +} - DIPU_PCCL_ERROR_IMPL(pcclGetLastError, (pcclComm_t, comm)){ - throw std::runtime_error("Fallback pccl impl should not call pcclGetLastError"); - } +DIPU_PCCL_ERROR_IMPL(pcclGetErrorString, (pcclResult_t, result)) { + throw std::runtime_error( + "Fallback pccl impl should not call pcclGetErrorString"); +} - DIPU_PCCL_COMM_IMPL(pcclReduce, (const void*, sendbuff), (void*, recvbuff), (size_t, count), (pcclDataType_t, datatype), (pcclRedOp_t, op), (int, root), (pcclComm_t, comm), (tangStream_t, stream)) { - checkCommOrThrow(comm); - checkRankOrThrow(root); - singleDeviceMemcpy(stream, recvbuff, sendbuff, - count * at::elementSize(PcclDataTypeToScalarType(datatype))); - return pcclSuccess; - } +DIPU_PCCL_ERROR_IMPL(pcclGetLastError, (pcclComm_t, comm)) { + throw std::runtime_error( + "Fallback pccl impl should not call pcclGetLastError"); +} - DIPU_PCCL_COMM_IMPL(pcclAllReduce, (const void*, sendbuff), (void*, recvbuff), (size_t, count), (pcclDataType_t, datatype), (pcclRedOp_t, op), (pcclComm_t, comm), (tangStream_t, stream)) { - checkCommOrThrow(comm); - singleDeviceMemcpy(stream, recvbuff, sendbuff, - count * at::elementSize(PcclDataTypeToScalarType(datatype))); +DIPU_PCCL_COMM_IMPL(pcclReduce, (const void*, sendbuff), (void*, recvbuff), + (size_t, count), (pcclDataType_t, datatype), + (pcclRedOp_t, op), (int, root), (pcclComm_t, comm), + (tangStream_t, stream)) { + checkCommOrThrow(comm); + checkRankOrThrow(root); + singleDeviceMemcpy( + stream, recvbuff, sendbuff, + count * at::elementSize(PcclDataTypeToScalarType(datatype))); return pcclSuccess; - } +} - DIPU_PCCL_COMM_IMPL(pcclReduceScatter, (const void*, sendbuff), (void*, recvbuff), (size_t, recvcount), (pcclDataType_t, datatype), (pcclRedOp_t, op), (pcclComm_t, comm), (tangStream_t, stream)) { - singleDeviceMemcpy(stream, recvbuff, sendbuff, - recvcount * at::elementSize(PcclDataTypeToScalarType(datatype))); - return pcclSuccess; - } +DIPU_PCCL_COMM_IMPL(pcclAllReduce, (const void*, sendbuff), (void*, recvbuff), + (size_t, count), (pcclDataType_t, datatype), + (pcclRedOp_t, op), (pcclComm_t, comm), + (tangStream_t, stream)) { + checkCommOrThrow(comm); + singleDeviceMemcpy( + stream, recvbuff, sendbuff, + count * at::elementSize(PcclDataTypeToScalarType(datatype))); + return pcclSuccess; +} - DIPU_PCCL_COMM_IMPL(pcclBroadcast, (const void *, sendbuff), (void*, recvbuff), (size_t, count), (pcclDataType_t, datatype), (int, root), (pcclComm_t, comm), (tangStream_t, stream)) { - checkCommOrThrow(comm); - singleDeviceMemcpy(stream, recvbuff, sendbuff, - count * at::elementSize(PcclDataTypeToScalarType(datatype))); - return pcclSuccess; - } - DIPU_PCCL_COMM_IMPL(pcclAllGather, (const void*, sendbuff), (void*, recvbuff), (size_t, count), (pcclDataType_t, datatype), (pcclComm_t, comm), (tangStream_t, stream)) { - checkCommOrThrow(comm); - singleDeviceMemcpy(stream, recvbuff, sendbuff, - count * at::elementSize(PcclDataTypeToScalarType(datatype))); - return pcclSuccess; - } - DIPU_PCCL_COMM_IMPL(pcclSend, (const void*, sendbuff), (size_t, count), (pcclDataType_t, datatype), (int, peer), (pcclComm_t, comm), (tangStream_t, stream)) { - throwNotSupportedError(); - return pcclInvalidUsage; - } - DIPU_PCCL_COMM_IMPL(pcclRecv, (void*, recvbuff), (size_t, count), (pcclDataType_t, datatype), (int, peer), (pcclComm_t, comm), (tangStream_t, stream)) { - throwNotSupportedError(); - return pcclInvalidUsage; - } +DIPU_PCCL_COMM_IMPL(pcclReduceScatter, (const void*, sendbuff), + (void*, recvbuff), (size_t, recvcount), + (pcclDataType_t, datatype), (pcclRedOp_t, op), + (pcclComm_t, comm), (tangStream_t, stream)) { + singleDeviceMemcpy( + stream, recvbuff, sendbuff, + recvcount * at::elementSize(PcclDataTypeToScalarType(datatype))); + return pcclSuccess; +} +DIPU_PCCL_COMM_IMPL(pcclBroadcast, (const void*, sendbuff), (void*, recvbuff), + (size_t, count), (pcclDataType_t, datatype), (int, root), + (pcclComm_t, comm), (tangStream_t, stream)) { + checkCommOrThrow(comm); + singleDeviceMemcpy( + stream, recvbuff, sendbuff, + count * at::elementSize(PcclDataTypeToScalarType(datatype))); + return pcclSuccess; +} +DIPU_PCCL_COMM_IMPL(pcclAllGather, (const void*, sendbuff), (void*, recvbuff), + (size_t, count), (pcclDataType_t, datatype), + (pcclComm_t, comm), (tangStream_t, stream)) { + checkCommOrThrow(comm); + singleDeviceMemcpy( + stream, recvbuff, sendbuff, + count * at::elementSize(PcclDataTypeToScalarType(datatype))); + return pcclSuccess; +} +DIPU_PCCL_COMM_IMPL(pcclSend, (const void*, sendbuff), (size_t, count), + (pcclDataType_t, datatype), (int, peer), (pcclComm_t, comm), + (tangStream_t, stream)) { + throwNotSupportedError(); + return pcclInvalidUsage; +} +DIPU_PCCL_COMM_IMPL(pcclRecv, (void*, recvbuff), (size_t, count), + (pcclDataType_t, datatype), (int, peer), (pcclComm_t, comm), + (tangStream_t, stream)) { + throwNotSupportedError(); + return pcclInvalidUsage; +} diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/pcclcommon.h b/dipu/torch_dipu/csrc_dipu/vendor/droplet/pcclcommon.h index bc3525e2a..c00bd5e76 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/pcclcommon.h +++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/pcclcommon.h @@ -23,12 +23,22 @@ inline void* getCommPcclLibHandler(const char* libName) { } inline void* getCommPcclFuncAddr(const char* apiName) { - constexpr const char kOpApiLibName[] = "libpccl.so"; - static void* opApiHandler = getCommPcclLibHandler(kOpApiLibName); - if (opApiHandler == nullptr) { + constexpr const char pcclLibName[] = "libpccl.so"; + constexpr const char pcclLibDependName[] = "libtangrt_shared.so"; + static void* pcclDependHandler = dlopen(pcclLibDependName, RTLD_LAZY | RTLD_GLOBAL); + if(pcclDependHandler == nullptr){ + throw std::runtime_error( + "Error: Failed to load libpccl.so. The required library 'libtangrt_shared.so' is missing.\n" + "Please ensure that 'libtangrt_shared.so' is installed and its path is included in the LD_LIBRARY_PATH environment variable.\n" + "Example: export LD_LIBRARY_PATH=/path/to/lib:$LD_LIBRARY_PATH" + ); + } + static void* pcclHandler = getCommPcclLibHandler(pcclLibName); + if (pcclHandler == nullptr) { + std::cerr << "Fallback " << apiName << " will be called" << std::endl; return nullptr; } - return getCommPcclFuncAddrInLib(opApiHandler, kOpApiLibName, apiName); + return getCommPcclFuncAddrInLib(pcclHandler, pcclLibName, apiName); } #define EXPAND(x) x