diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/pccl.h b/dipu/torch_dipu/csrc_dipu/vendor/droplet/pccl.h index 284b36fc5..9579532e7 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/pccl.h +++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/pccl.h @@ -5,30 +5,28 @@ extern "C" { #endif - #include "tang_rt/driver_types.h" - #define PCCL_UNIQUE_ID_BYTES 128 typedef struct { - char internal[PCCL_UNIQUE_ID_BYTES]; -}pcclUniqueId; + char internal[PCCL_UNIQUE_ID_BYTES]; +} pcclUniqueId; /* Opaque handle to communicator */ typedef struct pcclComm* pcclComm_t; /* Error enum */ typedef enum { - pcclSuccess = 0, - pcclUnhandledTangError = 1, - pcclSystemError = 2, - pcclInternalError = 3, - pcclInvalidArgument = 4, - pcclInvalidUsage = 5, - pcclRemoteError = 6, - pcclInProgress = 7, - pcclInvalidDeviceIndex = 8, - pccl_NUM_RESULTS + pcclSuccess = 0, + pcclUnhandledTangError = 1, + pcclSystemError = 2, + pcclInternalError = 3, + pcclInvalidArgument = 4, + pcclInvalidUsage = 5, + pcclRemoteError = 6, + pcclInProgress = 7, + pcclInvalidDeviceIndex = 8, + pccl_NUM_RESULTS } pcclResult_t; /* description : Generates a unique Id with each call @@ -44,19 +42,20 @@ pcclResult_t pcclGetUniqueId(pcclUniqueId* uniqueId); * : commId, unique Id for communicator * : rank, must be between 0 and ndev-1 * output : 0:pcclSuccess, other failure - * note : the func implicitly syncronizes with other ranks, so INIT OF EACH RANK MUST - * BE CALLED IN A SEPARATE HOST THREADS to avoid deadlock. + * note : the func implicitly syncronizes with other ranks, so INIT OF + * EACH RANK MUST BE CALLED IN A SEPARATE HOST THREADS to avoid deadlock. */ -pcclResult_t pcclCommInitRank(pcclComm_t* comm, int ndev, pcclUniqueId commId, int rank); +pcclResult_t pcclCommInitRank(pcclComm_t* comm, int ndev, pcclUniqueId commId, + int rank); /* description : Creates a clique of communicators - * input : comms, should be pre-allocated with size at least ndev*sizeof(pcclComm_t) - * : ndev, number of logical devices - * : devlist, the set of dev pointer, if NULL, first device to ndev used - * output : 0:pcclSuccess, other failure - * note : This is a convenience function to create a single-process communicator clique + * input : comms, should be pre-allocated with size at least + * ndev*sizeof(pcclComm_t) : ndev, number of logical devices : devlist, the set + * of dev pointer, if NULL, first device to ndev used output : + * 0:pcclSuccess, other failure note : This is a convenience function to + * create a single-process communicator clique */ -pcclResult_t pcclCommInitAll(pcclComm_t* comms, int ndev, const int* devlist); +pcclResult_t pcclCommInitAll(pcclComm_t* comms, int ndev, const int* devlist); /* description : Frees resources associated with communicator object * input : comm, the communicator @@ -77,14 +76,14 @@ pcclResult_t pcclCommAbort(pcclComm_t comm); * output : asyncError, the out value error * note : N/A */ -pcclResult_t pcclCommGetAsyncError(pcclComm_t comm, pcclResult_t *asyncError); +pcclResult_t pcclCommGetAsyncError(pcclComm_t comm, pcclResult_t* asyncError); /* description : Returns human error message * input : result, the result flag * output : readable error string * note : N/A */ -const char* pcclGetErrorString(pcclResult_t result); +const char* pcclGetErrorString(pcclResult_t result); const char* pcclGetLastError(pcclComm_t comm); @@ -94,7 +93,7 @@ const char* pcclGetLastError(pcclComm_t comm); * output : 0:pcclSuccess, other failure * note : N/A */ -pcclResult_t pcclCommCount(const pcclComm_t comm, int* count); +pcclResult_t pcclCommCount(const pcclComm_t comm, int* count); /* description : get tang device number associated with communicator * input : comm, the communicator @@ -110,42 +109,42 @@ pcclResult_t pcclCommCuDevice(const pcclComm_t comm, int* device); * output : 0:pcclSuccess, other failure * note : N/A */ -pcclResult_t pcclCommUserRank(const pcclComm_t comm, int* rank); +pcclResult_t pcclCommUserRank(const pcclComm_t comm, int* rank); /* description : get pccl lib version * input : version, the pointers * output : 0:pcclSuccess, other failure * note : N/A */ -pcclResult_t pcclGetVersion(int *version); +pcclResult_t pcclGetVersion(int* version); /* Reduction opperation selector */ typedef enum { - pcclSum = 0, - pcclProd = 1, - pcclMax = 2, - pcclMin = 3, - pcclAvg = 4, - pcclOpsNum = 5, - pcclNull = pcclOpsNum + pcclSum = 0, + pcclProd = 1, + pcclMax = 2, + pcclMin = 3, + pcclAvg = 4, + pcclOpsNum = 5, + pcclNull = pcclOpsNum } pcclRedOp_t; /* Data types unspported double */ typedef enum { - pcclChar = 0, - pcclInt8 = pcclChar, - pcclUint8 = 1, - pcclInt = 2, - pcclInt32 = pcclInt, - pcclUint32 = 3, - pcclInt64 = 4, - pcclUint64 = 5, - pcclHalf = 6, - pcclFloat16 = pcclHalf, - pcclFloat = 7, - pcclFloat32 = pcclFloat, - pcclBfloat16 = 8, - pcclTypesNum + pcclChar = 0, + pcclInt8 = pcclChar, + pcclUint8 = 1, + pcclInt = 2, + pcclInt32 = pcclInt, + pcclUint32 = 3, + pcclInt64 = 4, + pcclUint64 = 5, + pcclHalf = 6, + pcclFloat16 = pcclHalf, + pcclFloat = 7, + pcclFloat32 = pcclFloat, + pcclBfloat16 = 8, + pcclTypesNum } pcclDataType_t; /* description : Reduces @@ -161,8 +160,9 @@ typedef enum { * note : recvbuf may be NULL on all calls except for root device, * sendbuff and recvbuff are assumed to reside on root device */ -pcclResult_t pcclReduce(const void* sendbuff, void* recvbuf, size_t count, pcclDataType_t datatype, - pcclRedOp_t op, int root, pcclComm_t comm, tangStream_t stream); +pcclResult_t pcclReduce(const void* sendbuff, void* recvbuf, size_t count, + pcclDataType_t datatype, pcclRedOp_t op, int root, + pcclComm_t comm, tangStream_t stream); /* description : AllReduces * input : sendbuff, input data buffer @@ -175,8 +175,9 @@ pcclResult_t pcclReduce(const void* sendbuff, void* recvbuf, size_t count, pccl * output : 0:pcclSuccess, other failure * note : N/A */ -pcclResult_t pcclAllReduce(const void* sendbuff, void* recvbuff, size_t count, pcclDataType_t datatype, - pcclRedOp_t op, pcclComm_t comm, tangStream_t stream); +pcclResult_t pcclAllReduce(const void* sendbuff, void* recvbuff, size_t count, + pcclDataType_t datatype, pcclRedOp_t op, + pcclComm_t comm, tangStream_t stream); /* description : ReducesScatter * input : sendbuff, input data buffer @@ -187,11 +188,12 @@ pcclResult_t pcclAllReduce(const void* sendbuff, void* recvbuff, size_t count, * : comm, communicator * : stream, if null,used default * output : 0:pcclSuccess, other failure - * note : N/A + * note : N/A */ -pcclResult_t pcclReduceScatter(const void* sendbuff, void* recvbuff, - size_t recvcount, pcclDataType_t datatype, pcclRedOp_t op, pcclComm_t comm, - tangStream_t stream); +pcclResult_t pcclReduceScatter(const void* sendbuff, void* recvbuff, + size_t recvcount, pcclDataType_t datatype, + pcclRedOp_t op, pcclComm_t comm, + tangStream_t stream); /* description : Broadcast * input : buff, input data buffer @@ -201,10 +203,12 @@ pcclResult_t pcclReduceScatter(const void* sendbuff, void* recvbuff, * : comm, communicator * : stream, if null,used default * output : 0:pcclSuccess, other failure - * note : Must be called separately for each communicator in communicator clique + * note : Must be called separately for each communicator in + * communicator clique */ -pcclResult_t pcclBroadcast(const void *sendbuff, void* recvbuff, size_t count, pcclDataType_t datatype, int root, - pcclComm_t comm, tangStream_t stream); +pcclResult_t pcclBroadcast(const void* sendbuff, void* recvbuff, size_t count, + pcclDataType_t datatype, int root, pcclComm_t comm, + tangStream_t stream); /* description : AllGather * input : sendbuff, input data buffer @@ -214,10 +218,12 @@ pcclResult_t pcclBroadcast(const void *sendbuff, void* recvbuff, size_t count, * : comm, communicator * : stream, if null,used default * output : 0:pcclSuccess, other failure - * note : Must be called separately for each communicator in communicator clique + * note : Must be called separately for each communicator in + * communicator clique */ -pcclResult_t pcclAllGather(const void* sendbuff, void* recvbuff, size_t count, - pcclDataType_t datatype, pcclComm_t comm, tangStream_t stream); +pcclResult_t pcclAllGather(const void* sendbuff, void* recvbuff, size_t count, + pcclDataType_t datatype, pcclComm_t comm, + tangStream_t stream); /* description : P2P send * input : sendbuff, input data buffer @@ -229,8 +235,9 @@ pcclResult_t pcclAllGather(const void* sendbuff, void* recvbuff, size_t count, * output : 0:pcclSuccess, other failure * note : Must be called pcclRecv in group protect */ -pcclResult_t pcclSend(const void* sendbuff, size_t count, pcclDataType_t datatype, int peer, - pcclComm_t comm, tangStream_t stream); +pcclResult_t pcclSend(const void* sendbuff, size_t count, + pcclDataType_t datatype, int peer, pcclComm_t comm, + tangStream_t stream); /* description : P2P recv * : recvbuff, output data buffer @@ -242,8 +249,8 @@ pcclResult_t pcclSend(const void* sendbuff, size_t count, pcclDataType_t datatyp * output : 0:pcclSuccess, other failure * note : Must be called pcclSend in group protect */ -pcclResult_t pcclRecv(void* recvbuff, size_t count, pcclDataType_t datatype, int peer, - pcclComm_t comm, tangStream_t stream); +pcclResult_t pcclRecv(void* recvbuff, size_t count, pcclDataType_t datatype, + int peer, pcclComm_t comm, tangStream_t stream); pcclResult_t pcclGroupStart(void); pcclResult_t pcclGroupEnd(void); @@ -251,4 +258,4 @@ pcclResult_t pcclGroupEnd(void); #ifdef __cplusplus } #endif -#endif //end __PCCL_API_H__ +#endif // end __PCCL_API_H__ diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/pcclcommon.h b/dipu/torch_dipu/csrc_dipu/vendor/droplet/pcclcommon.h index c00bd5e76..e1003046a 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/pcclcommon.h +++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/pcclcommon.h @@ -1,70 +1,100 @@ -#include // For dlsym, dlopen, dlerror -#include // For std::runtime_error -#include // For std::string +#include // For dlsym, dlopen, dlerror #include #include +#include // For std::runtime_error +#include // For std::string -inline void* getCommPcclFuncAddrInLib(void* handler, const char* libName, const char* apiName) { - void* funcAddr = dlsym(handler, apiName); - if (funcAddr == nullptr) { - std::cerr << "Warning: [" << __FILE__ << ":" << __LINE__ << "] " << __FUNCTION__ - << ": dlsym " << apiName << " from " << libName << " failed, error: " << dlerror() << std::endl; - } - return funcAddr; +inline void* getCommPcclFuncAddrInLib(void* handler, const char* libName, + const char* apiName) { + void* funcAddr = dlsym(handler, apiName); + if (funcAddr == nullptr) { + std::cerr << "Warning: [" << __FILE__ << ":" << __LINE__ << "] " + << __FUNCTION__ << ": dlsym " << apiName << " from " << libName + << " failed, error: " << dlerror() << std::endl; + } + return funcAddr; } inline void* getCommPcclLibHandler(const char* libName) { - auto handler = dlopen(libName, RTLD_LAZY); - if (handler == nullptr) { - std::cerr << "Warning: " << __FILE__ << ":" << __LINE__ << " " << __FUNCTION__ - << " dlopen " << libName << " failed, error:" << dlerror() << std::endl; - } - return handler; + auto handler = dlopen(libName, RTLD_LAZY); + if (handler == nullptr) { + std::cerr << "Warning: " << __FILE__ << ":" << __LINE__ << " " + << __FUNCTION__ << " dlopen " << libName + << " failed, error:" << dlerror() << std::endl; + } + return handler; } inline void* getCommPcclFuncAddr(const char* apiName) { - constexpr const char pcclLibName[] = "libpccl.so"; - constexpr const char pcclLibDependName[] = "libtangrt_shared.so"; - static void* pcclDependHandler = dlopen(pcclLibDependName, RTLD_LAZY | RTLD_GLOBAL); - if(pcclDependHandler == nullptr){ - throw std::runtime_error( - "Error: Failed to load libpccl.so. The required library 'libtangrt_shared.so' is missing.\n" - "Please ensure that 'libtangrt_shared.so' is installed and its path is included in the LD_LIBRARY_PATH environment variable.\n" - "Example: export LD_LIBRARY_PATH=/path/to/lib:$LD_LIBRARY_PATH" - ); + constexpr const char pcclLibName[] = "libpccl.so"; + constexpr const char pcclLibDependName[] = "libtangrt_shared.so"; + static void* pcclDependHandler = + dlopen(pcclLibDependName, RTLD_LAZY | RTLD_GLOBAL); + if (pcclDependHandler == nullptr) { + throw std::runtime_error( + "Error: Failed to load libpccl.so. The required library " + "'libtangrt_shared.so' is missing.\n" + "Please ensure that 'libtangrt_shared.so' is installed and its path is " + "included in the LD_LIBRARY_PATH environment variable.\n" + "Example: export LD_LIBRARY_PATH=/path/to/lib:$LD_LIBRARY_PATH"); + } + static void* pcclHandler = getCommPcclLibHandler(pcclLibName); + if (pcclHandler == nullptr) { + std::cerr << "Fallback " << apiName << " will be called" << std::endl; + return nullptr; } - static void* pcclHandler = getCommPcclLibHandler(pcclLibName); - if (pcclHandler == nullptr) { - std::cerr << "Fallback " << apiName << " will be called" << std::endl; - return nullptr; - } - return getCommPcclFuncAddrInLib(pcclHandler, pcclLibName, apiName); + return getCommPcclFuncAddrInLib(pcclHandler, pcclLibName, apiName); } #define EXPAND(x) x #define DIPU_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, NAME, ...) NAME -#define DIPU_TYPE_PARAM(...) EXPAND(DIPU_GET_MACRO(__VA_ARGS__, DIPU_TYPE_PARAM_9, DIPU_TYPE_PARAM_8, DIPU_TYPE_PARAM_7, DIPU_TYPE_PARAM_6, DIPU_TYPE_PARAM_5, DIPU_TYPE_PARAM_4, DIPU_TYPE_PARAM_3,DIPU_TYPE_PARAM_2, DIPU_TYPE_PARAM_1)(__VA_ARGS__)) +#define DIPU_TYPE_PARAM(...) \ + EXPAND(DIPU_GET_MACRO( \ + __VA_ARGS__, DIPU_TYPE_PARAM_9, DIPU_TYPE_PARAM_8, DIPU_TYPE_PARAM_7, \ + DIPU_TYPE_PARAM_6, DIPU_TYPE_PARAM_5, DIPU_TYPE_PARAM_4, \ + DIPU_TYPE_PARAM_3, DIPU_TYPE_PARAM_2, DIPU_TYPE_PARAM_1)(__VA_ARGS__)) #define DIPU_FORMAT_TYPE_PARAM(T, ...) T __VA_ARGS__ #define DIPU_TYPE_PARAM_1(TP1) DIPU_FORMAT_TYPE_PARAM TP1 -#define DIPU_TYPE_PARAM_2(TP1, TP2) DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_1(TP2) -#define DIPU_TYPE_PARAM_3(TP1, TP2, TP3) DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_2(TP2, TP3) -#define DIPU_TYPE_PARAM_4(TP1, TP2, TP3, TP4) DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_3(TP2, TP3, TP4) -#define DIPU_TYPE_PARAM_5(TP1, TP2, TP3, TP4, TP5) DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_4(TP2, TP3, TP4, TP5) -#define DIPU_TYPE_PARAM_6(TP1, TP2, TP3, TP4, TP5, TP6) DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_5(TP2, TP3, TP4, TP5, TP6) -#define DIPU_TYPE_PARAM_7(TP1, TP2, TP3, TP4, TP5, TP6, TP7) DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_6(TP2, TP3, TP4, TP5, TP6, TP7) -#define DIPU_TYPE_PARAM_8(TP1, TP2, TP3, TP4, TP5, TP6, TP7, TP8) DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_7(TP2, TP3, TP4, TP5, TP6, TP7, TP8) -#define DIPU_TYPE_PARAM_9(TP1, TP2, TP3, TP4, TP5, TP6, TP7, TP8, TP9) DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_8(TP2, TP3, TP4, TP5, TP6, TP7, TP8, TP9) -#define DIPU_PARAM(...) EXPAND(DIPU_GET_MACRO(__VA_ARGS__, DIPU_PARAM_9, DIPU_PARAM_8, DIPU_PARAM_7, DIPU_PARAM_6, DIPU_PARAM_5, DIPU_PARAM_4, DIPU_PARAM_3, DIPU_PARAM_2, DIPU_PARAM_1)(__VA_ARGS__)) +#define DIPU_TYPE_PARAM_2(TP1, TP2) \ + DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_1(TP2) +#define DIPU_TYPE_PARAM_3(TP1, TP2, TP3) \ + DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_2(TP2, TP3) +#define DIPU_TYPE_PARAM_4(TP1, TP2, TP3, TP4) \ + DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_3(TP2, TP3, TP4) +#define DIPU_TYPE_PARAM_5(TP1, TP2, TP3, TP4, TP5) \ + DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_4(TP2, TP3, TP4, TP5) +#define DIPU_TYPE_PARAM_6(TP1, TP2, TP3, TP4, TP5, TP6) \ + DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_5(TP2, TP3, TP4, TP5, TP6) +#define DIPU_TYPE_PARAM_7(TP1, TP2, TP3, TP4, TP5, TP6, TP7) \ + DIPU_FORMAT_TYPE_PARAM TP1, DIPU_TYPE_PARAM_6(TP2, TP3, TP4, TP5, TP6, TP7) +#define DIPU_TYPE_PARAM_8(TP1, TP2, TP3, TP4, TP5, TP6, TP7, TP8) \ + DIPU_FORMAT_TYPE_PARAM TP1, \ + DIPU_TYPE_PARAM_7(TP2, TP3, TP4, TP5, TP6, TP7, TP8) +#define DIPU_TYPE_PARAM_9(TP1, TP2, TP3, TP4, TP5, TP6, TP7, TP8, TP9) \ + DIPU_FORMAT_TYPE_PARAM TP1, \ + DIPU_TYPE_PARAM_8(TP2, TP3, TP4, TP5, TP6, TP7, TP8, TP9) +#define DIPU_PARAM(...) \ + EXPAND(DIPU_GET_MACRO(__VA_ARGS__, DIPU_PARAM_9, DIPU_PARAM_8, DIPU_PARAM_7, \ + DIPU_PARAM_6, DIPU_PARAM_5, DIPU_PARAM_4, \ + DIPU_PARAM_3, DIPU_PARAM_2, \ + DIPU_PARAM_1)(__VA_ARGS__)) #define DIPU_FORMAT_PARAM(T, ...) __VA_ARGS__ #define DIPU_PARAM_1(TP1) DIPU_FORMAT_PARAM TP1 #define DIPU_PARAM_2(TP1, TP2) DIPU_FORMAT_PARAM TP1, DIPU_PARAM_1(TP2) -#define DIPU_PARAM_3(TP1, TP2, TP3) DIPU_FORMAT_PARAM TP1, DIPU_PARAM_2(TP2, TP3) -#define DIPU_PARAM_4(TP1, TP2, TP3, TP4) DIPU_FORMAT_PARAM TP1, DIPU_PARAM_3(TP2, TP3, TP4) -#define DIPU_PARAM_5(TP1, TP2, TP3, TP4, TP5) DIPU_FORMAT_PARAM TP1, DIPU_PARAM_4(TP2, TP3, TP4, TP5) -#define DIPU_PARAM_6(TP1, TP2, TP3, TP4, TP5, TP6) DIPU_FORMAT_PARAM TP1, DIPU_PARAM_5(TP2, TP3, TP4, TP5, TP6) -#define DIPU_PARAM_7(TP1, TP2, TP3, TP4, TP5, TP6, TP7) DIPU_FORMAT_PARAM TP1, DIPU_PARAM_6(TP2, TP3, TP4, TP5, TP6, TP7) -#define DIPU_PARAM_8(TP1, TP2, TP3, TP4, TP5, TP6, TP7, TP8) DIPU_FORMAT_PARAM TP1, DIPU_PARAM_7(TP2, TP3, TP4, TP5, TP6, TP7, TP8) -#define DIPU_PARAM_9(TP1, TP2, TP3, TP4, TP5, TP6, TP7, TP8, TP9) DIPU_FORMAT_PARAM TP1, DIPU_PARAM_8(TP2, TP3, TP4, TP5, TP6, TP7, TP8, TP9) +#define DIPU_PARAM_3(TP1, TP2, TP3) \ + DIPU_FORMAT_PARAM TP1, DIPU_PARAM_2(TP2, TP3) +#define DIPU_PARAM_4(TP1, TP2, TP3, TP4) \ + DIPU_FORMAT_PARAM TP1, DIPU_PARAM_3(TP2, TP3, TP4) +#define DIPU_PARAM_5(TP1, TP2, TP3, TP4, TP5) \ + DIPU_FORMAT_PARAM TP1, DIPU_PARAM_4(TP2, TP3, TP4, TP5) +#define DIPU_PARAM_6(TP1, TP2, TP3, TP4, TP5, TP6) \ + DIPU_FORMAT_PARAM TP1, DIPU_PARAM_5(TP2, TP3, TP4, TP5, TP6) +#define DIPU_PARAM_7(TP1, TP2, TP3, TP4, TP5, TP6, TP7) \ + DIPU_FORMAT_PARAM TP1, DIPU_PARAM_6(TP2, TP3, TP4, TP5, TP6, TP7) +#define DIPU_PARAM_8(TP1, TP2, TP3, TP4, TP5, TP6, TP7, TP8) \ + DIPU_FORMAT_PARAM TP1, DIPU_PARAM_7(TP2, TP3, TP4, TP5, TP6, TP7, TP8) +#define DIPU_PARAM_9(TP1, TP2, TP3, TP4, TP5, TP6, TP7, TP8, TP9) \ + DIPU_FORMAT_PARAM TP1, DIPU_PARAM_8(TP2, TP3, TP4, TP5, TP6, TP7, TP8, TP9) #define DIPU_CONCAT_IMPL(x, y) x##y #define CONCAT(x, y) DIPU_CONCAT_IMPL(x, y) diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h b/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h index 680c204a2..f8f6270e2 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h +++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h @@ -1,11 +1,12 @@ #pragma once #include -#include -#include "csrc_dipu/vendor/droplet/pccl.h" #include "pccl.h" #include +#include + +#include "csrc_dipu/vendor/droplet/pccl.h" #include namespace dipu {