Skip to content

Commit

Permalink
remove some sync (DeepLink-org#896)
Browse files Browse the repository at this point in the history
* remove sync
  • Loading branch information
yewentao256 authored Jan 30, 2024
1 parent dd6b34a commit d0d2376
Show file tree
Hide file tree
Showing 6 changed files with 2 additions and 25 deletions.
9 changes: 0 additions & 9 deletions impl/ascend/aclnn/aclnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,6 @@ int aclnnAddAdaptor(diopiContextHandle_t ctx, diopiConstTensorHandle_t self1, di
// 调用aclnnAdd第二段接口
ret = aclnnAdd(workspaceAddr, workspaceSize, executor, stream);
CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnAdd failed. ERROR: %d\n", ret); return ret);
// 3.(固定写法)同步等待任务执行结束
ret = aclrtSynchronizeStream(stream);
CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret);

if (workspaceSize > 0) {
aclrtFree(workspaceAddr);
Expand Down Expand Up @@ -149,9 +146,6 @@ int aclnnSinAdaptor(diopiContextHandle_t ctx, diopiConstTensorHandle_t self1, di
// 调用aclnnSin第二段接口
ret = aclnnSin(workspaceAddr, workspaceSize, executor, stream);
CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSin failed. ERROR: %d\n", ret); return ret);
// 3.(固定写法)同步等待任务执行结束
ret = aclrtSynchronizeStream(stream);
CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret);

if (workspaceSize > 0) {
aclrtFree(workspaceAddr);
Expand Down Expand Up @@ -192,9 +186,6 @@ int aclnnCosAdaptor(diopiContextHandle_t ctx, diopiConstTensorHandle_t self1, di
// 调用aclnnCos第二段接口
ret = aclnnCos(workspaceAddr, workspaceSize, executor, stream);
CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnCos failed. ERROR: %d\n", ret); return ret);
// 3.(固定写法)同步等待任务执行结束
ret = aclrtSynchronizeStream(stream);
CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret);

if (workspaceSize > 0) {
aclrtFree(workspaceAddr);
Expand Down
1 change: 0 additions & 1 deletion impl/ascend/common/acloprunner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,6 @@ class AclOpRunner {
}
*syncTensorPtr = syncTensorReal;
}
CALL_ACLRT(aclrtSynchronizeStream(stream));
// Get environment variables once when run is called for the first time
if (isDebugAclOpRunnerOn()) {
info(__FILE__, __LINE__, __FUNCTION__, "%s", dumpRunnerInfo().c_str());
Expand Down
3 changes: 0 additions & 3 deletions impl/ascend/common/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ diopiError_t reshape(diopiContextHandle_t ctx, const AscendTensor& src, AscendTe
diopiStreamHandle_t stream;
diopiGetStream(ctx, &stream);
aclrtMemcpyAsync(destPtr, dst.getAclMemBufferSize(), sourcePtr, src.getAclMemBufferSize(), ACL_MEMCPY_DEVICE_TO_DEVICE, stream);
aclrtSynchronizeStream(stream);

return diopiSuccess;
}
Expand Down Expand Up @@ -317,7 +316,6 @@ diopiError_t makeTensorFromScalar(diopiContextHandle_t ctx, const diopiScalar_t*
diopiRequireTensor(ctx, out, &sSize, nullptr, dtype, diopi_device);
diopiGetTensorData(outCopyDev, &dst);
CALL_ACLRT(aclrtMemcpyAsync(dst, elemsize, src, elemsize, ACL_MEMCPY_HOST_TO_DEVICE, stream));
CALL_ACLRT(aclrtSynchronizeStream(stream));
diopiCastDtype(ctx, *out, outCopyDev);
} else {
error(__FILE__, __LINE__, __FUNCTION__, "device(%s) not supported", deviceType2Str(device));
Expand Down Expand Up @@ -732,7 +730,6 @@ diopiTensorHandle_t hostToDevice(diopiContextHandle_t ctx, diopiConstTensorHandl
diopiGetStream(ctx, &stream);
int64_t elemsize = getBaseBufferSize(src);
CALL_ACLRT(aclrtMemcpyAsync(dstPtr, elemsize, const_cast<void*>(srcPtr), elemsize, ACL_MEMCPY_HOST_TO_DEVICE, stream));
CALL_ACLRT(aclrtSynchronizeStream(stream));
return dst;
} else {
return const_cast<diopiTensorHandle_t>(src);
Expand Down
5 changes: 2 additions & 3 deletions impl/ascend/functions/loss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,10 @@ diopiError_t nllLossOutWithTotalWeight(diopiContextHandle_t ctx, diopiTensorHand
castTensor(ctx, weightAt, diopi_dtype_float32);
if (0 <= ignoreIndex && ignoreIndex < inputAt.shape(-1)) {
diopiStreamHandle_t stream;
void *ptr = reinterpret_cast<uint8_t *>(const_cast<void *>(weightAt.data())) + ignoreIndex * weightAt.elemsize();
void* ptr = reinterpret_cast<uint8_t*>(const_cast<void*>(weightAt.data())) + ignoreIndex * weightAt.elemsize();
float val = 0.0f;
diopiGetStream(ctx, &stream);
aclrtMemcpyAsync(ptr, sizeof(float), &val, sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE, stream);
aclrtSynchronizeStream(stream);
}

// ascend only support inpu tensor with 2D dimension
Expand Down Expand Up @@ -254,7 +253,7 @@ diopiError_t diopiNLLLossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t
runner.addInput(targetPtr, getBaseBufferSize(targetCopy), calTargetShapeVec, ACL_FORMAT_ND, diopi_dtype_int32).setAttr("ignore_index", ignoreIndex);

if (inputShape.len > 2) {
void *gradInputPtr;
void* gradInputPtr;
diopiGetTensorData(gradInputCopy, &gradInputPtr);
runner.addOutput(gradInputPtr, getBaseBufferSize(gradInputCopy), calShapeVec, ACL_FORMAT_ND, gradDtype);
} else {
Expand Down
1 change: 0 additions & 1 deletion impl/ascend_npu/torch_npu/csrc/CopyKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,6 @@ at::Tensor& NPUNativeFunctions::copy_(at::Tensor& self, const at::Tensor& src, b
if (!non_blocking) {
c10_npu::getCurrentNPUStream().synchronize();
}
c10_npu::getCurrentNPUStream().synchronize();
return self;
}

Expand Down
8 changes: 0 additions & 8 deletions impl/ascend_npu/torch_npu/csrc/DIOPIAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,11 @@ static std::map<const string, const aclDataType> STRING_SCALAR_TYPE_TO_ACL_TYPE_

aclError AclrtMemcpyAsyncParamCheck(void* dst, size_t destMax, const void* src, size_t count, aclrtMemcpyKind kind, aclrtStream stream) {
auto ret = aclrtMemcpyAsync(dst, destMax, src, count, kind, stream);
NPU_CHECK_ERROR(aclrtSynchronizeStream(stream));
return ret;
}

aclError AclrtMemcpyParamCheck(void* dst, size_t destMax, const void* src, size_t count, aclrtMemcpyKind kind) {
c10_npu::getCurrentNPUStream().synchronize();
auto ret = aclrtMemcpy(dst, destMax, src, count, kind);
c10_npu::getCurrentNPUStream().synchronize();
return ret;
}

Expand Down Expand Up @@ -889,7 +886,6 @@ void copy_d2d_by_memcpy(at::Tensor& dst, const at::Tensor& src, int64_t exceptSi
}
c10_npu::NPUStream stream = c10_npu::getCurrentNPUStream();
NPU_CHECK_ERROR(aclrtMemcpyAsync(dst.data_ptr(), dst.nbytes(), src.data_ptr(), src.nbytes(), ACL_MEMCPY_DEVICE_TO_DEVICE, stream));
NPU_CHECK_ERROR(aclrtSynchronizeStream(stream));
}

float CalcuOpUtil::GetScalarFloatValue(const c10::Scalar& scalar) {
Expand Down Expand Up @@ -1027,7 +1023,6 @@ NPUStatus CalcuOpUtil::AclrtMemcpyAsync(const std::pair<at::Tensor, int64_t>& ds
void* src_ptr = reinterpret_cast<uint8_t*>(src.first.data_ptr()) + src.second * src.first.itemsize();
c10_npu::NPUStream stream = c10_npu::getCurrentNPUStream();
NPU_CHECK_ERROR(aclrtMemcpyAsync(dst_ptr, dst_size, src_ptr, src_size, kind, stream));
NPU_CHECK_ERROR(aclrtSynchronizeStream(stream));
return "SUCCESS";
}

Expand All @@ -1051,7 +1046,6 @@ aclError CalcuOpUtil::AclrtMemcpyWithModeSwitch(void* dst, size_t dstMax, const
aclError CalcuOpUtil::LaunchAsyncCopyTaskWithModeSwitch(const at::Tensor& dst, size_t dstMax, const at::Tensor& src, size_t count, aclrtMemcpyKind kind) {
c10_npu::NPUStream stream = c10_npu::getCurrentNPUStream();
NPU_CHECK_ERROR(aclrtMemcpyAsync(dst.data_ptr(), dst.nbytes(), src.data_ptr(), src.nbytes(), kind, stream));
NPU_CHECK_ERROR(aclrtSynchronizeStream(stream));
}

void ContiguousTensorDesc::refresh_contiguous_using_size_and_stride() {
Expand Down Expand Up @@ -2743,9 +2737,7 @@ void NPUStream::synchronize() const {

aclError queue::LaunchAsyncCopyTask(void* dst, size_t dstLen, void* src, size_t srcLen, aclrtMemcpyKind kind) {
c10_npu::NPUStream stream = c10_npu::getCurrentNPUStream();
NPU_CHECK_ERROR(aclrtSynchronizeStream(stream));
auto ret = aclrtMemcpyAsync(dst, dstLen, src, srcLen, kind, stream);
NPU_CHECK_ERROR(aclrtSynchronizeStream(stream));
return ret;
}

Expand Down

0 comments on commit d0d2376

Please sign in to comment.