From fc417d93b6522d6da68005d7aa012aa76869f75e Mon Sep 17 00:00:00 2001 From: kchristin Date: Mon, 21 Oct 2024 18:27:05 +0300 Subject: [PATCH] Fix suggestions and format --- include/clad/Differentiator/BuiltinDerivatives.h | 12 ++++++------ lib/Differentiator/ReverseModeVisitor.cpp | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/clad/Differentiator/BuiltinDerivatives.h b/include/clad/Differentiator/BuiltinDerivatives.h index df5ebbac8..24fde4f8e 100644 --- a/include/clad/Differentiator/BuiltinDerivatives.h +++ b/include/clad/Differentiator/BuiltinDerivatives.h @@ -92,8 +92,8 @@ __global__ void atomicAdd_kernel(T* destPtr, T* srcPtr, size_t N) { template void cudaMemcpy_pullback(T* destPtr, T* srcPtr, size_t count, - cudaMemcpyKind kind, T* d_destPtr, T* d_srcPtr, - size_t* d_count, cudaMemcpyKind* d_kind) + cudaMemcpyKind kind, T* d_destPtr, T* d_srcPtr, + size_t* d_count, cudaMemcpyKind* d_kind) __attribute__((host)) { T* aux_destPtr; if (kind == cudaMemcpyDeviceToHost) { @@ -111,18 +111,18 @@ void cudaMemcpy_pullback(T* destPtr, T* srcPtr, size_t count, cudaGetDeviceProperties(&deviceProp, 0); size_t maxThreads = deviceProp.maxThreadsPerBlock; size_t maxBlocks = deviceProp.maxGridSize[0]; - + size_t numThreads = std::min(maxThreads, N); size_t numBlocks = std::min(maxBlocks, (N + numThreads - 1) / numThreads); custom_derivatives::atomicAdd_kernel<<>>( d_srcPtr, aux_destPtr, N); - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); // needed in case user uses another stream than the + // default one cudaFree(aux_destPtr); } else if (kind == cudaMemcpyHostToDevice) { // d_kind is device to host, so d_srcPtr is a host pointer - for (size_t i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) d_srcPtr[i] += aux_destPtr[i]; - } free(aux_destPtr); } } diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index cef3ab7b5..038e341c6 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -614,7 +614,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // If the function is a global kernel, all its parameters reside in the // global memory of the GPU else if (m_DiffReq->hasAttr()) - for (auto param : params) + for (auto* param : params) m_CUDAGlobalArgs.emplace(param); m_Derivative->setBody(nullptr);