diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index b90dacb99..1018a4436 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1456,6 +1456,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Clone(CE)); } + if (FD->getNameAsString() == "printf" || FD->getNameAsString() == "fprintf") + return StmtDiff(Clone(CE)); + Expr* CUDAExecConfig = nullptr; if (const auto* KCE = dyn_cast(CE)) CUDAExecConfig = Clone(KCE->getConfig()); diff --git a/test/CUDA/GradientKernels.cu b/test/CUDA/GradientKernels.cu index 328a1f50d..6afd6b633 100644 --- a/test/CUDA/GradientKernels.cu +++ b/test/CUDA/GradientKernels.cu @@ -504,6 +504,7 @@ double fn_memory(double *out, double *in) { //CHECK-NEXT:} void launch_add_kernel_4(int *out, int *in, const int N) { + printf("Launching add_kernel_4 for size: %d\n", N); int *in_dev = nullptr; cudaMalloc(&in_dev, N * sizeof(int)); cudaMemcpy(in_dev, in, N * sizeof(int), cudaMemcpyHostToDevice); @@ -520,6 +521,7 @@ void launch_add_kernel_4(int *out, int *in, const int N) { // CHECK: void launch_add_kernel_4_grad_0_1(int *out, int *in, const int N, int *_d_out, int *_d_in) { //CHECK-NEXT: int _d_N = 0; +//CHECK-NEXT: printf("Launching add_kernel_4 for size: %d\n", N); //CHECK-NEXT: int *_d_in_dev = nullptr; //CHECK-NEXT: int *in_dev = nullptr; //CHECK-NEXT: cudaMalloc(&_d_in_dev, N * sizeof(int));