diff --git a/include/clad/Differentiator/DiffPlanner.h b/include/clad/Differentiator/DiffPlanner.h index 663b24b47..a4b06a148 100644 --- a/include/clad/Differentiator/DiffPlanner.h +++ b/include/clad/Differentiator/DiffPlanner.h @@ -47,7 +47,7 @@ struct DiffRequest { /// Args provided to the call to clad::gradient/differentiate. const clang::Expr* Args = nullptr; /// Indexes of global GPU args of function as a subset of Args. - std::vector GlobalArgsIndexes; + std::vector CUDAGlobalArgsIndexes; /// Requested differentiation mode, forward or reverse. DiffMode Mode = DiffMode::unknown; /// If function appears in the call to clad::gradient/differentiate, diff --git a/include/clad/Differentiator/KokkosBuiltins.h b/include/clad/Differentiator/KokkosBuiltins.h index 1a6253027..51824a004 100644 --- a/include/clad/Differentiator/KokkosBuiltins.h +++ b/include/clad/Differentiator/KokkosBuiltins.h @@ -30,6 +30,37 @@ constructor_pushforward( Kokkos::View( "_diff_" + name, idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7)}; } +template +clad::ValueAndAdjoint<::Kokkos::View, + ::Kokkos::View> +constructor_reverse_forw( + clad::ConstructorReverseForwTag<::Kokkos::View>, + const ::std::string& name, const size_t& idx0, const size_t& idx1, + const size_t& idx2, const size_t& idx3, const size_t& idx4, + const size_t& idx5, const size_t& idx6, const size_t& idx7, + const ::std::string& /*d_name*/, const size_t& /*d_idx0*/, + const size_t& /*d_idx1*/, const size_t& /*d_idx2*/, + const size_t& /*d_idx3*/, const size_t& /*d_idx4*/, + const size_t& /*d_idx5*/, const size_t& /*d_idx6*/, + const size_t& /*d_idx7*/) { + return {::Kokkos::View(name, idx0, idx1, idx2, idx3, + idx4, idx5, idx6, idx7), + ::Kokkos::View( + "_diff_" + name, idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7)}; +} +template +void constructor_pullback(::Kokkos::View* v, + const ::std::string& name, const size_t& idx0, + const size_t& idx1, const size_t& idx2, + const size_t& idx3, const size_t& idx4, + const size_t& idx5, const size_t& idx6, + const size_t& idx7, + ::Kokkos::View* d_v, + const ::std::string* /*d_name*/, + const size_t& /*d_idx0*/, const size_t* /*d_idx1*/, + const size_t* /*d_idx2*/, const size_t* /*d_idx3*/, + const size_t* /*d_idx4*/, const size_t* /*d_idx5*/, + const size_t* /*d_idx6*/, const size_t* /*d_idx7*/) {} /// View indexing template @@ -107,6 +138,191 @@ operator_call_pushforward(const View* v, Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, return {(*v)(i0, i1, i2, i3, i4, i5, i6, i7), (*d_v)(i0, i1, i2, i3, i4, i5, i6, i7)}; } +template +clad::ValueAndAdjoint< + typename ::Kokkos::View::reference_type&, + typename ::Kokkos::View::reference_type&> +operator_call_reverse_forw(const ::Kokkos::View* v, + Idx i0, + const ::Kokkos::View* d_v, + Idx /*d_i0*/) { + return {(*v)(i0), (*d_v)(i0)}; +} +template +void operator_call_pullback(const ::Kokkos::View* v, + Idx i0, Diff d_y, + ::Kokkos::View* d_v, + dIdx* /*d_i0*/) { + (*d_v)(i0) += d_y; +} +template +clad::ValueAndAdjoint< + typename ::Kokkos::View::reference_type&, + typename ::Kokkos::View::reference_type&> +operator_call_reverse_forw(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, + const ::Kokkos::View* d_v, + Idx0 /*d_i0*/, Idx1 /*d_i1*/) { + return {(*v)(i0, i1), (*d_v)(i0, i1)}; +} +template +void operator_call_pullback(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Diff d_y, + ::Kokkos::View* d_v, + dIdx0* /*d_i0*/, dIdx1* /*d_i1*/) { + (*d_v)(i0, i1) += d_y; +} +template +clad::ValueAndAdjoint< + typename ::Kokkos::View::reference_type&, + typename ::Kokkos::View::reference_type&> +operator_call_reverse_forw(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, + const ::Kokkos::View* d_v, + Idx0 /*d_i0*/, Idx1 /*d_i1*/, Idx2 /*d_i2*/) { + return {(*v)(i0, i1, i2), (*d_v)(i0, i1, i2)}; +} +template +void operator_call_pullback(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Diff d_y, + ::Kokkos::View* d_v, + dIdx0* /*d_i0*/, dIdx1* /*d_i1*/, dIdx2* /*d_i2*/) { + (*d_v)(i0, i1, i2) += d_y; +} +template +clad::ValueAndAdjoint< + typename ::Kokkos::View::reference_type&, + typename ::Kokkos::View::reference_type&> +operator_call_reverse_forw(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, + const ::Kokkos::View* d_v, + Idx0 /*d_i0*/, Idx1 /*d_i1*/, Idx2 /*d_i2*/, + Idx3 /*d_i3*/) { + return {(*v)(i0, i1, i2, i3), (*d_v)(i0, i1, i2, i3)}; +} +template +void operator_call_pullback(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, Diff d_y, + ::Kokkos::View* d_v, + dIdx0* /*d_i0*/, dIdx1* /*d_i1*/, dIdx2* /*d_i2*/, + dIdx3* /*d_i3*/) { + (*d_v)(i0, i1, i2, i3) += d_y; +} +template +clad::ValueAndAdjoint< + typename ::Kokkos::View::reference_type&, + typename ::Kokkos::View::reference_type&> +operator_call_reverse_forw(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, Idx4 i4, + const ::Kokkos::View* d_v, + Idx0 /*d_i0*/, Idx1 /*d_i1*/, Idx2 /*d_i2*/, + Idx3 /*d_i3*/, Idx4 /*d_i4*/) { + return {(*v)(i0, i1, i2, i3, i4), (*d_v)(i0, i1, i2, i3, i4)}; +} +template +void operator_call_pullback(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, Idx4 i4, + Diff d_y, + ::Kokkos::View* d_v, + dIdx0* /*d_i0*/, dIdx1* /*d_i1*/, dIdx2* /*d_i2*/, + dIdx3* /*d_i3*/, dIdx4* /*d_i4*/) { + (*d_v)(i0, i1, i2, i3, i4) += d_y; +} +template +clad::ValueAndAdjoint< + typename ::Kokkos::View::reference_type&, + typename ::Kokkos::View::reference_type&> +operator_call_reverse_forw(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, Idx4 i4, Idx5 i5, + const ::Kokkos::View* d_v, + Idx0 /*d_i0*/, Idx1 /*d_i1*/, Idx2 /*d_i2*/, + Idx3 /*d_i3*/, Idx4 /*d_i4*/, Idx5 /*d_i5*/) { + return {(*v)(i0, i1, i2, i3, i4, i5), (*d_v)(i0, i1, i2, i3, i4, i5)}; +} +template +void operator_call_pullback(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, Idx4 i4, + Idx5 i5, Diff d_y, + ::Kokkos::View* d_v, + dIdx0* /*d_i0*/, dIdx1* /*d_i1*/, dIdx2* /*d_i2*/, + dIdx3* /*d_i3*/, dIdx4* /*d_i4*/, dIdx5* /*d_i5*/) { + (*d_v)(i0, i1, i2, i3, i4, i5) += d_y; +} +template +clad::ValueAndAdjoint< + typename ::Kokkos::View::reference_type&, + typename ::Kokkos::View::reference_type&> +operator_call_reverse_forw(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, Idx4 i4, Idx5 i5, + Idx6 i6, + const ::Kokkos::View* d_v, + Idx0 /*d_i0*/, Idx1 /*d_i1*/, Idx2 /*d_i2*/, + Idx3 /*d_i3*/, Idx4 /*d_i4*/, Idx5 /*d_i5*/, + Idx6 /*d_i6*/) { + return {(*v)(i0, i1, i2, i3, i4, i5, i6), (*d_v)(i0, i1, i2, i3, i4, i5, i6)}; +} +template +void operator_call_pullback(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, Idx4 i4, + Idx5 i5, Idx6 i6, Diff d_y, + ::Kokkos::View* d_v, + dIdx0* /*d_i0*/, dIdx1* /*d_i1*/, dIdx2* /*d_i2*/, + dIdx3* /*d_i3*/, dIdx4* /*d_i3*/, dIdx5* /*d_i3*/, + dIdx6* /*d_i3*/) { + (*d_v)(i0, i1, i2, i3, i4, i5, i6) += d_y; +} +template +clad::ValueAndAdjoint< + typename ::Kokkos::View::reference_type&, + typename ::Kokkos::View::reference_type&> +operator_call_reverse_forw(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, Idx4 i4, Idx5 i5, + Idx6 i6, Idx7 i7, + const ::Kokkos::View* d_v, + Idx0 /*d_i0*/, Idx1 /*d_i1*/, Idx2 /*d_i2*/, + Idx3 /*d_i3*/, Idx4 /*d_i4*/, Idx5 /*d_i5*/, + Idx6 /*d_i6*/, Idx7 /*d_i7*/) { + return {(*v)(i0, i1, i2, i3, i4, i5, i6, i7), + (*d_v)(i0, i1, i2, i3, i4, i5, i6, i7)}; +} +template +void operator_call_pullback(const ::Kokkos::View* v, + Idx0 i0, Idx1 i1, Idx2 i2, Idx3 i3, Idx4 i4, + Idx5 i5, Idx6 i6, Idx7 i7, Diff d_y, + ::Kokkos::View* d_v, + dIdx0* /*d_i0*/, dIdx1* /*d_i1*/, dIdx2* /*d_i2*/, + dIdx3* /*d_i3*/, dIdx4* /*d_i3*/, dIdx5* /*d_i3*/, + dIdx6* /*d_i3*/, dIdx7* /*d_i3*/) { + (*d_v)(i0, i1, i2, i3, i4, i5, i6, i7) += d_y; +} } // namespace class_functions /// Kokkos functions (view utils) @@ -118,6 +334,122 @@ inline void deep_copy_pushforward(const View1& dst, const View2& src, T param, deep_copy(dst, src); deep_copy(d_dst, d_src); } +template struct iterate_over_all_view_elements { + template static void run(const View& v, F func) {} +}; +template struct iterate_over_all_view_elements { + template static void run(const View& v, F func) { + ::Kokkos::parallel_for("iterate_over_all_view_elements", v.extent(0), func); + } +}; +template struct iterate_over_all_view_elements { + template static void run(const View& v, F func) { + ::Kokkos::parallel_for("iterate_over_all_view_elements", + ::Kokkos::MDRangePolicy<::Kokkos::Rank<2>>( + {0, 0}, {v.extent(0), v.extent(1)}), + func); + } +}; +template struct iterate_over_all_view_elements { + template static void run(const View& v, F func) { + ::Kokkos::parallel_for( + "iterate_over_all_view_elements", + ::Kokkos::MDRangePolicy<::Kokkos::Rank<3>>( + {0, 0, 0}, {v.extent(0), v.extent(1), v.extent(2)}), + func); + } +}; +template struct iterate_over_all_view_elements { + template static void run(const View& v, F func) { + ::Kokkos::parallel_for( + "iterate_over_all_view_elements", + ::Kokkos::MDRangePolicy<::Kokkos::Rank<4>>( + {0, 0, 0, 0}, {v.extent(0), v.extent(1), v.extent(2), v.extent(3)}), + func); + } +}; +template struct iterate_over_all_view_elements { + template static void run(const View& v, F func) { + ::Kokkos::parallel_for( + "iterate_over_all_view_elements", + ::Kokkos::MDRangePolicy<::Kokkos::Rank<5>>( + {0, 0, 0, 0, 0}, + {v.extent(0), v.extent(1), v.extent(2), v.extent(3), v.extent(4)}), + func); + } +}; +template struct iterate_over_all_view_elements { + template static void run(const View& v, F func) { + ::Kokkos::parallel_for( + "iterate_over_all_view_elements", + ::Kokkos::MDRangePolicy<::Kokkos::Rank<6>>( + {0, 0, 0, 0, 0, 0}, {v.extent(0), v.extent(1), v.extent(2), + v.extent(3), v.extent(4), v.extent(5)}), + func); + } +}; +template struct iterate_over_all_view_elements { + template static void run(const View& v, F func) { + ::Kokkos::parallel_for( + "iterate_over_all_view_elements", + ::Kokkos::MDRangePolicy<::Kokkos::Rank<7>>( + {0, 0, 0, 0, 0, 0, 0}, + {v.extent(0), v.extent(1), v.extent(2), v.extent(3), v.extent(4), + v.extent(5), v.extent(6)}), + func); + } +}; +template +void deep_copy_pullback( + const ::Kokkos::View& dst, + typename ::Kokkos::ViewTraits::const_value_type& /*value*/, + ::std::enable_if_t<::std::is_same< + typename ::Kokkos::ViewTraits::specialize, void>::value>*, + ::Kokkos::View* d_dst, + typename ::Kokkos::ViewTraits::value_type* d_value, + ::std::enable_if_t< + ::std::is_same::specialize, + void>::value>*) { + typename ::Kokkos::ViewTraits::value_type res = 0; + + iterate_over_all_view_elements< + ::Kokkos::View, + ::Kokkos::ViewTraits::rank>::run(dst, + [&res, + &d_dst](auto&&... args) { + res += (*d_dst)(args...); + (*d_dst)(args...) = 0; + }); + + (*d_value) += res; +} +template +inline void deep_copy_pullback( + const ::Kokkos::View& dst, + const ::Kokkos::View& /*src*/, + ::std::enable_if_t< + (::std::is_void< + typename ::Kokkos::ViewTraits::specialize>::value && + ::std::is_void< + typename ::Kokkos::ViewTraits::specialize>::value && + ((unsigned int)(::Kokkos::ViewTraits::rank) != 0 || + (unsigned int)(::Kokkos::ViewTraits::rank) != 0))>*, + ::Kokkos::View* d_dst, ::Kokkos::View* d_src, + ::std::enable_if_t< + (::std::is_void< + typename ::Kokkos::ViewTraits::specialize>::value && + ::std::is_void< + typename ::Kokkos::ViewTraits::specialize>::value && + ((unsigned int)(::Kokkos::ViewTraits::rank) != 0 || + (unsigned int)(::Kokkos::ViewTraits::rank) != 0))>*) { + iterate_over_all_view_elements<::Kokkos::View, + ::Kokkos::ViewTraits::rank>:: + run(dst, [&d_src, &d_dst](auto&&... args) { + (*d_src)(args...) += (*d_dst)(args...); + (*d_dst)(args...) = 0; + }); +} + template diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h index 83087f438..6003d50fc 100644 --- a/include/clad/Differentiator/ReverseModeVisitor.h +++ b/include/clad/Differentiator/ReverseModeVisitor.h @@ -57,7 +57,7 @@ namespace clad { /// block. Stmts m_Globals; /// Global GPU args of the function. - std::unordered_set m_GlobalArgs; + std::unordered_set m_CUDAGlobalArgs; //// A reference to the output parameter of the gradient function. clang::Expr* m_Result; /// A flag indicating if the Stmt we are currently visiting is inside loop. diff --git a/include/clad/Differentiator/VisitorBase.h b/include/clad/Differentiator/VisitorBase.h index 1cea9028c..a7206b58b 100644 --- a/include/clad/Differentiator/VisitorBase.h +++ b/include/clad/Differentiator/VisitorBase.h @@ -362,8 +362,17 @@ namespace clad { /// \param[in] D The declaration to build a DeclRefExpr for. /// \param[in] SS The scope specifier for the declaration. /// \returns the DeclRefExpr for the given declaration. - clang::DeclRefExpr* BuildDeclRef(clang::DeclaratorDecl* D, - const clang::CXXScopeSpec* SS = nullptr); + clang::DeclRefExpr* + BuildDeclRef(clang::DeclaratorDecl* D, + const clang::CXXScopeSpec* SS = nullptr, + clang::ExprValueKind VK = clang::VK_LValue); + /// Builds a DeclRefExpr to a given Decl, adding proper nested name + /// qualifiers. + /// \param[in] D The declaration to build a DeclRefExpr for. + /// \param[in] NNS The nested name specifier to use. + clang::DeclRefExpr* + BuildDeclRef(clang::DeclaratorDecl* D, clang::NestedNameSpecifier* NNS, + clang::ExprValueKind VK = clang::VK_LValue); /// Stores the result of an expression in a temporary variable (of the same /// type as is the result of the expression) and returns a reference to it. diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index fe14227b7..8015b8fdb 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -1036,8 +1036,9 @@ StmtDiff BaseForwardModeVisitor::VisitDeclRefExpr(const DeclRefExpr* DRE) { // Sema::BuildDeclRefExpr is responsible for adding captured fields // to the underlying struct of a lambda. if (clonedDRE->getDecl()->getDeclContext() != m_Sema.CurContext) { - auto referencedDecl = cast(clonedDRE->getDecl()); - clonedDRE = cast(BuildDeclRef(referencedDecl)); + NestedNameSpecifier* NNS = DRE->getQualifier(); + auto* referencedDecl = cast(clonedDRE->getDecl()); + clonedDRE = BuildDeclRef(referencedDecl, NNS); } } else clonedDRE = cast(Clone(DRE)); @@ -1052,7 +1053,7 @@ StmtDiff BaseForwardModeVisitor::VisitDeclRefExpr(const DeclRefExpr* DRE) { if (auto dVarDRE = dyn_cast(dExpr)) { auto dVar = cast(dVarDRE->getDecl()); if (dVar->getDeclContext() != m_Sema.CurContext) - dExpr = BuildDeclRef(dVar); + dExpr = BuildDeclRef(dVar, DRE->getQualifier()); } return StmtDiff(clonedDRE, dExpr); } diff --git a/lib/Differentiator/ConstantFolder.cpp b/lib/Differentiator/ConstantFolder.cpp index e75b918ad..900e87a90 100644 --- a/lib/Differentiator/ConstantFolder.cpp +++ b/lib/Differentiator/ConstantFolder.cpp @@ -150,7 +150,7 @@ namespace clad { SourceLocation noLoc; Expr* cast = CXXStaticCastExpr::Create( C, QT, CLAD_COMPAT_ExprValueKind_R_or_PR_Value, - clang::CastKind::CK_IntegralCast, Result, nullptr, + clang::CastKind::CK_IntegralCast, Result, /*CXXCastPath=*/nullptr, C.getTrivialTypeSourceInfo(QT, noLoc) CLAD_COMPAT_CLANG12_CastExpr_DefaultFPO, noLoc, noLoc, SourceRange()); diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 518e667ba..2c3baec86 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -106,12 +106,11 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, bool ReverseModeVisitor::shouldUseCudaAtomicOps(const Expr* E) { // Same as checking whether this is a function executed by the GPU - if (!m_GlobalArgs.empty()) + if (!m_CUDAGlobalArgs.empty()) if (const auto* DRE = dyn_cast(E)) if (const auto* PVD = dyn_cast(DRE->getDecl())) - // we need to check whether this param is in the global memory of the - // GPU - return m_GlobalArgs.find(PVD) != m_GlobalArgs.end(); + // Check whether this param is in the global memory of the GPU + return m_CUDAGlobalArgs.find(PVD) != m_CUDAGlobalArgs.end(); return false; } @@ -454,8 +453,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // if the function is a global kernel, all its parameters reside in the // global memory of the GPU if (m_DiffReq->hasAttr()) - for (auto param : params) - m_GlobalArgs.emplace(param); + for (auto* param : params) + m_CUDAGlobalArgs.emplace(param); llvm::ArrayRef paramsRef = clad_compat::makeArrayRef(params.data(), params.size()); @@ -563,7 +562,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, auto derivativeName = utils::ComputeEffectiveFnName(m_DiffReq.Function) + "_pullback"; - for (auto index : m_DiffReq.GlobalArgsIndexes) + for (auto index : m_DiffReq.CUDAGlobalArgsIndexes) derivativeName += "_" + std::to_string(index); auto DNI = utils::BuildDeclarationNameInfo(m_Sema, derivativeName); @@ -608,14 +607,14 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, m_Derivative->setParams(params); // Match the global arguments of the call to the device function to the // pullback function's parameters. - if (!m_DiffReq.GlobalArgsIndexes.empty()) - for (auto index : m_DiffReq.GlobalArgsIndexes) - m_GlobalArgs.emplace(m_Derivative->getParamDecl(index)); + if (!m_DiffReq.CUDAGlobalArgsIndexes.empty()) + for (auto index : m_DiffReq.CUDAGlobalArgsIndexes) + m_CUDAGlobalArgs.emplace(m_Derivative->getParamDecl(index)); // If the function is a global kernel, all its parameters reside in the // global memory of the GPU else if (m_DiffReq->hasAttr()) for (auto param : params) - m_GlobalArgs.emplace(param); + m_CUDAGlobalArgs.emplace(param); m_Derivative->setBody(nullptr); if (!m_DiffReq.DeclarationOnly) { @@ -1573,8 +1572,12 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // with Sema::BuildDeclRefExpr. This is required in some cases, e.g. // Sema::BuildDeclRefExpr is responsible for adding captured fields // to the underlying struct of a lambda. - if (VD->getDeclContext() != m_Sema.CurContext) - clonedDRE = cast(BuildDeclRef(VD)); + if (VD->getDeclContext() != m_Sema.CurContext) { + auto* ccDRE = dyn_cast(clonedDRE); + NestedNameSpecifier* NNS = DRE->getQualifier(); + auto* referencedDecl = cast(ccDRE->getDecl()); + clonedDRE = BuildDeclRef(referencedDecl, NNS, DRE->getValueKind()); + } // This case happens when ref-type variables have to become function // global. Ref-type declarations cannot be moved to the function global // scope because they can't be separated from their inits. @@ -1900,9 +1903,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } Expr* OverloadedDerivedFn = nullptr; - // If the function has a single arg and does not returns a reference or take + // If the function has a single arg and does not return a reference or take // arg by reference, we look for a derivative w.r.t. to this arg using the - // forward mode(it is unlikely that we need gradient of a one-dimensional' + // forward mode(it is unlikely that we need gradient of a one-dimensional // function). bool asGrad = true; @@ -2000,11 +2003,11 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, std::string customPullback = clad::utils::ComputeEffectiveFnName(FD) + "_pullback"; // Add the indexes of the global args to the custom pullback name - if (!m_GlobalArgs.empty()) + if (!m_CUDAGlobalArgs.empty()) for (size_t i = 0; i < pullbackCallArgs.size(); i++) if (auto* DRE = dyn_cast(pullbackCallArgs[i])) if (auto* param = dyn_cast(DRE->getDecl())) - if (m_GlobalArgs.find(param) != m_GlobalArgs.end()) { + if (m_CUDAGlobalArgs.find(param) != m_CUDAGlobalArgs.end()) { customPullback += "_" + std::to_string(i); globalCallArgs.emplace_back(i); } @@ -2049,7 +2052,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // Mark the indexes of the global args. Necessary if the argument of the // call has a different name than the function's signature parameter. - pullbackRequest.GlobalArgsIndexes = globalCallArgs; + pullbackRequest.CUDAGlobalArgsIndexes = globalCallArgs; pullbackRequest.BaseFunctionName = clad::utils::ComputeEffectiveFnName(FD); @@ -2214,8 +2217,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, StmtDiff argDiff = Visit(arg); CallArgs.push_back(argDiff.getExpr_dx()); } - if (baseDiff.getExpr()) { - Expr* baseE = baseDiff.getExpr(); + if (Expr* baseE = baseDiff.getExpr()) { call = BuildCallExprToMemFn(baseE, calleeFnForwPassFD->getName(), CallArgs, Loc); } else { @@ -2232,6 +2234,28 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, utils::BuildMemberExpr(m_Sema, getCurrentScope(), callRes, "adjoint"); return StmtDiff(resValue, resAdjoint, resAdjoint); } // Recreate the original call expression. + + if (const auto* OCE = dyn_cast(CE)) { + auto* FD = const_cast( + dyn_cast(OCE->getCalleeDecl())); + + NestedNameSpecifierLoc NNS(FD->getQualifier(), + /*Data=*/nullptr); + auto DAP = DeclAccessPair::make(FD, FD->getAccess()); + auto* memberExpr = MemberExpr::Create( + m_Context, Clone(OCE->getArg(0)), /*isArrow=*/false, Loc, NNS, noLoc, + FD, DAP, FD->getNameInfo(), + /*TemplateArgs=*/nullptr, m_Context.BoundMemberTy, + CLAD_COMPAT_ExprValueKind_R_or_PR_Value, + ExprObjectKind::OK_Ordinary CLAD_COMPAT_CLANG9_MemberExpr_ExtraParams( + NOUR_None)); + call = m_Sema + .BuildCallToMemberFunction(getCurrentScope(), memberExpr, Loc, + CallArgs, Loc) + .get(); + return StmtDiff(call); + } + call = m_Sema .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), Loc, CallArgs, Loc) @@ -2668,11 +2692,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, ConstantFolder::synthesizeLiteral(m_Context.IntTy, m_Context, i); Expr* gradElem = BuildArraySubscript(gradRef, {idx}); Expr* gradExpr = BuildOp(BO_Mul, dfdx, gradElem); - if (shouldUseCudaAtomicOps(outputArgs[i])) - PostCallStmts.push_back( - BuildCallToCudaAtomicAdd(outputArgs[i], gradExpr)); - else - PostCallStmts.push_back(BuildOp(BO_AddAssign, outputArgs[i], gradExpr)); + // Inputs were not pointers, so the output args are not in global GPU + // memory. Hence, no need to use atomic ops. + PostCallStmts.push_back(BuildOp(BO_AddAssign, outputArgs[i], gradExpr)); NumDiffArgs.push_back(args[i]); } std::string Name = "central_difference"; @@ -2779,7 +2801,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, else { derivedE = BuildOp(UnaryOperatorKind::UO_Deref, diff_dx); // Create the (target += dfdx) statement. - if (dfdx()) { + if (dfdx() && derivedE) { if (shouldUseCudaAtomicOps(diff_dx)) { Expr* atomicCall = BuildCallToCudaAtomicAdd(diff_dx, dfdx()); // Add it to the body statements. diff --git a/lib/Differentiator/VisitorBase.cpp b/lib/Differentiator/VisitorBase.cpp index b37efe5dd..0f22245d4 100644 --- a/lib/Differentiator/VisitorBase.cpp +++ b/lib/Differentiator/VisitorBase.cpp @@ -236,11 +236,38 @@ namespace clad { } DeclRefExpr* VisitorBase::BuildDeclRef(DeclaratorDecl* D, - const CXXScopeSpec* SS /*=nullptr*/) { + const CXXScopeSpec* SS /*=nullptr*/, + ExprValueKind VK /*=VK_LValue*/) { QualType T = D->getType(); T = T.getNonReferenceType(); return cast(clad_compat::GetResult( - m_Sema.BuildDeclRefExpr(D, T, VK_LValue, D->getBeginLoc(), SS))); + m_Sema.BuildDeclRefExpr(D, T, VK, D->getBeginLoc(), SS))); + } + + DeclRefExpr* VisitorBase::BuildDeclRef(DeclaratorDecl* D, + NestedNameSpecifier* NNS, + ExprValueKind VK /*=VK_LValue*/) { + std::vector NNChain; + CXXScopeSpec CSS; + while (NNS) { + NNChain.push_back(NNS); + NNS = NNS->getPrefix(); + } + + std::reverse(NNChain.begin(), NNChain.end()); + + for (size_t i = 0; i < NNChain.size(); ++i) { + NNS = NNChain[i]; + // FIXME: this needs to be extended to support more NNS kinds. An + // inspiration can be take from getFullyQualifiedNestedNameSpecifier in + // llvm-project/clang/lib/AST/QualTypeNames.cpp + if (NNS->getKind() == NestedNameSpecifier::Namespace) { + NamespaceDecl* NS = NNS->getAsNamespace(); + CSS.Extend(m_Context, NS, noLoc, noLoc); + } + } + + return BuildDeclRef(D, &CSS, VK); } IdentifierInfo* diff --git a/test/Gradient/Lambdas.C b/test/Gradient/Lambdas.C index f9b06aeeb..35776e2d6 100644 --- a/test/Gradient/Lambdas.C +++ b/test/Gradient/Lambdas.C @@ -13,7 +13,7 @@ double f1(double i, double j) { } // CHECK: inline void operator_call_pullback(double t, double _d_y, double *_d_t) const; -// CHECK-NEXT: void f1_grad(double i, double j, double *_d_i, double *_d_j) { +// CHECK: void f1_grad(double i, double j, double *_d_i, double *_d_j) { // CHECK-NEXT: auto _f = []{{ ?}}(double t) { // CHECK-NEXT: return t * t + 1.; // CHECK-NEXT: }{{;?}} @@ -34,12 +34,12 @@ double f2(double i, double j) { } // CHECK: inline void operator_call_pullback(double t, double k, double _d_y, double *_d_t, double *_d_k) const; -// CHECK-NEXT: void f2_grad(double i, double j, double *_d_i, double *_d_j) { +// CHECK: void f2_grad(double i, double j, double *_d_i, double *_d_j) { // CHECK-NEXT: auto _f = []{{ ?}}(double t, double k) { // CHECK-NEXT: return t + k; // CHECK-NEXT: }{{;?}} // CHECK: double _d_x = 0.; -// CHECK-NEXT: double x = operator()(i + j, i); +// CHECK-NEXT: double x = _f.operator()(i + j, i); // CHECK-NEXT: _d_x += 1; // CHECK-NEXT: { // CHECK-NEXT: double _r0 = 0.; diff --git a/test/Gradient/Switch.C b/test/Gradient/Switch.C index 6e18bc04d..98a176807 100644 --- a/test/Gradient/Switch.C +++ b/test/Gradient/Switch.C @@ -682,6 +682,146 @@ double fn7(double u, double v) { // CHECK-NEXT: } // CHECK-NEXT: } +enum Op { + Add, + Sub, + Mul, + Div +}; + +double fn24(double x, double y, Op op) { + double res = 0; + switch (op) { + case Add: + res = x + y; + break; + case Sub: + res = x - y; + break; + case Mul: + res = x * y; + break; + case Div: + res = x / y; + break; + } + return res; +} + +// CHECK: void fn24_grad_0_1(double x, double y, Op op, double *_d_x, double *_d_y) { +// CHECK-NEXT: Op _d_op = static_cast(0U); +// CHECK-NEXT: Op _cond0; +// CHECK-NEXT: double _t0; +// CHECK-NEXT: clad::tape _t1 = {}; +// CHECK-NEXT: double _t2; +// CHECK-NEXT: double _t3; +// CHECK-NEXT: double _t4; +// CHECK-NEXT: double _d_res = 0.; +// CHECK-NEXT: double res = 0; +// CHECK-NEXT: { +// CHECK-NEXT: _cond0 = op; +// CHECK-NEXT: switch (_cond0) { +// CHECK-NEXT: { +// CHECK-NEXT: case Add: +// CHECK-NEXT: res = x + y; +// CHECK-NEXT: _t0 = res; +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: clad::push(_t1, {{1U|1UL}}); +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: case Sub: +// CHECK-NEXT: res = x - y; +// CHECK-NEXT: _t2 = res; +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: clad::push(_t1, {{2U|2UL}}); +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: case Mul: +// CHECK-NEXT: res = x * y; +// CHECK-NEXT: _t3 = res; +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: clad::push(_t1, {{3U|3UL}}); +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: case Div: +// CHECK-NEXT: res = x / y; +// CHECK-NEXT: _t4 = res; +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: clad::push(_t1, {{4U|4UL}}); +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: clad::push(_t1, {{5U|5UL}}); +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: _d_res += 1; +// CHECK-NEXT: { +// CHECK-NEXT: switch (clad::pop(_t1)) { +// CHECK-NEXT: case {{5U|5UL}}: +// CHECK-NEXT: ; +// CHECK-NEXT: case {{4U|4UL}}: +// CHECK-NEXT: ; +// CHECK-NEXT: { +// CHECK-NEXT: { +// CHECK-NEXT: res = _t4; +// CHECK-NEXT: double _r_d3 = _d_res; +// CHECK-NEXT: _d_res = 0.; +// CHECK-NEXT: *_d_x += _r_d3 / y; +// CHECK-NEXT: double _r0 = _r_d3 * -(x / (y * y)); +// CHECK-NEXT: _d_y += _r0; +// CHECK-NEXT: } +// CHECK-NEXT: if (Div == _cond0) +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: case {{3U|3UL}}: +// CHECK-NEXT: ; +// CHECK-NEXT: { +// CHECK-NEXT: { +// CHECK-NEXT: res = _t3; +// CHECK-NEXT: double _r_d2 = _d_res; +// CHECK-NEXT: _d_res = 0.; +// CHECK-NEXT: *_d_x += _r_d2 * y; +// CHECK-NEXT: _d_y += x * _r_d2; +// CHECK-NEXT: } +// CHECK-NEXT: if (Mul == _cond0) +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: case {{2U|2UL}}: +// CHECK-NEXT: ; +// CHECK-NEXT: { +// CHECK-NEXT: { +// CHECK-NEXT: res = _t2; +// CHECK-NEXT: double _r_d1 = _d_res; +// CHECK-NEXT: _d_res = 0.; +// CHECK-NEXT: *_d_x += _r_d1; +// CHECK-NEXT: _d_y += -_r_d1; +// CHECK-NEXT: } +// CHECK-NEXT: if (Sub == _cond0) +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: case {{1U|1UL}}: +// CHECK-NEXT: ; +// CHECK-NEXT: { +// CHECK-NEXT: { +// CHECK-NEXT: res = _t0; +// CHECK-NEXT: double _r_d0 = _d_res; +// CHECK-NEXT: _d_res = 0.; +// CHECK-NEXT: *_d_x += _r_d0; +// CHECK-NEXT: _d_y += _r_d0; +// CHECK-NEXT: } +// CHECK-NEXT: if (Add == _cond0) +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT:} + #define TEST_2(F, x, y) \ { \ @@ -691,6 +831,14 @@ double fn7(double u, double v) { printf("{%.2f, %.2f}\n", result[0], result[1]); \ } +#define TEST_2_Op(F, x, y, op) \ +{ \ + result[0] = result[1] = 0; \ + auto d_##F = clad::gradient(F, "x, y"); \ + d_##F.execute(x, y, op, result, result + 1); \ + printf("{%.2f, %.2f}\n", result[0], result[1]); \ +} + int main() { double result[2] = {}; @@ -705,4 +853,9 @@ int main() { TEST_GRADIENT(fn6, 2, 3, 5, &result[0], &result[1]); // CHECK-EXEC: {5.00, 3.00} TEST_GRADIENT(fn7, 2, 3, 5, &result[0], &result[1]); // CHECK-EXEC: {3.00, 2.00} + + TEST_2_Op(fn24, 3, 5, Add); // CHECK-EXEC: {1.00, 1.00} + TEST_2_Op(fn24, 3, 5, Sub); // CHECK-EXEC: {1.00, -1.00} + TEST_2_Op(fn24, 3, 5, Mul); // CHECK-EXEC: {5.00, 3.00} + TEST_2_Op(fn24, 3, 5, Div); // CHECK-EXEC: {0.20, -0.12} } diff --git a/test/ValidCodeGen/ValidCodeGen.C b/test/ValidCodeGen/ValidCodeGen.C new file mode 100644 index 000000000..11f1f6fc7 --- /dev/null +++ b/test/ValidCodeGen/ValidCodeGen.C @@ -0,0 +1,76 @@ +// RUN: %cladclang -std=c++14 %s -I%S/../../include -oValidCodeGen.out 2>&1 | %filecheck %s +// RUN: ./ValidCodeGen.out | %filecheck_exec %s +// RUN: %cladclang -std=c++14 -Xclang -plugin-arg-clad -Xclang -enable-tbr %s -I%S/../../include -oValidCodeGenWithTBR.out +// RUN: ./ValidCodeGenWithTBR.out | %filecheck_exec %s +// CHECK-NOT: {{.*error|warning|note:.*}} + +#include "clad/Differentiator/Differentiator.h" +#include "clad/Differentiator/STLBuiltins.h" +#include "../TestUtils.h" +#include "../PrintOverloads.h" + +namespace TN { + int coefficient = 3; + + template + struct Test2 { + T operator[](T x) { + return 4*x; + } + }; +} + +namespace clad { +namespace custom_derivatives { +namespace class_functions { + template + void operator_subscript_pullback(::TN::Test2* obj, T x, T d_u, ::TN::Test2* d_obj, T* d_x) { + (*d_x) += 4*d_u; + } +}}} + +double fn(double x) { + // fwd and rvs mode test + return x*TN::coefficient; // in this test, it's important that this nested name is copied into the generated code properly in both modes +} + +double fn2(double x, double y) { + // rvs mode test + TN::Test2 t; // this type needs to be copied into the derived code properly + auto q = t[x]; // in this test, it's important that this operator call is copied into the generated code properly and that the pullback function is called with all the needed namespace prefixes + return q; +} + +int main() { + double dx, dy; + INIT_DIFFERENTIATE(fn, "x"); + INIT_GRADIENT(fn); + INIT_GRADIENT(fn2); + + TEST_GRADIENT(fn, /*numOfDerivativeArgs=*/1, 3, &dx); // CHECK-EXEC: {3.00} + TEST_GRADIENT(fn2, /*numOfDerivativeArgs=*/2, 3, 4, &dx, &dy); // CHECK-EXEC: {4.00, 0.00} + TEST_DIFFERENTIATE(fn, 3) // CHECK-EXEC: {3.00} +} + +//CHECK: double fn_darg0(double x) { +//CHECK-NEXT: double _d_x = 1; +//CHECK-NEXT: return _d_x * TN::coefficient + x * 0; +//CHECK-NEXT: } + +//CHECK: void fn_grad(double x, double *_d_x) { +//CHECK-NEXT: *_d_x += 1 * TN::coefficient; +//CHECK-NEXT: } + +//CHECK: void fn2_grad(double x, double y, double *_d_x, double *_d_y) { +//CHECK-NEXT: TN::Test2 _d_t({}); +//CHECK-NEXT: TN::Test2 t; +//CHECK-NEXT: TN::Test2 _t0 = t; +//CHECK-NEXT: double _d_q = 0.; +//CHECK-NEXT: double q = t.operator[](x); +//CHECK-NEXT: _d_q += 1; +//CHECK-NEXT: { +//CHECK-NEXT: double _r0 = 0.; +//CHECK-NEXT: clad::custom_derivatives::class_functions::operator_subscript_pullback(&_t0, x, _d_q, &_d_t, &_r0); +//CHECK-NEXT: *_d_x += _r0; +//CHECK-NEXT: } +//CHECK-NEXT: } diff --git a/unittests/Kokkos/ViewAccess.cpp b/unittests/Kokkos/ViewAccess.cpp index e77b278f0..e42475ccd 100644 --- a/unittests/Kokkos/ViewAccess.cpp +++ b/unittests/Kokkos/ViewAccess.cpp @@ -14,11 +14,11 @@ double f(double x, double y) { Kokkos::View b("b", N1); a(0, 0) = x; - b(0, 0) = y; + b(1, 1) = y; - b(0, 0) += a(0, 0) * b(0, 0); + b(1, 1) += a(0, 0) * b(1, 1); - return a(0, 0) * a(0, 0) * b(0, 0) + b(0, 0); + return a(0, 0) * a(0, 0) * b(1, 1) + b(1, 1); } double f_2(double x, double y) { @@ -37,6 +37,22 @@ double f_2(double x, double y) { return a(0, 0); } +double f_3(double x, double y) { + + const int N1 = 4; + + Kokkos::View a("a", N1); + Kokkos::View b("b", N1); + + Kokkos::deep_copy(a, 3 * x + y); + b(0, 0) = y; + Kokkos::deep_copy(b, a); + + b(0, 0) += a(0, 0) * b(0, 0); + + return a(0, 0) + b(0, 0); +} + TEST(ViewAccess, Test1) { EXPECT_NEAR(f(0, 1), 1, 1e-8); EXPECT_NEAR(f(0, 2), 2, 1e-8); @@ -51,7 +67,6 @@ TEST(ViewAccess, Test2) { std::function f_tmp = [](double x) { return f(x, 4.); }; double dx_f_FD = finite_difference_tangent(f_tmp, 3., epsilon); - EXPECT_NEAR(f_x.execute(3, 4), dx_f_FD, tolerance * dx_f_FD); auto f_2_x = clad::differentiate(f_2, "x"); @@ -60,14 +75,24 @@ TEST(ViewAccess, Test2) { double dx_f_2_FD = finite_difference_tangent(f_2_tmp, 3., epsilon); EXPECT_NEAR(f_2_x.execute(3, 4), dx_f_2_FD, tolerance * dx_f_2_FD); - // TODO: uncomment this once it has been implemented - // auto f_grad_exe = clad::gradient(f); - // double dx, dy; - // f_grad_exe.execute(3., 4., &dx, &dy); - // EXPECT_NEAR(f_x.execute(3, 4),dx,tolerance*dx); + auto f_3_y = clad::differentiate(f_3, "y"); + + std::function f_3_tmp = [](double y) { return f_3(3., y); }; + double dy_f_3_FD = finite_difference_tangent(f_3_tmp, 4., epsilon); + EXPECT_NEAR(f_3_y.execute(3, 4), dy_f_3_FD, tolerance * dy_f_3_FD); + + auto f_grad_exe = clad::gradient(f); + double dx, dy; + f_grad_exe.execute(3., 4., &dx, &dy); + EXPECT_NEAR(f_x.execute(3, 4), dx, tolerance * dx); + + double dx_2, dy_2; + auto f_2_grad_exe = clad::gradient(f_2); + f_2_grad_exe.execute(3., 4., &dx_2, &dy_2); + EXPECT_NEAR(f_2_x.execute(3, 4), dx_2, tolerance * dx_2); - // double dx_2, dy_2; - // auto f_2_grad_exe = clad::gradient(f_2); - // f_2_grad_exe.execute(3., 4., &dx_2, &dy_2); - // EXPECT_NEAR(f_2_x.execute(3, 4),dx_2,tolerance*dx_2); + double dx_3, dy_3; + auto f_3_grad_exe = clad::gradient(f_3); + f_3_grad_exe.execute(3., 4., &dx_3, &dy_3); + EXPECT_NEAR(f_3_y.execute(3, 4), dy_3, tolerance * dy_3); } \ No newline at end of file