From 082bb20535f2ce91e0bc24142039fbdecf8cccb5 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 22 Nov 2023 13:49:54 -0700 Subject: [PATCH 01/75] Modify Clad to use Kokkos views in forward mode --- .../Differentiator/BaseForwardModeVisitor.h | 2 + lib/Differentiator/BaseForwardModeVisitor.cpp | 175 +++++++++++++++++- 2 files changed, 173 insertions(+), 4 deletions(-) diff --git a/include/clad/Differentiator/BaseForwardModeVisitor.h b/include/clad/Differentiator/BaseForwardModeVisitor.h index 375ae88d5..6eb9fc1cb 100644 --- a/include/clad/Differentiator/BaseForwardModeVisitor.h +++ b/include/clad/Differentiator/BaseForwardModeVisitor.h @@ -100,6 +100,8 @@ class BaseForwardModeVisitor StmtDiff VisitCXXStaticCastExpr(const clang::CXXStaticCastExpr* CSE); StmtDiff VisitCXXFunctionalCastExpr(const clang::CXXFunctionalCastExpr* FCE); StmtDiff VisitCXXBindTemporaryExpr(const clang::CXXBindTemporaryExpr* BTE); + StmtDiff VisitLambdaExpr(const clang::LambdaExpr* LE); + StmtDiff VisitValueStmt(const clang::ValueStmt* VS); StmtDiff VisitCXXNullPtrLiteralExpr(const clang::CXXNullPtrLiteralExpr* NPL); StmtDiff VisitUnaryExprOrTypeTraitExpr(const clang::UnaryExprOrTypeTraitExpr* UE); diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index 37f32bf81..d460c4322 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -31,6 +31,8 @@ #include "clad/Differentiator/Compatibility.h" +#include + using namespace clang; namespace clad { @@ -961,6 +963,87 @@ DiffMode BaseForwardModeVisitor::GetPushForwardMode() { } StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { + if (isa(CE)) { + auto MCE = dyn_cast(CE); + + if (MCE->getObjectType().getAsString().find("Kokkos::View") != std::string::npos) { + //std::cout << "Member function called from a Kokkos::View; nothing to do here" << std::endl; + return StmtDiff(Clone(CE)); + } + } + if (isa(CE)) { + auto OCE = dyn_cast(CE); + const Expr* baseOriginalE = OCE->getArg(0); + + bool isKokkosViewAccess = false; + std::string kokkosViewName; + + if (isa(baseOriginalE)) { + auto SE = baseOriginalE->IgnoreImpCasts(); + if (auto DRE = dyn_cast(SE)) { + std::string constructedTypeName = QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} }); + std::cout << constructedTypeName << std::endl; + if (constructedTypeName.find("Kokkos::View") != std::string::npos) { + isKokkosViewAccess = true; + kokkosViewName = DRE->getNameInfo().getName().getAsString (); + } + } + } + + // Returning the function call and zero derivative + if (isKokkosViewAccess) { + + llvm::SmallVector ClonedArgs; + for (unsigned i = 1, e = CE->getNumArgs(); i < e; ++i) + ClonedArgs.push_back(Clone(CE->getArg(i))); + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getArg(0)), + noLoc, ClonedArgs, noLoc) + .get(); + + // replace kokkosViewName with "_d_"+kokkosViewName + + Expr* dView = Visit(CE->getArg(0)).getExpr_dx(); + + dView->dump(); + + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), dView, + noLoc, ClonedArgs, noLoc) + .get(); + + //std::cout << " kokkosViewName = " << kokkosViewName << std::endl; + return StmtDiff(Call, dCall); + } + } + + auto SE = CE->getCallee()->IgnoreImpCasts(); + if (auto DRE = dyn_cast(SE)) { + if (auto FD = dyn_cast(DRE->getDecl())) { + if (FD->getQualifiedNameAsString().find("Kokkos::deep_copy") != std::string::npos) { + + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { + ClonedArgs.push_back(Clone(CE->getArg(i))); + ClonedDArgs.push_back(Visit(CE->getArg(i)).getExpr_dx()); + } + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); + + return StmtDiff(Call, dCall); + } + } + } + const FunctionDecl* FD = CE->getDirectCallee(); if (!FD) { diag(DiagnosticsEngine::Warning, CE->getBeginLoc(), @@ -1054,6 +1137,18 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { CallArgs.push_back(argDiff.getExpr()); if (BaseForwardModeVisitor::IsDifferentiableType(arg->getType())) { Expr* dArg = argDiff.getExpr_dx(); + QualType CallArgTy = CallArgs.back()->getType(); + + std::string error_message = "Type mismatch, we might fail to instantiate a pullback with types " + + QualType::getAsString(CallArgTy.split(), PrintingPolicy{ {} }) + " and " + + QualType::getAsString(dArg->getType().split(), PrintingPolicy{ {} }); + if (!(!dArg || m_Context.hasSameType(CallArgTy, dArg->getType()))) { + std::cout << error_message.c_str() << std::endl; + CE->dump(); + } + assert((!dArg || m_Context.hasSameType(CallArgTy, dArg->getType())) && + "Type mismatch, we might fail to instantiate a pullback"); + (void)CallArgTy; // FIXME: What happens when dArg is nullptr? diffArgs.push_back(dArg); } @@ -1219,6 +1314,8 @@ StmtDiff BaseForwardModeVisitor::VisitUnaryOperator(const UnaryOperator* UnOp) { return StmtDiff(op, BuildOp(opKind, diff.getExpr_dx())); } else if (opKind == UnaryOperatorKind::UO_AddrOf) { return StmtDiff(op, BuildOp(opKind, diff.getExpr_dx())); + } else if (opKind == UnaryOperatorKind::UO_Not) { + return StmtDiff(op, BuildOp(opKind, diff.getExpr_dx())); } else { unsupportedOpWarn(UnOp->getEndLoc()); auto zero = @@ -1781,10 +1878,52 @@ StmtDiff BaseForwardModeVisitor::VisitBreakStmt(const BreakStmt* stmt) { StmtDiff BaseForwardModeVisitor::VisitCXXConstructExpr(const CXXConstructExpr* CE) { llvm::SmallVector clonedArgs, derivedArgs; - for (auto arg : CE->arguments()) { - auto argDiff = Visit(arg); - clonedArgs.push_back(argDiff.getExpr()); - derivedArgs.push_back(argDiff.getExpr_dx()); + //CE->dump (); + //std::string className = CE->getStmtClassName(); + //std::cout << className << std::endl; + //CE->getConstructor ()->dump(); + //std::cout << className << std::endl; + //CE->getType()->dump(); + //std::cout << CE->getType()->getAsString () << std::endl; + std::string constructedTypeName = QualType::getAsString(CE->getType().split(), PrintingPolicy{ {} }); + + // Check if we are in a Kokkos View construction. + if (constructedTypeName.rfind("Kokkos::View", 0) == 0) { + size_t runTimeDim = 0; + std::vector compileTimeDims; + bool read = false; + for (size_t i = 0; i < constructedTypeName.size(); ++i) { + if (read && constructedTypeName[i] == '*') + ++runTimeDim; + if (read && constructedTypeName[i] == '[') + compileTimeDims.push_back(std::stoi(&constructedTypeName[i+1])); + if (!read && constructedTypeName[i] == ' ') + read = true; + } + //std::cout << "runTimeDim = " << runTimeDim << std::endl; + //std::cout << "compileTimeDim = " << compileTimeDims.size() << std::endl; + //for (auto compileTimeDim : compileTimeDims) + // std::cout << " compileTimeDim = " << compileTimeDim << std::endl; + + size_t i = 0; + for (auto arg : CE->arguments()) { + if (i == runTimeDim + 1) + break; + auto argDiff = Visit(arg); + clonedArgs.push_back(argDiff.getExpr()); + if (i==0) + derivedArgs.push_back(argDiff.getExpr_dx()); + else + derivedArgs.push_back(argDiff.getExpr()); + ++i; + } + } + else { + for (auto arg : CE->arguments()) { + auto argDiff = Visit(arg); + clonedArgs.push_back(argDiff.getExpr()); + derivedArgs.push_back(argDiff.getExpr_dx()); + } } Expr* clonedArgsE = nullptr; Expr* derivedArgsE = nullptr; @@ -1953,6 +2092,34 @@ StmtDiff BaseForwardModeVisitor::VisitCXXBindTemporaryExpr( return BTEDiff; } +StmtDiff BaseForwardModeVisitor::VisitLambdaExpr( + const clang::LambdaExpr* LE) { + //for (auto TP : LE->getExplicitTemplateParameters()) + StmtDiff LEDiff = Visit(LE->getBody()); + return LEDiff; +} + +StmtDiff BaseForwardModeVisitor::VisitValueStmt( + const clang::ValueStmt* VS) { + // This is most likely a name provided in a Kokkos::view construction + VS->dump (); + // Test if StringLiteral + if (isa(VS)) { + std::cout << "This is a StringLiteral!" << std::endl; + auto SL = dyn_cast(VS); + + std::string name_str("_d_"+ SL->getString().str()); + StringRef name(name_str); + + Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); + VS->dump (); + derivedVS->dump (); + return {Clone(VS), derivedVS}; + } + return {Clone(VS), Clone(VS)}; +} + + StmtDiff BaseForwardModeVisitor::VisitCXXNullPtrLiteralExpr( const clang::CXXNullPtrLiteralExpr* NPL) { return {Clone(NPL), Clone(NPL)}; From 9b9612e0d81dea8604a11bec27799778c5094be8 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 29 Nov 2023 21:15:45 -0700 Subject: [PATCH 02/75] Add parallel_for --- .../Differentiator/BaseForwardModeVisitor.h | 1 - lib/Differentiator/BaseForwardModeVisitor.cpp | 30 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/include/clad/Differentiator/BaseForwardModeVisitor.h b/include/clad/Differentiator/BaseForwardModeVisitor.h index 6eb9fc1cb..28c81f2e0 100644 --- a/include/clad/Differentiator/BaseForwardModeVisitor.h +++ b/include/clad/Differentiator/BaseForwardModeVisitor.h @@ -100,7 +100,6 @@ class BaseForwardModeVisitor StmtDiff VisitCXXStaticCastExpr(const clang::CXXStaticCastExpr* CSE); StmtDiff VisitCXXFunctionalCastExpr(const clang::CXXFunctionalCastExpr* FCE); StmtDiff VisitCXXBindTemporaryExpr(const clang::CXXBindTemporaryExpr* BTE); - StmtDiff VisitLambdaExpr(const clang::LambdaExpr* LE); StmtDiff VisitValueStmt(const clang::ValueStmt* VS); StmtDiff VisitCXXNullPtrLiteralExpr(const clang::CXXNullPtrLiteralExpr* NPL); StmtDiff diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index d460c4322..94d8d9b42 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -1030,6 +1030,29 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { ClonedDArgs.push_back(Visit(CE->getArg(i)).getExpr_dx()); } + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); + + return StmtDiff(Call, dCall); + } + if (FD->getQualifiedNameAsString().find("Kokkos::parallel_for") != std::string::npos) { + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { + auto visitedArg = Visit(CE->getArg(i)); + ClonedArgs.push_back(visitedArg.getExpr()); + if (i==0) + ClonedDArgs.push_back(visitedArg.getExpr()); + else + ClonedDArgs.push_back(visitedArg.getExpr_dx()); + } + Expr* Call = m_Sema .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), noLoc, ClonedArgs, noLoc) @@ -2092,13 +2115,6 @@ StmtDiff BaseForwardModeVisitor::VisitCXXBindTemporaryExpr( return BTEDiff; } -StmtDiff BaseForwardModeVisitor::VisitLambdaExpr( - const clang::LambdaExpr* LE) { - //for (auto TP : LE->getExplicitTemplateParameters()) - StmtDiff LEDiff = Visit(LE->getBody()); - return LEDiff; -} - StmtDiff BaseForwardModeVisitor::VisitValueStmt( const clang::ValueStmt* VS) { // This is most likely a name provided in a Kokkos::view construction From 9284719e008db3779dbd74558662a829546a9af0 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 7 Dec 2023 22:13:43 -0700 Subject: [PATCH 03/75] Add Kokkos subviews --- lib/Differentiator/BaseForwardModeVisitor.cpp | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index 94d8d9b42..84755e5a3 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -214,6 +214,14 @@ BaseForwardModeVisitor::Derive(const FunctionDecl* FD, derivedFD->setParams(paramsRef); derivedFD->setBody(nullptr); + //AnnotateAttr* A = FD->getAttr(); + //if (A && + // (A->getAnnotation().equals("KOKKOS_INLINE_FUNCTION") || A->getAnnotation().equals("KOKKOS_FUNCTION"))) { + // std::cout << "This is a Kokkos function!" << std::endl; + // //derivedFD->addAttr(A); + // derivedFD->dump(); + // } + // Function body scope beginScope(Scope::FnScope | Scope::DeclScope); m_DerivativeFnScope = getCurrentScope(); @@ -1026,8 +1034,33 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { llvm::SmallVector ClonedArgs; llvm::SmallVector ClonedDArgs; for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { - ClonedArgs.push_back(Clone(CE->getArg(i))); - ClonedDArgs.push_back(Visit(CE->getArg(i)).getExpr_dx()); + auto visitedArg = Visit(CE->getArg(i)); + ClonedArgs.push_back(visitedArg.getExpr()); + ClonedDArgs.push_back(visitedArg.getExpr_dx()); + } + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); + + return StmtDiff(Call, dCall); + } + if (FD->getQualifiedNameAsString().find("Kokkos::subview") != std::string::npos) { + + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { + auto visitedArg = Visit(CE->getArg(i)); + ClonedArgs.push_back(visitedArg.getExpr()); + if (i==0) + ClonedDArgs.push_back(visitedArg.getExpr_dx()); + else + ClonedDArgs.push_back(visitedArg.getExpr()); } Expr* Call = m_Sema From 65557a9d7e997348db50dd5b659233be2d3f56a7 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 12 Dec 2023 15:24:44 -0700 Subject: [PATCH 04/75] Add Kokkos examples --- kokkos/functor_for.hpp | 32 +++++++++++++++++++++++++++++ kokkos/lambda_reduction.hpp | 19 +++++++++++++++++ kokkos/lambda_reduction_subview.hpp | 21 +++++++++++++++++++ kokkos/main.cpp | 24 ++++++++++++++++++++++ 4 files changed, 96 insertions(+) create mode 100644 kokkos/functor_for.hpp create mode 100644 kokkos/lambda_reduction.hpp create mode 100644 kokkos/lambda_reduction_subview.hpp create mode 100644 kokkos/main.cpp diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp new file mode 100644 index 000000000..9cba760a1 --- /dev/null +++ b/kokkos/functor_for.hpp @@ -0,0 +1,32 @@ +template +struct ParallelFunctor { + VT a; + double x, y; + + ParallelFunctor(VT _a, double _x, double _y) : a(_a), x(_x), y(_y) {} + + KOKKOS_INLINE_FUNCTION void operator()(const int i) const { + for (size_t j =0; j a("a", N1); + + Kokkos::deep_copy(a, 0); + + size_t N1n = a.extent(0); + + ParallelFunctor functor(a,x,y); + + Kokkos::parallel_for(N1n, functor); + + return a(0,0); +} \ No newline at end of file diff --git a/kokkos/lambda_reduction.hpp b/kokkos/lambda_reduction.hpp new file mode 100644 index 000000000..e122f2c9e --- /dev/null +++ b/kokkos/lambda_reduction.hpp @@ -0,0 +1,19 @@ + +template +typename ViewtypeA::value_type weightedDotProduct_2(ViewtypeA A, Viewtypex x, Viewtypey y) { + // Application: = y^T*A*x + + typename ViewtypeA::value_type result = 0; + + Kokkos::parallel_reduce( A.extent(0), KOKKOS_LAMBDA ( int j, typename ViewtypeA::value_type &update ) { + typename ViewtypeA::value_type temp2 = 0; + + for ( int i = 0; i < A.extent(1); ++i ) { + temp2 += A( j, i ) * x( i ); + } + + update += y( j ) * temp2; + }, result ); + + return result; +} \ No newline at end of file diff --git a/kokkos/lambda_reduction_subview.hpp b/kokkos/lambda_reduction_subview.hpp new file mode 100644 index 000000000..eac7fc95f --- /dev/null +++ b/kokkos/lambda_reduction_subview.hpp @@ -0,0 +1,21 @@ + +template +typename ViewtypeA::value_type weightedDotProduct_1(ViewtypeA A, Viewtypex x, Viewtypey y) { + // Application: = y^T*A*x + + typename ViewtypeA::value_type result = 0; + + Kokkos::parallel_reduce( A.extent(0), KOKKOS_LAMBDA ( int j, typename ViewtypeA::value_type &update ) { + typename ViewtypeA::value_type temp2 = 0; + + auto A_row_j = Kokkos::subview( A, j, Kokkos::ALL ); + + for ( int i = 0; i < A.extent(1); ++i ) { + temp2 += A_row_j( i ) * x( i ); + } + + update += y( j ) * temp2; + }, result ); + + return result; +} \ No newline at end of file diff --git a/kokkos/main.cpp b/kokkos/main.cpp new file mode 100644 index 000000000..258f0dd6e --- /dev/null +++ b/kokkos/main.cpp @@ -0,0 +1,24 @@ +#include +#include "functor_for.hpp" +#include "lambda_reduction.hpp" +#include "lambda_reduction_subview.hpp" + +int main(int argc, char* argv[]) { + Kokkos::initialize(argc, argv); + { + Kokkos::View A("A", 10, 10); + Kokkos::View x("x", 10); + Kokkos::View y("y", 10); + + Kokkos::deep_copy(A, 3); + Kokkos::deep_copy(x, 2); + Kokkos::deep_copy(y, 4); + + std::cout << f(3.,4.) << std::endl; + std::cout << weightedDotProduct_1(A, x, y) << std::endl; + std::cout << weightedDotProduct_2(A, x, y) << std::endl; + + } + Kokkos::finalize(); + +} \ No newline at end of file From 322ca213da0693d488696b92e67baa20b29e962d Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 14 Dec 2023 10:09:21 -0700 Subject: [PATCH 05/75] Add CMakeLists.txt for the Kokkos example and push the generated file --- kokkos/CMakeLists.txt | 9 +++++++++ kokkos/generated/Derivatives.cpp | 19 +++++++++++++++++++ kokkos/main.cpp | 6 ++++++ 3 files changed, 34 insertions(+) create mode 100644 kokkos/CMakeLists.txt create mode 100644 kokkos/generated/Derivatives.cpp diff --git a/kokkos/CMakeLists.txt b/kokkos/CMakeLists.txt new file mode 100644 index 000000000..af0099265 --- /dev/null +++ b/kokkos/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.16.3) + +project(clad_example) + +add_executable ( clad_example main.cpp ) +find_package(Kokkos REQUIRED) +set (CMAKE_CXX_STANDARD 17) +target_include_directories( clad_example PRIVATE ${CLAD_INCLUDE_PATH}) +target_link_libraries(clad_example PRIVATE Kokkos::kokkos) diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp new file mode 100644 index 000000000..1748cd326 --- /dev/null +++ b/kokkos/generated/Derivatives.cpp @@ -0,0 +1,19 @@ +inline double f_darg0(double x, double y) { + double _d_x = 1; + double _d_y = 0; + const int _d_N1 = 0; + const int N1 = 4; + const int _d_N2 = 0; + const int N2 = 4; + Kokkos::View _d_a("_d_a", N1); + Kokkos::View a("a", N1); + Kokkos::deep_copy(_d_a, 0, nullptr); + Kokkos::deep_copy(a, 0, nullptr); + size_t _d_N1n; + size_t N1n = a.extent(0); + ParallelFunctor > _d_functor(_d_a, _d_x, _d_y); + ParallelFunctor > functor(a, x, y); + Kokkos::parallel_for(N1n, _d_functor); + Kokkos::parallel_for(N1n, functor); + return _d_a(0, 0); +} diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 258f0dd6e..1605118c3 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -1,3 +1,4 @@ +#include "clad/Differentiator/Differentiator.h" #include #include "functor_for.hpp" #include "lambda_reduction.hpp" @@ -18,6 +19,11 @@ int main(int argc, char* argv[]) { std::cout << weightedDotProduct_1(A, x, y) << std::endl; std::cout << weightedDotProduct_2(A, x, y) << std::endl; + auto f_dx = clad::differentiate(f, "x"); + // Any of the two below will generate an "error: Attempted differentiation w.r.t. member 'x' which is not of real type." + //auto weightedDotProduct_1_dx = clad::differentiate(weightedDotProduct_1, "x"); + //auto weightedDotProduct_2_dx = clad::differentiate(weightedDotProduct_2, "x"); + } Kokkos::finalize(); From 02dd2ce9911996865ecfc6284fd0ee82ace0cec4 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 14 Dec 2023 23:34:57 -0700 Subject: [PATCH 06/75] Start to work on the reverse mode --- .../clad/Differentiator/ReverseModeVisitor.h | 1 + kokkos/functor_for.hpp | 12 +- kokkos/generated/Derivatives.cpp | 45 ++++- kokkos/main.cpp | 10 +- lib/Differentiator/ReverseModeVisitor.cpp | 161 +++++++++++++++++- 5 files changed, 213 insertions(+), 16 deletions(-) diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h index 1cd1c0bfa..bef1fdb45 100644 --- a/include/clad/Differentiator/ReverseModeVisitor.h +++ b/include/clad/Differentiator/ReverseModeVisitor.h @@ -362,6 +362,7 @@ namespace clad { StmtDiff VisitParenExpr(const clang::ParenExpr* PE); virtual StmtDiff VisitReturnStmt(const clang::ReturnStmt* RS); StmtDiff VisitStmt(const clang::Stmt* S); + StmtDiff VisitValueStmt(const clang::ValueStmt* S); virtual StmtDiff VisitUnaryOperator(const clang::UnaryOperator* UnOp); StmtDiff VisitExprWithCleanups(const clang::ExprWithCleanups* EWC); /// Decl is not Stmt, so it cannot be visited directly. diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 9cba760a1..465cc20e5 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -20,13 +20,17 @@ double f(double x, double y) { Kokkos::View a("a", N1); - Kokkos::deep_copy(a, 0); + double tmp = x * x + y; + + a(0,0) = tmp; + + //Kokkos::deep_copy(a, 0); size_t N1n = a.extent(0); - ParallelFunctor functor(a,x,y); + //ParallelFunctor functor(a,x,y); - Kokkos::parallel_for(N1n, functor); + //Kokkos::parallel_for(N1n, functor); - return a(0,0); + return tmp; } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 1748cd326..866c20362 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -7,13 +7,44 @@ inline double f_darg0(double x, double y) { const int N2 = 4; Kokkos::View _d_a("_d_a", N1); Kokkos::View a("a", N1); - Kokkos::deep_copy(_d_a, 0, nullptr); - Kokkos::deep_copy(a, 0, nullptr); + double _d_tmp = _d_x * x + x * _d_x + _d_y; + double tmp = x * x + y; + _d_a(0, 0) = _d_tmp; + a(0, 0) = tmp; size_t _d_N1n; size_t N1n = a.extent(0); - ParallelFunctor > _d_functor(_d_a, _d_x, _d_y); - ParallelFunctor > functor(a, x, y); - Kokkos::parallel_for(N1n, _d_functor); - Kokkos::parallel_for(N1n, functor); - return _d_a(0, 0); + return _d_tmp; +} +inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { + int _d_N1 = 0; + int _d_N2 = 0; + Kokkos::View _d_a("_d_a", N1); + double _t0; + double _t1; + double _d_tmp = 0; + size_t _d_N1n = 0; + const int N1 = 4; + const int N2 = 4; + Kokkos::View a("a", N1); + _t1 = x; + _t0 = x; + double tmp = _t1 * _t0 + y; + a(0, 0) = tmp; + size_t N1n = a.extent(0); + goto _label0; + _label0: + _d_tmp += 1; + { + double _r_d0 = _d_a(0, 0); + _d_tmp += _r_d0; + _d_a(0, 0) -= _r_d0; + _d_a(0, 0); + } + { + double _r0 = _d_tmp * _t0; + * _d_x += _r0; + double _r1 = _t1 * _d_tmp; + * _d_x += _r1; + * _d_y += _d_tmp; + } } diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 1605118c3..84414155b 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -19,11 +19,19 @@ int main(int argc, char* argv[]) { std::cout << weightedDotProduct_1(A, x, y) << std::endl; std::cout << weightedDotProduct_2(A, x, y) << std::endl; - auto f_dx = clad::differentiate(f, "x"); + auto f_dx_exe = clad::differentiate(f, "x"); + auto f_grad_exe = clad::gradient(f); // Any of the two below will generate an "error: Attempted differentiation w.r.t. member 'x' which is not of real type." //auto weightedDotProduct_1_dx = clad::differentiate(weightedDotProduct_1, "x"); //auto weightedDotProduct_2_dx = clad::differentiate(weightedDotProduct_2, "x"); + std::cout << f_dx_exe.execute(3.,4.) << std::endl; + + double dx = 0, dy = 0; + // After this call, dx and dy will store the derivatives of x and y respectively. + f_grad_exe.execute(3., 4., &dx, &dy); + std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; + } Kokkos::finalize(); diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 6e3f1af48..0f9414932 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -36,6 +36,8 @@ #include "clad/Differentiator/CladUtils.h" #include "clad/Differentiator/Compatibility.h" +#include + using namespace clang; namespace clad { @@ -748,6 +750,26 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Clone(S)); } + StmtDiff ReverseModeVisitor::VisitValueStmt( + const clang::ValueStmt* VS) { + // This is most likely a name provided in a Kokkos::view construction + //VS->dump (); + // Test if StringLiteral + if (isa(VS)) { + //std::cout << "This is a StringLiteral!" << std::endl; + auto SL = dyn_cast(VS); + + std::string name_str("_d_"+ SL->getString().str()); + StringRef name(name_str); + + Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); + //VS->dump (); + //derivedVS->dump (); + return {Clone(VS), derivedVS}; + } + return {Clone(VS), Clone(VS)}; + } + StmtDiff ReverseModeVisitor::VisitCompoundStmt(const CompoundStmt* CS) { int scopeFlags = Scope::DeclScope; // If this is the outermost compound statement of the function, @@ -1175,18 +1197,26 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // Initially, df/df = 1. const Expr* value = RS->getRetValue(); QualType type = value->getType(); + std::cout << "return type is " << type.getAsString() << std::endl; + if (type.getAsString().find("Kokkos::View") != std::string::npos) { + std::cout << "return value is a view!" << std::endl; + } auto* dfdf = m_Pullback; if (isa(dfdf) || isa(dfdf)) { + std::cout << "isa(dfdf) || isa(dfdf) is true" << std::endl; ExprResult tmp = dfdf; dfdf = m_Sema .ImpCastExprToType(tmp.get(), type, m_Sema.PrepareScalarCast(tmp, type)) .get(); } + else + std::cout << "isa(dfdf) || isa(dfdf) is false" << std::endl; auto ReturnResult = DifferentiateSingleExpr(value, dfdf); StmtDiff ReturnDiff = ReturnResult.first; StmtDiff ExprDiff = ReturnResult.second; Stmt* Reverse = ReturnDiff.getStmt_dx(); + Reverse->dump(); // If the original function returns at this point, some part of the reverse // pass (corresponding to other branches that do not return here) must be // skipped. We create a label in the reverse pass and jump to it via goto. @@ -1375,6 +1405,60 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } StmtDiff ReverseModeVisitor::VisitCallExpr(const CallExpr* CE) { + if (isa(CE)) { + auto MCE = dyn_cast(CE); + + if (MCE->getObjectType().getAsString().find("Kokkos::View") != std::string::npos) { + //std::cout << "Member function called from a Kokkos::View; nothing to do here" << std::endl; + return StmtDiff(Clone(CE)); + } + } + if (isa(CE)) { + auto OCE = dyn_cast(CE); + const Expr* baseOriginalE = OCE->getArg(0); + + bool isKokkosViewAccess = false; + std::string kokkosViewName; + + if (isa(baseOriginalE)) { + auto SE = baseOriginalE->IgnoreImpCasts(); + if (auto DRE = dyn_cast(SE)) { + std::string constructedTypeName = QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} }); + //std::cout << constructedTypeName << std::endl; + if (constructedTypeName.find("Kokkos::View") != std::string::npos) { + isKokkosViewAccess = true; + kokkosViewName = DRE->getNameInfo().getName().getAsString (); + } + } + } + + // Returning the function call and zero derivative + if (isKokkosViewAccess) { + + llvm::SmallVector ClonedArgs; + for (unsigned i = 1, e = CE->getNumArgs(); i < e; ++i) + ClonedArgs.push_back(Clone(CE->getArg(i))); + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getArg(0)), + noLoc, ClonedArgs, noLoc) + .get(); + + // replace kokkosViewName with "_d_"+kokkosViewName + + Expr* dView = Visit(CE->getArg(0)).getExpr_dx(); + + dView->dump(); + + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), dView, + noLoc, ClonedArgs, noLoc) + .get(); + + //std::cout << " kokkosViewName = " << kokkosViewName << std::endl; + return StmtDiff(Call, dCall); + } + } const FunctionDecl* FD = CE->getDirectCallee(); if (!FD) { diag(DiagnosticsEngine::Warning, @@ -2574,9 +2658,54 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, VarDecl* VDDerived = nullptr; bool isPointerType = VD->getType()->isPointerType(); + + std::string constructedTypeName = QualType::getAsString(VD->getType().split(), PrintingPolicy{ {} }); + if (constructedTypeName.rfind("Kokkos::View", 0) == 0) { + size_t runTimeDim = 0; + std::vector compileTimeDims; + bool read = false; + for (size_t i = 0; i < constructedTypeName.size(); ++i) { + if (read && constructedTypeName[i] == '*') + ++runTimeDim; + if (read && constructedTypeName[i] == '[') + compileTimeDims.push_back(std::stoi(&constructedTypeName[i+1])); + if (!read && constructedTypeName[i] == ' ') + read = true; + } + size_t i = 0; + if (isa(VD->getInit())) { + auto CE = dyn_cast(VD->getInit()); + llvm::SmallVector clonedArgs; + for (auto arg : CE->arguments()) { + if (i == runTimeDim + 1) + break; + auto argDiff = Visit(arg, dfdx()); + if (i == 0) + clonedArgs.push_back(argDiff.getExpr_dx()); + else + clonedArgs.push_back(argDiff.getExpr()); + ++i; + } + //VDDerivedInit = m_Sema.ActOnInitList(noLoc, clonedArgs, noLoc).get(); + + VDDerivedInit = + m_Sema.ActOnParenListExpr(noLoc, noLoc, clonedArgs).get(); + + + if (VDDerivedType->isRecordType()) + VDDerived = + BuildVarDecl(VDDerivedType, "_d_" + VD->getNameAsString(), + VDDerivedInit, VD->isDirectInit(), + m_Context.getTrivialTypeSourceInfo(VDDerivedType), + VD->getInitStyle()); + else + VDDerived = BuildVarDecl(VDDerivedType, "_d_" + VD->getNameAsString(), + VDDerivedInit); + } + // VDDerivedInit now serves two purposes -- as the initial derivative value // or the size of the derivative array -- depending on the primal type. - if (const auto* AT = dyn_cast(VD->getType())) { + } else if (const auto* AT = dyn_cast(VD->getType())) { VDDerivedInit = getArraySizeExpr(AT, m_Context, *this); VDDerived = BuildGlobalVarDecl( VDDerivedType, "_d_" + VD->getNameAsString(), VDDerivedInit, false, @@ -3708,9 +3837,33 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, StmtDiff ReverseModeVisitor::VisitCXXConstructExpr(const CXXConstructExpr* CE) { llvm::SmallVector clonedArgs; - for (const auto* arg : CE->arguments()) { - auto argDiff = Visit(arg, dfdx()); - clonedArgs.push_back(argDiff.getExpr()); + std::string constructedTypeName = QualType::getAsString(CE->getType().split(), PrintingPolicy{ {} }); + if (constructedTypeName.rfind("Kokkos::View", 0) == 0) { + size_t runTimeDim = 0; + std::vector compileTimeDims; + bool read = false; + for (size_t i = 0; i < constructedTypeName.size(); ++i) { + if (read && constructedTypeName[i] == '*') + ++runTimeDim; + if (read && constructedTypeName[i] == '[') + compileTimeDims.push_back(std::stoi(&constructedTypeName[i+1])); + if (!read && constructedTypeName[i] == ' ') + read = true; + } + size_t i = 0; + for (auto arg : CE->arguments()) { + if (i == runTimeDim + 1) + break; + auto argDiff = Visit(arg, dfdx()); + clonedArgs.push_back(argDiff.getExpr()); + ++i; + } + } + else { + for (const auto* arg : CE->arguments()) { + auto argDiff = Visit(arg, dfdx()); + clonedArgs.push_back(argDiff.getExpr()); + } } Expr* clonedArgsE = nullptr; From 7e9c54023a0b6511cedb658db41511d31a0ea225 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Fri, 15 Dec 2023 13:17:06 -0700 Subject: [PATCH 07/75] Propagate derivatives inside Kokkos view --- kokkos/functor_for.hpp | 2 +- kokkos/generated/Derivatives.cpp | 4 ++-- lib/Differentiator/ReverseModeVisitor.cpp | 10 ++++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 465cc20e5..9c7f4185c 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -32,5 +32,5 @@ double f(double x, double y) { //Kokkos::parallel_for(N1n, functor); - return tmp; + return a(0,0); } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 866c20362..b9317718d 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -13,7 +13,7 @@ inline double f_darg0(double x, double y) { a(0, 0) = tmp; size_t _d_N1n; size_t N1n = a.extent(0); - return _d_tmp; + return _d_a(0, 0); } inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { int _d_N1 = 0; @@ -33,7 +33,7 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array size_t N1n = a.extent(0); goto _label0; _label0: - _d_tmp += 1; + _d_a(0, 0) += 1; { double _r_d0 = _d_a(0, 0); _d_tmp += _r_d0; diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 0f9414932..55b93c71b 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1216,7 +1216,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, StmtDiff ReturnDiff = ReturnResult.first; StmtDiff ExprDiff = ReturnResult.second; Stmt* Reverse = ReturnDiff.getStmt_dx(); - Reverse->dump(); // If the original function returns at this point, some part of the reverse // pass (corresponding to other branches that do not return here) must be // skipped. We create a label in the reverse pass and jump to it via goto. @@ -1446,15 +1445,18 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // replace kokkosViewName with "_d_"+kokkosViewName - Expr* dView = Visit(CE->getArg(0)).getExpr_dx(); - - dView->dump(); + auto visited = Visit(CE->getArg(0), dfdx()); + Expr* dView = visited.getExpr_dx(); Expr* dCall = m_Sema .ActOnCallExpr(getCurrentScope(), dView, noLoc, ClonedArgs, noLoc) .get(); + if (dfdx()) { + Expr* add_assign = BuildOp(BO_AddAssign, dCall, dfdx()); + addToCurrentBlock(add_assign, direction::reverse); + } //std::cout << " kokkosViewName = " << kokkosViewName << std::endl; return StmtDiff(Call, dCall); } From 2cd5231f15f26e20c097126e6e80966c2b729d81 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Fri, 15 Dec 2023 13:43:02 -0700 Subject: [PATCH 08/75] Modify the test to use the generated file --- kokkos/functor_for.hpp | 7 +++++-- kokkos/generated/Derivatives.cpp | 24 ++++++++++++++++-------- kokkos/main.cpp | 14 +++++++++++++- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 9c7f4185c..99a24bb27 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -22,7 +22,10 @@ double f(double x, double y) { double tmp = x * x + y; - a(0,0) = tmp; + const int i = 0; + const int j = 0; + + a(i,j) = tmp; //Kokkos::deep_copy(a, 0); @@ -32,5 +35,5 @@ double f(double x, double y) { //Kokkos::parallel_for(N1n, functor); - return a(0,0); + return a(i,j); } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index b9317718d..a34c71a4b 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -9,11 +9,15 @@ inline double f_darg0(double x, double y) { Kokkos::View a("a", N1); double _d_tmp = _d_x * x + x * _d_x + _d_y; double tmp = x * x + y; - _d_a(0, 0) = _d_tmp; - a(0, 0) = tmp; + const int _d_i = 0; + const int i = 0; + const int _d_j = 0; + const int j = 0; + _d_a(i, j) = _d_tmp; + a(i, j) = tmp; size_t _d_N1n; size_t N1n = a.extent(0); - return _d_a(0, 0); + return _d_a(i, j); } inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { int _d_N1 = 0; @@ -22,6 +26,8 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double _t0; double _t1; double _d_tmp = 0; + int _d_i = 0; + int _d_j = 0; size_t _d_N1n = 0; const int N1 = 4; const int N2 = 4; @@ -29,16 +35,18 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array _t1 = x; _t0 = x; double tmp = _t1 * _t0 + y; - a(0, 0) = tmp; + const int i = 0; + const int j = 0; + a(i, j) = tmp; size_t N1n = a.extent(0); goto _label0; _label0: - _d_a(0, 0) += 1; + _d_a(i, j) += 1; { - double _r_d0 = _d_a(0, 0); + double _r_d0 = _d_a(i, j); _d_tmp += _r_d0; - _d_a(0, 0) -= _r_d0; - _d_a(0, 0); + _d_a(i, j) -= _r_d0; + _d_a(i, j); } { double _r0 = _d_tmp * _t0; diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 84414155b..30955fdb8 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -4,6 +4,12 @@ #include "lambda_reduction.hpp" #include "lambda_reduction_subview.hpp" +//#define use_generated_file + +#ifdef use_generated_file +#include "generated/Derivatives.cpp" +#endif + int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); { @@ -19,6 +25,7 @@ int main(int argc, char* argv[]) { std::cout << weightedDotProduct_1(A, x, y) << std::endl; std::cout << weightedDotProduct_2(A, x, y) << std::endl; +#ifndef use_generated_file auto f_dx_exe = clad::differentiate(f, "x"); auto f_grad_exe = clad::gradient(f); // Any of the two below will generate an "error: Attempted differentiation w.r.t. member 'x' which is not of real type." @@ -31,7 +38,12 @@ int main(int argc, char* argv[]) { // After this call, dx and dy will store the derivatives of x and y respectively. f_grad_exe.execute(3., 4., &dx, &dy); std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; - +#else + double dx = 0, dy = 0; + std::cout << f_darg0(3.,4.) << std::endl; + f_grad(3., 4., &dx, &dy); + std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; +#endif } Kokkos::finalize(); From db086a587daf77d108f2bb04409ee982edd732e5 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Fri, 15 Dec 2023 15:47:25 -0700 Subject: [PATCH 09/75] Add a first implementation of a deep_copy --- kokkos/functor_for.hpp | 7 +- kokkos/generated/Derivatives.cpp | 8 ++ lib/Differentiator/ReverseModeVisitor.cpp | 90 ++++++++++++++++++++++- 3 files changed, 101 insertions(+), 4 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 99a24bb27..79b61154a 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -25,9 +25,12 @@ double f(double x, double y) { const int i = 0; const int j = 0; - a(i,j) = tmp; + double zero = 0.; + Kokkos::deep_copy(a, zero); + //Kokkos::deep_copy(a, 0); does not work + //auto a_row_0 = Kokkos::subview( a, 0, Kokkos::ALL ); - //Kokkos::deep_copy(a, 0); + a(i,j) = tmp; size_t N1n = a.extent(0); diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index a34c71a4b..ec2f02f59 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -13,6 +13,10 @@ inline double f_darg0(double x, double y) { const int i = 0; const int _d_j = 0; const int j = 0; + double _d_zero = 0.; + double zero = 0.; + Kokkos::deep_copy(_d_a, _d_zero, nullptr); + Kokkos::deep_copy(a, zero, nullptr); _d_a(i, j) = _d_tmp; a(i, j) = tmp; size_t _d_N1n; @@ -28,6 +32,7 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double _d_tmp = 0; int _d_i = 0; int _d_j = 0; + double _d_zero = 0; size_t _d_N1n = 0; const int N1 = 4; const int N2 = 4; @@ -37,6 +42,8 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double tmp = _t1 * _t0 + y; const int i = 0; const int j = 0; + double zero = 0.; + Kokkos::deep_copy(a, zero, nullptr); a(i, j) = tmp; size_t N1n = a.extent(0); goto _label0; @@ -48,6 +55,7 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array _d_a(i, j) -= _r_d0; _d_a(i, j); } + Kokkos::deep_copy(_d_a, _d_zero, nullptr); { double _r0 = _d_tmp * _t0; * _d_x += _r0; diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 55b93c71b..3263b91b8 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1461,6 +1461,93 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Call, dCall); } } + auto SE = CE->getCallee()->IgnoreImpCasts(); + if (auto DRE = dyn_cast(SE)) { + if (auto FD = dyn_cast(DRE->getDecl())) { + if (FD->getQualifiedNameAsString().find("Kokkos::deep_copy") != std::string::npos) { + + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { + auto visitedArg = Visit(CE->getArg(i), dfdx()); + ClonedArgs.push_back(visitedArg.getExpr()); + ClonedDArgs.push_back(visitedArg.getExpr_dx()); + std::cout << "Kokkos::deep_copy visitedArg.getExpr()->dump() start with i = " << i << std::endl; + visitedArg.getExpr()->dump(); + std::cout << "Kokkos::deep_copy visitedArg.getExpr()->dump() end with i = " << i << std::endl; + std::cout << "Kokkos::deep_copy visitedArg.getExpr_dx()->dump() start with i = " << i << std::endl; + visitedArg.getExpr_dx()->dump(); + std::cout << "Kokkos::deep_copy visitedArg.getExpr_dx()->dump() end with i = " << i << std::endl; + } + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); + + return StmtDiff(Call, dCall); + } + if (FD->getQualifiedNameAsString().find("Kokkos::subview") != std::string::npos) { + + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { + auto visitedArg = Visit(CE->getArg(i)); + ClonedArgs.push_back(visitedArg.getExpr()); + if (i==0) + ClonedDArgs.push_back(visitedArg.getExpr()); + else + ClonedDArgs.push_back(visitedArg.getExpr()); + + std::cout << "Kokkos::subview visitedArg.getExpr()->dump() start with i = " << i << std::endl; + visitedArg.getExpr()->dump(); + std::cout << "Kokkos::subview visitedArg.getExpr()->dump() end with i = " << i << std::endl; + std::cout << "Kokkos::subview visitedArg.getExpr_dx()->dump() start with i = " << i << std::endl; + visitedArg.getExpr_dx()->dump(); + std::cout << "Kokkos::subview visitedArg.getExpr_dx()->dump() end with i = " << i << std::endl; + } + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); + + return StmtDiff(Call, dCall); + } + if (FD->getQualifiedNameAsString().find("Kokkos::parallel_for") != std::string::npos) { + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { + auto visitedArg = Visit(CE->getArg(i)); + ClonedArgs.push_back(visitedArg.getExpr()); + if (i==0) + ClonedDArgs.push_back(visitedArg.getExpr()); + else + ClonedDArgs.push_back(visitedArg.getExpr_dx()); + } + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); + + return StmtDiff(Call, dCall); + } + } + } + const FunctionDecl* FD = CE->getDirectCallee(); if (!FD) { diag(DiagnosticsEngine::Warning, @@ -2681,14 +2768,13 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, for (auto arg : CE->arguments()) { if (i == runTimeDim + 1) break; - auto argDiff = Visit(arg, dfdx()); + auto argDiff = Visit(arg); if (i == 0) clonedArgs.push_back(argDiff.getExpr_dx()); else clonedArgs.push_back(argDiff.getExpr()); ++i; } - //VDDerivedInit = m_Sema.ActOnInitList(noLoc, clonedArgs, noLoc).get(); VDDerivedInit = m_Sema.ActOnParenListExpr(noLoc, noLoc, clonedArgs).get(); From ef1d117f07b6c68691b008ce42a2fdba85c46b12 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Fri, 15 Dec 2023 20:23:35 -0700 Subject: [PATCH 10/75] Update the implementation of the deep_copy --- kokkos/functor_for.hpp | 43 ++++- kokkos/generated/Derivatives.cpp | 40 ++-- kokkos/main.cpp | 4 +- lib/Differentiator/ReverseModeVisitor.cpp | 222 +++++++++++++++++++++- 4 files changed, 279 insertions(+), 30 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 79b61154a..681fa3b2f 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -1,3 +1,22 @@ + +namespace kokkos_builtin_derivative { + +template +void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { + double tmp_sum = sum; + sum = 0.; + //to be updated to be rank independent + Kokkos::parallel_reduce( A.extent(0), KOKKOS_LAMBDA ( int i, typename ViewtypeA::value_type &update ) { + + for ( int j = 0; j < A.extent(1); ++j ) { + update += A( i, j ); + } + }, sum ); + sum += tmp_sum; +} + +} + template struct ParallelFunctor { VT a; @@ -12,6 +31,10 @@ struct ParallelFunctor { }; +double f2(double x, double y) { + return x; +} + KOKKOS_INLINE_FUNCTION double f(double x, double y) { @@ -19,6 +42,7 @@ double f(double x, double y) { constexpr int N2 = 4; Kokkos::View a("a", N1); + Kokkos::View b("b", N1); double tmp = x * x + y; @@ -26,17 +50,28 @@ double f(double x, double y) { const int j = 0; double zero = 0.; - Kokkos::deep_copy(a, zero); - //Kokkos::deep_copy(a, 0); does not work + //Kokkos::deep_copy(a, tmp); //auto a_row_0 = Kokkos::subview( a, 0, Kokkos::ALL ); - a(i,j) = tmp; + //b(i,j) = tmp; - size_t N1n = a.extent(0); + //Kokkos::deep_copy(a, tmp); + //Kokkos::deep_copy(a, tmp); + + //Kokkos::deep_copy(a, x); + Kokkos::deep_copy(b, x * x + y); + Kokkos::deep_copy(a, b); + + //a(i,j) = x; + //a(i,j) = x * x + y; + + //size_t N1n = a.extent(0); //ParallelFunctor functor(a,x,y); //Kokkos::parallel_for(N1n, functor); + // double sum = f2(x, y); + return a(i,j); } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index ec2f02f59..568f28526 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -7,6 +7,8 @@ inline double f_darg0(double x, double y) { const int N2 = 4; Kokkos::View _d_a("_d_a", N1); Kokkos::View a("a", N1); + Kokkos::View _d_b("_d_b", N1); + Kokkos::View b("b", N1); double _d_tmp = _d_x * x + x * _d_x + _d_y; double tmp = x * x + y; const int _d_i = 0; @@ -15,47 +17,55 @@ inline double f_darg0(double x, double y) { const int j = 0; double _d_zero = 0.; double zero = 0.; - Kokkos::deep_copy(_d_a, _d_zero, nullptr); - Kokkos::deep_copy(a, zero, nullptr); - _d_a(i, j) = _d_tmp; - a(i, j) = tmp; - size_t _d_N1n; - size_t N1n = a.extent(0); + Kokkos::deep_copy(_d_b, _d_x * x + x * _d_x + _d_y, nullptr); + Kokkos::deep_copy(b, x * x + y, nullptr); + Kokkos::deep_copy(_d_a, _d_b, nullptr); + Kokkos::deep_copy(a, b, nullptr); return _d_a(i, j); } inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { int _d_N1 = 0; int _d_N2 = 0; Kokkos::View _d_a("_d_a", N1); + Kokkos::View _d_b("_d_b", N1); double _t0; double _t1; double _d_tmp = 0; int _d_i = 0; int _d_j = 0; double _d_zero = 0; - size_t _d_N1n = 0; + double _t2; + double _t3; + double _t4; const int N1 = 4; const int N2 = 4; Kokkos::View a("a", N1); + Kokkos::View b("b", N1); _t1 = x; _t0 = x; double tmp = _t1 * _t0 + y; const int i = 0; const int j = 0; double zero = 0.; - Kokkos::deep_copy(a, zero, nullptr); - a(i, j) = tmp; - size_t N1n = a.extent(0); + _t2 = x; + _t4 = x; + _t3 = x; + Kokkos::deep_copy(b, x * _t2 + y, nullptr); + Kokkos::deep_copy(a, b, nullptr); goto _label0; _label0: _d_a(i, j) += 1; + Kokkos::deep_copy(_d_b, _d_a, nullptr); { - double _r_d0 = _d_a(i, j); - _d_tmp += _r_d0; - _d_a(i, j) -= _r_d0; - _d_a(i, j); + double _grad0 = 0.; + kokkos_builtin_derivative::parallel_sum(_grad0, _d_b); + double _r2 = _grad0; + double _r3 = _r2 * _t3; + * _d_x += _r3; + double _r4 = _t4 * _r2; + * _d_x += _r4; + * _d_y += _r2; } - Kokkos::deep_copy(_d_a, _d_zero, nullptr); { double _r0 = _d_tmp * _t0; * _d_x += _r0; diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 30955fdb8..511088349 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -32,12 +32,14 @@ int main(int argc, char* argv[]) { //auto weightedDotProduct_1_dx = clad::differentiate(weightedDotProduct_1, "x"); //auto weightedDotProduct_2_dx = clad::differentiate(weightedDotProduct_2, "x"); - std::cout << f_dx_exe.execute(3.,4.) << std::endl; + double dx_f = f_dx_exe.execute(3.,4.); + std::cout << "dx: " << dx_f << std::endl; double dx = 0, dy = 0; // After this call, dx and dy will store the derivatives of x and y respectively. f_grad_exe.execute(3., 4., &dx, &dy); std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; + assert(dx==dx_f && "error"); #else double dx = 0, dy = 0; std::cout << f_darg0(3.,4.) << std::endl; diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 3263b91b8..50936ec2f 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1403,6 +1403,18 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Clone(FL)); } + bool isKokkosView(const Expr* E) { + std::string constructedTypeName = QualType::getAsString(E->getType().split(), PrintingPolicy{ {} }); + if (isa(E)) { + auto SE = E->IgnoreImpCasts(); + if (auto DRE = dyn_cast(SE)) { + std::string constructedTypeName = QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} }); + return constructedTypeName.find("Kokkos::View") != std::string::npos; + } + } + return constructedTypeName.rfind("Kokkos::View", 0) == 0; + } + StmtDiff ReverseModeVisitor::VisitCallExpr(const CallExpr* CE) { if (isa(CE)) { auto MCE = dyn_cast(CE); @@ -1468,16 +1480,206 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::SmallVector ClonedArgs; llvm::SmallVector ClonedDArgs; - for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { - auto visitedArg = Visit(CE->getArg(i), dfdx()); - ClonedArgs.push_back(visitedArg.getExpr()); - ClonedDArgs.push_back(visitedArg.getExpr_dx()); - std::cout << "Kokkos::deep_copy visitedArg.getExpr()->dump() start with i = " << i << std::endl; - visitedArg.getExpr()->dump(); - std::cout << "Kokkos::deep_copy visitedArg.getExpr()->dump() end with i = " << i << std::endl; - std::cout << "Kokkos::deep_copy visitedArg.getExpr_dx()->dump() start with i = " << i << std::endl; - visitedArg.getExpr_dx()->dump(); - std::cout << "Kokkos::deep_copy visitedArg.getExpr_dx()->dump() end with i = " << i << std::endl; + bool viewToView = isKokkosView(CE->getArg(1)); + + if (viewToView) { + auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); + auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); + auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); + + ClonedArgs.push_back(visitedArg_0.getExpr()); + ClonedArgs.push_back(visitedArg_1.getExpr()); + ClonedArgs.push_back(visitedArg_2.getExpr()); + ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); + ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + ClonedDArgs.push_back(visitedArg_2.getExpr_dx()); + } + else { + auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); + auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); + auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); + + ClonedArgs.push_back(visitedArg_0.getExpr()); + ClonedArgs.push_back(visitedArg_1.getExpr()); + ClonedArgs.push_back(visitedArg_2.getExpr()); + if (visitedArg_1.getExpr_dx()) { + ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); + ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + + // Here we need to do: + // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); + + NamespaceDecl* DC = utils::LookupNSD(m_Sema, "kokkos_builtin_derivative", /*shouldExist=*/true); + + CXXScopeSpec SS; + + utils::BuildNNS(m_Sema, DC, SS); + IdentifierInfo* II = &m_Context.Idents.get("parallel_sum"); + + DeclarationName name(II); + DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); + + LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); + m_Sema.LookupQualifiedName(R, DC); + if (!R.empty()) { + Expr* UnresolvedLookup = + m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); + + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); + + return StmtDiff(Call, dCall); + } + + } else { + //ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); + //ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + //beginBlock(direction::reverse); + + QualType argResultValueType = + utils::GetValueType(visitedArg_1.getExpr()->getType()) + .getNonReferenceType(); + + + + VarDecl* argDerivativeVar = BuildVarDecl(argResultValueType, CreateUniqueIdentifier("_r"), visitedArg_1.getExpr_dx()); + Expr* argDerivative = BuildDeclRef(argDerivativeVar); + //Expr* argDerivative = StoreAndRef(visitedArg_1.getExpr_dx(), argResultValueType, + // direction::reverse, "_r", + // /*forceDeclCreation=*/true); + + llvm::SmallVector ArgResultDecls{}; + ArgResultDecls.push_back( + cast(cast(argDerivative)->getDecl())); + + llvm::SmallVector ArgDeclStmts{}; + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + + // Here we need to do: + // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); + + NamespaceDecl* DC = utils::LookupNSD(m_Sema, "kokkos_builtin_derivative", /*shouldExist=*/true); + + CXXScopeSpec SS; + + utils::BuildNNS(m_Sema, DC, SS); + IdentifierInfo* II = &m_Context.Idents.get("parallel_sum"); + + DeclarationName name(II); + DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); + + LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); + m_Sema.LookupQualifiedName(R, DC); + if (!R.empty()) { + Expr* UnresolvedLookup = + m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); + + llvm::SmallVector, 4> argResultsAndGrads; + + VarDecl* gradVarDecl = nullptr; + Expr* gradVarExpr = nullptr; + Expr* gradArgExpr = nullptr; + IdentifierInfo* gradVarII = nullptr; + Expr* OverloadedDerivedFn = nullptr; + + { + gradVarII = CreateUniqueIdentifier(funcPostfix()); + + auto PVD = FD->getParamDecl(1); + { + // Declare: diffArgType _grad; + Expr* initVal = nullptr; + if (!visitedArg_1.getExpr()->getType()->isRecordType()) { + // If the argument is not a class type, then initialize the grad + // variable with 0. + initVal = + ConstantFolder::synthesizeLiteral(visitedArg_1.getExpr()->getType(), m_Context, 0); + } + //gradVarDecl = BuildVarDecl(PVD->getType(), gradVarII, visitedArg_1.getExpr()); + gradVarDecl = BuildVarDecl(visitedArg_1.getExpr()->getType(), gradVarII, initVal); + // Pass the address of the declared variable + gradVarExpr = BuildDeclRef(gradVarDecl); + gradArgExpr = + BuildOp(UO_AddrOf, gradVarExpr, m_Function->getLocation()); + argResultsAndGrads.push_back({ArgResultDecls[0], gradVarExpr}); + ArgDeclStmts.push_back(BuildDeclStmt(gradVarDecl)); + } + } + + //ClonedDArgs.push_back(argResultsAndGrads[0].second); + //ClonedDArgs.push_back(BuildDeclRef(ArgResultDecls[0])); + ClonedDArgs.push_back(BuildDeclRef(gradVarDecl)); + ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); + + auto& block = getCurrentBlock(direction::reverse); + std::size_t insertionPoint = getCurrentBlock(direction::reverse).size(); + auto it = std::begin(block) + insertionPoint; + + // Insert the _gradX declaration statements + it = block.insert(it, ArgDeclStmts.begin(), ArgDeclStmts.end()); + it += ArgDeclStmts.size(); + + it = block.insert(it, dCall); + it += 1; + + it = block.insert(it, BuildDeclStmt(argDerivativeVar)); + + for (auto resAndGrad : argResultsAndGrads) { + VarDecl* argRes = resAndGrad.first; + Expr* grad = resAndGrad.second; + argRes->dump(); + grad->dump(); + PerformImplicitConversionAndAssign(argRes, grad); + } + + Visit(CE->getArg(1), argDerivative); + //Stmt* Reverse = endBlock(direction::reverse); + + return StmtDiff(Call); + } + } + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + + // Here we need to do: + // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); + + NamespaceDecl* DC = utils::LookupNSD(m_Sema, "kokkos_builtin_derivative", /*shouldExist=*/true); + + CXXScopeSpec SS; + + utils::BuildNNS(m_Sema, DC, SS); + IdentifierInfo* II = &m_Context.Idents.get("parallel_sum"); + + DeclarationName name(II); + DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); + + LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); + m_Sema.LookupQualifiedName(R, DC); + if (!R.empty()) { + Expr* UnresolvedLookup = + m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); + + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); + + return StmtDiff(Call, dCall); + } } Expr* Call = m_Sema From 43fa4d68744d07beb33b7b4a85a2657ffd1a7700 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 18 Dec 2023 23:48:48 -0700 Subject: [PATCH 11/75] Reset _d_a to zero after a reverse deep_copy --- kokkos/functor_for.hpp | 6 +- kokkos/generated/Derivatives.cpp | 20 +++- lib/Differentiator/ReverseModeVisitor.cpp | 119 ++++++++++++---------- 3 files changed, 85 insertions(+), 60 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 681fa3b2f..5d27a9ec2 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -54,11 +54,9 @@ double f(double x, double y) { //auto a_row_0 = Kokkos::subview( a, 0, Kokkos::ALL ); //b(i,j) = tmp; + Kokkos::deep_copy(a, tmp); - //Kokkos::deep_copy(a, tmp); - //Kokkos::deep_copy(a, tmp); - - //Kokkos::deep_copy(a, x); + Kokkos::deep_copy(a, x); Kokkos::deep_copy(b, x * x + y); Kokkos::deep_copy(a, b); diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 568f28526..1274e4f9c 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -17,6 +17,10 @@ inline double f_darg0(double x, double y) { const int j = 0; double _d_zero = 0.; double zero = 0.; + Kokkos::deep_copy(_d_a, _d_tmp, nullptr); + Kokkos::deep_copy(a, tmp, nullptr); + Kokkos::deep_copy(_d_a, _d_x, nullptr); + Kokkos::deep_copy(a, x, nullptr); Kokkos::deep_copy(_d_b, _d_x * x + x * _d_x + _d_y, nullptr); Kokkos::deep_copy(b, x * x + y, nullptr); Kokkos::deep_copy(_d_a, _d_b, nullptr); @@ -47,6 +51,8 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array const int i = 0; const int j = 0; double zero = 0.; + Kokkos::deep_copy(a, tmp, nullptr); + Kokkos::deep_copy(a, x, nullptr); _t2 = x; _t4 = x; _t3 = x; @@ -55,10 +61,14 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array goto _label0; _label0: _d_a(i, j) += 1; - Kokkos::deep_copy(_d_b, _d_a, nullptr); + { + Kokkos::deep_copy(_d_b, _d_a, nullptr); + Kokkos::deep_copy(_d_a, 0., nullptr); + } { double _grad0 = 0.; kokkos_builtin_derivative::parallel_sum(_grad0, _d_b); + Kokkos::deep_copy(_d_b, 0., nullptr); double _r2 = _grad0; double _r3 = _r2 * _t3; * _d_x += _r3; @@ -66,6 +76,14 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array * _d_x += _r4; * _d_y += _r2; } + { + kokkos_builtin_derivative::parallel_sum(* _d_x, _d_a); + Kokkos::deep_copy(_d_a, 0., nullptr); + } + { + kokkos_builtin_derivative::parallel_sum(_d_tmp, _d_a); + Kokkos::deep_copy(_d_a, 0., nullptr); + } { double _r0 = _d_tmp * _t0; * _d_x += _r0; diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 50936ec2f..50003b0b6 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1480,25 +1480,62 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::SmallVector ClonedArgs; llvm::SmallVector ClonedDArgs; + llvm::SmallVector ClonedDArgsZero; bool viewToView = isKokkosView(CE->getArg(1)); - if (viewToView) { - auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); - auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); - auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); + auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); + auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); + auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); + + ClonedDArgsZero.push_back(visitedArg_0.getExpr_dx()); + auto zero = + ConstantFolder::synthesizeLiteral(m_Context.DoubleTy, m_Context, 0); + ClonedDArgsZero.push_back(zero); + ClonedDArgsZero.push_back(visitedArg_2.getExpr_dx()); + if (viewToView) { ClonedArgs.push_back(visitedArg_0.getExpr()); ClonedArgs.push_back(visitedArg_1.getExpr()); ClonedArgs.push_back(visitedArg_2.getExpr()); ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); ClonedDArgs.push_back(visitedArg_2.getExpr_dx()); + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); + + NamespaceDecl* DC = utils::LookupNSD(m_Sema, "Kokkos", /*shouldExist=*/true); + + CXXScopeSpec SS; + + utils::BuildNNS(m_Sema, DC, SS); + IdentifierInfo* II = &m_Context.Idents.get("deep_copy"); + + DeclarationName name(II); + DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); + + LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); + m_Sema.LookupQualifiedName(R, DC); + + Expr* UnresolvedLookup = + m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); + + Expr* dCallZero = + m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgsZero, noLoc).get(); + + + addToCurrentBlock(dCall, direction::reverse); + addToCurrentBlock(dCallZero, direction::reverse); + + return StmtDiff(Call); } else { - auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); - auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); - auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); - ClonedArgs.push_back(visitedArg_0.getExpr()); ClonedArgs.push_back(visitedArg_1.getExpr()); ClonedArgs.push_back(visitedArg_2.getExpr()); @@ -1533,7 +1570,15 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, Expr* dCall = m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); - return StmtDiff(Call, dCall); + Expr* dCallZero = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgsZero, noLoc) + .get(); + + addToCurrentBlock(dCall, direction::reverse); + addToCurrentBlock(dCallZero, direction::reverse); + + return StmtDiff(Call); } } else { @@ -1587,14 +1632,12 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, VarDecl* gradVarDecl = nullptr; Expr* gradVarExpr = nullptr; - Expr* gradArgExpr = nullptr; + //gradArgExprExpr* gradArgExpr = nullptr; IdentifierInfo* gradVarII = nullptr; - Expr* OverloadedDerivedFn = nullptr; { gradVarII = CreateUniqueIdentifier(funcPostfix()); - auto PVD = FD->getParamDecl(1); { // Declare: diffArgType _grad; Expr* initVal = nullptr; @@ -1608,8 +1651,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, gradVarDecl = BuildVarDecl(visitedArg_1.getExpr()->getType(), gradVarII, initVal); // Pass the address of the declared variable gradVarExpr = BuildDeclRef(gradVarDecl); - gradArgExpr = - BuildOp(UO_AddrOf, gradVarExpr, m_Function->getLocation()); + //gradArgExpr = + // BuildOp(UO_AddrOf, gradVarExpr, m_Function->getLocation()); argResultsAndGrads.push_back({ArgResultDecls[0], gradVarExpr}); ArgDeclStmts.push_back(BuildDeclStmt(gradVarDecl)); } @@ -1623,6 +1666,11 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, Expr* dCall = m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); + Expr* dCallZero = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgsZero, noLoc) + .get(); + auto& block = getCurrentBlock(direction::reverse); std::size_t insertionPoint = getCurrentBlock(direction::reverse).size(); auto it = std::begin(block) + insertionPoint; @@ -1633,6 +1681,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, it = block.insert(it, dCall); it += 1; + it = block.insert(it, dCallZero); + it += 1; it = block.insert(it, BuildDeclStmt(argDerivativeVar)); @@ -1650,48 +1700,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Call); } } - - Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedArgs, noLoc) - .get(); - - // Here we need to do: - // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); - - NamespaceDecl* DC = utils::LookupNSD(m_Sema, "kokkos_builtin_derivative", /*shouldExist=*/true); - - CXXScopeSpec SS; - - utils::BuildNNS(m_Sema, DC, SS); - IdentifierInfo* II = &m_Context.Idents.get("parallel_sum"); - - DeclarationName name(II); - DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); - - LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); - m_Sema.LookupQualifiedName(R, DC); - if (!R.empty()) { - Expr* UnresolvedLookup = - m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); - - Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); - - return StmtDiff(Call, dCall); - } } - - Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedArgs, noLoc) - .get(); - Expr* dCall = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedDArgs, noLoc) - .get(); - - return StmtDiff(Call, dCall); } if (FD->getQualifiedNameAsString().find("Kokkos::subview") != std::string::npos) { From 092f425a598f0347f4fca83a08f3e3447e537f53 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 19 Dec 2023 11:23:57 -0700 Subject: [PATCH 12/75] Fix generated deep_copy by removing not needed arguments --- kokkos/functor_for.hpp | 9 +--- kokkos/generated/Derivatives.cpp | 24 ++++++----- kokkos/main.cpp | 4 +- lib/Differentiator/BaseForwardModeVisitor.cpp | 41 +++++++++++++------ lib/Differentiator/ReverseModeVisitor.cpp | 19 ++++----- 5 files changed, 54 insertions(+), 43 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 5d27a9ec2..52a00fe58 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -50,26 +50,19 @@ double f(double x, double y) { const int j = 0; double zero = 0.; - //Kokkos::deep_copy(a, tmp); //auto a_row_0 = Kokkos::subview( a, 0, Kokkos::ALL ); - //b(i,j) = tmp; Kokkos::deep_copy(a, tmp); Kokkos::deep_copy(a, x); Kokkos::deep_copy(b, x * x + y); Kokkos::deep_copy(a, b); - //a(i,j) = x; - //a(i,j) = x * x + y; - - //size_t N1n = a.extent(0); + size_t N1n = a.extent(0); //ParallelFunctor functor(a,x,y); //Kokkos::parallel_for(N1n, functor); - // double sum = f2(x, y); - return a(i,j); } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 1274e4f9c..9c0ee4961 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -17,14 +17,16 @@ inline double f_darg0(double x, double y) { const int j = 0; double _d_zero = 0.; double zero = 0.; - Kokkos::deep_copy(_d_a, _d_tmp, nullptr); - Kokkos::deep_copy(a, tmp, nullptr); - Kokkos::deep_copy(_d_a, _d_x, nullptr); - Kokkos::deep_copy(a, x, nullptr); - Kokkos::deep_copy(_d_b, _d_x * x + x * _d_x + _d_y, nullptr); - Kokkos::deep_copy(b, x * x + y, nullptr); - Kokkos::deep_copy(_d_a, _d_b, nullptr); - Kokkos::deep_copy(a, b, nullptr); + Kokkos::deep_copy(_d_a, _d_tmp); + Kokkos::deep_copy(a, tmp); + Kokkos::deep_copy(_d_a, _d_x); + Kokkos::deep_copy(a, x); + Kokkos::deep_copy(_d_b, _d_x * x + x * _d_x + _d_y); + Kokkos::deep_copy(b, x * x + y); + Kokkos::deep_copy(_d_a, _d_b); + Kokkos::deep_copy(a, b); + size_t _d_N1n; + size_t N1n = a.extent(0); return _d_a(i, j); } inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { @@ -41,6 +43,7 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double _t2; double _t3; double _t4; + size_t _d_N1n = 0; const int N1 = 4; const int N2 = 4; Kokkos::View a("a", N1); @@ -57,12 +60,13 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array _t4 = x; _t3 = x; Kokkos::deep_copy(b, x * _t2 + y, nullptr); - Kokkos::deep_copy(a, b, nullptr); + Kokkos::deep_copy(a, b); + size_t N1n = a.extent(0); goto _label0; _label0: _d_a(i, j) += 1; { - Kokkos::deep_copy(_d_b, _d_a, nullptr); + Kokkos::deep_copy(_d_b, _d_a); Kokkos::deep_copy(_d_a, 0., nullptr); } { diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 511088349..a6f2f75e9 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -42,9 +42,11 @@ int main(int argc, char* argv[]) { assert(dx==dx_f && "error"); #else double dx = 0, dy = 0; - std::cout << f_darg0(3.,4.) << std::endl; + double dx_f = f_darg0(3.,4.); + std::cout << "dx: " << dx_f << std::endl; f_grad(3., 4., &dx, &dy); std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; + assert(dx==dx_f && "error"); #endif } Kokkos::finalize(); diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index 84755e5a3..e663172f7 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -1033,20 +1033,35 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { llvm::SmallVector ClonedArgs; llvm::SmallVector ClonedDArgs; - for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { - auto visitedArg = Visit(CE->getArg(i)); - ClonedArgs.push_back(visitedArg.getExpr()); - ClonedDArgs.push_back(visitedArg.getExpr_dx()); - } - Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedArgs, noLoc) - .get(); - Expr* dCall = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedDArgs, noLoc) - .get(); + auto visitedArg_0 = Visit(CE->getArg(0)); + auto visitedArg_1 = Visit(CE->getArg(1)); + ClonedArgs.push_back(visitedArg_0.getExpr()); + ClonedArgs.push_back(visitedArg_1.getExpr()); + ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); + + NamespaceDecl* DC = utils::LookupNSD(m_Sema, "Kokkos", /*shouldExist=*/true); + + CXXScopeSpec SS; + + utils::BuildNNS(m_Sema, DC, SS); + IdentifierInfo* II = &m_Context.Idents.get("deep_copy"); + + DeclarationName name(II); + DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); + + LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); + m_Sema.LookupQualifiedName(R, DC); + + Expr* UnresolvedLookup = + m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); + + Expr* Call = + m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedArgs, noLoc).get(); + + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); return StmtDiff(Call, dCall); } diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 50003b0b6..027452a15 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -753,7 +753,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, StmtDiff ReverseModeVisitor::VisitValueStmt( const clang::ValueStmt* VS) { // This is most likely a name provided in a Kokkos::view construction + //std::cout << "VisitValueStmt VS->dump start" << std::endl; //VS->dump (); + //std::cout << "VisitValueStmt VS->dump end" << std::endl; // Test if StringLiteral if (isa(VS)) { //std::cout << "This is a StringLiteral!" << std::endl; @@ -1496,19 +1498,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (viewToView) { ClonedArgs.push_back(visitedArg_0.getExpr()); ClonedArgs.push_back(visitedArg_1.getExpr()); - ClonedArgs.push_back(visitedArg_2.getExpr()); ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); - ClonedDArgs.push_back(visitedArg_2.getExpr_dx()); - - Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedArgs, noLoc) - .get(); - Expr* dCall = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedDArgs, noLoc) - .get(); NamespaceDecl* DC = utils::LookupNSD(m_Sema, "Kokkos", /*shouldExist=*/true); @@ -1526,6 +1517,12 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, Expr* UnresolvedLookup = m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); + Expr* Call = + m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedArgs, noLoc).get(); + + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); + Expr* dCallZero = m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgsZero, noLoc).get(); From 1eb525eb536858530d0b58e0db52ae2d4db13967 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 19 Dec 2023 11:44:52 -0700 Subject: [PATCH 13/75] Clean a bit of the copy paste --- include/clad/Differentiator/CladUtils.h | 5 + kokkos/generated/Derivatives.cpp | 14 +- lib/Differentiator/CladUtils.cpp | 17 ++ lib/Differentiator/ReverseModeVisitor.cpp | 218 ++++++++-------------- 4 files changed, 106 insertions(+), 148 deletions(-) diff --git a/include/clad/Differentiator/CladUtils.h b/include/clad/Differentiator/CladUtils.h index 5690c3913..48c656276 100644 --- a/include/clad/Differentiator/CladUtils.h +++ b/include/clad/Differentiator/CladUtils.h @@ -153,6 +153,11 @@ namespace clad { clang::DeclContext* GetOutermostDC(clang::Sema& semaRef, clang::DeclContext* DC); + clang::Expr* GetUnresolvedLookup(clang::Sema& semaRef, + clang::ASTContext& C, + std::string NS, + std::string FN); + /// Creates a `StringLiteral` node to represent string literal /// "`str`". /// diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 9c0ee4961..ab2a0e6d3 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -54,12 +54,12 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array const int i = 0; const int j = 0; double zero = 0.; - Kokkos::deep_copy(a, tmp, nullptr); - Kokkos::deep_copy(a, x, nullptr); + Kokkos::deep_copy(a, tmp); + Kokkos::deep_copy(a, x); _t2 = x; _t4 = x; _t3 = x; - Kokkos::deep_copy(b, x * _t2 + y, nullptr); + Kokkos::deep_copy(b, x * _t2 + y); Kokkos::deep_copy(a, b); size_t N1n = a.extent(0); goto _label0; @@ -67,12 +67,12 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array _d_a(i, j) += 1; { Kokkos::deep_copy(_d_b, _d_a); - Kokkos::deep_copy(_d_a, 0., nullptr); + Kokkos::deep_copy(_d_a, 0.); } { double _grad0 = 0.; kokkos_builtin_derivative::parallel_sum(_grad0, _d_b); - Kokkos::deep_copy(_d_b, 0., nullptr); + Kokkos::deep_copy(_d_b, 0.); double _r2 = _grad0; double _r3 = _r2 * _t3; * _d_x += _r3; @@ -82,11 +82,11 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array } { kokkos_builtin_derivative::parallel_sum(* _d_x, _d_a); - Kokkos::deep_copy(_d_a, 0., nullptr); + Kokkos::deep_copy(_d_a, 0.); } { kokkos_builtin_derivative::parallel_sum(_d_tmp, _d_a); - Kokkos::deep_copy(_d_a, 0., nullptr); + Kokkos::deep_copy(_d_a, 0.); } { double _r0 = _d_tmp * _t0; diff --git a/lib/Differentiator/CladUtils.cpp b/lib/Differentiator/CladUtils.cpp index fbddd535b..5bcfdc16c 100644 --- a/lib/Differentiator/CladUtils.cpp +++ b/lib/Differentiator/CladUtils.cpp @@ -264,6 +264,23 @@ namespace clad { return DC; } + clang::Expr* GetUnresolvedLookup(Sema& S, ASTContext& C, std::string NS, std::string FN) { + NamespaceDecl* DC = utils::LookupNSD(S, NS, /*shouldExist=*/true); + + CXXScopeSpec SS; + + utils::BuildNNS(S, DC, SS); + IdentifierInfo* II = &C.Idents.get(FN); + + DeclarationName name(II); + DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(S)); + + LookupResult R(S, DNInfo, Sema::LookupOrdinaryName); + S.LookupQualifiedName(R, DC); + + return S.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); + } + StringLiteral* CreateStringLiteral(ASTContext& C, llvm::StringRef str) { // Copied and adapted from clang::Sema::ActOnStringLiteral. QualType CharTyConst = C.CharTy.withConst(); diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 027452a15..2ca81142a 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1487,44 +1487,31 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); - auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); + //auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); + + ClonedArgs.push_back(visitedArg_0.getExpr()); + ClonedArgs.push_back(visitedArg_1.getExpr()); ClonedDArgsZero.push_back(visitedArg_0.getExpr_dx()); auto zero = ConstantFolder::synthesizeLiteral(m_Context.DoubleTy, m_Context, 0); ClonedDArgsZero.push_back(zero); - ClonedDArgsZero.push_back(visitedArg_2.getExpr_dx()); + + Expr* kokkos_deep_copy = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "deep_copy"); + Expr* kokkos_builtin_derivative_parallel_sum = utils::GetUnresolvedLookup(m_Sema, m_Context, "kokkos_builtin_derivative", "parallel_sum"); if (viewToView) { - ClonedArgs.push_back(visitedArg_0.getExpr()); - ClonedArgs.push_back(visitedArg_1.getExpr()); ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); - NamespaceDecl* DC = utils::LookupNSD(m_Sema, "Kokkos", /*shouldExist=*/true); - - CXXScopeSpec SS; - - utils::BuildNNS(m_Sema, DC, SS); - IdentifierInfo* II = &m_Context.Idents.get("deep_copy"); - - DeclarationName name(II); - DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); - - LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); - m_Sema.LookupQualifiedName(R, DC); - - Expr* UnresolvedLookup = - m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); - Expr* Call = - m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedArgs, noLoc).get(); + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedArgs, noLoc).get(); Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgs, noLoc).get(); Expr* dCallZero = - m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgsZero, noLoc).get(); + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgsZero, noLoc).get(); addToCurrentBlock(dCall, direction::reverse); @@ -1533,55 +1520,31 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Call); } else { - ClonedArgs.push_back(visitedArg_0.getExpr()); - ClonedArgs.push_back(visitedArg_1.getExpr()); - ClonedArgs.push_back(visitedArg_2.getExpr()); if (visitedArg_1.getExpr_dx()) { ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedArgs, noLoc) .get(); // Here we need to do: // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); - NamespaceDecl* DC = utils::LookupNSD(m_Sema, "kokkos_builtin_derivative", /*shouldExist=*/true); - - CXXScopeSpec SS; - - utils::BuildNNS(m_Sema, DC, SS); - IdentifierInfo* II = &m_Context.Idents.get("parallel_sum"); - - DeclarationName name(II); - DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); - - LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); - m_Sema.LookupQualifiedName(R, DC); - if (!R.empty()) { - Expr* UnresolvedLookup = - m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); - - Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); - - Expr* dCallZero = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedDArgsZero, noLoc) - .get(); + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedDArgs, noLoc).get(); - addToCurrentBlock(dCall, direction::reverse); - addToCurrentBlock(dCallZero, direction::reverse); + Expr* dCallZero = m_Sema + .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, + noLoc, ClonedDArgsZero, noLoc) + .get(); - return StmtDiff(Call); - } + addToCurrentBlock(dCall, direction::reverse); + addToCurrentBlock(dCallZero, direction::reverse); + return StmtDiff(Call); } else { - //ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); - //ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); - //beginBlock(direction::reverse); QualType argResultValueType = utils::GetValueType(visitedArg_1.getExpr()->getType()) @@ -1591,9 +1554,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, VarDecl* argDerivativeVar = BuildVarDecl(argResultValueType, CreateUniqueIdentifier("_r"), visitedArg_1.getExpr_dx()); Expr* argDerivative = BuildDeclRef(argDerivativeVar); - //Expr* argDerivative = StoreAndRef(visitedArg_1.getExpr_dx(), argResultValueType, - // direction::reverse, "_r", - // /*forceDeclCreation=*/true); llvm::SmallVector ArgResultDecls{}; ArgResultDecls.push_back( @@ -1602,100 +1562,76 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::SmallVector ArgDeclStmts{}; Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedArgs, noLoc) .get(); // Here we need to do: // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); - NamespaceDecl* DC = utils::LookupNSD(m_Sema, "kokkos_builtin_derivative", /*shouldExist=*/true); - - CXXScopeSpec SS; - - utils::BuildNNS(m_Sema, DC, SS); - IdentifierInfo* II = &m_Context.Idents.get("parallel_sum"); + llvm::SmallVector, 4> argResultsAndGrads; - DeclarationName name(II); - DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); + VarDecl* gradVarDecl = nullptr; + Expr* gradVarExpr = nullptr; + IdentifierInfo* gradVarII = nullptr; - LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); - m_Sema.LookupQualifiedName(R, DC); - if (!R.empty()) { - Expr* UnresolvedLookup = - m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); - - llvm::SmallVector, 4> argResultsAndGrads; - - VarDecl* gradVarDecl = nullptr; - Expr* gradVarExpr = nullptr; - //gradArgExprExpr* gradArgExpr = nullptr; - IdentifierInfo* gradVarII = nullptr; + { + gradVarII = CreateUniqueIdentifier(funcPostfix()); { - gradVarII = CreateUniqueIdentifier(funcPostfix()); - - { - // Declare: diffArgType _grad; - Expr* initVal = nullptr; - if (!visitedArg_1.getExpr()->getType()->isRecordType()) { - // If the argument is not a class type, then initialize the grad - // variable with 0. - initVal = - ConstantFolder::synthesizeLiteral(visitedArg_1.getExpr()->getType(), m_Context, 0); - } - //gradVarDecl = BuildVarDecl(PVD->getType(), gradVarII, visitedArg_1.getExpr()); - gradVarDecl = BuildVarDecl(visitedArg_1.getExpr()->getType(), gradVarII, initVal); - // Pass the address of the declared variable - gradVarExpr = BuildDeclRef(gradVarDecl); - //gradArgExpr = - // BuildOp(UO_AddrOf, gradVarExpr, m_Function->getLocation()); - argResultsAndGrads.push_back({ArgResultDecls[0], gradVarExpr}); - ArgDeclStmts.push_back(BuildDeclStmt(gradVarDecl)); + // Declare: diffArgType _grad; + Expr* initVal = nullptr; + if (!visitedArg_1.getExpr()->getType()->isRecordType()) { + // If the argument is not a class type, then initialize the grad + // variable with 0. + initVal = + ConstantFolder::synthesizeLiteral(visitedArg_1.getExpr()->getType(), m_Context, 0); } + gradVarDecl = BuildVarDecl(visitedArg_1.getExpr()->getType(), gradVarII, initVal); + // Pass the address of the declared variable + gradVarExpr = BuildDeclRef(gradVarDecl); + argResultsAndGrads.push_back({ArgResultDecls[0], gradVarExpr}); + ArgDeclStmts.push_back(BuildDeclStmt(gradVarDecl)); } - - //ClonedDArgs.push_back(argResultsAndGrads[0].second); - //ClonedDArgs.push_back(BuildDeclRef(ArgResultDecls[0])); - ClonedDArgs.push_back(BuildDeclRef(gradVarDecl)); - ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); - - Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); - - Expr* dCallZero = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedDArgsZero, noLoc) - .get(); - - auto& block = getCurrentBlock(direction::reverse); - std::size_t insertionPoint = getCurrentBlock(direction::reverse).size(); - auto it = std::begin(block) + insertionPoint; - - // Insert the _gradX declaration statements - it = block.insert(it, ArgDeclStmts.begin(), ArgDeclStmts.end()); - it += ArgDeclStmts.size(); - - it = block.insert(it, dCall); - it += 1; - it = block.insert(it, dCallZero); - it += 1; - - it = block.insert(it, BuildDeclStmt(argDerivativeVar)); - - for (auto resAndGrad : argResultsAndGrads) { - VarDecl* argRes = resAndGrad.first; - Expr* grad = resAndGrad.second; - argRes->dump(); - grad->dump(); - PerformImplicitConversionAndAssign(argRes, grad); - } - - Visit(CE->getArg(1), argDerivative); - //Stmt* Reverse = endBlock(direction::reverse); - - return StmtDiff(Call); } + + ClonedDArgs.push_back(BuildDeclRef(gradVarDecl)); + ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedDArgs, noLoc).get(); + + Expr* dCallZero = m_Sema + .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, + noLoc, ClonedDArgsZero, noLoc) + .get(); + + auto& block = getCurrentBlock(direction::reverse); + std::size_t insertionPoint = getCurrentBlock(direction::reverse).size(); + auto it = std::begin(block) + insertionPoint; + + // Insert the _gradX declaration statements + it = block.insert(it, ArgDeclStmts.begin(), ArgDeclStmts.end()); + it += ArgDeclStmts.size(); + + it = block.insert(it, dCall); + it += 1; + it = block.insert(it, dCallZero); + it += 1; + + it = block.insert(it, BuildDeclStmt(argDerivativeVar)); + + for (auto resAndGrad : argResultsAndGrads) { + VarDecl* argRes = resAndGrad.first; + Expr* grad = resAndGrad.second; + argRes->dump(); + grad->dump(); + PerformImplicitConversionAndAssign(argRes, grad); + } + + Visit(CE->getArg(1), argDerivative); + + return StmtDiff(Call); } } } From 91ec308f33264d7cfdfc25516ef0037128dd81e2 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 19 Dec 2023 11:57:01 -0700 Subject: [PATCH 14/75] Move towards the usage of kokkos_builtin_derivative::parallel_sum inside the forward pass --- kokkos/functor_for.hpp | 5 ++- kokkos/generated/Derivatives.cpp | 37 +++-------------- kokkos/main.cpp | 14 +++---- lib/Differentiator/ReverseModeVisitor.cpp | 50 +++++++++++++++++++++-- 4 files changed, 63 insertions(+), 43 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 52a00fe58..0c3f69244 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -64,5 +64,8 @@ double f(double x, double y) { //Kokkos::parallel_for(N1n, functor); - return a(i,j); + double sum; + kokkos_builtin_derivative::parallel_sum(sum, a); + + return sum; } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index ab2a0e6d3..b3083a758 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -1,34 +1,3 @@ -inline double f_darg0(double x, double y) { - double _d_x = 1; - double _d_y = 0; - const int _d_N1 = 0; - const int N1 = 4; - const int _d_N2 = 0; - const int N2 = 4; - Kokkos::View _d_a("_d_a", N1); - Kokkos::View a("a", N1); - Kokkos::View _d_b("_d_b", N1); - Kokkos::View b("b", N1); - double _d_tmp = _d_x * x + x * _d_x + _d_y; - double tmp = x * x + y; - const int _d_i = 0; - const int i = 0; - const int _d_j = 0; - const int j = 0; - double _d_zero = 0.; - double zero = 0.; - Kokkos::deep_copy(_d_a, _d_tmp); - Kokkos::deep_copy(a, tmp); - Kokkos::deep_copy(_d_a, _d_x); - Kokkos::deep_copy(a, x); - Kokkos::deep_copy(_d_b, _d_x * x + x * _d_x + _d_y); - Kokkos::deep_copy(b, x * x + y); - Kokkos::deep_copy(_d_a, _d_b); - Kokkos::deep_copy(a, b); - size_t _d_N1n; - size_t N1n = a.extent(0); - return _d_a(i, j); -} inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { int _d_N1 = 0; int _d_N2 = 0; @@ -44,6 +13,7 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double _t3; double _t4; size_t _d_N1n = 0; + double _d_sum = 0; const int N1 = 4; const int N2 = 4; Kokkos::View a("a", N1); @@ -62,9 +32,12 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array Kokkos::deep_copy(b, x * _t2 + y); Kokkos::deep_copy(a, b); size_t N1n = a.extent(0); + double sum; + kokkos_builtin_derivative::parallel_sum(sum, a); goto _label0; _label0: - _d_a(i, j) += 1; + _d_sum += 1; + Kokkos::deep_copy(_d_a, _d_sum); { Kokkos::deep_copy(_d_b, _d_a); Kokkos::deep_copy(_d_a, 0.); diff --git a/kokkos/main.cpp b/kokkos/main.cpp index a6f2f75e9..7c50df1bd 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -26,27 +26,27 @@ int main(int argc, char* argv[]) { std::cout << weightedDotProduct_2(A, x, y) << std::endl; #ifndef use_generated_file - auto f_dx_exe = clad::differentiate(f, "x"); + //auto f_dx_exe = clad::differentiate(f, "x"); auto f_grad_exe = clad::gradient(f); // Any of the two below will generate an "error: Attempted differentiation w.r.t. member 'x' which is not of real type." //auto weightedDotProduct_1_dx = clad::differentiate(weightedDotProduct_1, "x"); //auto weightedDotProduct_2_dx = clad::differentiate(weightedDotProduct_2, "x"); - double dx_f = f_dx_exe.execute(3.,4.); - std::cout << "dx: " << dx_f << std::endl; + //double dx_f = f_dx_exe.execute(3.,4.); + //std::cout << "dx: " << dx_f << std::endl; double dx = 0, dy = 0; // After this call, dx and dy will store the derivatives of x and y respectively. f_grad_exe.execute(3., 4., &dx, &dy); std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; - assert(dx==dx_f && "error"); + //assert(dx==dx_f && "error"); #else double dx = 0, dy = 0; - double dx_f = f_darg0(3.,4.); - std::cout << "dx: " << dx_f << std::endl; + //double dx_f = f_darg0(3.,4.); + //std::cout << "dx: " << dx_f << std::endl; f_grad(3., 4., &dx, &dy); std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; - assert(dx==dx_f && "error"); + //assert(dx==dx_f && "error"); #endif } Kokkos::finalize(); diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 2ca81142a..73ff348dd 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -769,6 +769,19 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, //derivedVS->dump (); return {Clone(VS), derivedVS}; } + if (isa(VS)) { + std::cout << "This is probably a view!" << std::endl; + auto CBTE = dyn_cast(VS); + + auto tmp = Visit(CBTE->getSubExpr()); + + std::cout << "Start dump probably a view!" << std::endl; + tmp.getExpr()->dump(); + tmp.getExpr_dx()->dump(); + std::cout << "End dump probably a view!" << std::endl; + + return tmp; + } return {Clone(VS), Clone(VS)}; } @@ -1414,7 +1427,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return constructedTypeName.find("Kokkos::View") != std::string::npos; } } - return constructedTypeName.rfind("Kokkos::View", 0) == 0; + return constructedTypeName.find("Kokkos::View") != std::string::npos; } StmtDiff ReverseModeVisitor::VisitCallExpr(const CallExpr* CE) { @@ -1478,6 +1491,32 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, auto SE = CE->getCallee()->IgnoreImpCasts(); if (auto DRE = dyn_cast(SE)) { if (auto FD = dyn_cast(DRE->getDecl())) { + if (FD->getQualifiedNameAsString().find("kokkos_builtin_derivative::parallel_sum") != std::string::npos) { + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + + auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); + auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); + + ClonedArgs.push_back(visitedArg_0.getExpr()); + ClonedArgs.push_back(visitedArg_1.getExpr()); + + ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); + ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + + Expr* kokkos_deep_copy = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "deep_copy"); + Expr* kokkos_builtin_derivative_parallel_sum = utils::GetUnresolvedLookup(m_Sema, m_Context, "kokkos_builtin_derivative", "parallel_sum"); + + Expr* Call = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedArgs, noLoc).get(); + + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgs, noLoc).get(); + + addToCurrentBlock(dCall, direction::reverse); + + return StmtDiff(Call); + } if (FD->getQualifiedNameAsString().find("Kokkos::deep_copy") != std::string::npos) { llvm::SmallVector ClonedArgs; @@ -2893,7 +2932,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, std::string constructedTypeName = QualType::getAsString(VD->getType().split(), PrintingPolicy{ {} }); - if (constructedTypeName.rfind("Kokkos::View", 0) == 0) { + if (constructedTypeName.find("Kokkos::View") != std::string::npos) { size_t runTimeDim = 0; std::vector compileTimeDims; bool read = false; @@ -4069,8 +4108,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, StmtDiff ReverseModeVisitor::VisitCXXConstructExpr(const CXXConstructExpr* CE) { llvm::SmallVector clonedArgs; + llvm::SmallVector clonedDArgs; std::string constructedTypeName = QualType::getAsString(CE->getType().split(), PrintingPolicy{ {} }); - if (constructedTypeName.rfind("Kokkos::View", 0) == 0) { + if (constructedTypeName.find("Kokkos::View") != std::string::npos) { size_t runTimeDim = 0; std::vector compileTimeDims; bool read = false; @@ -4088,8 +4128,12 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, break; auto argDiff = Visit(arg, dfdx()); clonedArgs.push_back(argDiff.getExpr()); + clonedDArgs.push_back(argDiff.getExpr_dx()); ++i; } + if (CE->getNumArgs() == 1) { + return StmtDiff(clonedArgs[0], clonedDArgs[0]); + } } else { for (const auto* arg : CE->arguments()) { From f87ccd2d8a0900a3d5e34091cc87169d61a9e2be Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 19 Dec 2023 22:16:20 -0700 Subject: [PATCH 15/75] Move towards the usage of subview --- kokkos/functor_for.hpp | 26 +- kokkos/generated/Derivatives.cpp | 16 +- kokkos/main.cpp | 45 ++- lib/Differentiator/ReverseModeVisitor.cpp | 416 ++++++++++++---------- 4 files changed, 282 insertions(+), 221 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 0c3f69244..9c8b43538 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -15,6 +15,17 @@ void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { sum += tmp_sum; } +template +void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { + + Kokkos::parallel_for( A.extent(0), KOKKOS_LAMBDA ( int i) { + + for ( int j = 0; j < A.extent(1); ++j ) { + A( i, j ) += b; + } + }); +} + } template @@ -49,23 +60,18 @@ double f(double x, double y) { const int i = 0; const int j = 0; - double zero = 0.; - //auto a_row_0 = Kokkos::subview( a, 0, Kokkos::ALL ); - Kokkos::deep_copy(a, tmp); Kokkos::deep_copy(a, x); Kokkos::deep_copy(b, x * x + y); Kokkos::deep_copy(a, b); - size_t N1n = a.extent(0); - - //ParallelFunctor functor(a,x,y); - - //Kokkos::parallel_for(N1n, functor); - double sum; - kokkos_builtin_derivative::parallel_sum(sum, a); + auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); + + sum = a_row_0(0,0); + kokkos_builtin_derivative::parallel_sum(sum, a_row_0); + //sum = a_row_0(0,0); return sum; } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index b3083a758..f5bb5cf32 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -8,12 +8,11 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double _d_tmp = 0; int _d_i = 0; int _d_j = 0; - double _d_zero = 0; double _t2; double _t3; double _t4; - size_t _d_N1n = 0; double _d_sum = 0; + Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); const int N1 = 4; const int N2 = 4; Kokkos::View a("a", N1); @@ -23,7 +22,6 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double tmp = _t1 * _t0 + y; const int i = 0; const int j = 0; - double zero = 0.; Kokkos::deep_copy(a, tmp); Kokkos::deep_copy(a, x); _t2 = x; @@ -31,13 +29,19 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array _t3 = x; Kokkos::deep_copy(b, x * _t2 + y); Kokkos::deep_copy(a, b); - size_t N1n = a.extent(0); double sum; - kokkos_builtin_derivative::parallel_sum(sum, a); + Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); + sum = a_row_0(0, 0); + kokkos_builtin_derivative::parallel_sum(sum, a_row_0); goto _label0; _label0: _d_sum += 1; - Kokkos::deep_copy(_d_a, _d_sum); + kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); + { + double _r_d0 = _d_sum; + _d_a_row_0(0, 0) += _r_d0; + _d_sum -= _r_d0; + } { Kokkos::deep_copy(_d_b, _d_a); Kokkos::deep_copy(_d_a, 0.); diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 7c50df1bd..0ee6a9a98 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -5,6 +5,7 @@ #include "lambda_reduction_subview.hpp" //#define use_generated_file +//#define use_forward_mode #ifdef use_generated_file #include "generated/Derivatives.cpp" @@ -25,29 +26,47 @@ int main(int argc, char* argv[]) { std::cout << weightedDotProduct_1(A, x, y) << std::endl; std::cout << weightedDotProduct_2(A, x, y) << std::endl; + double epsilon = 1e-6; + + double f_pe = f(3.+epsilon,4.); + double f_me = f(3.-epsilon,4.); + double dx_f_FD = (f_pe-f_me) / (2 * epsilon); + + double tolerance = 1e-6; + + std::cout << "dx_f_FD: " << dx_f_FD << std::endl; + + double dx = 0, dy = 0; + double dx_f; + #ifndef use_generated_file - //auto f_dx_exe = clad::differentiate(f, "x"); + #ifdef use_forward_mode + auto f_dx_exe = clad::differentiate(f, "x"); + #endif auto f_grad_exe = clad::gradient(f); // Any of the two below will generate an "error: Attempted differentiation w.r.t. member 'x' which is not of real type." //auto weightedDotProduct_1_dx = clad::differentiate(weightedDotProduct_1, "x"); //auto weightedDotProduct_2_dx = clad::differentiate(weightedDotProduct_2, "x"); - - //double dx_f = f_dx_exe.execute(3.,4.); - //std::cout << "dx: " << dx_f << std::endl; - - double dx = 0, dy = 0; + #ifdef use_forward_mode + dx_f = f_dx_exe.execute(3.,4.); + #endif // After this call, dx and dy will store the derivatives of x and y respectively. f_grad_exe.execute(3., 4., &dx, &dy); - std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; - //assert(dx==dx_f && "error"); #else - double dx = 0, dy = 0; - //double dx_f = f_darg0(3.,4.); - //std::cout << "dx: " << dx_f << std::endl; + #ifdef use_forward_mode + dx_f = f_darg0(3.,4.); + #endif f_grad(3., 4., &dx, &dy); - std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; - //assert(dx==dx_f && "error"); #endif + + #ifdef use_forward_mode + std::cout << "dx: " << dx_f << std::endl; + #endif + std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; + #ifdef use_forward_mode + assert(dx==dx_f && "error"); + #endif + assert(std::abs(dx-dx_f_FD)(VS)) { - std::cout << "This is probably a view!" << std::endl; + //std::cout << "This is probably a view!" << std::endl; auto CBTE = dyn_cast(VS); auto tmp = Visit(CBTE->getSubExpr()); - std::cout << "Start dump probably a view!" << std::endl; - tmp.getExpr()->dump(); - tmp.getExpr_dx()->dump(); - std::cout << "End dump probably a view!" << std::endl; + //std::cout << "Start dump probably a view!" << std::endl; + //tmp.getExpr()->dump(); + //tmp.getExpr_dx()->dump(); + //std::cout << "End dump probably a view!" << std::endl; return tmp; } @@ -1488,248 +1488,248 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Call, dCall); } } - auto SE = CE->getCallee()->IgnoreImpCasts(); - if (auto DRE = dyn_cast(SE)) { - if (auto FD = dyn_cast(DRE->getDecl())) { - if (FD->getQualifiedNameAsString().find("kokkos_builtin_derivative::parallel_sum") != std::string::npos) { - llvm::SmallVector ClonedArgs; - llvm::SmallVector ClonedDArgs; - - auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); - auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); - - ClonedArgs.push_back(visitedArg_0.getExpr()); - ClonedArgs.push_back(visitedArg_1.getExpr()); - - ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); - ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); - - Expr* kokkos_deep_copy = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "deep_copy"); - Expr* kokkos_builtin_derivative_parallel_sum = utils::GetUnresolvedLookup(m_Sema, m_Context, "kokkos_builtin_derivative", "parallel_sum"); - - Expr* Call = - m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedArgs, noLoc).get(); + auto SE = CE->getCallee()->IgnoreImpCasts(); + if (auto DRE = dyn_cast(SE)) { + if (auto FD = dyn_cast(DRE->getDecl())) { + if (FD->getQualifiedNameAsString().find("kokkos_builtin_derivative::parallel_sum") != std::string::npos) { + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; - Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgs, noLoc).get(); - - addToCurrentBlock(dCall, direction::reverse); - - return StmtDiff(Call); - } - if (FD->getQualifiedNameAsString().find("Kokkos::deep_copy") != std::string::npos) { - - llvm::SmallVector ClonedArgs; - llvm::SmallVector ClonedDArgs; - llvm::SmallVector ClonedDArgsZero; - bool viewToView = isKokkosView(CE->getArg(1)); + auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); + auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); - auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); - auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); - //auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); + ClonedArgs.push_back(visitedArg_0.getExpr()); + ClonedArgs.push_back(visitedArg_1.getExpr()); - ClonedArgs.push_back(visitedArg_0.getExpr()); - ClonedArgs.push_back(visitedArg_1.getExpr()); - - ClonedDArgsZero.push_back(visitedArg_0.getExpr_dx()); - auto zero = - ConstantFolder::synthesizeLiteral(m_Context.DoubleTy, m_Context, 0); - ClonedDArgsZero.push_back(zero); - - Expr* kokkos_deep_copy = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "deep_copy"); - Expr* kokkos_builtin_derivative_parallel_sum = utils::GetUnresolvedLookup(m_Sema, m_Context, "kokkos_builtin_derivative", "parallel_sum"); - - if (viewToView) { ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + //Expr* kokkos_deep_copy = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "deep_copy"); + Expr* kokkos_builtin_derivative_parallel_sum = utils::GetUnresolvedLookup(m_Sema, m_Context, "kokkos_builtin_derivative", "parallel_sum"); + Expr* Call = - m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedArgs, noLoc).get(); + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedArgs, noLoc).get(); Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgs, noLoc).get(); - - Expr* dCallZero = - m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgsZero, noLoc).get(); - + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedDArgs, noLoc).get(); addToCurrentBlock(dCall, direction::reverse); - addToCurrentBlock(dCallZero, direction::reverse); return StmtDiff(Call); } - else { - if (visitedArg_1.getExpr_dx()) { + if (FD->getQualifiedNameAsString().find("Kokkos::deep_copy") != std::string::npos) { + + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + llvm::SmallVector ClonedDArgsZero; + bool viewToView = isKokkosView(CE->getArg(1)); + + auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); + auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); + //auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); + + ClonedArgs.push_back(visitedArg_0.getExpr()); + ClonedArgs.push_back(visitedArg_1.getExpr()); + + ClonedDArgsZero.push_back(visitedArg_0.getExpr_dx()); + auto zero = + ConstantFolder::synthesizeLiteral(m_Context.DoubleTy, m_Context, 0); + ClonedDArgsZero.push_back(zero); + + Expr* kokkos_deep_copy = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "deep_copy"); + Expr* kokkos_builtin_derivative_parallel_sum = utils::GetUnresolvedLookup(m_Sema, m_Context, "kokkos_builtin_derivative", "parallel_sum"); + + if (viewToView) { ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); - Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, - noLoc, ClonedArgs, noLoc) - .get(); - - // Here we need to do: - // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); + Expr* Call = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedArgs, noLoc).get(); Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedDArgs, noLoc).get(); + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgs, noLoc).get(); + + Expr* dCallZero = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgsZero, noLoc).get(); - Expr* dCallZero = m_Sema - .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, - noLoc, ClonedDArgsZero, noLoc) - .get(); addToCurrentBlock(dCall, direction::reverse); addToCurrentBlock(dCallZero, direction::reverse); - + return StmtDiff(Call); - } else { - - QualType argResultValueType = - utils::GetValueType(visitedArg_1.getExpr()->getType()) - .getNonReferenceType(); + } + else { + if (visitedArg_1.getExpr_dx()) { + ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); + ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, + noLoc, ClonedArgs, noLoc) + .get(); + + // Here we need to do: + // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); + + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedDArgs, noLoc).get(); + + Expr* dCallZero = m_Sema + .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, + noLoc, ClonedDArgsZero, noLoc) + .get(); + + addToCurrentBlock(dCall, direction::reverse); + addToCurrentBlock(dCallZero, direction::reverse); + + return StmtDiff(Call); + } else { + QualType argResultValueType = + utils::GetValueType(visitedArg_1.getExpr()->getType()) + .getNonReferenceType(); - VarDecl* argDerivativeVar = BuildVarDecl(argResultValueType, CreateUniqueIdentifier("_r"), visitedArg_1.getExpr_dx()); - Expr* argDerivative = BuildDeclRef(argDerivativeVar); - llvm::SmallVector ArgResultDecls{}; - ArgResultDecls.push_back( - cast(cast(argDerivative)->getDecl())); + VarDecl* argDerivativeVar = BuildVarDecl(argResultValueType, CreateUniqueIdentifier("_r"), visitedArg_1.getExpr_dx()); + Expr* argDerivative = BuildDeclRef(argDerivativeVar); - llvm::SmallVector ArgDeclStmts{}; + llvm::SmallVector ArgResultDecls{}; + ArgResultDecls.push_back( + cast(cast(argDerivative)->getDecl())); - Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, - noLoc, ClonedArgs, noLoc) - .get(); - - // Here we need to do: - // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); + llvm::SmallVector ArgDeclStmts{}; - llvm::SmallVector, 4> argResultsAndGrads; + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, + noLoc, ClonedArgs, noLoc) + .get(); + + // Here we need to do: + // visitedArg_1.getExpr_dx() = parallel_sum(visitedArg_0.getExpr_dx()); - VarDecl* gradVarDecl = nullptr; - Expr* gradVarExpr = nullptr; - IdentifierInfo* gradVarII = nullptr; + llvm::SmallVector, 4> argResultsAndGrads; - { - gradVarII = CreateUniqueIdentifier(funcPostfix()); + VarDecl* gradVarDecl = nullptr; + Expr* gradVarExpr = nullptr; + IdentifierInfo* gradVarII = nullptr; { - // Declare: diffArgType _grad; - Expr* initVal = nullptr; - if (!visitedArg_1.getExpr()->getType()->isRecordType()) { - // If the argument is not a class type, then initialize the grad - // variable with 0. - initVal = - ConstantFolder::synthesizeLiteral(visitedArg_1.getExpr()->getType(), m_Context, 0); + gradVarII = CreateUniqueIdentifier(funcPostfix()); + + { + // Declare: diffArgType _grad; + Expr* initVal = nullptr; + if (!visitedArg_1.getExpr()->getType()->isRecordType()) { + // If the argument is not a class type, then initialize the grad + // variable with 0. + initVal = + ConstantFolder::synthesizeLiteral(visitedArg_1.getExpr()->getType(), m_Context, 0); + } + gradVarDecl = BuildVarDecl(visitedArg_1.getExpr()->getType(), gradVarII, initVal); + // Pass the address of the declared variable + gradVarExpr = BuildDeclRef(gradVarDecl); + argResultsAndGrads.push_back({ArgResultDecls[0], gradVarExpr}); + ArgDeclStmts.push_back(BuildDeclStmt(gradVarDecl)); } - gradVarDecl = BuildVarDecl(visitedArg_1.getExpr()->getType(), gradVarII, initVal); - // Pass the address of the declared variable - gradVarExpr = BuildDeclRef(gradVarDecl); - argResultsAndGrads.push_back({ArgResultDecls[0], gradVarExpr}); - ArgDeclStmts.push_back(BuildDeclStmt(gradVarDecl)); } - } - ClonedDArgs.push_back(BuildDeclRef(gradVarDecl)); - ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); + ClonedDArgs.push_back(BuildDeclRef(gradVarDecl)); + ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); - Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedDArgs, noLoc).get(); + Expr* dCall = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedDArgs, noLoc).get(); - Expr* dCallZero = m_Sema - .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, - noLoc, ClonedDArgsZero, noLoc) - .get(); + Expr* dCallZero = m_Sema + .ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, + noLoc, ClonedDArgsZero, noLoc) + .get(); - auto& block = getCurrentBlock(direction::reverse); - std::size_t insertionPoint = getCurrentBlock(direction::reverse).size(); - auto it = std::begin(block) + insertionPoint; + auto& block = getCurrentBlock(direction::reverse); + std::size_t insertionPoint = getCurrentBlock(direction::reverse).size(); + auto it = std::begin(block) + insertionPoint; - // Insert the _gradX declaration statements - it = block.insert(it, ArgDeclStmts.begin(), ArgDeclStmts.end()); - it += ArgDeclStmts.size(); + // Insert the _gradX declaration statements + it = block.insert(it, ArgDeclStmts.begin(), ArgDeclStmts.end()); + it += ArgDeclStmts.size(); - it = block.insert(it, dCall); - it += 1; - it = block.insert(it, dCallZero); - it += 1; + it = block.insert(it, dCall); + it += 1; + it = block.insert(it, dCallZero); + it += 1; - it = block.insert(it, BuildDeclStmt(argDerivativeVar)); - - for (auto resAndGrad : argResultsAndGrads) { - VarDecl* argRes = resAndGrad.first; - Expr* grad = resAndGrad.second; - argRes->dump(); - grad->dump(); - PerformImplicitConversionAndAssign(argRes, grad); - } + it = block.insert(it, BuildDeclStmt(argDerivativeVar)); + + for (auto resAndGrad : argResultsAndGrads) { + VarDecl* argRes = resAndGrad.first; + Expr* grad = resAndGrad.second; + argRes->dump(); + grad->dump(); + PerformImplicitConversionAndAssign(argRes, grad); + } - Visit(CE->getArg(1), argDerivative); + Visit(CE->getArg(1), argDerivative); - return StmtDiff(Call); + return StmtDiff(Call); + } } } - } - if (FD->getQualifiedNameAsString().find("Kokkos::subview") != std::string::npos) { - - llvm::SmallVector ClonedArgs; - llvm::SmallVector ClonedDArgs; - for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { - auto visitedArg = Visit(CE->getArg(i)); - ClonedArgs.push_back(visitedArg.getExpr()); - if (i==0) - ClonedDArgs.push_back(visitedArg.getExpr()); - else - ClonedDArgs.push_back(visitedArg.getExpr()); - - std::cout << "Kokkos::subview visitedArg.getExpr()->dump() start with i = " << i << std::endl; - visitedArg.getExpr()->dump(); - std::cout << "Kokkos::subview visitedArg.getExpr()->dump() end with i = " << i << std::endl; - std::cout << "Kokkos::subview visitedArg.getExpr_dx()->dump() start with i = " << i << std::endl; - visitedArg.getExpr_dx()->dump(); - std::cout << "Kokkos::subview visitedArg.getExpr_dx()->dump() end with i = " << i << std::endl; - } + if (FD->getQualifiedNameAsString().find("Kokkos::subview") != std::string::npos) { + + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { + auto visitedArg = Visit(CE->getArg(i)); + ClonedArgs.push_back(visitedArg.getExpr()); + if (i==0) + ClonedDArgs.push_back(visitedArg.getExpr_dx()); + else + ClonedDArgs.push_back(visitedArg.getExpr()); + + std::cout << "Kokkos::subview visitedArg.getExpr()->dump() start with i = " << i << std::endl; + visitedArg.getExpr()->dump(); + std::cout << "Kokkos::subview visitedArg.getExpr()->dump() end with i = " << i << std::endl; + std::cout << "Kokkos::subview visitedArg.getExpr_dx()->dump() start with i = " << i << std::endl; + visitedArg.getExpr_dx()->dump(); + std::cout << "Kokkos::subview visitedArg.getExpr_dx()->dump() end with i = " << i << std::endl; + } - Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedArgs, noLoc) - .get(); - Expr* dCall = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedDArgs, noLoc) - .get(); + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); - return StmtDiff(Call, dCall); - } - if (FD->getQualifiedNameAsString().find("Kokkos::parallel_for") != std::string::npos) { - llvm::SmallVector ClonedArgs; - llvm::SmallVector ClonedDArgs; - for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { - auto visitedArg = Visit(CE->getArg(i)); - ClonedArgs.push_back(visitedArg.getExpr()); - if (i==0) - ClonedDArgs.push_back(visitedArg.getExpr()); - else - ClonedDArgs.push_back(visitedArg.getExpr_dx()); + return StmtDiff(Call, dCall); } + if (FD->getQualifiedNameAsString().find("Kokkos::parallel_for") != std::string::npos) { + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { + auto visitedArg = Visit(CE->getArg(i)); + ClonedArgs.push_back(visitedArg.getExpr()); + if (i==0) + ClonedDArgs.push_back(visitedArg.getExpr()); + else + ClonedDArgs.push_back(visitedArg.getExpr_dx()); + } - Expr* Call = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedArgs, noLoc) - .get(); - Expr* dCall = m_Sema - .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), - noLoc, ClonedDArgs, noLoc) - .get(); + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); - return StmtDiff(Call, dCall); + return StmtDiff(Call, dCall); + } } } - } const FunctionDecl* FD = CE->getDirectCallee(); if (!FD) { @@ -2973,6 +2973,32 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, VDDerived = BuildVarDecl(VDDerivedType, "_d_" + VD->getNameAsString(), VDDerivedInit); } + else { + auto SE = VD->getInit()->IgnoreImpCasts(); + + if (auto CBTE = dyn_cast(SE)) { + auto tmp = Visit(CBTE); + // This is a subview + + VarDecl* VDClone = BuildVarDecl(VD->getType(), VD->getNameAsString(), + tmp.getExpr(), VD->isDirectInit()); + + VarDecl* VDDerived = BuildVarDecl(VD->getType(), "_d_" + VD->getNameAsString(), + tmp.getExpr_dx()); + + //VarDecl* VDDerived = BuildVarDecl(VD->getType()->getContainedAutoType(), "_d_" + VD->getNameAsString(), + // tmp.getExpr_dx()); + + + Expr* derivedVDE = BuildDeclRef(VDDerived); + m_Variables.emplace(VDClone, derivedVDE); + + return VarDeclDiff(VDClone, VDDerived); + } + else + assert(false && + "Not supported yet!"); + } // VDDerivedInit now serves two purposes -- as the initial derivative value // or the size of the derivative array -- depending on the primal type. @@ -4127,6 +4153,12 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (i == runTimeDim + 1) break; auto argDiff = Visit(arg, dfdx()); + + std::cout << "Start dump argDiff i = " << i << std::endl; + argDiff.getExpr()->dump(); + argDiff.getExpr_dx()->dump(); + std::cout << "End dump argDiff i = " << i << std::endl; + clonedArgs.push_back(argDiff.getExpr()); clonedDArgs.push_back(argDiff.getExpr_dx()); ++i; From ee88c2e2d594a989be88c33367015d2b28a7398f Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 20 Dec 2023 14:00:34 -0700 Subject: [PATCH 16/75] Update the isKokkosView logic --- lib/Differentiator/ReverseModeVisitor.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index f13f36620..16be6abbc 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1208,12 +1208,17 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Clone(NPE), Clone(NPE)); } + bool isKokkosView(const std::string constructedTypeName){ + return constructedTypeName.find("Kokkos::View") == 0 || constructedTypeName.find("class Kokkos::View") == 0; + //return constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("getRetValue(); QualType type = value->getType(); std::cout << "return type is " << type.getAsString() << std::endl; - if (type.getAsString().find("Kokkos::View") != std::string::npos) { + if (isKokkosView(type.getAsString())) { std::cout << "return value is a view!" << std::endl; } auto* dfdf = m_Pullback; @@ -1419,22 +1424,20 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } bool isKokkosView(const Expr* E) { - std::string constructedTypeName = QualType::getAsString(E->getType().split(), PrintingPolicy{ {} }); if (isa(E)) { auto SE = E->IgnoreImpCasts(); if (auto DRE = dyn_cast(SE)) { - std::string constructedTypeName = QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} }); - return constructedTypeName.find("Kokkos::View") != std::string::npos; + return isKokkosView(QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} })); } } - return constructedTypeName.find("Kokkos::View") != std::string::npos; + return isKokkosView(QualType::getAsString(E->getType().split(), PrintingPolicy{ {} })); } StmtDiff ReverseModeVisitor::VisitCallExpr(const CallExpr* CE) { if (isa(CE)) { auto MCE = dyn_cast(CE); - if (MCE->getObjectType().getAsString().find("Kokkos::View") != std::string::npos) { + if (isKokkosView(MCE->getObjectType().getAsString())) { //std::cout << "Member function called from a Kokkos::View; nothing to do here" << std::endl; return StmtDiff(Clone(CE)); } @@ -1451,7 +1454,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (auto DRE = dyn_cast(SE)) { std::string constructedTypeName = QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} }); //std::cout << constructedTypeName << std::endl; - if (constructedTypeName.find("Kokkos::View") != std::string::npos) { + if (isKokkosView(constructedTypeName)) { isKokkosViewAccess = true; kokkosViewName = DRE->getNameInfo().getName().getAsString (); } @@ -2932,7 +2935,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, std::string constructedTypeName = QualType::getAsString(VD->getType().split(), PrintingPolicy{ {} }); - if (constructedTypeName.find("Kokkos::View") != std::string::npos) { + if (isKokkosView(constructedTypeName)) { size_t runTimeDim = 0; std::vector compileTimeDims; bool read = false; @@ -4136,7 +4139,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::SmallVector clonedArgs; llvm::SmallVector clonedDArgs; std::string constructedTypeName = QualType::getAsString(CE->getType().split(), PrintingPolicy{ {} }); - if (constructedTypeName.find("Kokkos::View") != std::string::npos) { + if (isKokkosView(constructedTypeName)) { size_t runTimeDim = 0; std::vector compileTimeDims; bool read = false; From 1118bbe9cb1417160a0654e3c0d69d4f4082637f Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 21 Dec 2023 08:52:09 -0700 Subject: [PATCH 17/75] Use the fact that a Kokkos view is passed by reference --- include/clad/Differentiator/CladUtils.h | 6 +++ kokkos/functor_for.hpp | 25 ++++++++++++- kokkos/generated/Derivatives.cpp | 37 ++++++++++++++++++- lib/Differentiator/BaseForwardModeVisitor.cpp | 4 +- lib/Differentiator/CladUtils.cpp | 11 +++++- lib/Differentiator/ReverseModeVisitor.cpp | 24 +++++++----- lib/Differentiator/VisitorBase.cpp | 3 ++ 7 files changed, 95 insertions(+), 15 deletions(-) diff --git a/include/clad/Differentiator/CladUtils.h b/include/clad/Differentiator/CladUtils.h index 48c656276..18e63f82f 100644 --- a/include/clad/Differentiator/CladUtils.h +++ b/include/clad/Differentiator/CladUtils.h @@ -179,6 +179,12 @@ namespace clad { /// otherwise returns false. bool HasAnyReferenceOrPointerArgument(const clang::FunctionDecl* FD); + /// Returns true if `constructedTypeName` is a string describing Kokkos::View type. + bool IsKokkosView(const std::string constructedTypeName); + + /// Returns true if `T` is a Kokkos::View type. + bool IsKokkosView(clang::QualType T); + /// Returns true if `T` is a reference, pointer or array type. /// /// \note Please note that this function returns true for array types as diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 9c8b43538..3d515081a 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -46,6 +46,24 @@ double f2(double x, double y) { return x; } +template +KOKKOS_INLINE_FUNCTION +double f_view(ViewtypeA a) { + double sum; + auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); + + sum = a_row_0(0,0); + kokkos_builtin_derivative::parallel_sum(sum, a_row_0); + return 1e-6*sum*sum; +} + +template +KOKKOS_INLINE_FUNCTION +void f_view_2(ViewtypeA a, double tmp) { + Kokkos::deep_copy(a, tmp); +} + + KOKKOS_INLINE_FUNCTION double f(double x, double y) { @@ -60,6 +78,10 @@ double f(double x, double y) { const int i = 0; const int j = 0; + // These 2 lines do not work. Is it because nothing is returned by f_view_2? + //f_view_2(a, tmp); + //return f_view(a); + Kokkos::deep_copy(a, tmp); Kokkos::deep_copy(a, x); @@ -71,7 +93,6 @@ double f(double x, double y) { sum = a_row_0(0,0); kokkos_builtin_derivative::parallel_sum(sum, a_row_0); - //sum = a_row_0(0,0); - return sum; + return f_view(a); } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index f5bb5cf32..8214e7157 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -1,3 +1,33 @@ +inline void f_view_pullback(Kokkos::View a, double _d_y, clad::array_ref > _d_a) { + double _d_sum = 0; + Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); + double _t0; + double _t1; + double _t2; + double sum; + Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); + sum = a_row_0(0, 0); + kokkos_builtin_derivative::parallel_sum(sum, a_row_0); + _t1 = sum; + _t2 = 9.9999999999999995E-7 * _t1; + _t0 = sum; + goto _label0; + _label0: + { + double _r0 = _d_y * _t0; + double _r1 = _r0 * _t1; + double _r2 = 9.9999999999999995E-7 * _r0; + _d_sum += _r2; + double _r3 = _t2 * _d_y; + _d_sum += _r3; + } + kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); + { + double _r_d0 = _d_sum; + _d_a_row_0(0, 0) += _r_d0; + _d_sum -= _r_d0; + } +} inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { int _d_N1 = 0; int _d_N2 = 0; @@ -13,6 +43,7 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double _t4; double _d_sum = 0; Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); + Kokkos::View _t5; const int N1 = 4; const int N2 = 4; Kokkos::View a("a", N1); @@ -33,9 +64,13 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); sum = a_row_0(0, 0); kokkos_builtin_derivative::parallel_sum(sum, a_row_0); + _t5 = a; goto _label0; _label0: - _d_sum += 1; + { + f_view_pullback(_t5, 1, &_d_a); + Kokkos::View _r5 = _d_a; + } kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); { double _r_d0 = _d_sum; diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index e663172f7..44e873d5b 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -974,7 +974,7 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { if (isa(CE)) { auto MCE = dyn_cast(CE); - if (MCE->getObjectType().getAsString().find("Kokkos::View") != std::string::npos) { + if (utils::IsKokkosView(MCE->getObjectType().getAsString())) { //std::cout << "Member function called from a Kokkos::View; nothing to do here" << std::endl; return StmtDiff(Clone(CE)); } @@ -991,7 +991,7 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { if (auto DRE = dyn_cast(SE)) { std::string constructedTypeName = QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} }); std::cout << constructedTypeName << std::endl; - if (constructedTypeName.find("Kokkos::View") != std::string::npos) { + if (utils::IsKokkosView(constructedTypeName)) { isKokkosViewAccess = true; kokkosViewName = DRE->getNameInfo().getName().getAsString (); } diff --git a/lib/Differentiator/CladUtils.cpp b/lib/Differentiator/CladUtils.cpp index 5bcfdc16c..73f039fa1 100644 --- a/lib/Differentiator/CladUtils.cpp +++ b/lib/Differentiator/CladUtils.cpp @@ -317,8 +317,17 @@ namespace clad { return false; } + bool IsKokkosView(const std::string constructedTypeName){ + return constructedTypeName.find("Kokkos::View") == 0 || constructedTypeName.find("class Kokkos::View") == 0; + //return constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("isReferenceType() || isArrayOrPointerType(T); + return T->isReferenceType() || isArrayOrPointerType(T) || IsKokkosView(T); } bool SameCanonicalType(clang::QualType T1, clang::QualType T2) { diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 16be6abbc..5cb69757d 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1218,7 +1218,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, const Expr* value = RS->getRetValue(); QualType type = value->getType(); std::cout << "return type is " << type.getAsString() << std::endl; - if (isKokkosView(type.getAsString())) { + if (utils::IsKokkosView(type.getAsString())) { std::cout << "return value is a view!" << std::endl; } auto* dfdf = m_Pullback; @@ -1427,17 +1427,17 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (isa(E)) { auto SE = E->IgnoreImpCasts(); if (auto DRE = dyn_cast(SE)) { - return isKokkosView(QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} })); + return utils::IsKokkosView(QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} })); } } - return isKokkosView(QualType::getAsString(E->getType().split(), PrintingPolicy{ {} })); + return utils::IsKokkosView(QualType::getAsString(E->getType().split(), PrintingPolicy{ {} })); } StmtDiff ReverseModeVisitor::VisitCallExpr(const CallExpr* CE) { if (isa(CE)) { auto MCE = dyn_cast(CE); - if (isKokkosView(MCE->getObjectType().getAsString())) { + if (utils::IsKokkosView(MCE->getObjectType().getAsString())) { //std::cout << "Member function called from a Kokkos::View; nothing to do here" << std::endl; return StmtDiff(Clone(CE)); } @@ -1454,7 +1454,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (auto DRE = dyn_cast(SE)) { std::string constructedTypeName = QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} }); //std::cout << constructedTypeName << std::endl; - if (isKokkosView(constructedTypeName)) { + if (utils::IsKokkosView(constructedTypeName)) { isKokkosViewAccess = true; kokkosViewName = DRE->getNameInfo().getName().getAsString (); } @@ -2015,8 +2015,14 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, !isa(FD)) { std::string customPushforward = FD->getNameAsString() + "_pushforward"; auto pushforwardCallArgs = DerivedCallArgs; - pushforwardCallArgs.push_back(ConstantFolder::synthesizeLiteral( - DerivedCallArgs.front()->getType(), m_Context, 1)); + if (utils::IsKokkosView(DerivedCallArgs.front()->getType())) { + // KL: Is it useful? + pushforwardCallArgs.push_back(DerivedCallArgs.front()); + } + else { + pushforwardCallArgs.push_back(ConstantFolder::synthesizeLiteral( + DerivedCallArgs.front()->getType(), m_Context, 1)); + } OverloadedDerivedFn = m_Builder.BuildCallToCustomDerivativeOrNumericalDiff( customPushforward, pushforwardCallArgs, getCurrentScope(), @@ -2935,7 +2941,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, std::string constructedTypeName = QualType::getAsString(VD->getType().split(), PrintingPolicy{ {} }); - if (isKokkosView(constructedTypeName)) { + if (utils::IsKokkosView(constructedTypeName)) { size_t runTimeDim = 0; std::vector compileTimeDims; bool read = false; @@ -4139,7 +4145,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::SmallVector clonedArgs; llvm::SmallVector clonedDArgs; std::string constructedTypeName = QualType::getAsString(CE->getType().split(), PrintingPolicy{ {} }); - if (isKokkosView(constructedTypeName)) { + if (utils::IsKokkosView(constructedTypeName)) { size_t runTimeDim = 0; std::vector compileTimeDims; bool read = false; diff --git a/lib/Differentiator/VisitorBase.cpp b/lib/Differentiator/VisitorBase.cpp index eef3e2353..f2139bde3 100644 --- a/lib/Differentiator/VisitorBase.cpp +++ b/lib/Differentiator/VisitorBase.cpp @@ -688,6 +688,9 @@ namespace clad { } QualType VisitorBase::GetCladArrayRefOfType(clang::QualType T) { + //KL: needed ? + //if (utils::IsKokkosView(T)) + // return T; return InstantiateTemplate(GetCladArrayRefDecl(), {T}); } From 3fe31226f8cded9db87573bc7cd12cb0e097d5d6 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 21 Dec 2023 09:16:46 -0700 Subject: [PATCH 18/75] Add a gradient of a 2D->double function --- kokkos/generated/Derivatives.cpp | 30 ++++++++++++++++++++++++++++++ kokkos/main.cpp | 9 +++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 8214e7157..61dc12e0d 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -108,3 +108,33 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array * _d_y += _d_tmp; } } +inline void f_view_grad(Kokkos::View a, clad::array_ref > _d_a) { + double _d_sum = 0; + Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); + double _t0; + double _t1; + double _t2; + double sum; + Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); + sum = a_row_0(0, 0); + kokkos_builtin_derivative::parallel_sum(sum, a_row_0); + _t1 = sum; + _t2 = 9.9999999999999995E-7 * _t1; + _t0 = sum; + goto _label0; + _label0: + { + double _r0 = 1 * _t0; + double _r1 = _r0 * _t1; + double _r2 = 9.9999999999999995E-7 * _r0; + _d_sum += _r2; + double _r3 = _t2 * 1; + _d_sum += _r3; + } + kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); + { + double _r_d0 = _d_sum; + _d_a_row_0(0, 0) += _r_d0; + _d_sum -= _r_d0; + } +} diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 0ee6a9a98..b3dd0f5c1 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -15,6 +15,7 @@ int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); { Kokkos::View A("A", 10, 10); + Kokkos::View dA("dA", 10, 10); Kokkos::View x("x", 10); Kokkos::View y("y", 10); @@ -26,6 +27,8 @@ int main(int argc, char* argv[]) { std::cout << weightedDotProduct_1(A, x, y) << std::endl; std::cout << weightedDotProduct_2(A, x, y) << std::endl; + std::cout << f_view(A) << std::endl; + double epsilon = 1e-6; double f_pe = f(3.+epsilon,4.); @@ -42,16 +45,18 @@ int main(int argc, char* argv[]) { #ifndef use_generated_file #ifdef use_forward_mode auto f_dx_exe = clad::differentiate(f, "x"); - #endif - auto f_grad_exe = clad::gradient(f); // Any of the two below will generate an "error: Attempted differentiation w.r.t. member 'x' which is not of real type." //auto weightedDotProduct_1_dx = clad::differentiate(weightedDotProduct_1, "x"); //auto weightedDotProduct_2_dx = clad::differentiate(weightedDotProduct_2, "x"); + #endif + auto f_grad_exe = clad::gradient(f); + auto f_view_grad_exe = clad::gradient(f_view>); #ifdef use_forward_mode dx_f = f_dx_exe.execute(3.,4.); #endif // After this call, dx and dy will store the derivatives of x and y respectively. f_grad_exe.execute(3., 4., &dx, &dy); + f_view_grad_exe.execute(A, &dA); #else #ifdef use_forward_mode dx_f = f_darg0(3.,4.); From d6c4633ee4f6322ef60218efc17a869fda78cfa4 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 21 Dec 2023 10:24:04 -0700 Subject: [PATCH 19/75] Use the gradient of the 2D-> double function to solve a simple optimization problem with a simple algorithm --- kokkos/main.cpp | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/kokkos/main.cpp b/kokkos/main.cpp index b3dd0f5c1..b1940da63 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -11,6 +11,58 @@ #include "generated/Derivatives.cpp" #endif +template +typename ViewtypeA::value_type solve(ViewtypeA A, typename ViewtypeA::value_type (*objective)(ViewtypeA), CladFunctionType gradient) { + ViewtypeA gradA("gradA", A.extent(0), A.extent(1)); + ViewtypeA tmp("tmp", A.extent(0), A.extent(1)); + + std::vector objective_history; + + int n_iterations = 10; + int n_line_search = 10; + + double epsilon_min = 0.; + double epsilon_tmp = 0.; + double epsilon_max = 4000.; + double epsilon_delta = (epsilon_max-epsilon_min)/n_line_search; + + typename ViewtypeA::value_type obj_min = objective(A); + + objective_history.push_back(obj_min); + + for (int i = 0; i < n_iterations; ++i) { + + gradient.execute(A, &gradA); + + epsilon_min = 0.; + + for (int j = 0; j < n_line_search; ++j) { + epsilon_tmp = epsilon_delta * (j+1); + Kokkos::parallel_for( A.extent(0), KOKKOS_LAMBDA ( int i) { + + for ( int j = 0; j < A.extent(1); ++j ) { + tmp( i, j ) = A( i, j ) - epsilon_tmp * gradA( i, j ); + } + }); + + typename ViewtypeA::value_type obj_tmp = objective(tmp); + + if ( obj_tmp < obj_min) { + obj_min = obj_tmp; + epsilon_min = epsilon_tmp; + } + } + + objective_history.push_back(obj_min); + } + + + for (int i = 0; i < n_iterations + 1; ++i) { + std::cout << "Objective value " << objective_history[i] << " iteration " << i << std::endl; + } + return obj_min; +} + int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); { @@ -57,6 +109,8 @@ int main(int argc, char* argv[]) { // After this call, dx and dy will store the derivatives of x and y respectively. f_grad_exe.execute(3., 4., &dx, &dy); f_view_grad_exe.execute(A, &dA); + + solve(A, &f_view, f_view_grad_exe); #else #ifdef use_forward_mode dx_f = f_darg0(3.,4.); From 814e46d96235a5731bbaf4e00be12bb2f06562dc Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 21 Dec 2023 11:43:45 -0700 Subject: [PATCH 20/75] Reenable a forward test --- kokkos/functor_for.hpp | 3 +- kokkos/generated/Derivatives.cpp | 69 ++++++++++--------- kokkos/main.cpp | 2 +- lib/Differentiator/BaseForwardModeVisitor.cpp | 20 +----- 4 files changed, 42 insertions(+), 52 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 3d515081a..f6bc76046 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -92,7 +92,6 @@ double f(double x, double y) { auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); sum = a_row_0(0,0); - kokkos_builtin_derivative::parallel_sum(sum, a_row_0); - return f_view(a); + return sum*sum; } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 61dc12e0d..9ad95486b 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -1,32 +1,35 @@ -inline void f_view_pullback(Kokkos::View a, double _d_y, clad::array_ref > _d_a) { - double _d_sum = 0; - Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); - double _t0; - double _t1; - double _t2; +inline double f_darg0(double x, double y) { + double _d_x = 1; + double _d_y = 0; + const int _d_N1 = 0; + const int N1 = 4; + const int _d_N2 = 0; + const int N2 = 4; + Kokkos::View _d_a("_d_a", N1); + Kokkos::View a("a", N1); + Kokkos::View _d_b("_d_b", N1); + Kokkos::View b("b", N1); + double _d_tmp = _d_x * x + x * _d_x + _d_y; + double tmp = x * x + y; + const int _d_i = 0; + const int i = 0; + const int _d_j = 0; + const int j = 0; + Kokkos::deep_copy(_d_a, _d_tmp); + Kokkos::deep_copy(a, tmp); + Kokkos::deep_copy(_d_a, _d_x); + Kokkos::deep_copy(a, x); + Kokkos::deep_copy(_d_b, _d_x * x + x * _d_x + _d_y); + Kokkos::deep_copy(b, x * x + y); + Kokkos::deep_copy(_d_a, _d_b); + Kokkos::deep_copy(a, b); + double _d_sum; double sum; + Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); + _d_sum = _d_a_row_0(0, 0); sum = a_row_0(0, 0); - kokkos_builtin_derivative::parallel_sum(sum, a_row_0); - _t1 = sum; - _t2 = 9.9999999999999995E-7 * _t1; - _t0 = sum; - goto _label0; - _label0: - { - double _r0 = _d_y * _t0; - double _r1 = _r0 * _t1; - double _r2 = 9.9999999999999995E-7 * _r0; - _d_sum += _r2; - double _r3 = _t2 * _d_y; - _d_sum += _r3; - } - kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); - { - double _r_d0 = _d_sum; - _d_a_row_0(0, 0) += _r_d0; - _d_sum -= _r_d0; - } + return _d_sum * sum + sum * _d_sum; } inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { int _d_N1 = 0; @@ -43,7 +46,8 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double _t4; double _d_sum = 0; Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); - Kokkos::View _t5; + double _t5; + double _t6; const int N1 = 4; const int N2 = 4; Kokkos::View a("a", N1); @@ -63,15 +67,16 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array double sum; Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); sum = a_row_0(0, 0); - kokkos_builtin_derivative::parallel_sum(sum, a_row_0); - _t5 = a; + _t6 = sum; + _t5 = sum; goto _label0; _label0: { - f_view_pullback(_t5, 1, &_d_a); - Kokkos::View _r5 = _d_a; + double _r5 = 1 * _t5; + _d_sum += _r5; + double _r6 = _t6 * 1; + _d_sum += _r6; } - kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); { double _r_d0 = _d_sum; _d_a_row_0(0, 0) += _r_d0; diff --git a/kokkos/main.cpp b/kokkos/main.cpp index b1940da63..7e8971568 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -5,7 +5,7 @@ #include "lambda_reduction_subview.hpp" //#define use_generated_file -//#define use_forward_mode +#define use_forward_mode #ifdef use_generated_file #include "generated/Derivatives.cpp" diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index 44e873d5b..b30e4c0f0 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -1041,27 +1041,13 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); - NamespaceDecl* DC = utils::LookupNSD(m_Sema, "Kokkos", /*shouldExist=*/true); - - CXXScopeSpec SS; - - utils::BuildNNS(m_Sema, DC, SS); - IdentifierInfo* II = &m_Context.Idents.get("deep_copy"); - - DeclarationName name(II); - DeclarationNameInfo DNInfo(name, utils::GetValidSLoc(m_Sema)); - - LookupResult R(m_Sema, DNInfo, Sema::LookupOrdinaryName); - m_Sema.LookupQualifiedName(R, DC); - - Expr* UnresolvedLookup = - m_Sema.BuildDeclarationNameExpr(SS, R, /*ADL*/ false).get(); + Expr* kokkos_deep_copy = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "deep_copy"); Expr* Call = - m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedArgs, noLoc).get(); + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedArgs, noLoc).get(); Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), UnresolvedLookup, noLoc, ClonedDArgs, noLoc).get(); + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgs, noLoc).get(); return StmtDiff(Call, dCall); } From aef1422f771ccb84fe008646fdab0676afadf620 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 21 Dec 2023 11:58:00 -0700 Subject: [PATCH 21/75] Clean files --- lib/Differentiator/BaseForwardModeVisitor.cpp | 5 -- lib/Differentiator/ReverseModeVisitor.cpp | 48 +++---------------- 2 files changed, 6 insertions(+), 47 deletions(-) diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index b30e4c0f0..3ed80ca75 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -2151,19 +2151,14 @@ StmtDiff BaseForwardModeVisitor::VisitCXXBindTemporaryExpr( StmtDiff BaseForwardModeVisitor::VisitValueStmt( const clang::ValueStmt* VS) { - // This is most likely a name provided in a Kokkos::view construction - VS->dump (); // Test if StringLiteral if (isa(VS)) { - std::cout << "This is a StringLiteral!" << std::endl; auto SL = dyn_cast(VS); std::string name_str("_d_"+ SL->getString().str()); StringRef name(name_str); Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); - VS->dump (); - derivedVS->dump (); return {Clone(VS), derivedVS}; } return {Clone(VS), Clone(VS)}; diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 5cb69757d..5133fad12 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -750,37 +750,22 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Clone(S)); } - StmtDiff ReverseModeVisitor::VisitValueStmt( - const clang::ValueStmt* VS) { - // This is most likely a name provided in a Kokkos::view construction - //std::cout << "VisitValueStmt VS->dump start" << std::endl; - //VS->dump (); - //std::cout << "VisitValueStmt VS->dump end" << std::endl; + StmtDiff ReverseModeVisitor::VisitValueStmt(const clang::ValueStmt* VS) { // Test if StringLiteral if (isa(VS)) { - //std::cout << "This is a StringLiteral!" << std::endl; auto SL = dyn_cast(VS); std::string name_str("_d_"+ SL->getString().str()); StringRef name(name_str); Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); - //VS->dump (); - //derivedVS->dump (); + return {Clone(VS), derivedVS}; } if (isa(VS)) { - //std::cout << "This is probably a view!" << std::endl; auto CBTE = dyn_cast(VS); - auto tmp = Visit(CBTE->getSubExpr()); - - //std::cout << "Start dump probably a view!" << std::endl; - //tmp.getExpr()->dump(); - //tmp.getExpr_dx()->dump(); - //std::cout << "End dump probably a view!" << std::endl; - - return tmp; + return Visit(CBTE->getSubExpr()); } return {Clone(VS), Clone(VS)}; } @@ -1217,21 +1202,18 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // Initially, df/df = 1. const Expr* value = RS->getRetValue(); QualType type = value->getType(); - std::cout << "return type is " << type.getAsString() << std::endl; + if (utils::IsKokkosView(type.getAsString())) { std::cout << "return value is a view!" << std::endl; } auto* dfdf = m_Pullback; if (isa(dfdf) || isa(dfdf)) { - std::cout << "isa(dfdf) || isa(dfdf) is true" << std::endl; ExprResult tmp = dfdf; dfdf = m_Sema .ImpCastExprToType(tmp.get(), type, m_Sema.PrepareScalarCast(tmp, type)) .get(); } - else - std::cout << "isa(dfdf) || isa(dfdf) is false" << std::endl; auto ReturnResult = DifferentiateSingleExpr(value, dfdf); StmtDiff ReturnDiff = ReturnResult.first; StmtDiff ExprDiff = ReturnResult.second; @@ -1438,7 +1420,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, auto MCE = dyn_cast(CE); if (utils::IsKokkosView(MCE->getObjectType().getAsString())) { - //std::cout << "Member function called from a Kokkos::View; nothing to do here" << std::endl; + // Member function called from a Kokkos::View; nothing to do here return StmtDiff(Clone(CE)); } } @@ -1452,9 +1434,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (isa(baseOriginalE)) { auto SE = baseOriginalE->IgnoreImpCasts(); if (auto DRE = dyn_cast(SE)) { - std::string constructedTypeName = QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} }); - //std::cout << constructedTypeName << std::endl; - if (utils::IsKokkosView(constructedTypeName)) { + if (utils::IsKokkosView(DRE->getType())) { isKokkosViewAccess = true; kokkosViewName = DRE->getNameInfo().getName().getAsString (); } @@ -1487,7 +1467,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, Expr* add_assign = BuildOp(BO_AddAssign, dCall, dfdx()); addToCurrentBlock(add_assign, direction::reverse); } - //std::cout << " kokkosViewName = " << kokkosViewName << std::endl; return StmtDiff(Call, dCall); } } @@ -1507,7 +1486,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, ClonedDArgs.push_back(visitedArg_1.getExpr_dx()); ClonedDArgs.push_back(visitedArg_0.getExpr_dx()); - //Expr* kokkos_deep_copy = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "deep_copy"); Expr* kokkos_builtin_derivative_parallel_sum = utils::GetUnresolvedLookup(m_Sema, m_Context, "kokkos_builtin_derivative", "parallel_sum"); Expr* Call = @@ -1666,8 +1644,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, for (auto resAndGrad : argResultsAndGrads) { VarDecl* argRes = resAndGrad.first; Expr* grad = resAndGrad.second; - argRes->dump(); - grad->dump(); PerformImplicitConversionAndAssign(argRes, grad); } @@ -1688,13 +1664,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, ClonedDArgs.push_back(visitedArg.getExpr_dx()); else ClonedDArgs.push_back(visitedArg.getExpr()); - - std::cout << "Kokkos::subview visitedArg.getExpr()->dump() start with i = " << i << std::endl; - visitedArg.getExpr()->dump(); - std::cout << "Kokkos::subview visitedArg.getExpr()->dump() end with i = " << i << std::endl; - std::cout << "Kokkos::subview visitedArg.getExpr_dx()->dump() start with i = " << i << std::endl; - visitedArg.getExpr_dx()->dump(); - std::cout << "Kokkos::subview visitedArg.getExpr_dx()->dump() end with i = " << i << std::endl; } Expr* Call = m_Sema @@ -4163,11 +4132,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, break; auto argDiff = Visit(arg, dfdx()); - std::cout << "Start dump argDiff i = " << i << std::endl; - argDiff.getExpr()->dump(); - argDiff.getExpr_dx()->dump(); - std::cout << "End dump argDiff i = " << i << std::endl; - clonedArgs.push_back(argDiff.getExpr()); clonedDArgs.push_back(argDiff.getExpr_dx()); ++i; From 230baba7a0b4b7453d42920b35e511c50a045c38 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 21 Dec 2023 14:53:30 -0700 Subject: [PATCH 22/75] Add a python script to postprocess the generated Derivative.cpp --- kokkos/generated/Derivatives.hpp | 146 +++++++++++++++++++++++++++++++ kokkos/main.cpp | 2 +- kokkos/postProcess.py | 143 ++++++++++++++++++++++++++++++ 3 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 kokkos/generated/Derivatives.hpp create mode 100644 kokkos/postProcess.py diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp new file mode 100644 index 000000000..f0d6f7167 --- /dev/null +++ b/kokkos/generated/Derivatives.hpp @@ -0,0 +1,146 @@ +KOKKOS_INLINE_FUNCTION double f_darg0(double x, double y) { + double _d_x = 1; + double _d_y = 0; + const int _d_N1 = 0; + const int N1 = 4; + const int _d_N2 = 0; + const int N2 = 4; + Kokkos::View _d_a("_d_a", N1); + Kokkos::View a("a", N1); + Kokkos::View _d_b("_d_b", N1); + Kokkos::View b("b", N1); + double _d_tmp = _d_x * x + x * _d_x + _d_y; + double tmp = x * x + y; + const int _d_i = 0; + const int i = 0; + const int _d_j = 0; + const int j = 0; + Kokkos::deep_copy(_d_a, _d_tmp); + Kokkos::deep_copy(a, tmp); + Kokkos::deep_copy(_d_a, _d_x); + Kokkos::deep_copy(a, x); + Kokkos::deep_copy(_d_b, _d_x * x + x * _d_x + _d_y); + Kokkos::deep_copy(b, x * x + y); + Kokkos::deep_copy(_d_a, _d_b); + Kokkos::deep_copy(a, b); + double _d_sum; + double sum; + auto _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), Kokkos::ALL); + auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); + _d_sum = _d_a_row_0(0, 0); + sum = a_row_0(0, 0); + return _d_sum * sum + sum * _d_sum; +} +KOKKOS_INLINE_FUNCTION void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { + const int N1 = 4; + int _d_N1 = 0; + int _d_N2 = 0; + Kokkos::View _d_a("_d_a", N1); + Kokkos::View _d_b("_d_b", N1); + double _t0; + double _t1; + double _d_tmp = 0; + int _d_i = 0; + int _d_j = 0; + double _t2; + double _t3; + double _t4; + double _d_sum = 0; + auto _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), Kokkos::ALL); + double _t5; + double _t6; + const int N2 = 4; + Kokkos::View a("a", N1); + Kokkos::View b("b", N1); + _t1 = x; + _t0 = x; + double tmp = _t1 * _t0 + y; + const int i = 0; + const int j = 0; + Kokkos::deep_copy(a, tmp); + Kokkos::deep_copy(a, x); + _t2 = x; + _t4 = x; + _t3 = x; + Kokkos::deep_copy(b, x * _t2 + y); + Kokkos::deep_copy(a, b); + double sum; + auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); + sum = a_row_0(0, 0); + _t6 = sum; + _t5 = sum; + goto _label0; + _label0: + { + double _r5 = 1 * _t5; + _d_sum += _r5; + double _r6 = _t6 * 1; + _d_sum += _r6; + } + { + double _r_d0 = _d_sum; + _d_a_row_0(0, 0) += _r_d0; + _d_sum -= _r_d0; + } + { + Kokkos::deep_copy(_d_b, _d_a); + Kokkos::deep_copy(_d_a, 0.); + } + { + double _grad0 = 0.; + kokkos_builtin_derivative::parallel_sum(_grad0, _d_b); + Kokkos::deep_copy(_d_b, 0.); + double _r2 = _grad0; + double _r3 = _r2 * _t3; + * _d_x += _r3; + double _r4 = _t4 * _r2; + * _d_x += _r4; + * _d_y += _r2; + } + { + kokkos_builtin_derivative::parallel_sum(* _d_x, _d_a); + Kokkos::deep_copy(_d_a, 0.); + } + { + kokkos_builtin_derivative::parallel_sum(_d_tmp, _d_a); + Kokkos::deep_copy(_d_a, 0.); + } + { + double _r0 = _d_tmp * _t0; + * _d_x += _r0; + double _r1 = _t1 * _d_tmp; + * _d_x += _r1; + * _d_y += _d_tmp; + } +} +template +KOKKOS_INLINE_FUNCTION void f_view_grad(type_a a, clad::array_ref _d_a) { + double _d_sum = 0; + auto _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), Kokkos::ALL); + double _t0; + double _t1; + double _t2; + double sum; + auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); + sum = a_row_0(0, 0); + kokkos_builtin_derivative::parallel_sum(sum, a_row_0); + _t1 = sum; + _t2 = 9.9999999999999995E-7 * _t1; + _t0 = sum; + goto _label0; + _label0: + { + double _r0 = 1 * _t0; + double _r1 = _r0 * _t1; + double _r2 = 9.9999999999999995E-7 * _r0; + _d_sum += _r2; + double _r3 = _t2 * 1; + _d_sum += _r3; + } + kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); + { + double _r_d0 = _d_sum; + _d_a_row_0(0, 0) += _r_d0; + _d_sum -= _r_d0; + } +} diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 7e8971568..a91faa9a5 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -8,7 +8,7 @@ #define use_forward_mode #ifdef use_generated_file -#include "generated/Derivatives.cpp" +#include "generated/Derivatives.hpp" #endif template diff --git a/kokkos/postProcess.py b/kokkos/postProcess.py new file mode 100644 index 000000000..d79bcefc1 --- /dev/null +++ b/kokkos/postProcess.py @@ -0,0 +1,143 @@ +import getopt, sys + +def replaceKokkosInlineFunction(stringIn): + string_old = 'inline' + string_new = 'KOKKOS_INLINE_FUNCTION' + return stringIn.replace(string_old, string_new) + +def useAutoInSubview(stringIn): + if stringIn.find('Kokkos::subview') == -1: + return stringIn + index_eq = stringIn.find('=') - 1 + for index in range(1, index_eq): + if stringIn[index_eq-index] == ' ': + index1 = index_eq-index + break + index0 = 0 + for index in range(0, index_eq): + if stringIn[index] != ' ': + index0 = index + break + stringOut = stringIn[0:index0] + 'auto ' + stringIn[index1+1:-1] + stringIn[-1] + return stringOut + +def useKokkosNamespace(stringIn): + return stringIn.replace(' ALL', ' Kokkos::ALL').replace('': + bracket_lvl += 1 + if linesIn[index0][index_end-index] == '<': + bracket_lvl -= 1 + return linesIn[index0][index_begin:index_end] + + +def swapTypeForTemplate(linesIn, fucntionName, variableName, index0=-1, index1=-1): + if index0 == -1 or index1 == -1: + index0, index1 = getFunctionLineIDs(linesIn, fucntionName) + typeVar = getType(linesIn, fucntionName, variableName) + template = 'type_' + variableName + linesIn[index0] = 'template \n' + linesIn[index0] + + for index in range(index0, index1): + linesIn[index] = linesIn[index].replace(typeVar, template) + + +def transform(filenameIn, filenameOut): + + fileIn = open(filenameIn, "r") + linesIn = fileIn.readlines() + fileOut = open(filenameOut, "w") + + swapLinesForVariableDecl(linesIn, 'f_grad', 'N1') + + for i in range(0, len(linesIn)): + linesIn[i] = replaceKokkosInlineFunction(linesIn[i]) + linesIn[i] = useAutoInSubview(linesIn[i]) + linesIn[i] = useKokkosNamespace(linesIn[i]) + + swapTypeForTemplate(linesIn, 'f_view_grad', 'a') + + for line in linesIn: + fileOut.write(line) + fileIn.close() + fileOut.close() + +argumentList = sys.argv[1:] + +options = "hi:o:" + +long_options = ["help", "filenameIn=", "filenameOut="] + + +filenameIn = '' +filenameOut = '' + +try: + arguments, values = getopt.getopt(argumentList, options, long_options) + + for currentArgument, currentValue in arguments: + + if currentArgument in ("-h", "--help"): + print ("Displaying Help") + + elif currentArgument in ("-i", "--filenameIn"): + filenameIn = currentValue + + elif currentArgument in ("-o", "--filenameOut"): + filenameOut = currentValue + +except getopt.error as err: + # output error, and return with an error code + print (str(err)) + +if filenameIn != '' and filenameOut != '' : + transform(filenameIn, filenameOut) +else: + print("Missing arguments") From f5b2f4be043ec03b125a2a7dfcd1f38f0655af39 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 21 Dec 2023 19:45:51 -0700 Subject: [PATCH 23/75] update the gradient descent test --- kokkos/main.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kokkos/main.cpp b/kokkos/main.cpp index a91faa9a5..adecbfaee 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -23,7 +23,7 @@ typename ViewtypeA::value_type solve(ViewtypeA A, typename ViewtypeA::value_type double epsilon_min = 0.; double epsilon_tmp = 0.; - double epsilon_max = 4000.; + double epsilon_max = 1000.; double epsilon_delta = (epsilon_max-epsilon_min)/n_line_search; typename ViewtypeA::value_type obj_min = objective(A); @@ -53,6 +53,13 @@ typename ViewtypeA::value_type solve(ViewtypeA A, typename ViewtypeA::value_type } } + Kokkos::parallel_for( A.extent(0), KOKKOS_LAMBDA ( int i) { + + for ( int j = 0; j < A.extent(1); ++j ) { + A( i, j ) -= epsilon_min * gradA( i, j ); + } + }); + objective_history.push_back(obj_min); } From 09956bcc1dda267146a65248294f0ad4241bc1ad Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 21 Dec 2023 21:22:42 -0700 Subject: [PATCH 24/75] add timers --- kokkos/main.cpp | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/kokkos/main.cpp b/kokkos/main.cpp index adecbfaee..a9e479237 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -3,6 +3,7 @@ #include "functor_for.hpp" #include "lambda_reduction.hpp" #include "lambda_reduction_subview.hpp" +#include //#define use_generated_file #define use_forward_mode @@ -73,10 +74,11 @@ typename ViewtypeA::value_type solve(ViewtypeA A, typename ViewtypeA::value_type int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); { - Kokkos::View A("A", 10, 10); - Kokkos::View dA("dA", 10, 10); - Kokkos::View x("x", 10); - Kokkos::View y("y", 10); + constexpr int N = 100; + Kokkos::View A("A", N, N); + Kokkos::View dA("dA", N, N); + Kokkos::View x("x", N); + Kokkos::View y("y", N); Kokkos::deep_copy(A, 3); Kokkos::deep_copy(x, 2); @@ -86,7 +88,10 @@ int main(int argc, char* argv[]) { std::cout << weightedDotProduct_1(A, x, y) << std::endl; std::cout << weightedDotProduct_2(A, x, y) << std::endl; - std::cout << f_view(A) << std::endl; + auto t0_f_view = std::chrono::high_resolution_clock::now(); + double obj = f_view(A); + auto t1_f_view = std::chrono::high_resolution_clock::now(); + std::cout << obj << std::endl; double epsilon = 1e-6; @@ -115,7 +120,10 @@ int main(int argc, char* argv[]) { #endif // After this call, dx and dy will store the derivatives of x and y respectively. f_grad_exe.execute(3., 4., &dx, &dy); + + auto t0_f_view_grad = std::chrono::high_resolution_clock::now(); f_view_grad_exe.execute(A, &dA); + auto t1_f_view_grad = std::chrono::high_resolution_clock::now(); solve(A, &f_view, f_view_grad_exe); #else @@ -123,8 +131,18 @@ int main(int argc, char* argv[]) { dx_f = f_darg0(3.,4.); #endif f_grad(3., 4., &dx, &dy); + + auto t0_f_view_grad = std::chrono::high_resolution_clock::now(); + f_view_grad>(A, &dA); + auto t1_f_view_grad = std::chrono::high_resolution_clock::now(); #endif + double time_f_view = (t1_f_view-t0_f_view).count()*1E-9 ; + double time_f_view_grad = (t1_f_view_grad-t0_f_view_grad).count()*1E-9 ; + std::cout << " f_view took "<< time_f_view <<" second(s)."<< std::endl; + std::cout << " f_view_grad took "<< time_f_view_grad <<" second(s)."<< std::endl; + std::cout << " f_view_grad took "<< time_f_view_grad/time_f_view <<" the wall-clock time of f_view."<< std::endl; + #ifdef use_forward_mode std::cout << "dx: " << dx_f << std::endl; #endif From b7f2d7128ef4f149247ce3ae835bb9be9442d5b0 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 2 Jan 2024 10:02:24 -0700 Subject: [PATCH 25/75] Modify the test to be run on GPU and generalize the parallel_sum functions --- kokkos/functor_for.hpp | 85 ++++++++++++++++++++++++++------ kokkos/generated/Derivatives.cpp | 12 ++--- kokkos/generated/Derivatives.hpp | 14 ++---- kokkos/postProcess.py | 19 ++++++- 4 files changed, 96 insertions(+), 34 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index f6bc76046..d4c72d8c2 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -5,11 +5,41 @@ template void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { double tmp_sum = sum; sum = 0.; - //to be updated to be rank independent - Kokkos::parallel_reduce( A.extent(0), KOKKOS_LAMBDA ( int i, typename ViewtypeA::value_type &update ) { - - for ( int j = 0; j < A.extent(1); ++j ) { - update += A( i, j ); + + const int s0 = A.stride(0); + const int s1 = A.stride(1); + const int s2 = A.stride(2); + const int s3 = A.stride(3); + const int s4 = A.stride(4); + const int s5 = A.stride(5); + const int s6 = A.stride(6); + const int s7 = A.stride(7); + + const int e0 = A.extent_int(0); + const int e1 = A.extent_int(1); + const int e2 = A.extent_int(2); + const int e3 = A.extent_int(3); + const int e4 = A.extent_int(4); + const int e5 = A.extent_int(5); + const int e6 = A.extent_int(6); + const int e7 = A.extent_int(7); + + Kokkos::Array begins = {0, 0, 0, 0, 0, 0}; + Kokkos::Array ends = {e0, e1, e2, e3, e4, e5}; + + Kokkos::parallel_reduce(Kokkos::MDRangePolicy< Kokkos::Rank<6> > (begins, ends), + KOKKOS_LAMBDA (const int i0, + const int i1, + const int i2, + const int i3, + const int i4, + const int i5, + typename ViewtypeA::value_type& update) { + const int offset = i0*s0 + i1*s1 + i2*s2 + i3*s3 + i4*s4 + i5*s5; + for ( int i6 = 0; i6 < e6; ++i6 ) { + for ( int i7 = 0; i7 < e7; ++i7 ) { + update += A.data()[offset + i6*s6 + i7*s7]; + } } }, sum ); sum += tmp_sum; @@ -18,10 +48,39 @@ void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { template void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { - Kokkos::parallel_for( A.extent(0), KOKKOS_LAMBDA ( int i) { - - for ( int j = 0; j < A.extent(1); ++j ) { - A( i, j ) += b; + const int s0 = A.stride(0); + const int s1 = A.stride(1); + const int s2 = A.stride(2); + const int s3 = A.stride(3); + const int s4 = A.stride(4); + const int s5 = A.stride(5); + const int s6 = A.stride(6); + const int s7 = A.stride(7); + + const int e0 = A.extent_int(0); + const int e1 = A.extent_int(1); + const int e2 = A.extent_int(2); + const int e3 = A.extent_int(3); + const int e4 = A.extent_int(4); + const int e5 = A.extent_int(5); + const int e6 = A.extent_int(6); + const int e7 = A.extent_int(7); + + Kokkos::Array begins = {0, 0, 0, 0, 0, 0}; + Kokkos::Array ends = {e0, e1, e2, e3, e4, e5}; + + Kokkos::parallel_for(Kokkos::MDRangePolicy< Kokkos::Rank<6> > (begins, ends), + KOKKOS_LAMBDA (const int i0, + const int i1, + const int i2, + const int i3, + const int i4, + const int i5) { + const int offset = i0*s0 + i1*s1 + i2*s2 + i3*s3 + i4*s4 + i5*s5; + for ( int i6 = 0; i6 < e6; ++i6 ) { + for ( int i7 = 0; i7 < e7; ++i7 ) { + A.data()[offset + i6*s6 + i7*s7] += b; + } } }); } @@ -47,24 +106,22 @@ double f2(double x, double y) { } template -KOKKOS_INLINE_FUNCTION double f_view(ViewtypeA a) { double sum; auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); - - sum = a_row_0(0,0); + //auto h_a_row_0 = Kokkos::create_mirror_view(a_row_0); //create_mirror_view_and_copy + //Kokkos::deep_copy(h_a_row_0, a_row_0); + //sum = h_a_row_0(0,0); kokkos_builtin_derivative::parallel_sum(sum, a_row_0); return 1e-6*sum*sum; } template -KOKKOS_INLINE_FUNCTION void f_view_2(ViewtypeA a, double tmp) { Kokkos::deep_copy(a, tmp); } -KOKKOS_INLINE_FUNCTION double f(double x, double y) { const int N1 = 4; diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 9ad95486b..83e678739 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -1,4 +1,4 @@ -inline double f_darg0(double x, double y) { +double f_darg0(double x, double y) { double _d_x = 1; double _d_y = 0; const int _d_N1 = 0; @@ -31,7 +31,7 @@ inline double f_darg0(double x, double y) { sum = a_row_0(0, 0); return _d_sum * sum + sum * _d_sum; } -inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { +void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { int _d_N1 = 0; int _d_N2 = 0; Kokkos::View _d_a("_d_a", N1); @@ -113,7 +113,7 @@ inline void f_grad(double x, double y, clad::array_ref _d_x, clad::array * _d_y += _d_tmp; } } -inline void f_view_grad(Kokkos::View a, clad::array_ref > _d_a) { +void f_view_grad(Kokkos::View a, clad::array_ref > _d_a) { double _d_sum = 0; Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); double _t0; @@ -121,7 +121,6 @@ inline void f_view_grad(Kokkos::View a, clad::array_ref, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); - sum = a_row_0(0, 0); kokkos_builtin_derivative::parallel_sum(sum, a_row_0); _t1 = sum; _t2 = 9.9999999999999995E-7 * _t1; @@ -137,9 +136,4 @@ inline void f_view_grad(Kokkos::View a, clad::array_ref _d_x, clad::array_ref _d_y) { +void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { const int N1 = 4; int _d_N1 = 0; int _d_N2 = 0; @@ -114,15 +114,14 @@ KOKKOS_INLINE_FUNCTION void f_grad(double x, double y, clad::array_ref _ } } template -KOKKOS_INLINE_FUNCTION void f_view_grad(type_a a, clad::array_ref _d_a) { +void f_view_grad(type_a a, type_a _d_a) { double _d_sum = 0; - auto _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), Kokkos::ALL); + auto _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), Kokkos::ALL); double _t0; double _t1; double _t2; double sum; auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); - sum = a_row_0(0, 0); kokkos_builtin_derivative::parallel_sum(sum, a_row_0); _t1 = sum; _t2 = 9.9999999999999995E-7 * _t1; @@ -138,9 +137,4 @@ KOKKOS_INLINE_FUNCTION void f_view_grad(type_a a, clad::array_ref _d_a) _d_sum += _r3; } kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); - { - double _r_d0 = _d_sum; - _d_a_row_0(0, 0) += _r_d0; - _d_sum -= _r_d0; - } } diff --git a/kokkos/postProcess.py b/kokkos/postProcess.py index d79bcefc1..7a5e2d7f6 100644 --- a/kokkos/postProcess.py +++ b/kokkos/postProcess.py @@ -6,7 +6,7 @@ def replaceKokkosInlineFunction(stringIn): return stringIn.replace(string_old, string_new) def useAutoInSubview(stringIn): - if stringIn.find('Kokkos::subview') == -1: + if stringIn.find('Kokkos::subview') == -1 and stringIn.find('Kokkos::create_mirror_view') == -1: return stringIn index_eq = stringIn.find('=') - 1 for index in range(1, index_eq): @@ -88,6 +88,23 @@ def swapTypeForTemplate(linesIn, fucntionName, variableName, index0=-1, index1=- for index in range(index0, index1): linesIn[index] = linesIn[index].replace(typeVar, template) + # Get the _d_ names and replace the clad::array_ref by Kokkos::view directly. + derivativeVarNames = [] + while linesIn[index0].find('clad::array_ref<' + template + ' >') != -1: + indexVarName0 = linesIn[index0].find('clad::array_ref<' + template + ' >') + len('clad::array_ref<' + template + ' >') + 1 + for indexVarName in range(indexVarName0, len(linesIn[index0])): + if linesIn[index0][indexVarName] == ',': + indexVarName1 = indexVarName + break + if linesIn[index0][indexVarName] == ')': + indexVarName1 = indexVarName + break + derivativeVarNames.append(linesIn[index0][indexVarName0:indexVarName1]) + linesIn[index0] = linesIn[index0].replace('clad::array_ref<' + template + ' >', template) + for index in range(index0, index1): + for derivativeVarName in derivativeVarNames: + linesIn[index] = linesIn[index].replace('(* ' + derivativeVarName + ')', derivativeVarName) + def transform(filenameIn, filenameOut): From 09d3dec6428a93a3fe5895f9e7b650b4c40d90c7 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 2 Jan 2024 10:31:35 -0700 Subject: [PATCH 26/75] Use more template type member to deduce some of the temporary types --- kokkos/functor_for.hpp | 6 +++--- kokkos/generated/Derivatives.cpp | 8 ++++---- kokkos/generated/Derivatives.hpp | 8 ++++---- kokkos/postProcess.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index d4c72d8c2..bda499ccc 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -3,7 +3,7 @@ namespace kokkos_builtin_derivative { template void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { - double tmp_sum = sum; + typename ViewtypeA::value_type tmp_sum = sum; sum = 0.; const int s0 = A.stride(0); @@ -106,8 +106,8 @@ double f2(double x, double y) { } template -double f_view(ViewtypeA a) { - double sum; +typename ViewtypeA::value_type f_view(ViewtypeA a) { + typename ViewtypeA::value_type sum; auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); //auto h_a_row_0 = Kokkos::create_mirror_view(a_row_0); //create_mirror_view_and_copy //Kokkos::deep_copy(h_a_row_0, a_row_0); diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 83e678739..2bdfd1ca9 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -114,12 +114,12 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref a, clad::array_ref > _d_a) { - double _d_sum = 0; + typename View::value_type _d_sum = 0; Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); - double _t0; - double _t1; + typename View::value_type _t0; + typename View::value_type _t1; double _t2; - double sum; + typename View::value_type sum; Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); kokkos_builtin_derivative::parallel_sum(sum, a_row_0); _t1 = sum; diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp index 86b61549f..bf33251bf 100644 --- a/kokkos/generated/Derivatives.hpp +++ b/kokkos/generated/Derivatives.hpp @@ -115,12 +115,12 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref void f_view_grad(type_a a, type_a _d_a) { - double _d_sum = 0; + typename type_a::value_type _d_sum = 0; auto _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), Kokkos::ALL); - double _t0; - double _t1; + typename type_a::value_type _t0; + typename type_a::value_type _t1; double _t2; - double sum; + typename type_a::value_type sum; auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); kokkos_builtin_derivative::parallel_sum(sum, a_row_0); _t1 = sum; diff --git a/kokkos/postProcess.py b/kokkos/postProcess.py index 7a5e2d7f6..0ce0ddf72 100644 --- a/kokkos/postProcess.py +++ b/kokkos/postProcess.py @@ -22,7 +22,7 @@ def useAutoInSubview(stringIn): return stringOut def useKokkosNamespace(stringIn): - return stringIn.replace(' ALL', ' Kokkos::ALL').replace(' Date: Tue, 2 Jan 2024 11:33:39 -0700 Subject: [PATCH 27/75] Add create_mirror_view --- kokkos/functor_for.hpp | 2 ++ kokkos/postProcess.py | 2 +- lib/Differentiator/ReverseModeVisitor.cpp | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index bda499ccc..496278039 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -112,6 +112,8 @@ typename ViewtypeA::value_type f_view(ViewtypeA a) { //auto h_a_row_0 = Kokkos::create_mirror_view(a_row_0); //create_mirror_view_and_copy //Kokkos::deep_copy(h_a_row_0, a_row_0); //sum = h_a_row_0(0,0); + //kokkos_builtin_derivative::parallel_sum(sum, Kokkos::subview(a_row_0,0,0)); + //sum = 10 * sum * sum * sum; kokkos_builtin_derivative::parallel_sum(sum, a_row_0); return 1e-6*sum*sum; } diff --git a/kokkos/postProcess.py b/kokkos/postProcess.py index 0ce0ddf72..2fc2f5b02 100644 --- a/kokkos/postProcess.py +++ b/kokkos/postProcess.py @@ -6,7 +6,7 @@ def replaceKokkosInlineFunction(stringIn): return stringIn.replace(string_old, string_new) def useAutoInSubview(stringIn): - if stringIn.find('Kokkos::subview') == -1 and stringIn.find('Kokkos::create_mirror_view') == -1: + if stringIn.find('= Kokkos::subview') == -1 and stringIn.find('= Kokkos::create_mirror_view') == -1: return stringIn index_eq = stringIn.find('=') - 1 for index in range(1, index_eq): diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 5133fad12..b8e208e1b 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1653,7 +1653,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } } } - if (FD->getQualifiedNameAsString().find("Kokkos::subview") != std::string::npos) { + if (FD->getQualifiedNameAsString().find("Kokkos::subview") != std::string::npos || FD->getQualifiedNameAsString().find("Kokkos::create_mirror_view") != std::string::npos) { llvm::SmallVector ClonedArgs; llvm::SmallVector ClonedDArgs; From ddd5763667554f6ebf79282731187d452ead4ed5 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 2 Jan 2024 11:56:22 -0700 Subject: [PATCH 28/75] Use f_view in double f(double x, double y) --- kokkos/functor_for.hpp | 4 +- kokkos/generated/Derivatives.cpp | 71 ++++++++++++-------------------- kokkos/generated/Derivatives.hpp | 70 ++++++++++++------------------- kokkos/main.cpp | 2 +- kokkos/postProcess.py | 6 ++- 5 files changed, 59 insertions(+), 94 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 496278039..09d9344d6 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -150,7 +150,5 @@ double f(double x, double y) { double sum; auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); - sum = a_row_0(0,0); - - return sum*sum; + return f_view(a_row_0); } \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 2bdfd1ca9..a12169e67 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -1,35 +1,26 @@ -double f_darg0(double x, double y) { - double _d_x = 1; - double _d_y = 0; - const int _d_N1 = 0; - const int N1 = 4; - const int _d_N2 = 0; - const int N2 = 4; - Kokkos::View _d_a("_d_a", N1); - Kokkos::View a("a", N1); - Kokkos::View _d_b("_d_b", N1); - Kokkos::View b("b", N1); - double _d_tmp = _d_x * x + x * _d_x + _d_y; - double tmp = x * x + y; - const int _d_i = 0; - const int i = 0; - const int _d_j = 0; - const int j = 0; - Kokkos::deep_copy(_d_a, _d_tmp); - Kokkos::deep_copy(a, tmp); - Kokkos::deep_copy(_d_a, _d_x); - Kokkos::deep_copy(a, x); - Kokkos::deep_copy(_d_b, _d_x * x + x * _d_x + _d_y); - Kokkos::deep_copy(b, x * x + y); - Kokkos::deep_copy(_d_a, _d_b); - Kokkos::deep_copy(a, b); - double _d_sum; - double sum; - Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); +void f_view_pullback(Kokkos::View, Kokkos::MemoryTraits<0> > a, typename View, MemoryTraits<0> >::value_type _d_y, clad::array_ref, MemoryTraits<0> > > _d_a) { + typename View, MemoryTraits<0> >::value_type _d_sum = 0; + Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); + typename View, MemoryTraits<0> >::value_type _t0; + typename View, MemoryTraits<0> >::value_type _t1; + double _t2; + typename View, MemoryTraits<0> >::value_type sum; Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); - _d_sum = _d_a_row_0(0, 0); - sum = a_row_0(0, 0); - return _d_sum * sum + sum * _d_sum; + kokkos_builtin_derivative::parallel_sum(sum, a_row_0); + _t1 = sum; + _t2 = 9.9999999999999995E-7 * _t1; + _t0 = sum; + goto _label0; + _label0: + { + double _r0 = _d_y * _t0; + double _r1 = _r0 * _t1; + double _r2 = 9.9999999999999995E-7 * _r0; + _d_sum += _r2; + double _r3 = _t2 * _d_y; + _d_sum += _r3; + } + kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); } void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { int _d_N1 = 0; @@ -46,8 +37,7 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); - double _t5; - double _t6; + Kokkos::View, MemoryTraits<0> > _t5; const int N1 = 4; const int N2 = 4; Kokkos::View a("a", N1); @@ -66,21 +56,12 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); - sum = a_row_0(0, 0); - _t6 = sum; - _t5 = sum; + _t5 = a_row_0; goto _label0; _label0: { - double _r5 = 1 * _t5; - _d_sum += _r5; - double _r6 = _t6 * 1; - _d_sum += _r6; - } - { - double _r_d0 = _d_sum; - _d_a_row_0(0, 0) += _r_d0; - _d_sum -= _r_d0; + f_view_pullback(_t5, 1, &_d_a_row_0); + Kokkos::View, MemoryTraits<0> > _r5 = _d_a_row_0; } { Kokkos::deep_copy(_d_b, _d_a); diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp index bf33251bf..ee7a44a35 100644 --- a/kokkos/generated/Derivatives.hpp +++ b/kokkos/generated/Derivatives.hpp @@ -1,35 +1,27 @@ -double f_darg0(double x, double y) { - double _d_x = 1; - double _d_y = 0; - const int _d_N1 = 0; - const int N1 = 4; - const int _d_N2 = 0; - const int N2 = 4; - Kokkos::View _d_a("_d_a", N1); - Kokkos::View a("a", N1); - Kokkos::View _d_b("_d_b", N1); - Kokkos::View b("b", N1); - double _d_tmp = _d_x * x + x * _d_x + _d_y; - double tmp = x * x + y; - const int _d_i = 0; - const int i = 0; - const int _d_j = 0; - const int j = 0; - Kokkos::deep_copy(_d_a, _d_tmp); - Kokkos::deep_copy(a, tmp); - Kokkos::deep_copy(_d_a, _d_x); - Kokkos::deep_copy(a, x); - Kokkos::deep_copy(_d_b, _d_x * x + x * _d_x + _d_y); - Kokkos::deep_copy(b, x * x + y); - Kokkos::deep_copy(_d_a, _d_b); - Kokkos::deep_copy(a, b); - double _d_sum; - double sum; +template +void f_view_pullback(type_a a, typename type_a::value_type _d_y, type_a _d_a) { + typename type_a::value_type _d_sum = 0; auto _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), Kokkos::ALL); + typename type_a::value_type _t0; + typename type_a::value_type _t1; + double _t2; + typename type_a::value_type sum; auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); - _d_sum = _d_a_row_0(0, 0); - sum = a_row_0(0, 0); - return _d_sum * sum + sum * _d_sum; + kokkos_builtin_derivative::parallel_sum(sum, a_row_0); + _t1 = sum; + _t2 = 9.9999999999999995E-7 * _t1; + _t0 = sum; + goto _label0; + _label0: + { + double _r0 = _d_y * _t0; + double _r1 = _r0 * _t1; + double _r2 = 9.9999999999999995E-7 * _r0; + _d_sum += _r2; + double _r3 = _t2 * _d_y; + _d_sum += _r3; + } + kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); } void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { const int N1 = 4; @@ -47,8 +39,7 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, Kokkos::MemoryTraits<0> > _t5; const int N2 = 4; Kokkos::View a("a", N1); Kokkos::View b("b", N1); @@ -66,21 +57,12 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, Kokkos::MemoryTraits<0> > _r5 = _d_a_row_0; } { Kokkos::deep_copy(_d_b, _d_a); diff --git a/kokkos/main.cpp b/kokkos/main.cpp index a9e479237..bad73e6a6 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -6,7 +6,7 @@ #include //#define use_generated_file -#define use_forward_mode +//#define use_forward_mode #ifdef use_generated_file #include "generated/Derivatives.hpp" diff --git a/kokkos/postProcess.py b/kokkos/postProcess.py index 2fc2f5b02..99cb01caf 100644 --- a/kokkos/postProcess.py +++ b/kokkos/postProcess.py @@ -22,7 +22,10 @@ def useAutoInSubview(stringIn): return stringOut def useKokkosNamespace(stringIn): - return stringIn.replace(' ALL', ' Kokkos::ALL').replace(' Date: Wed, 3 Jan 2024 08:38:11 -0700 Subject: [PATCH 29/75] Use a partial template instanciation of ViewSum and ViewAdd --- kokkos/functor_for.hpp | 94 +------------------ kokkos/main.cpp | 1 + kokkos/parallel_sum.hpp | 201 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 207 insertions(+), 89 deletions(-) create mode 100644 kokkos/parallel_sum.hpp diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 09d9344d6..5d4f9a65c 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -1,92 +1,3 @@ - -namespace kokkos_builtin_derivative { - -template -void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { - typename ViewtypeA::value_type tmp_sum = sum; - sum = 0.; - - const int s0 = A.stride(0); - const int s1 = A.stride(1); - const int s2 = A.stride(2); - const int s3 = A.stride(3); - const int s4 = A.stride(4); - const int s5 = A.stride(5); - const int s6 = A.stride(6); - const int s7 = A.stride(7); - - const int e0 = A.extent_int(0); - const int e1 = A.extent_int(1); - const int e2 = A.extent_int(2); - const int e3 = A.extent_int(3); - const int e4 = A.extent_int(4); - const int e5 = A.extent_int(5); - const int e6 = A.extent_int(6); - const int e7 = A.extent_int(7); - - Kokkos::Array begins = {0, 0, 0, 0, 0, 0}; - Kokkos::Array ends = {e0, e1, e2, e3, e4, e5}; - - Kokkos::parallel_reduce(Kokkos::MDRangePolicy< Kokkos::Rank<6> > (begins, ends), - KOKKOS_LAMBDA (const int i0, - const int i1, - const int i2, - const int i3, - const int i4, - const int i5, - typename ViewtypeA::value_type& update) { - const int offset = i0*s0 + i1*s1 + i2*s2 + i3*s3 + i4*s4 + i5*s5; - for ( int i6 = 0; i6 < e6; ++i6 ) { - for ( int i7 = 0; i7 < e7; ++i7 ) { - update += A.data()[offset + i6*s6 + i7*s7]; - } - } - }, sum ); - sum += tmp_sum; -} - -template -void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { - - const int s0 = A.stride(0); - const int s1 = A.stride(1); - const int s2 = A.stride(2); - const int s3 = A.stride(3); - const int s4 = A.stride(4); - const int s5 = A.stride(5); - const int s6 = A.stride(6); - const int s7 = A.stride(7); - - const int e0 = A.extent_int(0); - const int e1 = A.extent_int(1); - const int e2 = A.extent_int(2); - const int e3 = A.extent_int(3); - const int e4 = A.extent_int(4); - const int e5 = A.extent_int(5); - const int e6 = A.extent_int(6); - const int e7 = A.extent_int(7); - - Kokkos::Array begins = {0, 0, 0, 0, 0, 0}; - Kokkos::Array ends = {e0, e1, e2, e3, e4, e5}; - - Kokkos::parallel_for(Kokkos::MDRangePolicy< Kokkos::Rank<6> > (begins, ends), - KOKKOS_LAMBDA (const int i0, - const int i1, - const int i2, - const int i3, - const int i4, - const int i5) { - const int offset = i0*s0 + i1*s1 + i2*s2 + i3*s3 + i4*s4 + i5*s5; - for ( int i6 = 0; i6 < e6; ++i6 ) { - for ( int i7 = 0; i7 < e7; ++i7 ) { - A.data()[offset + i6*s6 + i7*s7] += b; - } - } - }); -} - -} - template struct ParallelFunctor { VT a; @@ -147,6 +58,11 @@ double f(double x, double y) { Kokkos::deep_copy(b, x * x + y); Kokkos::deep_copy(a, b); + //Kokkos::parallel_for( 2, KOKKOS_LAMBDA ( int j) { + // //printf("work item %d\n", j); + // a(j,0) = b(j,0); + //}); + double sum; auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); diff --git a/kokkos/main.cpp b/kokkos/main.cpp index bad73e6a6..3e987b389 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -1,5 +1,6 @@ #include "clad/Differentiator/Differentiator.h" #include +#include "parallel_sum.hpp" #include "functor_for.hpp" #include "lambda_reduction.hpp" #include "lambda_reduction_subview.hpp" diff --git a/kokkos/parallel_sum.hpp b/kokkos/parallel_sum.hpp new file mode 100644 index 000000000..951ac3005 --- /dev/null +++ b/kokkos/parallel_sum.hpp @@ -0,0 +1,201 @@ +#include + +namespace kokkos_builtin_derivative { + +/* Things to do: + +- use span_is_contiguous corner case (regardless of the rank) +- check the span of the thing, do we need more than int32. +- deduce iterate base on layout: done? +- If you give me an execution space: non-blocking (in theory) (use an unmaged view if scalar argument) +- If no execution space: blocking. +*/ + +// Parallel sum: + +template +struct ViewSum; + +template +struct ViewSum { + + template + static auto execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { + + using policy_type = Kokkos::RangePolicy>; + using value_type = typename Viewtype::value_type; + + value_type sum; + + Kokkos::parallel_reduce( + "ViewSum-1D", + policy_type(space, 0, v.extent(0)), + KOKKOS_LAMBDA ( + const iType& i0, + value_type& update) { + update += v(i0); + }, + sum ); + + result += sum; + } +}; + +template +struct ViewSum { + + template + static auto execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + using value_type = typename Viewtype::value_type; + + value_type sum; + + Kokkos::parallel_reduce( + "ViewSum-2D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1, + value_type& update) { + update += v(i0, i1); + }, + sum ); + + result += sum; + } +}; + +template +struct ViewSum { + + template + static auto execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + using value_type = typename Viewtype::value_type; + + value_type sum; + + Kokkos::parallel_reduce( + "ViewSum-3D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1), v.extent(2)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1, + const iType& i2, + value_type& update) { + update += v(i0, i1, i2); + }, + sum ); + + result += sum; + } +}; + + +template +void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { + ViewSum::template execute(sum, A); +} + + +// Parallel add + +template +struct ViewAdd; + +template +struct ViewAdd { + + template + static auto execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + + using policy_type = Kokkos::RangePolicy>; + + Kokkos::parallel_for( + "ViewAdd-1D", + policy_type(space, 0, v.extent(0)), + KOKKOS_LAMBDA ( + const iType& i0) { + v(i0) += update; + }); + } +}; + +template +struct ViewAdd { + + template + static auto execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "ViewAdd-2D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1) { + v(i0, i1) += update; + }); + } +}; + +template +struct ViewAdd { + + template + static auto execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "ViewAdd-3D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1), v.extent(2)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1, + const iType& i2) { + v(i0, i1, i2) += update; + }); + } +}; + + + +template +void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { + ViewAdd::template execute(A, b); +} + +} \ No newline at end of file From 36aec0a52636b56840578e134a70be8a5fe3dbd6 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 3 Jan 2024 08:52:25 -0700 Subject: [PATCH 30/75] Modify ViewSum and ViewAdd to be compiled with CUDA --- kokkos/parallel_sum.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kokkos/parallel_sum.hpp b/kokkos/parallel_sum.hpp index 951ac3005..79bef8085 100644 --- a/kokkos/parallel_sum.hpp +++ b/kokkos/parallel_sum.hpp @@ -20,7 +20,7 @@ template struct ViewSum { template - static auto execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { + static void execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { using policy_type = Kokkos::RangePolicy>; using value_type = typename Viewtype::value_type; @@ -45,7 +45,7 @@ template struct ViewSum { template - static auto execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { + static void execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector::outer_iteration_pattern; @@ -78,7 +78,7 @@ template struct ViewSum { template - static auto execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { + static void execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector::outer_iteration_pattern; @@ -124,7 +124,7 @@ template struct ViewAdd { template - static auto execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + static void execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { using policy_type = Kokkos::RangePolicy>; @@ -142,7 +142,7 @@ template struct ViewAdd { template - static auto execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + static void execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector::outer_iteration_pattern; @@ -168,7 +168,7 @@ template struct ViewAdd { template - static auto execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + static void execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector::outer_iteration_pattern; From 281fb5aa86585c87eadde4f178298bb9fffea32a Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 3 Jan 2024 09:13:49 -0700 Subject: [PATCH 31/75] parallel_sum to use flat views if contiguous span --- kokkos/parallel_sum.hpp | 47 +++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/kokkos/parallel_sum.hpp b/kokkos/parallel_sum.hpp index 79bef8085..0449a05c4 100644 --- a/kokkos/parallel_sum.hpp +++ b/kokkos/parallel_sum.hpp @@ -4,9 +4,9 @@ namespace kokkos_builtin_derivative { /* Things to do: -- use span_is_contiguous corner case (regardless of the rank) +- use span_is_contiguous corner case (regardless of the rank): done - check the span of the thing, do we need more than int32. -- deduce iterate base on layout: done? +- deduce iterate base on layout: done - If you give me an execution space: non-blocking (in theory) (use an unmaged view if scalar argument) - If no execution space: blocking. */ @@ -108,13 +108,6 @@ struct ViewSum { } }; - -template -void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { - ViewSum::template execute(sum, A); -} - - // Parallel add template @@ -192,10 +185,46 @@ struct ViewAdd { }; +template +void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { + if (A.span_is_contiguous()) { + + using ViewTypeFlat = Kokkos::View< + typename ViewtypeA::value_type*, Kokkos::LayoutRight, + Kokkos::Device>, + Kokkos::MemoryTraits<0>>; + + ViewTypeFlat A_flat(A.data(), A.size()); + ViewSum::template execute(sum, A_flat); + return; + } + + ViewSum::template execute(sum, A); + return; +} template void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { + if (A.span_is_contiguous()) { + + using ViewTypeFlat = Kokkos::View< + typename ViewtypeA::value_type*, Kokkos::LayoutRight, + Kokkos::Device>, + Kokkos::MemoryTraits<0>>; + + ViewTypeFlat A_flat(A.data(), A.size()); + ViewAdd::template execute(A_flat, b); + return; + } + ViewAdd::template execute(A, b); + return; } } \ No newline at end of file From 06f4ae0a2a27c5700102c145e17da65366cd99bd Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 3 Jan 2024 09:25:41 -0700 Subject: [PATCH 32/75] parallel_sum add fences --- kokkos/parallel_sum.hpp | 62 +++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/kokkos/parallel_sum.hpp b/kokkos/parallel_sum.hpp index 0449a05c4..136e9862e 100644 --- a/kokkos/parallel_sum.hpp +++ b/kokkos/parallel_sum.hpp @@ -7,8 +7,8 @@ namespace kokkos_builtin_derivative { - use span_is_contiguous corner case (regardless of the rank): done - check the span of the thing, do we need more than int32. - deduce iterate base on layout: done -- If you give me an execution space: non-blocking (in theory) (use an unmaged view if scalar argument) -- If no execution space: blocking. +- If you give me an execution space: non-blocking (in theory) (use an unmaged view if scalar argument): done +- If no execution space: blocking: done */ // Parallel sum: @@ -187,6 +187,7 @@ struct ViewAdd { template void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { + Kokkos::fence("parallel_sum: pre sum fence"); if (A.span_is_contiguous()) { using ViewTypeFlat = Kokkos::View< @@ -199,15 +200,38 @@ void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { ViewTypeFlat A_flat(A.data(), A.size()); ViewSum::template execute(sum, A_flat); - return; } + else { + ViewSum::template execute(sum, A); + } + Kokkos::fence("parallel_sum: post sum fence"); +} + +template +void parallel_sum(const ExecSpace& space, typename ViewtypeA::value_type &sum, const ViewtypeA A) { + space.fence("parallel_sum: pre sum fence"); + if (A.span_is_contiguous()) { - ViewSum::template execute(sum, A); - return; + using ViewTypeFlat = Kokkos::View< + typename ViewtypeA::value_type*, Kokkos::LayoutRight, + Kokkos::Device>, + Kokkos::MemoryTraits<0>>; + + ViewTypeFlat A_flat(A.data(), A.size()); + ViewSum::template execute(sum, A_flat, space); + } + else { + ViewSum::template execute(sum, A, space); + } + space.fence("parallel_sum: post sum fence"); } template void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { + Kokkos::fence("parallel_sum: pre add fence"); if (A.span_is_contiguous()) { using ViewTypeFlat = Kokkos::View< @@ -220,11 +244,33 @@ void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { ViewTypeFlat A_flat(A.data(), A.size()); ViewAdd::template execute(A_flat, b); - return; } + else { + ViewAdd::template execute(A, b); + } + Kokkos::fence("parallel_sum: post add fence"); +} + +template +void parallel_sum(const ExecSpace& space, ViewtypeA A, const typename ViewtypeA::value_type b) { + space.fence("parallel_sum: pre add fence"); + if (A.span_is_contiguous()) { - ViewAdd::template execute(A, b); - return; + using ViewTypeFlat = Kokkos::View< + typename ViewtypeA::value_type*, Kokkos::LayoutRight, + Kokkos::Device>, + Kokkos::MemoryTraits<0>>; + + ViewTypeFlat A_flat(A.data(), A.size()); + ViewAdd::template execute(A_flat, b, space); + } + else { + ViewAdd::template execute(A, b, space); + } + space.fence("parallel_sum: post add fence"); } } \ No newline at end of file From 6ff32fa09304c0764218e47e04a933e22825292a Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 3 Jan 2024 14:40:51 -0700 Subject: [PATCH 33/75] start working on lambdas --- .../clad/Differentiator/ReverseModeVisitor.h | 1 + kokkos/functor_for.hpp | 13 +- kokkos/generated/Derivatives.cpp | 33 +- kokkos/generated/Derivatives.hpp | 35 ++- kokkos/main.cpp | 2 +- kokkos/postProcess.py | 12 + lib/Differentiator/CladUtils.cpp | 4 +- lib/Differentiator/ReverseModeVisitor.cpp | 282 +++++++++++++++++- 8 files changed, 358 insertions(+), 24 deletions(-) diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h index bef1fdb45..b148aa106 100644 --- a/include/clad/Differentiator/ReverseModeVisitor.h +++ b/include/clad/Differentiator/ReverseModeVisitor.h @@ -363,6 +363,7 @@ namespace clad { virtual StmtDiff VisitReturnStmt(const clang::ReturnStmt* RS); StmtDiff VisitStmt(const clang::Stmt* S); StmtDiff VisitValueStmt(const clang::ValueStmt* S); + StmtDiff VisitLambdaExpr(const clang::LambdaExpr* LE); virtual StmtDiff VisitUnaryOperator(const clang::UnaryOperator* UnOp); StmtDiff VisitExprWithCleanups(const clang::ExprWithCleanups* EWC); /// Decl is not Stmt, so it cannot be visited directly. diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 5d4f9a65c..0c5e40f9e 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -56,12 +56,15 @@ double f(double x, double y) { Kokkos::deep_copy(a, x); Kokkos::deep_copy(b, x * x + y); - Kokkos::deep_copy(a, b); + //Kokkos::deep_copy(a, b); - //Kokkos::parallel_for( 2, KOKKOS_LAMBDA ( int j) { - // //printf("work item %d\n", j); - // a(j,0) = b(j,0); - //}); + Kokkos::parallel_for( b.extent(0), KOKKOS_LAMBDA ( const int j0) { + b(j0,0) += j0*3.53; + }); + + Kokkos::parallel_for( a.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { + a(j1,0) += b(j1+1,0)*6.89 + b(j1,1); + }); double sum; auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index a12169e67..abc6981ef 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -53,7 +53,12 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); _t5 = a_row_0; @@ -61,12 +66,28 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, MemoryTraits<0> > _r5 = _d_a_row_0; - } - { - Kokkos::deep_copy(_d_b, _d_a); - Kokkos::deep_copy(_d_a, 0.); + Kokkos::View, MemoryTraits<0> > _r7 = _d_a_row_0; } + Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { + { + double _r_d1 = _d_a(j1, 0); + _d_a(j1, 0) += _r_d1; + double _r6 = _r_d1 * 6.8899999999999997; + _d_b(j1 + 1, 0) += _r6; + _d_b(j1, 1) += _r_d1; + _d_a(j1, 0) -= _r_d1; + _d_a(j1, 0); + } + }); + Kokkos::parallel_for(b.extent(0), [=](const int j0) { + { + double _r_d0 = _d_b(j0, 0); + _d_b(j0, 0) += _r_d0; + double _r5 = _r_d0 * 3.5299999999999998; + _d_b(j0, 0) -= _r_d0; + _d_b(j0, 0); + } + }); { double _grad0 = 0.; kokkos_builtin_derivative::parallel_sum(_grad0, _d_b); diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp index ee7a44a35..2961f8b39 100644 --- a/kokkos/generated/Derivatives.hpp +++ b/kokkos/generated/Derivatives.hpp @@ -54,20 +54,41 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, Kokkos::MemoryTraits<0> > _r5 = _d_a_row_0; - } - { - Kokkos::deep_copy(_d_b, _d_a); - Kokkos::deep_copy(_d_a, 0.); + f_view_pullback(_t5, 1, _d_a_row_0); + Kokkos::View, Kokkos::MemoryTraits<0> > _r7 = _d_a_row_0; } + Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { + { + double _r_d1 = _d_a(j1, 0); + _d_a(j1, 0) += _r_d1; + double _r6 = _r_d1 * 6.8899999999999997; + _d_b(j1 + 1, 0) += _r6; + _d_b(j1, 1) += _r_d1; + _d_a(j1, 0) -= _r_d1; + _d_a(j1, 0); + } + }); + Kokkos::parallel_for(b.extent(0), KOKKOS_LAMBDA(const int j0) { + { + double _r_d0 = _d_b(j0, 0); + _d_b(j0, 0) += _r_d0; + double _r5 = _r_d0 * 3.5299999999999998; + _d_b(j0, 0) -= _r_d0; + _d_b(j0, 0); + } + }); { double _grad0 = 0.; kokkos_builtin_derivative::parallel_sum(_grad0, _d_b); diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 3e987b389..808f843f6 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -134,7 +134,7 @@ int main(int argc, char* argv[]) { f_grad(3., 4., &dx, &dy); auto t0_f_view_grad = std::chrono::high_resolution_clock::now(); - f_view_grad>(A, &dA); + f_view_grad>(A, dA); auto t1_f_view_grad = std::chrono::high_resolution_clock::now(); #endif diff --git a/kokkos/postProcess.py b/kokkos/postProcess.py index 99cb01caf..772c80204 100644 --- a/kokkos/postProcess.py +++ b/kokkos/postProcess.py @@ -5,6 +5,13 @@ def replaceKokkosInlineFunction(stringIn): string_new = 'KOKKOS_INLINE_FUNCTION' return stringIn.replace(string_old, string_new) +def replaceKokkosLambda(stringIn): + if stringIn.find('Kokkos::parallel_for') == -1 and stringIn.find('Kokkos::parallel_reduce') == -1: + return stringIn + string_old = '[=]' + string_new = 'KOKKOS_LAMBDA' + return stringIn.replace(string_old, string_new) + def useAutoInSubview(stringIn): if stringIn.find('= Kokkos::subview') == -1 and stringIn.find('= Kokkos::create_mirror_view') == -1: return stringIn @@ -108,6 +115,10 @@ def swapTypeForTemplate(linesIn, fucntionName, variableName, index0=-1, index1=- for derivativeVarName in derivativeVarNames: linesIn[index] = linesIn[index].replace('(* ' + derivativeVarName + ')', derivativeVarName) + for index in range(0, len(linesIn)): + #to be improved! + if linesIn[index].find(fucntionName) != -1 and linesIn[index].find(';') != -1: + linesIn[index] = linesIn[index].replace('&', '') def transform(filenameIn, filenameOut): @@ -119,6 +130,7 @@ def transform(filenameIn, filenameOut): for i in range(0, len(linesIn)): linesIn[i] = replaceKokkosInlineFunction(linesIn[i]) + linesIn[i] = replaceKokkosLambda(linesIn[i]) linesIn[i] = useAutoInSubview(linesIn[i]) linesIn[i] = useKokkosNamespace(linesIn[i]) diff --git a/lib/Differentiator/CladUtils.cpp b/lib/Differentiator/CladUtils.cpp index 73f039fa1..c846a1f9f 100644 --- a/lib/Differentiator/CladUtils.cpp +++ b/lib/Differentiator/CladUtils.cpp @@ -317,8 +317,8 @@ namespace clad { return false; } - bool IsKokkosView(const std::string constructedTypeName){ - return constructedTypeName.find("Kokkos::View") == 0 || constructedTypeName.find("class Kokkos::View") == 0; + bool IsKokkosView(const std::string constructedTypeName) { + return constructedTypeName.find("Kokkos::View") == 0 || constructedTypeName.find("class Kokkos::View") == 0 || constructedTypeName.find("const class Kokkos::View") == 0; //return constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("getBody(); + //Stmt* reverseBody + auto bodyV = Visit(body); + + /* + std::cout << " bodyV.getStmt() dump start " << std::endl; + bodyV.getStmt()->dump(); + std::cout << " bodyV.getStmt() dump end " << std::endl; + std::cout << " bodyV.getStmt_dx() dump start " << std::endl; + bodyV.getStmt_dx()->dump(); + std::cout << " bodyV.getStmt_dx() dump end " << std::endl; + + std::cout << " body dump start " << std::endl; + body->dump(); + std::cout << " body dump end " << std::endl; + */ + + auto children_iterator_range = LE->children(); + + std::vector children_Exp; + std::vector children_Exp_dx; + + for (auto children : children_iterator_range) { + auto children_expr = const_cast(dyn_cast(children)); + if (children_expr) { + auto children_exprV = Visit(children_expr); + /* + std::cout << " children dump start " << std::endl; + children->dump(); + std::cout << " children dump end " << std::endl; + */ + children_Exp.push_back(children_exprV.getExpr()); + children_Exp_dx.push_back(children_exprV.getExpr()); + children_Exp_dx.push_back(children_exprV.getExpr_dx()); + + /* + std::cout << " children_expr dump start " << std::endl; + children_expr->dump(); + std::cout << " children_expr dump end " << std::endl; + std::cout << " children_exprV.getExpr() dump start " << std::endl; + children_exprV.getExpr()->dump(); + std::cout << " children_exprV.getExpr() dump end " << std::endl; + std::cout << " children_exprV.getExpr_dx() dump start " << std::endl; + children_exprV.getExpr_dx()->dump(); + std::cout << " children_exprV.getExpr_dx() dump end " << std::endl; + */ + } + //else { + /* + std::cout << " children body dump start " << std::endl; + children->dump(); + std::cout << " children body dump end " << std::endl; + */ + + //auto children_body = Visit(children); + + /* + std::cout << " children_body.getExpr() dump start " << std::endl; + children_body.getStmt()->dump(); + std::cout << " children_body.getExpr() dump end " << std::endl; + std::cout << " children_body.getExpr_dx() dump start " << std::endl; + children_body.getStmt_dx()->dump(); + std::cout << " children_body.getExpr_dx() dump end " << std::endl; + */ + //} + } + + llvm::ArrayRef childrenRef_Exp = + clad_compat::makeArrayRef(children_Exp.data(), children_Exp.size()); + + llvm::ArrayRef childrenRef_Exp_dx = + clad_compat::makeArrayRef(children_Exp_dx.data(), children_Exp_dx.size()); + + //std::cout << " children_Exp.size() = " << children_Exp.size() << std::endl; + + auto forwardLambdaClass = LE->getLambdaClass(); + //auto reverseLambdaClass = LE->getLambdaClass(); + //reverseLambdaClass->CallOperator = bodyV.getStmt_dx(); + //auto reverseLambdaClass = CXXRecordDecl::CreateLambda (m_Context, + // forwardLambdaClass->getDeclContext(), + // forwardLambdaClass->getLambdaTypeInfo (), // can be set + // forwardLambdaClass->getLocation(), + // forwardLambdaClass->isDependentLambda (), + // forwardLambdaClass->isGenericLambda (), + // forwardLambdaClass->getLambdaCaptureDefault ()); + + /* + std::cout << " forwardLambdaClass start dump" << std::endl; + forwardLambdaClass->dump(); + std::cout << " forwardLambdaClass end dump" << std::endl; + + std::cout << " reverseLambdaClass start dump" << std::endl; + reverseLambdaClass->dump(); + std::cout << " reverseLambdaClass end dump" << std::endl; + */ + + auto forwardLE = LambdaExpr::Create(m_Context, + forwardLambdaClass, + LE->getIntroducerRange(), + LE->getCaptureDefault(), + LE->getCaptureDefaultLoc(), + LE->hasExplicitParameters(), + LE->hasExplicitResultType(), + childrenRef_Exp, + LE->getEndLoc(), + false); + + //auto reverseLE = LambdaExpr::Create(m_Context, + // reverseLambdaClass, + // LE->getIntroducerRange(), + // LE->getCaptureDefault(), + // LE->getCaptureDefaultLoc(), + // LE->hasExplicitParameters(), + // LE->hasExplicitResultType(), + // childrenRef_Exp_dx, + // LE->getEndLoc(), + // false); + + clang::Expr * reverseLE; + { + clang::LambdaIntroducer Intro; + Intro.Default = forwardLambdaClass->getLambdaCaptureDefault (); + Intro.Range.setBegin(bodyV.getStmt_dx()->getBeginLoc()); + Intro.Range.setEnd(bodyV.getStmt_dx()->getEndLoc()); + + clang::AttributeFactory AttrFactory; + const clang::DeclSpec DS(AttrFactory); + clang::Declarator D(DS, + CLAD_COMPAT_CLANG15_Declarator_DeclarationAttrs_ExtraParam + CLAD_COMPAT_CLANG12_Declarator_LambdaExpr); + clang::sema::LambdaScopeInfo * LSI = m_Sema.PushLambdaScope(); + beginScope(clang::Scope::BlockScope | clang::Scope::FnScope | + clang::Scope::DeclScope); + m_Sema.ActOnStartOfLambdaDefinition(Intro, D, + clad_compat::Sema_ActOnStartOfLambdaDefinition_ScopeOrDeclSpec(getCurrentScope(), DS)); + + //beginBlock(); + //addToCurrentBlock(bodyV.getStmt_dx(), direction::reverse); + //endBlock(); + //clang::Expr* lambda = + // m_Sema.ActOnLambdaExpr(noLoc, bodyV.getStmt_dx(), getCurrentScope()).get(); + + //clang::sema::LambdaScopeInfo LSI = *cast(m_Sema.FunctionScopes.back()); + + //clang::sema::LambdaScopeInfo LSI = m_Sema.RebuildLambdaScopeInfo(LE); + + //for (auto Var : children_iterator_range) + // LSI.addCapture(Var, /*isBlock=*/false, forwardLambdaClass->getLambdaCaptureDefault (), + // /*isNested=*/false, noLoc, SourceLocation(), + // Var->getType(), /*Invalid=*/false); + + //for (auto Var : childrenRef_Exp) + // LSI.addCapture(Var, /*isBlock=*/false, forwardLambdaClass->getLambdaCaptureDefault (), + // /*isNested=*/false, noLoc, SourceLocation(), + // Var->getType(), /*Invalid=*/false); + + + //LSI->Lambda = forwardLambdaClass; + + m_Sema.buildLambdaScope(LSI, + //bodyV.getStmt_dx(), + LE->getCallOperator(), + LE->getIntroducerRange(), + LE->getCaptureDefault(), + LE->getCaptureDefaultLoc(), + LE->hasExplicitParameters(), + LE->hasExplicitResultType(), + LE->isMutable()); + + m_Sema.ActOnFinishFunctionBody(LSI->CallOperator, bodyV.getStmt_dx()); + clang::Expr* lambda = m_Sema.BuildLambdaExpr(noLoc, noLoc, LSI).get(); + endScope(); + //reverseLE = m_Sema.ActOnCallExpr(getCurrentScope(), lambda, noLoc, {}, noLoc).get(); + + /* + std::cout << " lambda start dump" << std::endl; + + lambda->dump(); + + std::cout << " lambda end dump" << std::endl; + */ + auto reverseLambdaClassNew = dyn_cast(dyn_cast(lambda)->getSubExpr())->getLambdaClass(); + /* + std::cout << " forwardLambdaClass start dump" << std::endl; + + forwardLambdaClass->dump(); + + std::cout << " forwardLambdaClass end dump" << std::endl; + + std::cout << " reverseLambdaClassNew start dump" << std::endl; + + reverseLambdaClassNew->dump(); + + std::cout << " reverseLambdaClassNew end dump" << std::endl; + */ + reverseLE = LambdaExpr::Create(m_Context, + reverseLambdaClassNew, //forwardLambdaClass,//dyn_cast(lambda)->getLambdaClass(), + LE->getIntroducerRange(), + LE->getCaptureDefault(), + LE->getCaptureDefaultLoc(), + LE->hasExplicitParameters(), + LE->hasExplicitResultType(), + childrenRef_Exp_dx, + LE->getEndLoc(), + false); + + } + + //auto reverseLE = m_Sema.ActOnLambdaExpr(noLoc, bodyV.getStmt_dx(), getCurrentScope()).get(); + + /* + std::cout << " forwardLE start dump" << std::endl; + + forwardLE->dump(); + + std::cout << " forwardLE end dump" << std::endl; + + std::cout << " reverseLE start dump" << std::endl; + + reverseLE->dump(); + + std::cout << " reverseLE end dump" << std::endl; + + std::cout << " LE->getLambdaClass() start dump" << std::endl; + LE->getLambdaClass()->dump(); + std::cout << " LE->getLambdaClass() end dump" << std::endl; + */ + return {forwardLE, reverseLE}; + } + StmtDiff ReverseModeVisitor::VisitCompoundStmt(const CompoundStmt* CS) { int scopeFlags = Scope::DeclScope; // If this is the outermost compound statement of the function, @@ -1416,6 +1650,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } StmtDiff ReverseModeVisitor::VisitCallExpr(const CallExpr* CE) { + //std::cout << " CE dump start" << std::endl; + //CE->dump(); + //std::cout << " CE dump end" << std::endl; if (isa(CE)) { auto MCE = dyn_cast(CE); @@ -1429,17 +1666,39 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, const Expr* baseOriginalE = OCE->getArg(0); bool isKokkosViewAccess = false; - std::string kokkosViewName; + //std::string kokkosViewName; + + //std::cout << " OCE dump start" << std::endl; + //OCE->dump(); + //std::cout << " OCE dump start" << std::endl; + //std::cout << " baseOriginalE dump start" << std::endl; + //baseOriginalE->dump(); + //std::cout << " baseOriginalE dump start" << std::endl; + if (isa(baseOriginalE)) { + //std::cout << "true 1" << std::endl; auto SE = baseOriginalE->IgnoreImpCasts(); if (auto DRE = dyn_cast(SE)) { + //std::cout << "true 2" << std::endl; if (utils::IsKokkosView(DRE->getType())) { + //std::cout << "true 3" << std::endl; isKokkosViewAccess = true; - kokkosViewName = DRE->getNameInfo().getName().getAsString (); + //kokkosViewName = DRE->getNameInfo().getName().getAsString (); } } } + if (auto DRE = dyn_cast(baseOriginalE)) { + //std::cout << "true 2" << std::endl; + //DRE->getType()->dump(); + if (utils::IsKokkosView(DRE->getType())) { + //std::cout << "true 3" << std::endl; + isKokkosViewAccess = true; + //kokkosViewName = DRE->getNameInfo().getName().getAsString (); + } + isKokkosViewAccess = true; + } + //std::cout << " isKokkosViewAccess = " << isKokkosViewAccess << std::endl; // Returning the function call and zero derivative if (isKokkosViewAccess) { @@ -1681,7 +1940,24 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::SmallVector ClonedArgs; llvm::SmallVector ClonedDArgs; for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { - auto visitedArg = Visit(CE->getArg(i)); + //std::cout << "Start CE->getArg("<dump()" << std::endl; + //CE->getArg(i)->dump(); + //std::cout << "end CE->getArg("<dump()" << std::endl; + + auto arg = CE->getArg(i); + if (const auto* MTE = dyn_cast(arg)) + arg = clad_compat::GetSubExpr(MTE); + + if (const auto* ICE = dyn_cast(arg)) + arg = ICE->getSubExpr(); + + if (const auto* BTE = dyn_cast(arg)) + arg = BTE->getSubExpr(); + + if (isa(arg)) + std::cout << " is a lambda " << std::endl; + + auto visitedArg = Visit(arg); ClonedArgs.push_back(visitedArg.getExpr()); if (i==0) ClonedDArgs.push_back(visitedArg.getExpr()); From fa7f261ce628d79497f5d7e3d2394d0b5cc2df3e Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 3 Jan 2024 15:42:47 -0700 Subject: [PATCH 34/75] add a multi layers example --- kokkos/functor_for.hpp | 28 ++++++++++++ kokkos/generated/Derivatives.cpp | 52 ++++++++++++++++++++++ kokkos/generated/Derivatives.hpp | 53 +++++++++++++++++++++++ kokkos/main.cpp | 4 +- kokkos/postProcess.py | 1 + lib/Differentiator/ReverseModeVisitor.cpp | 4 +- 6 files changed, 139 insertions(+), 3 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 0c5e40f9e..850662805 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -29,6 +29,34 @@ typename ViewtypeA::value_type f_view(ViewtypeA a) { return 1e-6*sum*sum; } + +template +typename ViewtypeX::value_type f_multilevel(ViewtypeX x) { + typename ViewtypeX::value_type mean_x, sum; + kokkos_builtin_derivative::parallel_sum(mean_x, x); + + ViewtypeX y("y", x.extent(0)); + + Kokkos::parallel_for( x.extent(0), KOKKOS_LAMBDA ( const int j0) { + x(j0) = 3*x(j0) - mean_x; + }); + + Kokkos::parallel_for( x.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { + //if (j1 != x.extent(0)-1) // does not work yet + y(j1+1) = 2.6*x(j1); + //else + // y(j1) = 2.6*x(0); + }); + + const int n_max = 10; + const int n = x.extent(0) > n_max ? n_max : x.extent(0); + + auto x_n_rows = Kokkos::subview( x, Kokkos::make_pair(0, n)); + kokkos_builtin_derivative::parallel_sum(sum, x_n_rows); + return sum; +} + + template void f_view_2(ViewtypeA a, double tmp) { Kokkos::deep_copy(a, tmp); diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index abc6981ef..a7d2c9cf1 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -115,6 +115,58 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref x, clad::array_ref > _d_x) { + typename View::value_type _d_mean_x = 0, _d_sum = 0; + Kokkos::View _d_y("_d_y", x.extent(0)); + double _t0; + double _t1; + int _d_n_max = 0; + bool _cond0; + int _d_n = 0; + Kokkos::View, MemoryTraits<0> > _d_x_n_rows = Kokkos::subview((* _d_x), Kokkos::make_pair(0, n)); + typename View::value_type mean_x, sum; + kokkos_builtin_derivative::parallel_sum(mean_x, x); + Kokkos::View y("y", x.extent(0)); + Kokkos::parallel_for(x.extent(0), [=](const int j0) { + x(j0) = 3 * x(j0) - mean_x; + }); + Kokkos::parallel_for(x.extent(0) - 1, [=](const int j1) { + y(j1 + 1) = 2.6000000000000001 * x(j1); + }); + const int n_max = 10; + _cond0 = x.extent(0) > n_max; + const int n = _cond0 ? n_max : x.extent(0); + Kokkos::View, MemoryTraits<0> > x_n_rows = Kokkos::subview(x, Kokkos::make_pair(0, n)); + kokkos_builtin_derivative::parallel_sum(sum, x_n_rows); + goto _label0; + _label0: + _d_sum += 1; + kokkos_builtin_derivative::parallel_sum(_d_x_n_rows, _d_sum); + if (_cond0) + _d_n_max += _d_n; + Kokkos::parallel_for(x.extent(0) - 1, [=](const int j1) { + { + double _r_d1 = _d_y(j1 + 1); + double _r2 = _r_d1 * _t1; + double _r3 = 2.6000000000000001 * _r_d1; + (* _d_x)(j1) += _r3; + _d_y(j1 + 1) -= _r_d1; + _d_y(j1 + 1); + } + }); + Kokkos::parallel_for(x.extent(0), [=](const int j0) { + { + double _r_d0 = (* _d_x)(j0); + double _r0 = _r_d0 * _t0; + double _r1 = 3 * _r_d0; + (* _d_x)(j0) += _r1; + _d_mean_x += -_r_d0; + (* _d_x)(j0) -= _r_d0; + (* _d_x)(j0); + } + }); + kokkos_builtin_derivative::parallel_sum((* _d_x), _d_mean_x); +} void f_view_grad(Kokkos::View a, clad::array_ref > _d_a) { typename View::value_type _d_sum = 0; Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp index 2961f8b39..08c130dc2 100644 --- a/kokkos/generated/Derivatives.hpp +++ b/kokkos/generated/Derivatives.hpp @@ -116,6 +116,59 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref +void f_multilevel_grad(type_x x, type_x _d_x) { + typename type_x::value_type _d_mean_x = 0, _d_sum = 0; + type_x _d_y("_d_y", x.extent(0)); + double _t0; + double _t1; + int _d_n_max = 0; + bool _cond0; + int _d_n = 0; + const int n_max = 10; + _cond0 = x.extent(0) > n_max; + const int n = _cond0 ? n_max : x.extent(0); + auto _d_x_n_rows = Kokkos::subview(_d_x, Kokkos::make_pair(0, n)); + typename type_x::value_type mean_x, sum; + kokkos_builtin_derivative::parallel_sum(mean_x, x); + type_x y("y", x.extent(0)); + Kokkos::parallel_for(x.extent(0), KOKKOS_LAMBDA(const int j0) { + x(j0) = 3 * x(j0) - mean_x; + }); + Kokkos::parallel_for(x.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { + y(j1 + 1) = 2.6000000000000001 * x(j1); + }); + auto x_n_rows = Kokkos::subview(x, Kokkos::make_pair(0, n)); + kokkos_builtin_derivative::parallel_sum(sum, x_n_rows); + goto _label0; + _label0: + _d_sum += 1; + kokkos_builtin_derivative::parallel_sum(_d_x_n_rows, _d_sum); + if (_cond0) + _d_n_max += _d_n; + Kokkos::parallel_for(x.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { + { + double _r_d1 = _d_y(j1 + 1); + double _r2 = _r_d1 * _t1; + double _r3 = 2.6000000000000001 * _r_d1; + _d_x(j1) += _r3; + _d_y(j1 + 1) -= _r_d1; + _d_y(j1 + 1); + } + }); + Kokkos::parallel_for(x.extent(0), KOKKOS_LAMBDA(const int j0) { + { + double _r_d0 = _d_x(j0); + double _r0 = _r_d0 * _t0; + double _r1 = 3 * _r_d0; + _d_x(j0) += _r1; + _d_mean_x += -_r_d0; + _d_x(j0) -= _r_d0; + _d_x(j0); + } + }); + kokkos_builtin_derivative::parallel_sum(_d_x, _d_mean_x); +} template void f_view_grad(type_a a, type_a _d_a) { typename type_a::value_type _d_sum = 0; diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 808f843f6..3182f6abc 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -6,7 +6,7 @@ #include "lambda_reduction_subview.hpp" #include -//#define use_generated_file +#define use_generated_file //#define use_forward_mode #ifdef use_generated_file @@ -115,6 +115,8 @@ int main(int argc, char* argv[]) { //auto weightedDotProduct_2_dx = clad::differentiate(weightedDotProduct_2, "x"); #endif auto f_grad_exe = clad::gradient(f); + + auto f_multilevel_grad_exe = clad::gradient(f_multilevel>); auto f_view_grad_exe = clad::gradient(f_view>); #ifdef use_forward_mode dx_f = f_dx_exe.execute(3.,4.); diff --git a/kokkos/postProcess.py b/kokkos/postProcess.py index 772c80204..3b2bf7129 100644 --- a/kokkos/postProcess.py +++ b/kokkos/postProcess.py @@ -136,6 +136,7 @@ def transform(filenameIn, filenameOut): swapTypeForTemplate(linesIn, 'f_view_grad', 'a') swapTypeForTemplate(linesIn, 'f_view_pullback', 'a') + swapTypeForTemplate(linesIn, 'f_multilevel_grad', 'x') for line in linesIn: fileOut.write(line) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 4f125509f..8a4d74e9f 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -3199,8 +3199,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, read = true; } size_t i = 0; - if (isa(VD->getInit())) { - auto CE = dyn_cast(VD->getInit()); + if (isa(VD->getInit()->IgnoreImpCasts())) { + auto CE = dyn_cast(VD->getInit()->IgnoreImpCasts()); llvm::SmallVector clonedArgs; for (auto arg : CE->arguments()) { if (i == runTimeDim + 1) From fe14c824ec132d41ff7ca1d9a30ae108e289040a Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 3 Jan 2024 16:01:56 -0700 Subject: [PATCH 35/75] update the multi layer example to run on the GPU --- kokkos/functor_for.hpp | 7 ++++--- kokkos/generated/Derivatives.cpp | 9 +++------ kokkos/generated/Derivatives.hpp | 9 +++------ kokkos/main.cpp | 2 +- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 850662805..75f9acf44 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -32,13 +32,14 @@ typename ViewtypeA::value_type f_view(ViewtypeA a) { template typename ViewtypeX::value_type f_multilevel(ViewtypeX x) { - typename ViewtypeX::value_type mean_x, sum; - kokkos_builtin_derivative::parallel_sum(mean_x, x); + typename ViewtypeX::value_type sum; + //kokkos_builtin_derivative::parallel_sum(mean_x, x); ViewtypeX y("y", x.extent(0)); Kokkos::parallel_for( x.extent(0), KOKKOS_LAMBDA ( const int j0) { - x(j0) = 3*x(j0) - mean_x; + x(j0) = 3*x(j0); + //x(j0) = 3*x(j0) - mean_x; }); Kokkos::parallel_for( x.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index a7d2c9cf1..d6931f4e9 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -116,7 +116,7 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref x, clad::array_ref > _d_x) { - typename View::value_type _d_mean_x = 0, _d_sum = 0; + typename View::value_type _d_sum = 0; Kokkos::View _d_y("_d_y", x.extent(0)); double _t0; double _t1; @@ -124,11 +124,10 @@ void f_multilevel_grad(Kokkos::View x, clad::array_ref bool _cond0; int _d_n = 0; Kokkos::View, MemoryTraits<0> > _d_x_n_rows = Kokkos::subview((* _d_x), Kokkos::make_pair(0, n)); - typename View::value_type mean_x, sum; - kokkos_builtin_derivative::parallel_sum(mean_x, x); + typename View::value_type sum; Kokkos::View y("y", x.extent(0)); Kokkos::parallel_for(x.extent(0), [=](const int j0) { - x(j0) = 3 * x(j0) - mean_x; + x(j0) = 3 * x(j0); }); Kokkos::parallel_for(x.extent(0) - 1, [=](const int j1) { y(j1 + 1) = 2.6000000000000001 * x(j1); @@ -160,12 +159,10 @@ void f_multilevel_grad(Kokkos::View x, clad::array_ref double _r0 = _r_d0 * _t0; double _r1 = 3 * _r_d0; (* _d_x)(j0) += _r1; - _d_mean_x += -_r_d0; (* _d_x)(j0) -= _r_d0; (* _d_x)(j0); } }); - kokkos_builtin_derivative::parallel_sum((* _d_x), _d_mean_x); } void f_view_grad(Kokkos::View a, clad::array_ref > _d_a) { typename View::value_type _d_sum = 0; diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp index 08c130dc2..deac5f8e7 100644 --- a/kokkos/generated/Derivatives.hpp +++ b/kokkos/generated/Derivatives.hpp @@ -118,7 +118,7 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref void f_multilevel_grad(type_x x, type_x _d_x) { - typename type_x::value_type _d_mean_x = 0, _d_sum = 0; + typename type_x::value_type _d_sum = 0; type_x _d_y("_d_y", x.extent(0)); double _t0; double _t1; @@ -129,11 +129,10 @@ void f_multilevel_grad(type_x x, type_x _d_x) { _cond0 = x.extent(0) > n_max; const int n = _cond0 ? n_max : x.extent(0); auto _d_x_n_rows = Kokkos::subview(_d_x, Kokkos::make_pair(0, n)); - typename type_x::value_type mean_x, sum; - kokkos_builtin_derivative::parallel_sum(mean_x, x); + typename type_x::value_type sum; type_x y("y", x.extent(0)); Kokkos::parallel_for(x.extent(0), KOKKOS_LAMBDA(const int j0) { - x(j0) = 3 * x(j0) - mean_x; + x(j0) = 3 * x(j0); }); Kokkos::parallel_for(x.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { y(j1 + 1) = 2.6000000000000001 * x(j1); @@ -162,12 +161,10 @@ void f_multilevel_grad(type_x x, type_x _d_x) { double _r0 = _r_d0 * _t0; double _r1 = 3 * _r_d0; _d_x(j0) += _r1; - _d_mean_x += -_r_d0; _d_x(j0) -= _r_d0; _d_x(j0); } }); - kokkos_builtin_derivative::parallel_sum(_d_x, _d_mean_x); } template void f_view_grad(type_a a, type_a _d_a) { diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 3182f6abc..7e6d01173 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -6,7 +6,7 @@ #include "lambda_reduction_subview.hpp" #include -#define use_generated_file +//#define use_generated_file //#define use_forward_mode #ifdef use_generated_file From 0cbb81133dc90b45d0626c9fcb786b8c78feead2 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 4 Jan 2024 09:12:19 -0700 Subject: [PATCH 36/75] fix one of the lambda warning --- kokkos/functor_for.hpp | 2 +- kokkos/generated/Derivatives.cpp | 9 +- kokkos/generated/Derivatives.hpp | 9 +- lib/Differentiator/ReverseModeVisitor.cpp | 131 +--------------------- 4 files changed, 12 insertions(+), 139 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 75f9acf44..077731cc6 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -88,7 +88,7 @@ double f(double x, double y) { //Kokkos::deep_copy(a, b); Kokkos::parallel_for( b.extent(0), KOKKOS_LAMBDA ( const int j0) { - b(j0,0) += j0*3.53; + b(j0,0) += 3.53; }); Kokkos::parallel_for( a.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index d6931f4e9..02e830470 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -54,7 +54,7 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_x, clad::array_ref, MemoryTraits<0> > _r7 = _d_a_row_0; + Kokkos::View, MemoryTraits<0> > _r6 = _d_a_row_0; } Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { { double _r_d1 = _d_a(j1, 0); _d_a(j1, 0) += _r_d1; - double _r6 = _r_d1 * 6.8899999999999997; - _d_b(j1 + 1, 0) += _r6; + double _r5 = _r_d1 * 6.8899999999999997; + _d_b(j1 + 1, 0) += _r5; _d_b(j1, 1) += _r_d1; _d_a(j1, 0) -= _r_d1; _d_a(j1, 0); @@ -83,7 +83,6 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_x, clad::array_ref _d_x, clad::array_ref, Kokkos::MemoryTraits<0> > _r7 = _d_a_row_0; + Kokkos::View, Kokkos::MemoryTraits<0> > _r6 = _d_a_row_0; } Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { { double _r_d1 = _d_a(j1, 0); _d_a(j1, 0) += _r_d1; - double _r6 = _r_d1 * 6.8899999999999997; - _d_b(j1 + 1, 0) += _r6; + double _r5 = _r_d1 * 6.8899999999999997; + _d_b(j1 + 1, 0) += _r5; _d_b(j1, 1) += _r_d1; _d_a(j1, 0) -= _r_d1; _d_a(j1, 0); @@ -84,7 +84,6 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_refdump(); - std::cout << " bodyV.getStmt() dump end " << std::endl; - std::cout << " bodyV.getStmt_dx() dump start " << std::endl; - bodyV.getStmt_dx()->dump(); - std::cout << " bodyV.getStmt_dx() dump end " << std::endl; - - std::cout << " body dump start " << std::endl; - body->dump(); - std::cout << " body dump end " << std::endl; - */ - auto children_iterator_range = LE->children(); std::vector children_Exp; @@ -800,45 +787,11 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, auto children_expr = const_cast(dyn_cast(children)); if (children_expr) { auto children_exprV = Visit(children_expr); - /* - std::cout << " children dump start " << std::endl; - children->dump(); - std::cout << " children dump end " << std::endl; - */ children_Exp.push_back(children_exprV.getExpr()); children_Exp_dx.push_back(children_exprV.getExpr()); children_Exp_dx.push_back(children_exprV.getExpr_dx()); - /* - std::cout << " children_expr dump start " << std::endl; - children_expr->dump(); - std::cout << " children_expr dump end " << std::endl; - std::cout << " children_exprV.getExpr() dump start " << std::endl; - children_exprV.getExpr()->dump(); - std::cout << " children_exprV.getExpr() dump end " << std::endl; - std::cout << " children_exprV.getExpr_dx() dump start " << std::endl; - children_exprV.getExpr_dx()->dump(); - std::cout << " children_exprV.getExpr_dx() dump end " << std::endl; - */ } - //else { - /* - std::cout << " children body dump start " << std::endl; - children->dump(); - std::cout << " children body dump end " << std::endl; - */ - - //auto children_body = Visit(children); - - /* - std::cout << " children_body.getExpr() dump start " << std::endl; - children_body.getStmt()->dump(); - std::cout << " children_body.getExpr() dump end " << std::endl; - std::cout << " children_body.getExpr_dx() dump start " << std::endl; - children_body.getStmt_dx()->dump(); - std::cout << " children_body.getExpr_dx() dump end " << std::endl; - */ - //} } llvm::ArrayRef childrenRef_Exp = @@ -847,28 +800,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::ArrayRef childrenRef_Exp_dx = clad_compat::makeArrayRef(children_Exp_dx.data(), children_Exp_dx.size()); - //std::cout << " children_Exp.size() = " << children_Exp.size() << std::endl; - auto forwardLambdaClass = LE->getLambdaClass(); - //auto reverseLambdaClass = LE->getLambdaClass(); - //reverseLambdaClass->CallOperator = bodyV.getStmt_dx(); - //auto reverseLambdaClass = CXXRecordDecl::CreateLambda (m_Context, - // forwardLambdaClass->getDeclContext(), - // forwardLambdaClass->getLambdaTypeInfo (), // can be set - // forwardLambdaClass->getLocation(), - // forwardLambdaClass->isDependentLambda (), - // forwardLambdaClass->isGenericLambda (), - // forwardLambdaClass->getLambdaCaptureDefault ()); - - /* - std::cout << " forwardLambdaClass start dump" << std::endl; - forwardLambdaClass->dump(); - std::cout << " forwardLambdaClass end dump" << std::endl; - - std::cout << " reverseLambdaClass start dump" << std::endl; - reverseLambdaClass->dump(); - std::cout << " reverseLambdaClass end dump" << std::endl; - */ auto forwardLE = LambdaExpr::Create(m_Context, forwardLambdaClass, @@ -881,16 +813,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, LE->getEndLoc(), false); - //auto reverseLE = LambdaExpr::Create(m_Context, - // reverseLambdaClass, - // LE->getIntroducerRange(), - // LE->getCaptureDefault(), - // LE->getCaptureDefaultLoc(), - // LE->hasExplicitParameters(), - // LE->hasExplicitResultType(), - // childrenRef_Exp_dx, - // LE->getEndLoc(), - // false); clang::Expr * reverseLE; { @@ -910,15 +832,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, m_Sema.ActOnStartOfLambdaDefinition(Intro, D, clad_compat::Sema_ActOnStartOfLambdaDefinition_ScopeOrDeclSpec(getCurrentScope(), DS)); - //beginBlock(); - //addToCurrentBlock(bodyV.getStmt_dx(), direction::reverse); - //endBlock(); - //clang::Expr* lambda = - // m_Sema.ActOnLambdaExpr(noLoc, bodyV.getStmt_dx(), getCurrentScope()).get(); - - //clang::sema::LambdaScopeInfo LSI = *cast(m_Sema.FunctionScopes.back()); - - //clang::sema::LambdaScopeInfo LSI = m_Sema.RebuildLambdaScopeInfo(LE); //for (auto Var : children_iterator_range) // LSI.addCapture(Var, /*isBlock=*/false, forwardLambdaClass->getLambdaCaptureDefault (), @@ -943,34 +856,15 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, LE->hasExplicitResultType(), LE->isMutable()); - m_Sema.ActOnFinishFunctionBody(LSI->CallOperator, bodyV.getStmt_dx()); + FunctionDecl *FD = LSI->CallOperator->getAsFunction(); + FD->setBody(bodyV.getStmt_dx()); clang::Expr* lambda = m_Sema.BuildLambdaExpr(noLoc, noLoc, LSI).get(); endScope(); - //reverseLE = m_Sema.ActOnCallExpr(getCurrentScope(), lambda, noLoc, {}, noLoc).get(); - - /* - std::cout << " lambda start dump" << std::endl; - lambda->dump(); - std::cout << " lambda end dump" << std::endl; - */ auto reverseLambdaClassNew = dyn_cast(dyn_cast(lambda)->getSubExpr())->getLambdaClass(); - /* - std::cout << " forwardLambdaClass start dump" << std::endl; - - forwardLambdaClass->dump(); - - std::cout << " forwardLambdaClass end dump" << std::endl; - - std::cout << " reverseLambdaClassNew start dump" << std::endl; - - reverseLambdaClassNew->dump(); - - std::cout << " reverseLambdaClassNew end dump" << std::endl; - */ reverseLE = LambdaExpr::Create(m_Context, - reverseLambdaClassNew, //forwardLambdaClass,//dyn_cast(lambda)->getLambdaClass(), + reverseLambdaClassNew, LE->getIntroducerRange(), LE->getCaptureDefault(), LE->getCaptureDefaultLoc(), @@ -982,25 +876,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } - //auto reverseLE = m_Sema.ActOnLambdaExpr(noLoc, bodyV.getStmt_dx(), getCurrentScope()).get(); - - /* - std::cout << " forwardLE start dump" << std::endl; - - forwardLE->dump(); - - std::cout << " forwardLE end dump" << std::endl; - - std::cout << " reverseLE start dump" << std::endl; - - reverseLE->dump(); - - std::cout << " reverseLE end dump" << std::endl; - - std::cout << " LE->getLambdaClass() start dump" << std::endl; - LE->getLambdaClass()->dump(); - std::cout << " LE->getLambdaClass() end dump" << std::endl; - */ return {forwardLE, reverseLE}; } From 6a7bf986e9b86fec85f1f1b0f30f353a11145351 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 4 Jan 2024 09:41:57 -0700 Subject: [PATCH 37/75] clean a little bit the test --- kokkos/functor_for.hpp | 3 --- kokkos/generated/Derivatives.cpp | 4 ---- kokkos/generated/Derivatives.hpp | 4 ---- 3 files changed, 11 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 077731cc6..7dc6b6f26 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -74,9 +74,6 @@ double f(double x, double y) { double tmp = x * x + y; - const int i = 0; - const int j = 0; - // These 2 lines do not work. Is it because nothing is returned by f_view_2? //f_view_2(a, tmp); //return f_view(a); diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 02e830470..5317154d0 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -30,8 +30,6 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_x, clad::array_ref _d_x, clad::array_ref _d_x, clad::array_ref Date: Thu, 4 Jan 2024 13:09:47 -0700 Subject: [PATCH 38/75] add a include dir --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index abe8adf64..21914eb24 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -233,6 +233,7 @@ if (NOT CLAD_BUILT_STANDALONE) include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_BINARY_DIR}/../clang/include ${CMAKE_CURRENT_SOURCE_DIR}/../clang/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../../clang/include ) endif() From f80f4a5fd86e00d83461275a2dcdf596bfd0e02f Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 8 Jan 2024 11:22:54 -0700 Subject: [PATCH 39/75] Update the lambda visit --- lib/Differentiator/ReverseModeVisitor.cpp | 78 +++++++++++++++++++---- 1 file changed, 66 insertions(+), 12 deletions(-) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 6261ee3b9..88e7cd88d 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -818,8 +818,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, { clang::LambdaIntroducer Intro; Intro.Default = forwardLambdaClass->getLambdaCaptureDefault (); - Intro.Range.setBegin(bodyV.getStmt_dx()->getBeginLoc()); - Intro.Range.setEnd(bodyV.getStmt_dx()->getEndLoc()); + Intro.Range.setBegin(LE->getBeginLoc()); + Intro.Range.setEnd(LE->getEndLoc()); clang::AttributeFactory AttrFactory; const clang::DeclSpec DS(AttrFactory); @@ -832,9 +832,15 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, m_Sema.ActOnStartOfLambdaDefinition(Intro, D, clad_compat::Sema_ActOnStartOfLambdaDefinition_ScopeOrDeclSpec(getCurrentScope(), DS)); - - //for (auto Var : children_iterator_range) - // LSI.addCapture(Var, /*isBlock=*/false, forwardLambdaClass->getLambdaCaptureDefault (), + //beginBlock(); + ////addToCurrentBlock(bodyV.getStmt_dx()); + ////addToCurrentBlock(LE->getCallOperator()); + //clang::CompoundStmt* body_tmp = endBlock(); + //clang::Expr* lambda = + // m_Sema.ActOnLambdaExpr(noLoc, bodyV.getStmt_dx(), getCurrentScope()).get(); + //endScope(); + //return {forwardLE, forwardLE}; + // LSI->addCapture(Var, /*isBlock=*/false, forwardLambdaClass->getLambdaCaptureDefault (), // /*isNested=*/false, noLoc, SourceLocation(), // Var->getType(), /*Invalid=*/false); @@ -846,25 +852,73 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, //LSI->Lambda = forwardLambdaClass; + LSI->CallOperator = LE->getCallOperator(); + + FunctionDecl *FD = LSI->CallOperator->getAsFunction(); + FD->setBody(bodyV.getStmt_dx()); + + //std::cout << "dump LE->getCallOperator()->getParent()" << std::endl; + //LE->getCallOperator()->getParent()->dump(); + //std::cout << "LE->getCallOperator()->getParent()->capture_size() = " << LE->getCallOperator()->getParent()->capture_size() << std::endl; + + //std::cout << "dump LSI->CallOperator->getParent()" << std::endl; + //LSI->CallOperator->getParent()->dump(); + //std::cout << "LSI->CallOperator->getParent()->capture_size() = " << LSI->CallOperator->getParent()->capture_size() << std::endl; + + + std::vector children_LC_Exp_dx; + + for (auto children_expr : children_Exp_dx) { + if (isa(children_expr)) { + auto VD = dyn_cast(dyn_cast(children_expr)->getDecl()); + children_LC_Exp_dx.push_back(LambdaCapture(SourceLocation(), true, LambdaCaptureKind::LCK_ByRef, VD)); + } + else { + if(isa(children_expr)) { + auto PE = dyn_cast(children_expr); + auto OCE = dyn_cast(PE->getSubExpr()); + + auto VD = dyn_cast(dyn_cast(OCE->getArg(0))->getDecl()); + children_LC_Exp_dx.push_back(LambdaCapture(SourceLocation(), true, LambdaCaptureKind::LCK_ByRef, VD)); + } + } + } + + llvm::ArrayRef childrenRef_LC_Exp_dx = + clad_compat::makeArrayRef(children_LC_Exp_dx.data(), children_LC_Exp_dx.size()); + + LSI->CallOperator->getParent()->setCaptures(m_Context, childrenRef_LC_Exp_dx); + m_Sema.buildLambdaScope(LSI, //bodyV.getStmt_dx(), - LE->getCallOperator(), + LSI->CallOperator, LE->getIntroducerRange(), LE->getCaptureDefault(), LE->getCaptureDefaultLoc(), LE->hasExplicitParameters(), LE->hasExplicitResultType(), LE->isMutable()); - - FunctionDecl *FD = LSI->CallOperator->getAsFunction(); - FD->setBody(bodyV.getStmt_dx()); - clang::Expr* lambda = m_Sema.BuildLambdaExpr(noLoc, noLoc, LSI).get(); + + //for (auto Var : children_Exp_dx) { + // auto VD = dyn_cast(dyn_cast(Var)->getDecl()); + // LSI->addCapture(VD, /*isBlock=*/false, forwardLambdaClass->getLambdaCaptureDefault (), + // /*isNested=*/false, noLoc, SourceLocation(), + // VD->getType(), /*Invalid=*/false); + //} + + //std::cout << "dump 2 LSI->CallOperator->getParent()" << std::endl; + //LSI->CallOperator->getParent()->dump(); + //std::cout << "LSI->CallOperator->getParent()->capture_size() = " << LSI->CallOperator->getParent()->capture_size() << std::endl; + + //clang::Expr* lambda = m_Sema.BuildLambdaExpr(noLoc, noLoc, LSI).get(); endScope(); + //std::cout << "dump LSI->Lambda" << std::endl; + //LSI->Lambda->dump(); - auto reverseLambdaClassNew = dyn_cast(dyn_cast(lambda)->getSubExpr())->getLambdaClass(); + //auto reverseLambdaClassNew = dyn_cast(dyn_cast(lambda)->getSubExpr())->getLambdaClass(); reverseLE = LambdaExpr::Create(m_Context, - reverseLambdaClassNew, + LSI->Lambda, LE->getIntroducerRange(), LE->getCaptureDefault(), LE->getCaptureDefaultLoc(), From aa6a1047c8c61a0d3880ed84b839aad46bfe53fa Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 9 Jan 2024 10:18:12 -0700 Subject: [PATCH 40/75] try the isInsideLoop --- lib/Differentiator/ReverseModeVisitor.cpp | 70 +++++++---------------- 1 file changed, 22 insertions(+), 48 deletions(-) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 88e7cd88d..cc85312af 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -774,6 +774,10 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, StmtDiff ReverseModeVisitor::VisitLambdaExpr(const clang::LambdaExpr* LE) { + // Save the isInsideLoop value (we may be inside another loop). + //llvm::SaveAndRestore SaveIsInsideLoop(isInsideLoop); + //isInsideLoop = true; + const Stmt* body = LE->getBody(); //Stmt* reverseBody auto bodyV = Visit(body); @@ -832,39 +836,26 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, m_Sema.ActOnStartOfLambdaDefinition(Intro, D, clad_compat::Sema_ActOnStartOfLambdaDefinition_ScopeOrDeclSpec(getCurrentScope(), DS)); - //beginBlock(); - ////addToCurrentBlock(bodyV.getStmt_dx()); - ////addToCurrentBlock(LE->getCallOperator()); - //clang::CompoundStmt* body_tmp = endBlock(); - //clang::Expr* lambda = - // m_Sema.ActOnLambdaExpr(noLoc, bodyV.getStmt_dx(), getCurrentScope()).get(); - //endScope(); - //return {forwardLE, forwardLE}; - // LSI->addCapture(Var, /*isBlock=*/false, forwardLambdaClass->getLambdaCaptureDefault (), - // /*isNested=*/false, noLoc, SourceLocation(), - // Var->getType(), /*Invalid=*/false); - - //for (auto Var : childrenRef_Exp) - // LSI.addCapture(Var, /*isBlock=*/false, forwardLambdaClass->getLambdaCaptureDefault (), - // /*isNested=*/false, noLoc, SourceLocation(), - // Var->getType(), /*Invalid=*/false); - - - //LSI->Lambda = forwardLambdaClass; - + // needed for CLAD_COMPAT_CLANG10_FunctionDecl_Create_ExtraParams + //clad::VisitorBase& VB = *this; + // + //LSI->CallOperator = CXXMethodDecl::Create(m_Context, LE->getCallOperator()->getParent(), + // noLoc, + // DNI, + // LE->getCallOperator()->getType(), LE->getCallOperator()->getTypeSourceInfo(), + // LE->getCallOperator()->getStorageClass(), + // LE->getCallOperator()->isInlineSpecified(), + // clad_compat::Function_GetConstexprKind(LE->getCallOperator()), + // noLoc + // CLAD_COMPAT_CLANG10_FunctionDecl_Create_ExtraParams(LE->getCallOperator()->getTrailingRequiresClause())); + + + // This will replace the calloperator of the forward mode in the AST LSI->CallOperator = LE->getCallOperator(); FunctionDecl *FD = LSI->CallOperator->getAsFunction(); FD->setBody(bodyV.getStmt_dx()); - //std::cout << "dump LE->getCallOperator()->getParent()" << std::endl; - //LE->getCallOperator()->getParent()->dump(); - //std::cout << "LE->getCallOperator()->getParent()->capture_size() = " << LE->getCallOperator()->getParent()->capture_size() << std::endl; - - //std::cout << "dump LSI->CallOperator->getParent()" << std::endl; - //LSI->CallOperator->getParent()->dump(); - //std::cout << "LSI->CallOperator->getParent()->capture_size() = " << LSI->CallOperator->getParent()->capture_size() << std::endl; - std::vector children_LC_Exp_dx; @@ -899,24 +890,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, LE->hasExplicitResultType(), LE->isMutable()); - //for (auto Var : children_Exp_dx) { - // auto VD = dyn_cast(dyn_cast(Var)->getDecl()); - // LSI->addCapture(VD, /*isBlock=*/false, forwardLambdaClass->getLambdaCaptureDefault (), - // /*isNested=*/false, noLoc, SourceLocation(), - // VD->getType(), /*Invalid=*/false); - //} - - //std::cout << "dump 2 LSI->CallOperator->getParent()" << std::endl; - //LSI->CallOperator->getParent()->dump(); - //std::cout << "LSI->CallOperator->getParent()->capture_size() = " << LSI->CallOperator->getParent()->capture_size() << std::endl; - - //clang::Expr* lambda = m_Sema.BuildLambdaExpr(noLoc, noLoc, LSI).get(); - endScope(); - - //std::cout << "dump LSI->Lambda" << std::endl; - //LSI->Lambda->dump(); - - //auto reverseLambdaClassNew = dyn_cast(dyn_cast(lambda)->getSubExpr())->getLambdaClass(); reverseLE = LambdaExpr::Create(m_Context, LSI->Lambda, LE->getIntroducerRange(), @@ -928,6 +901,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, LE->getEndLoc(), false); + endScope(); } return {forwardLE, reverseLE}; @@ -1883,8 +1857,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (const auto* BTE = dyn_cast(arg)) arg = BTE->getSubExpr(); - if (isa(arg)) - std::cout << " is a lambda " << std::endl; + //if (isa(arg)) + // std::cout << " is a lambda " << std::endl; auto visitedArg = Visit(arg); ClonedArgs.push_back(visitedArg.getExpr()); From f0448e5a03d047818cfb7e25bdcc3f364a798e8d Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 9 Jan 2024 10:51:27 -0700 Subject: [PATCH 41/75] Use a 2 steps compilation process --- kokkos/CMakeLists.txt | 6 ++++++ kokkos/functor_for.hpp | 7 ++++++- kokkos/generated/Derivatives.hpp | 8 ++++---- kokkos/main.cpp | 1 - kokkos/postProcess.py | 13 +++++++++---- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/kokkos/CMakeLists.txt b/kokkos/CMakeLists.txt index af0099265..4e2025d8f 100644 --- a/kokkos/CMakeLists.txt +++ b/kokkos/CMakeLists.txt @@ -2,6 +2,12 @@ cmake_minimum_required(VERSION 3.16.3) project(clad_example) +option(USE_GENERATED_FILE "If" OFF) + +IF(USE_GENERATED_FILE) + add_compile_definitions(use_generated_file) +ENDIF() + add_executable ( clad_example main.cpp ) find_package(Kokkos REQUIRED) set (CMAKE_CXX_STANDARD 17) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 7dc6b6f26..ef2bd1f66 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -63,6 +63,11 @@ void f_view_2(ViewtypeA a, double tmp) { Kokkos::deep_copy(a, tmp); } +template +KOKKOS_INLINE_FUNCTION +T pow(T a) { + return a*a; +} double f(double x, double y) { @@ -89,7 +94,7 @@ double f(double x, double y) { }); Kokkos::parallel_for( a.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { - a(j1,0) += b(j1+1,0)*6.89 + b(j1,1); + a(j1,0) += b(j1+1,0)*6.89 + b(j1,1);// + pow(b(j1,1)); }); double sum; diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp index ee99ae5cb..f7198a19b 100644 --- a/kokkos/generated/Derivatives.hpp +++ b/kokkos/generated/Derivatives.hpp @@ -113,16 +113,16 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref void f_multilevel_grad(type_x x, type_x _d_x) { + bool _cond0; + const int n_max = 10; + _cond0 = x.extent(0) > n_max; + const int n = _cond0 ? n_max : x.extent(0); typename type_x::value_type _d_sum = 0; type_x _d_y("_d_y", x.extent(0)); double _t0; double _t1; int _d_n_max = 0; - bool _cond0; int _d_n = 0; - const int n_max = 10; - _cond0 = x.extent(0) > n_max; - const int n = _cond0 ? n_max : x.extent(0); auto _d_x_n_rows = Kokkos::subview(_d_x, Kokkos::make_pair(0, n)); typename type_x::value_type sum; type_x y("y", x.extent(0)); diff --git a/kokkos/main.cpp b/kokkos/main.cpp index 7e6d01173..715ec2519 100644 --- a/kokkos/main.cpp +++ b/kokkos/main.cpp @@ -6,7 +6,6 @@ #include "lambda_reduction_subview.hpp" #include -//#define use_generated_file //#define use_forward_mode #ifdef use_generated_file diff --git a/kokkos/postProcess.py b/kokkos/postProcess.py index 3b2bf7129..0f2ae78a3 100644 --- a/kokkos/postProcess.py +++ b/kokkos/postProcess.py @@ -46,20 +46,23 @@ def getFunctionLineIDs(linesIn, fucntionName): return index0, index1 def getVariableDeclLineID(linesIn, variableName, index0, index1): + for index in range(index0, index1): + if linesIn[index].find('=') == -1 and linesIn[index].find(' ' + variableName + ';') != -1: + return index for index in range(index0, index1): if linesIn[index].find(' ' + variableName + ' =') != -1: return index return 0 -def swapLinesForVariableDecl(linesIn, fucntionName, variableName, index0=-1, index1=-1): +def swapLinesForVariableDecl(linesIn, fucntionName, variableName, numLines=1, index0=-1, index1=-1): if index0 == -1 or index1 == -1: index0, index1 = getFunctionLineIDs(linesIn, fucntionName) indexVar = getVariableDeclLineID(linesIn, variableName, index0, index1) - tmpLine = linesIn[indexVar] + tmpLine = linesIn[indexVar:indexVar+numLines] for index in range(0, indexVar-index0): - linesIn[indexVar-index] = linesIn[indexVar-index-1] - linesIn[index0+1] = tmpLine + linesIn[indexVar-index+numLines-1] = linesIn[indexVar-index-1] + linesIn[index0+1:index0+1+numLines] = tmpLine def getType(linesIn, fucntionName, variableName, index0=-1, index1=-1): @@ -127,6 +130,8 @@ def transform(filenameIn, filenameOut): fileOut = open(filenameOut, "w") swapLinesForVariableDecl(linesIn, 'f_grad', 'N1') + swapLinesForVariableDecl(linesIn, 'f_multilevel_grad', 'n_max', 3) + swapLinesForVariableDecl(linesIn, 'f_multilevel_grad', '_cond0') for i in range(0, len(linesIn)): linesIn[i] = replaceKokkosInlineFunction(linesIn[i]) From 492536d1d47f20f5dde3815f1980b0871c08a40c Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 9 Jan 2024 16:12:28 -0700 Subject: [PATCH 42/75] Start working on KokkosViewAccessVisitor --- .../Differentiator/KokkosViewAccessVisitor.h | 56 +++++++++++++++++++ .../clad/Differentiator/ReverseModeVisitor.h | 2 + lib/Differentiator/ReverseModeVisitor.cpp | 17 +++++- 3 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 include/clad/Differentiator/KokkosViewAccessVisitor.h diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h new file mode 100644 index 000000000..41c17cc0a --- /dev/null +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -0,0 +1,56 @@ +//--------------------------------------------------------------------*- C++ -*- +// clad - the C++ Clang-based Automatic Differentiator +// version: $Id: ClangPlugin.cpp 7 2013-06-01 22:48:03Z v.g.vassilev@gmail.com $ +// author: Vassil Vassilev +//------------------------------------------------------------------------------ + +#ifndef CLAD_KOKKOS_VIEW_ACCESS_VISITOR_H +#define CLAD_KOKKOS_VIEW_ACCESS_VISITOR_H + +namespace clad { + + class KokkosViewAccessVisitor { + public: + KokkosViewAccessVisitor (){} + + void Visit(const clang::Stmt *Node) { + if (llvm::isa(Node)) { + auto OCE = llvm::dyn_cast(Node); + + std::string constructedTypeName = OCE->getDirectCallee()->getQualifiedNameAsString(); + + if(constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("::operator()") != std::string::npos) { + //std::cout << "This can be a Kokkos view access " << std::endl; + view_access.push_back(OCE); + } + } + if (llvm::isa(Node)) { + auto DRE = llvm::dyn_cast(Node); + auto VD = llvm::dyn_cast(DRE->getDecl()); + + if(VD) { + std::string constructedTypeName = clang::QualType::getAsString(VD->getType().split(), clang::PrintingPolicy{ {} }); + if (utils::IsKokkosView(constructedTypeName)) { + std::string name = DRE->getNameInfo().getName().getAsString(); + + if(!std::count(view_names.begin(), view_names.end(), name)) { + view_names.push_back(name); + } + } + return; + } + } + + for (const clang::Stmt *SubStmt : Node->children()) + Visit(SubStmt); + } + + private: + + std::vector view_names; + std::vector view_access; + }; +} // end namespace clad + + +#endif // CLAD_KOKKOS_VIEW_ACCESS_VISITOR_H diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h index b148aa106..3f9a6ddb3 100644 --- a/include/clad/Differentiator/ReverseModeVisitor.h +++ b/include/clad/Differentiator/ReverseModeVisitor.h @@ -56,6 +56,8 @@ namespace clad { std::set m_ToBeRecorded; /// A flag indicating if the Stmt we are currently visiting is inside loop. bool isInsideLoop = false; + /// A flag indicating if the Stmt we are currently visiting is inside a parallel region. + bool isInsideParallelRegion = false; /// Output variable of vector-valued function std::string outputArrayStr; std::vector m_LoopBlock; diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index cc85312af..71dcaa84f 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -39,6 +39,8 @@ #include +#include "clad/Differentiator/KokkosViewAccessVisitor.h" + using namespace clang; namespace clad { @@ -774,9 +776,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, StmtDiff ReverseModeVisitor::VisitLambdaExpr(const clang::LambdaExpr* LE) { - // Save the isInsideLoop value (we may be inside another loop). - //llvm::SaveAndRestore SaveIsInsideLoop(isInsideLoop); - //isInsideLoop = true; + llvm::SaveAndRestore SaveIsInsideParallelRegion(isInsideParallelRegion); + isInsideParallelRegion = true; const Stmt* body = LE->getBody(); //Stmt* reverseBody @@ -3599,6 +3600,16 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, assert(E && "must be provided, otherwise use DelayedGlobalStoreAndRef"); if (!force && !UsefulToStoreGlobal(E)) return {E, E}; + + if (isInsideParallelRegion) { + auto kVAV = KokkosViewAccessVisitor(); + kVAV.Visit(E); + + auto CladTape = MakeCladTapeFor(E); + Expr* Push = CladTape.Push; + Expr* Pop = CladTape.Pop; + return {Push, Pop}; + } auto pushPop = BuildPushPop(E, Type, prefix, force); if (!isInsideLoop) { From db084ee34f696e01fedc772dd849dd6f16981af9 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 10 Jan 2024 10:55:36 -0700 Subject: [PATCH 43/75] Precompute which views need to be recorded --- .../Differentiator/KokkosViewAccessVisitor.h | 26 ++++++++++--------- lib/Differentiator/ReverseModeVisitor.cpp | 18 ++++++++++++- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index 41c17cc0a..6c5bf5dd7 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -13,22 +13,26 @@ namespace clad { public: KokkosViewAccessVisitor (){} - void Visit(const clang::Stmt *Node) { - if (llvm::isa(Node)) { - auto OCE = llvm::dyn_cast(Node); + void Visit(const clang::Stmt *Node, double record_view_names = false) { + if (llvm::isa(Node)) { + if (llvm::isa(Node)) { + auto OCE = llvm::dyn_cast(Node); - std::string constructedTypeName = OCE->getDirectCallee()->getQualifiedNameAsString(); + std::string constructedTypeName = OCE->getDirectCallee()->getQualifiedNameAsString(); - if(constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("::operator()") != std::string::npos) { - //std::cout << "This can be a Kokkos view access " << std::endl; - view_access.push_back(OCE); + if(constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("::operator()") != std::string::npos) { + view_accesses.push_back(OCE); + } + } + else { + record_view_names = true; } } if (llvm::isa(Node)) { auto DRE = llvm::dyn_cast(Node); auto VD = llvm::dyn_cast(DRE->getDecl()); - if(VD) { + if(VD && record_view_names) { std::string constructedTypeName = clang::QualType::getAsString(VD->getType().split(), clang::PrintingPolicy{ {} }); if (utils::IsKokkosView(constructedTypeName)) { std::string name = DRE->getNameInfo().getName().getAsString(); @@ -42,13 +46,11 @@ namespace clad { } for (const clang::Stmt *SubStmt : Node->children()) - Visit(SubStmt); + Visit(SubStmt, record_view_names); } - - private: std::vector view_names; - std::vector view_access; + std::vector view_accesses; }; } // end namespace clad diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 71dcaa84f..b5f0e1447 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -780,6 +780,22 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, isInsideParallelRegion = true; const Stmt* body = LE->getBody(); + + auto kVAV = KokkosViewAccessVisitor(); + kVAV.Visit(body, false); + + std::cout << "This Lambda access those views "; + for (auto view_name : kVAV.view_names) { + std::cout << view_name << " "; + } + std::cout << std::endl; + + //std::cout << "This Lambda has those accesses "<< std::endl; + //for (auto view_access : kVAV.view_accesses) { + // view_access->dump(); + //} + //std::cout << std::endl; + //Stmt* reverseBody auto bodyV = Visit(body); @@ -3603,7 +3619,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (isInsideParallelRegion) { auto kVAV = KokkosViewAccessVisitor(); - kVAV.Visit(E); + kVAV.Visit(E, true); auto CladTape = MakeCladTapeFor(E); Expr* Push = CladTape.Push; From 1bf65b1132ab8c062aa2084c788d786ea3c2249d Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 10 Jan 2024 13:24:17 -0700 Subject: [PATCH 44/75] Copy the required views and restore them before the reverse pass --- .../Differentiator/KokkosViewAccessVisitor.h | 2 + lib/Differentiator/ReverseModeVisitor.cpp | 54 ++++++++++++------- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index 6c5bf5dd7..6c5d366b5 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -39,6 +39,7 @@ namespace clad { if(!std::count(view_names.begin(), view_names.end(), name)) { view_names.push_back(name); + view_DeclRefExpr.push_back(DRE); } } return; @@ -50,6 +51,7 @@ namespace clad { } std::vector view_names; + std::vector view_DeclRefExpr; std::vector view_accesses; }; } // end namespace clad diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index b5f0e1447..c1fc8c8ba 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -781,15 +781,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, const Stmt* body = LE->getBody(); - auto kVAV = KokkosViewAccessVisitor(); - kVAV.Visit(body, false); - - std::cout << "This Lambda access those views "; - for (auto view_name : kVAV.view_names) { - std::cout << view_name << " "; - } - std::cout << std::endl; - //std::cout << "This Lambda has those accesses "<< std::endl; //for (auto view_access : kVAV.view_accesses) { // view_access->dump(); @@ -1859,6 +1850,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (FD->getQualifiedNameAsString().find("Kokkos::parallel_for") != std::string::npos) { llvm::SmallVector ClonedArgs; llvm::SmallVector ClonedDArgs; + + auto kVAV = KokkosViewAccessVisitor(); + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { //std::cout << "Start CE->getArg("<dump()" << std::endl; //CE->getArg(i)->dump(); @@ -1874,8 +1868,36 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (const auto* BTE = dyn_cast(arg)) arg = BTE->getSubExpr(); - //if (isa(arg)) - // std::cout << " is a lambda " << std::endl; + if (isa(arg)) { + kVAV.Visit(dyn_cast(arg)->getBody(), false); + + for (auto DRE : kVAV.view_DeclRefExpr) { + VarDecl* recordedView = BuildVarDecl(DRE->getType(), "_t", const_cast(DRE), /*DirectInit=*/true); + addToCurrentBlock(BuildDeclStmt(recordedView), direction::forward); + + + Expr* kokkos_deep_copy = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "deep_copy"); + + //llvm::SmallVector ClonedDCArgs; + llvm::SmallVector ClonedDDCArgs; + + //ClonedDCArgs.push_back(BuildDeclRef(recordedView)); + //ClonedDCArgs.push_back(const_cast(DRE)); + + ClonedDDCArgs.push_back(const_cast(DRE)); + ClonedDDCArgs.push_back(BuildDeclRef(recordedView)); + + //Expr* CallDC = + // m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDCArgs, noLoc).get(); + + Expr* CallDDC = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDDCArgs, noLoc).get(); + + //addToCurrentBlock(CallDC, direction::forward); + addToCurrentBlock(CallDDC, direction::reverse); + } + } + auto visitedArg = Visit(arg); ClonedArgs.push_back(visitedArg.getExpr()); @@ -3616,15 +3638,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, assert(E && "must be provided, otherwise use DelayedGlobalStoreAndRef"); if (!force && !UsefulToStoreGlobal(E)) return {E, E}; - - if (isInsideParallelRegion) { - auto kVAV = KokkosViewAccessVisitor(); - kVAV.Visit(E, true); - auto CladTape = MakeCladTapeFor(E); - Expr* Push = CladTape.Push; - Expr* Pop = CladTape.Pop; - return {Push, Pop}; + if (isInsideParallelRegion) { + return {E, E}; } auto pushPop = BuildPushPop(E, Type, prefix, force); From 6ee305429f2b269f08075c5a08ffbf8d9ae9eee9 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 10 Jan 2024 13:24:39 -0700 Subject: [PATCH 45/75] update the example update the example --- kokkos/functor_for.hpp | 2 +- kokkos/generated/Derivatives.cpp | 55 ++++++++++++++++++++++---------- kokkos/generated/Derivatives.hpp | 55 ++++++++++++++++++++++---------- 3 files changed, 79 insertions(+), 33 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index ef2bd1f66..2d2f55749 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -94,7 +94,7 @@ double f(double x, double y) { }); Kokkos::parallel_for( a.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { - a(j1,0) += b(j1+1,0)*6.89 + b(j1,1);// + pow(b(j1,1)); + a(j1,0) += b(j1+1,0)*6.89 + b(j1,1) + pow(b(j1,1) + b(j1+1,0)); }); double sum; diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 5317154d0..421416c50 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -1,3 +1,17 @@ +inline void pow_pullback(double a, double _d_y, clad::array_ref _d_a) { + double _t0; + double _t1; + _t1 = a; + _t0 = a; + goto _label0; + _label0: + { + double _r0 = _d_y * _t0; + * _d_a += _r0; + double _r1 = _t1 * _d_y; + * _d_a += _r1; + } +} void f_view_pullback(Kokkos::View, Kokkos::MemoryTraits<0> > a, typename View, MemoryTraits<0> >::value_type _d_y, clad::array_ref, MemoryTraits<0> > > _d_a) { typename View, MemoryTraits<0> >::value_type _d_sum = 0; Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); @@ -35,7 +49,7 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); - Kokkos::View, MemoryTraits<0> > _t5; + Kokkos::View, MemoryTraits<0> > _t6; const int N1 = 4; const int N2 = 4; Kokkos::View a("a", N1); @@ -52,29 +66,38 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _t5 = b; Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { - a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1); + a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1) + pow(b(j1, 1) + b(j1 + 1, 0)); }); double sum; Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); - _t5 = a_row_0; + _t6 = a_row_0; goto _label0; _label0: { - f_view_pullback(_t5, 1, &_d_a_row_0); - Kokkos::View, MemoryTraits<0> > _r6 = _d_a_row_0; + f_view_pullback(_t6, 1, &_d_a_row_0); + Kokkos::View, MemoryTraits<0> > _r7 = _d_a_row_0; + } + { + Kokkos::deep_copy(b, _t5); + Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { + { + double _r_d1 = _d_a(j1, 0); + _d_a(j1, 0) += _r_d1; + double _r5 = _r_d1 * 6.8899999999999997; + _d_b(j1 + 1, 0) += _r5; + _d_b(j1, 1) += _r_d1; + double _grad1 = 0.; + pow_pullback(b(j1, 1) + b(j1 + 1, 0), _r_d1, &_grad1); + double _r6 = _grad1; + _d_b(j1, 1) += _r6; + _d_b(j1 + 1, 0) += _r6; + _d_a(j1, 0) -= _r_d1; + _d_a(j1, 0); + } + }); } - Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { - { - double _r_d1 = _d_a(j1, 0); - _d_a(j1, 0) += _r_d1; - double _r5 = _r_d1 * 6.8899999999999997; - _d_b(j1 + 1, 0) += _r5; - _d_b(j1, 1) += _r_d1; - _d_a(j1, 0) -= _r_d1; - _d_a(j1, 0); - } - }); Kokkos::parallel_for(b.extent(0), [=](const int j0) { { double _r_d0 = _d_b(j0, 0); diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp index f7198a19b..7e1629649 100644 --- a/kokkos/generated/Derivatives.hpp +++ b/kokkos/generated/Derivatives.hpp @@ -1,3 +1,17 @@ +KOKKOS_INLINE_FUNCTION void pow_pullback(double a, double _d_y, clad::array_ref _d_a) { + double _t0; + double _t1; + _t1 = a; + _t0 = a; + goto _label0; + _label0: + { + double _r0 = _d_y * _t0; + * _d_a += _r0; + double _r1 = _t1 * _d_y; + * _d_a += _r1; + } +} template void f_view_pullback(type_a a, typename type_a::value_type _d_y, type_a _d_a) { typename type_a::value_type _d_sum = 0; @@ -37,7 +51,7 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, Kokkos::MemoryTraits<0> > _t5; + Kokkos::View, Kokkos::MemoryTraits<0> > _t6; const int N2 = 4; Kokkos::View a("a", N1); Kokkos::View b("b", N1); @@ -53,29 +67,38 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _t5 = b; Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { - a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1); + a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1) + pow(b(j1, 1) + b(j1 + 1, 0)); }); double sum; auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); - _t5 = a_row_0; + _t6 = a_row_0; goto _label0; _label0: { - f_view_pullback(_t5, 1, _d_a_row_0); - Kokkos::View, Kokkos::MemoryTraits<0> > _r6 = _d_a_row_0; + f_view_pullback(_t6, 1, _d_a_row_0); + Kokkos::View, Kokkos::MemoryTraits<0> > _r7 = _d_a_row_0; + } + { + Kokkos::deep_copy(b, _t5); + Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { + { + double _r_d1 = _d_a(j1, 0); + _d_a(j1, 0) += _r_d1; + double _r5 = _r_d1 * 6.8899999999999997; + _d_b(j1 + 1, 0) += _r5; + _d_b(j1, 1) += _r_d1; + double _grad1 = 0.; + pow_pullback(b(j1, 1) + b(j1 + 1, 0), _r_d1, &_grad1); + double _r6 = _grad1; + _d_b(j1, 1) += _r6; + _d_b(j1 + 1, 0) += _r6; + _d_a(j1, 0) -= _r_d1; + _d_a(j1, 0); + } + }); } - Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { - { - double _r_d1 = _d_a(j1, 0); - _d_a(j1, 0) += _r_d1; - double _r5 = _r_d1 * 6.8899999999999997; - _d_b(j1 + 1, 0) += _r5; - _d_b(j1, 1) += _r_d1; - _d_a(j1, 0) -= _r_d1; - _d_a(j1, 0); - } - }); Kokkos::parallel_for(b.extent(0), KOKKOS_LAMBDA(const int j0) { { double _r_d0 = _d_b(j0, 0); From c80fc4e540680e806f03123464a7867d932c9360 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 11 Jan 2024 07:57:51 -0700 Subject: [PATCH 46/75] Update DelayedStoreResult --- .../clad/Differentiator/ReverseModeVisitor.h | 7 +++-- lib/Differentiator/ReverseModeVisitor.cpp | 27 ++++++++++++++++--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h index 3f9a6ddb3..bdf41d34f 100644 --- a/include/clad/Differentiator/ReverseModeVisitor.h +++ b/include/clad/Differentiator/ReverseModeVisitor.h @@ -271,11 +271,14 @@ namespace clad { bool isConstant; bool isInsideLoop; bool needsUpdate; + bool isInsideParallelRegion; DelayedStoreResult(ReverseModeVisitor& pV, StmtDiff pResult, bool pIsConstant, bool pIsInsideLoop, - bool pNeedsUpdate = false) + bool pNeedsUpdate = false, + bool pIsInsideParallelRegion = false) : V(pV), Result(pResult), isConstant(pIsConstant), - isInsideLoop(pIsInsideLoop), needsUpdate(pNeedsUpdate) {} + isInsideLoop(pIsInsideLoop), needsUpdate(pNeedsUpdate), + isInsideParallelRegion(pIsInsideParallelRegion) {} void Finalize(clang::Expr* New); }; diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index c1fc8c8ba..699fbca6e 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -3713,7 +3713,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } void ReverseModeVisitor::DelayedStoreResult::Finalize(Expr* New) { - if (isConstant || !needsUpdate) + if (isConstant || isInsideParallelRegion || !needsUpdate) return; if (isInsideLoop) { auto* Push = cast(Result.getExpr()); @@ -3737,7 +3737,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return DelayedStoreResult{*this, Ediff, /*isConstant*/ isConst, /*isInsideLoop*/ false, - /*pNeedsUpdate=*/false}; + /*pNeedsUpdate=*/false, + /*isInsideParallelRegion*/ false}; } if (isInsideLoop) { Expr* dummy = E; @@ -3746,7 +3747,27 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, Expr* Pop = CladTape.Pop; return DelayedStoreResult{*this, StmtDiff{Push, nullptr, nullptr, Pop}, /*isConstant*/ false, - /*isInsideLoop*/ true, /*pNeedsUpdate=*/true}; + /*isInsideLoop*/ true, + /*pNeedsUpdate=*/ false, + /*isInsideParallelRegion*/ false}; + } else if (isInsideParallelRegion) { + Expr* Cloned = Clone(E); + return DelayedStoreResult{*this, + StmtDiff{Cloned, Cloned}, + /*isConstant*/ false, + /*isInsideLoop*/ false, + /*pNeedsUpdate=*/ false, + /*isInsideParallelRegion*/ true}; + } else { + Expr* Ref = BuildDeclRef(GlobalStoreImpl( + getNonConstType(E->getType(), m_Context, m_Sema), prefix)); + // Return reference to the declaration instead of original expression. + return DelayedStoreResult{*this, + StmtDiff{Ref, Ref}, + /*isConstant*/ false, + /*isInsideLoop*/ false, + /*pNeedsUpdate=*/ true, + /*isInsideParallelRegion*/ false}; } Expr* Ref = BuildDeclRef(GlobalStoreImpl( getNonConstType(E->getType(), m_Context, m_Sema), prefix)); From 4d0050fde3f3db5155cd18863553d540bf24a9f7 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 11 Jan 2024 09:35:52 -0700 Subject: [PATCH 47/75] Update BuildVarDecl --- lib/Differentiator/VisitorBase.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/Differentiator/VisitorBase.cpp b/lib/Differentiator/VisitorBase.cpp index f2139bde3..0a707451a 100644 --- a/lib/Differentiator/VisitorBase.cpp +++ b/lib/Differentiator/VisitorBase.cpp @@ -137,7 +137,9 @@ namespace clad { TypeSourceInfo* TSI, VarDecl::InitializationStyle IS) { // add namespace specifier in variable declaration if needed. - Type = utils::AddNamespaceSpecifier(m_Sema, m_Context, Type); + // KL: Temporary comment the AddNamespaceSpecifier to avoid cases of + // "Kokkos::const View" + //Type = utils::AddNamespaceSpecifier(m_Sema, m_Context, Type); auto VD = VarDecl::Create(m_Context, m_Sema.CurContext, m_Function->getLocation(), m_Function->getLocation(), Identifier, Type, TSI, From a9c0b171d99e9ad00b8f274633b032d69550e13c Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 11 Jan 2024 08:03:19 -0700 Subject: [PATCH 48/75] update the example update the example update the example --- kokkos/functor_for.hpp | 14 ++--- kokkos/generated/Derivatives.cpp | 89 +++++++++++++++++++++----------- kokkos/generated/Derivatives.hpp | 77 ++++++++++++++++++--------- kokkos/main.cpp | 2 +- 4 files changed, 118 insertions(+), 64 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 2d2f55749..0301b0e57 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -43,17 +43,17 @@ typename ViewtypeX::value_type f_multilevel(ViewtypeX x) { }); Kokkos::parallel_for( x.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { - //if (j1 != x.extent(0)-1) // does not work yet - y(j1+1) = 2.6*x(j1); - //else - // y(j1) = 2.6*x(0); + if (j1 != x.extent(0)-1) + y(j1+1) = 2.6*x(j1)*x(j1); + else + y(j1) = 2.6*x(0)*x(0); }); const int n_max = 10; const int n = x.extent(0) > n_max ? n_max : x.extent(0); - auto x_n_rows = Kokkos::subview( x, Kokkos::make_pair(0, n)); - kokkos_builtin_derivative::parallel_sum(sum, x_n_rows); + auto y_n_rows = Kokkos::subview( y, Kokkos::make_pair(0, n)); + kokkos_builtin_derivative::parallel_sum(sum, y_n_rows); return sum; } @@ -94,7 +94,7 @@ double f(double x, double y) { }); Kokkos::parallel_for( a.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { - a(j1,0) += b(j1+1,0)*6.89 + b(j1,1) + pow(b(j1,1) + b(j1+1,0)); + a(j1,0) += b(j1+1,0)*6.89 + b(j1,1) + pow(b(j1,1) + a(j1,1) + b(j1+1,0) * b(j1+1,0) * a(j1,2)); }); double sum; diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index 421416c50..e6eb62eb9 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -14,12 +14,12 @@ inline void pow_pullback(double a, double _d_y, clad::array_ref _d_a) { } void f_view_pullback(Kokkos::View, Kokkos::MemoryTraits<0> > a, typename View, MemoryTraits<0> >::value_type _d_y, clad::array_ref, MemoryTraits<0> > > _d_a) { typename View, MemoryTraits<0> >::value_type _d_sum = 0; - Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); + Kokkos::View, Kokkos::MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); typename View, MemoryTraits<0> >::value_type _t0; typename View, MemoryTraits<0> >::value_type _t1; double _t2; typename View, MemoryTraits<0> >::value_type sum; - Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); + Kokkos::View, Kokkos::MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); kokkos_builtin_derivative::parallel_sum(sum, a_row_0); _t1 = sum; _t2 = 9.9999999999999995E-7 * _t1; @@ -48,8 +48,8 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); - Kokkos::View, MemoryTraits<0> > _t6; + Kokkos::View, Kokkos::MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); + Kokkos::View, Kokkos::MemoryTraits<0> > _t7; const int N1 = 4; const int N2 = 4; Kokkos::View a("a", N1); @@ -67,20 +67,22 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _t5 = b; + const Kokkos::View _t6 = a; Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { - a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1) + pow(b(j1, 1) + b(j1 + 1, 0)); + a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1) + pow(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2)); }); double sum; - Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); - _t6 = a_row_0; + Kokkos::View, Kokkos::MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); + _t7 = a_row_0; goto _label0; _label0: { - f_view_pullback(_t6, 1, &_d_a_row_0); - Kokkos::View, MemoryTraits<0> > _r7 = _d_a_row_0; + f_view_pullback(_t7, 1, &_d_a_row_0); + Kokkos::View, Kokkos::MemoryTraits<0> > _r11 = _d_a_row_0; } { Kokkos::deep_copy(b, _t5); + Kokkos::deep_copy(a, _t6); Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { { double _r_d1 = _d_a(j1, 0); @@ -89,10 +91,17 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_x, clad::array_ref x, clad::array_ref > _d_x) { typename View::value_type _d_sum = 0; Kokkos::View _d_y("_d_y", x.extent(0)); - double _t0; - double _t1; int _d_n_max = 0; bool _cond0; int _d_n = 0; - Kokkos::View, MemoryTraits<0> > _d_x_n_rows = Kokkos::subview((* _d_x), Kokkos::make_pair(0, n)); + Kokkos::View, Kokkos::MemoryTraits<0> > _d_y_n_rows = Kokkos::subview(_d_y, Kokkos::make_pair(0, n)); typename View::value_type sum; Kokkos::View y("y", x.extent(0)); Kokkos::parallel_for(x.extent(0), [=](const int j0) { x(j0) = 3 * x(j0); }); + const Kokkos::View _t0 = x; Kokkos::parallel_for(x.extent(0) - 1, [=](const int j1) { - y(j1 + 1) = 2.6000000000000001 * x(j1); + if (j1 != x.extent(0) - 1) + y(j1 + 1) = 2.6000000000000001 * x(j1) * x(j1); + else + y(j1) = 2.6000000000000001 * x(0) * x(0); }); const int n_max = 10; _cond0 = x.extent(0) > n_max; const int n = _cond0 ? n_max : x.extent(0); - Kokkos::View, MemoryTraits<0> > x_n_rows = Kokkos::subview(x, Kokkos::make_pair(0, n)); - kokkos_builtin_derivative::parallel_sum(sum, x_n_rows); + Kokkos::View, Kokkos::MemoryTraits<0> > y_n_rows = Kokkos::subview(y, Kokkos::make_pair(0, n)); + kokkos_builtin_derivative::parallel_sum(sum, y_n_rows); goto _label0; _label0: _d_sum += 1; - kokkos_builtin_derivative::parallel_sum(_d_x_n_rows, _d_sum); + kokkos_builtin_derivative::parallel_sum(_d_y_n_rows, _d_sum); if (_cond0) _d_n_max += _d_n; - Kokkos::parallel_for(x.extent(0) - 1, [=](const int j1) { - { - double _r_d1 = _d_y(j1 + 1); - double _r2 = _r_d1 * _t1; - double _r3 = 2.6000000000000001 * _r_d1; - (* _d_x)(j1) += _r3; - _d_y(j1 + 1) -= _r_d1; - _d_y(j1 + 1); - } - }); + { + Kokkos::deep_copy(x, _t0); + Kokkos::parallel_for(x.extent(0) - 1, [=](const int j1) { + if (j1 != x.extent(0) - 1) { + double _r_d1 = _d_y(j1 + 1); + double _r2 = _r_d1 * x(j1); + double _r3 = _r2 * x(j1); + double _r4 = 2.6000000000000001 * _r2; + (* _d_x)(j1) += _r4; + double _r5 = 2.6000000000000001 * x(j1) * _r_d1; + (* _d_x)(j1) += _r5; + _d_y(j1 + 1) -= _r_d1; + _d_y(j1 + 1); + } else { + double _r_d2 = _d_y(j1); + double _r6 = _r_d2 * x(0); + double _r7 = _r6 * x(0); + double _r8 = 2.6000000000000001 * _r6; + (* _d_x)(0) += _r8; + double _r9 = 2.6000000000000001 * x(0) * _r_d2; + (* _d_x)(0) += _r9; + _d_y(j1) -= _r_d2; + _d_y(j1); + } + }); + } Kokkos::parallel_for(x.extent(0), [=](const int j0) { { double _r_d0 = (* _d_x)(j0); - double _r0 = _r_d0 * _t0; + double _r0 = _r_d0 * x(j0); double _r1 = 3 * _r_d0; (* _d_x)(j0) += _r1; (* _d_x)(j0) -= _r_d0; @@ -184,12 +211,12 @@ void f_multilevel_grad(Kokkos::View x, clad::array_ref } void f_view_grad(Kokkos::View a, clad::array_ref > _d_a) { typename View::value_type _d_sum = 0; - Kokkos::View, MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); + Kokkos::View, Kokkos::MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); typename View::value_type _t0; typename View::value_type _t1; double _t2; typename View::value_type sum; - Kokkos::View, MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); + Kokkos::View, Kokkos::MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); kokkos_builtin_derivative::parallel_sum(sum, a_row_0); _t1 = sum; _t2 = 9.9999999999999995E-7 * _t1; diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp index 7e1629649..4b4be253f 100644 --- a/kokkos/generated/Derivatives.hpp +++ b/kokkos/generated/Derivatives.hpp @@ -51,7 +51,7 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref, Kokkos::MemoryTraits<0> > _t6; + Kokkos::View, Kokkos::MemoryTraits<0> > _t7; const int N2 = 4; Kokkos::View a("a", N1); Kokkos::View b("b", N1); @@ -68,20 +68,22 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _t5 = b; + const Kokkos::View _t6 = a; Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { - a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1) + pow(b(j1, 1) + b(j1 + 1, 0)); + a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1) + pow(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2)); }); double sum; auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); - _t6 = a_row_0; + _t7 = a_row_0; goto _label0; _label0: { - f_view_pullback(_t6, 1, _d_a_row_0); - Kokkos::View, Kokkos::MemoryTraits<0> > _r7 = _d_a_row_0; + f_view_pullback(_t7, 1, _d_a_row_0); + Kokkos::View, Kokkos::MemoryTraits<0> > _r11 = _d_a_row_0; } { Kokkos::deep_copy(b, _t5); + Kokkos::deep_copy(a, _t6); Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { { double _r_d1 = _d_a(j1, 0); @@ -90,10 +92,17 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref Date: Fri, 12 Jan 2024 08:42:02 -0700 Subject: [PATCH 49/75] Start working on the accesses per thread Start working on the accesses per thread --- .../Differentiator/KokkosViewAccessVisitor.h | 47 ++++++++++++++++++- .../clad/Differentiator/ReverseModeVisitor.h | 4 ++ lib/Differentiator/ReverseModeVisitor.cpp | 30 +++++++++--- 3 files changed, 72 insertions(+), 9 deletions(-) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index 6c5d366b5..8aba9b4d9 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -7,13 +7,15 @@ #ifndef CLAD_KOKKOS_VIEW_ACCESS_VISITOR_H #define CLAD_KOKKOS_VIEW_ACCESS_VISITOR_H +#include "clad/Differentiator/CladUtils.h" + namespace clad { class KokkosViewAccessVisitor { public: - KokkosViewAccessVisitor (){} + KokkosViewAccessVisitor (clang::Sema& _semaRef) : semaRef(_semaRef) {} - void Visit(const clang::Stmt *Node, double record_view_names = false) { + void Visit(const clang::Stmt *Node, bool record_view_names = false) { if (llvm::isa(Node)) { if (llvm::isa(Node)) { auto OCE = llvm::dyn_cast(Node); @@ -50,8 +52,49 @@ namespace clad { Visit(SubStmt, record_view_names); } + bool VisitViewAccess(const clang::Stmt *Node, const clang::ParmVarDecl *param) { + if (llvm::isa(Node)) { + auto VD = llvm::dyn_cast(Node)->getDecl(); + if (llvm::isa(VD)) { + if (VD == param) + return true; + } + } + bool tmp = false; + for (const clang::Stmt *SubStmt : Node->children()) + tmp = tmp || VisitViewAccess(SubStmt, param); + return tmp; + } + + bool VisitViewAccess(const clang::CXXOperatorCallExpr* view_access, std::vector params) { + bool use_all_params = true; + for (auto PVD : params) { + use_all_params = use_all_params && VisitViewAccess(view_access, PVD); + } + if (!use_all_params) { + unsigned diagID = semaRef.Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning, "The view access does not use all the parameters of the lambda call; an atomic will be required for the reverse mode. "); + clang::Sema::SemaDiagnosticBuilder stream = semaRef.Diag(view_access->getBeginLoc(), diagID); + } + return use_all_params; + } + + void VisitViewAccesses(std::vector params) { + for (size_t i = 0; i < view_accesses.size(); ++i) { + view_accesses_is_thread_safe.push_back(VisitViewAccess(view_accesses[i], params)); + } + } + + void clear() { + view_names.clear(); + view_DeclRefExpr.clear(); + view_accesses_is_thread_safe.clear(); + view_accesses.clear(); + } + + clang::Sema& semaRef; std::vector view_names; std::vector view_DeclRefExpr; + std::vector view_accesses_is_thread_safe; std::vector view_accesses; }; } // end namespace clad diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h index bdf41d34f..4186be440 100644 --- a/include/clad/Differentiator/ReverseModeVisitor.h +++ b/include/clad/Differentiator/ReverseModeVisitor.h @@ -11,6 +11,7 @@ #include "clad/Differentiator/VisitorBase.h" #include "clad/Differentiator/ReverseModeVisitorDirectionKinds.h" #include "clad/Differentiator/ParseDiffArgsTypes.h" +#include "clad/Differentiator/KokkosViewAccessVisitor.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtVisitor.h" #include "clang/Sema/Sema.h" @@ -44,6 +45,9 @@ namespace clad { /// Stack is used to pass the arguments (dfdx) to further nodes /// in the Visit method. std::stack m_Stack; + // Used to pass a Kokkos view access visitor to further nodes + /// in the Visit method. + clad::KokkosViewAccessVisitor * m_KVAV; /// A sequence of DeclStmts containing "tape" variable declarations /// that will be put immediately in the beginning of derivative function /// block. diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 699fbca6e..ae549346d 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -39,8 +39,6 @@ #include -#include "clad/Differentiator/KokkosViewAccessVisitor.h" - using namespace clang; namespace clad { @@ -104,7 +102,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } ReverseModeVisitor::ReverseModeVisitor(DerivativeBuilder& builder) - : VisitorBase(builder), m_Result(nullptr) {} + : VisitorBase(builder), m_Result(nullptr) { + m_KVAV = new KokkosViewAccessVisitor(m_Sema); + } ReverseModeVisitor::~ReverseModeVisitor() { if (m_ExternalSource) { @@ -115,6 +115,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // Free the external sources multiplexer since we own this resource. delete m_ExternalSource; } + delete m_KVAV; } FunctionDecl* ReverseModeVisitor::CreateGradientOverload() { @@ -1634,6 +1635,12 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, .get(); if (dfdx()) { + //Expr* kokkos_atomic_add = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "atomic_add"); + //llvm::SmallVector AtomicAddArgs; + //AtomicAddArgs.push_back(BuildOp(UnaryOperatorKind::UO_AddrOf, dCall)); + //AtomicAddArgs.push_back(dfdx()); + //Expr* add_assign = + // m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_atomic_add, noLoc, AtomicAddArgs, noLoc).get(); Expr* add_assign = BuildOp(BO_AddAssign, dCall, dfdx()); addToCurrentBlock(add_assign, direction::reverse); } @@ -1851,8 +1858,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::SmallVector ClonedArgs; llvm::SmallVector ClonedDArgs; - auto kVAV = KokkosViewAccessVisitor(); - for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { //std::cout << "Start CE->getArg("<dump()" << std::endl; //CE->getArg(i)->dump(); @@ -1869,9 +1874,10 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, arg = BTE->getSubExpr(); if (isa(arg)) { - kVAV.Visit(dyn_cast(arg)->getBody(), false); + m_KVAV->clear(); + m_KVAV->Visit(dyn_cast(arg)->getBody(), false); - for (auto DRE : kVAV.view_DeclRefExpr) { + for (auto DRE : m_KVAV->view_DeclRefExpr) { VarDecl* recordedView = BuildVarDecl(DRE->getType(), "_t", const_cast(DRE), /*DirectInit=*/true); addToCurrentBlock(BuildDeclStmt(recordedView), direction::forward); @@ -1896,6 +1902,16 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, //addToCurrentBlock(CallDC, direction::forward); addToCurrentBlock(CallDDC, direction::reverse); } + + auto CMD = dyn_cast(arg)->getCallOperator(); + + std::vector params; + + for (size_t iParam = 0; iParam < CMD->getNumParams(); ++iParam) { + params.push_back(CMD->getParamDecl(iParam)); + } + + m_KVAV->VisitViewAccesses(params); } From c023697924cc7026e5e60a5c80ef7c2654c744e8 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Fri, 12 Jan 2024 14:26:26 -0700 Subject: [PATCH 50/75] Use getBeginLoc to compare view accesses --- .../Differentiator/KokkosViewAccessVisitor.h | 14 +++++++++++++ lib/Differentiator/ReverseModeVisitor.cpp | 21 ++++++++++++------- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index 8aba9b4d9..8e931b687 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -23,7 +23,9 @@ namespace clad { std::string constructedTypeName = OCE->getDirectCallee()->getQualifiedNameAsString(); if(constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("::operator()") != std::string::npos) { + view_accesses.push_back(OCE); + view_accesses_location.push_back(OCE->getBeginLoc()); } } else { @@ -84,11 +86,22 @@ namespace clad { } } + bool isAccessThreadSafe(const clang::CXXOperatorCallExpr* view_access) { + for (size_t i = 0; i < view_accesses_location.size(); ++i) { + if (view_access->getBeginLoc() == view_accesses_location[i]) + return view_accesses_is_thread_safe[i]; + } + unsigned diagID = semaRef.Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning, "The view access has not been visited yet. "); + clang::Sema::SemaDiagnosticBuilder stream = semaRef.Diag(view_access->getBeginLoc(), diagID); + return false; + } + void clear() { view_names.clear(); view_DeclRefExpr.clear(); view_accesses_is_thread_safe.clear(); view_accesses.clear(); + view_accesses_location.clear(); } clang::Sema& semaRef; @@ -96,6 +109,7 @@ namespace clad { std::vector view_DeclRefExpr; std::vector view_accesses_is_thread_safe; std::vector view_accesses; + std::vector view_accesses_location; }; } // end namespace clad diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index ae549346d..fb0ff4ee6 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1635,14 +1635,19 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, .get(); if (dfdx()) { - //Expr* kokkos_atomic_add = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "atomic_add"); - //llvm::SmallVector AtomicAddArgs; - //AtomicAddArgs.push_back(BuildOp(UnaryOperatorKind::UO_AddrOf, dCall)); - //AtomicAddArgs.push_back(dfdx()); - //Expr* add_assign = - // m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_atomic_add, noLoc, AtomicAddArgs, noLoc).get(); - Expr* add_assign = BuildOp(BO_AddAssign, dCall, dfdx()); - addToCurrentBlock(add_assign, direction::reverse); + if (m_KVAV->isAccessThreadSafe(dyn_cast(Call))) { + Expr* add_assign = BuildOp(BO_AddAssign, dCall, dfdx()); + addToCurrentBlock(add_assign, direction::reverse); + } + else { + Expr* kokkos_atomic_add = utils::GetUnresolvedLookup(m_Sema, m_Context, "Kokkos", "atomic_add"); + llvm::SmallVector AtomicAddArgs; + AtomicAddArgs.push_back(BuildOp(UnaryOperatorKind::UO_AddrOf, dCall)); + AtomicAddArgs.push_back(dfdx()); + Expr* add_assign = + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_atomic_add, noLoc, AtomicAddArgs, noLoc).get(); + addToCurrentBlock(add_assign, direction::reverse); + } } return StmtDiff(Call, dCall); } From b9a7a76ce1db916fa8a953ff6070cc5f35a78407 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Fri, 12 Jan 2024 14:27:08 -0700 Subject: [PATCH 51/75] Update the example --- kokkos/generated/Derivatives.cpp | 4 ++-- kokkos/generated/Derivatives.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index e6eb62eb9..fcdf35de5 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -190,9 +190,9 @@ void f_multilevel_grad(Kokkos::View x, clad::array_ref double _r6 = _r_d2 * x(0); double _r7 = _r6 * x(0); double _r8 = 2.6000000000000001 * _r6; - (* _d_x)(0) += _r8; + Kokkos::atomic_add(&(* _d_x)(0), _r8); double _r9 = 2.6000000000000001 * x(0) * _r_d2; - (* _d_x)(0) += _r9; + Kokkos::atomic_add(&(* _d_x)(0), _r9); _d_y(j1) -= _r_d2; _d_y(j1); } diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp index 4b4be253f..836f60051 100644 --- a/kokkos/generated/Derivatives.hpp +++ b/kokkos/generated/Derivatives.hpp @@ -192,9 +192,9 @@ void f_multilevel_grad(type_x x, type_x _d_x) { double _r6 = _r_d2 * x(0); double _r7 = _r6 * x(0); double _r8 = 2.6000000000000001 * _r6; - _d_x(0) += _r8; + Kokkos::atomic_add(&_d_x(0), _r8); double _r9 = 2.6000000000000001 * x(0) * _r_d2; - _d_x(0) += _r9; + Kokkos::atomic_add(&_d_x(0), _r9); _d_y(j1) -= _r_d2; _d_y(j1); } From 09e12117a8102c376e5bdc971c8820d40627952b Mon Sep 17 00:00:00 2001 From: kliegeois Date: Fri, 12 Jan 2024 22:51:00 -0700 Subject: [PATCH 52/75] Start to implement the logic --- .../Differentiator/KokkosViewAccessVisitor.h | 60 ++++++++++++++++++- lib/Differentiator/ReverseModeVisitor.cpp | 2 +- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index 8e931b687..acba75e20 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -8,12 +8,13 @@ #define CLAD_KOKKOS_VIEW_ACCESS_VISITOR_H #include "clad/Differentiator/CladUtils.h" +//#include "clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp" namespace clad { class KokkosViewAccessVisitor { public: - KokkosViewAccessVisitor (clang::Sema& _semaRef) : semaRef(_semaRef) {} + KokkosViewAccessVisitor (clang::Sema& _semaRef, clang::ASTContext& _m_Context) : semaRef(_semaRef), m_Context(_m_Context) {} void Visit(const clang::Stmt *Node, bool record_view_names = false) { if (llvm::isa(Node)) { @@ -84,6 +85,62 @@ namespace clad { for (size_t i = 0; i < view_accesses.size(); ++i) { view_accesses_is_thread_safe.push_back(VisitViewAccess(view_accesses[i], params)); } + // Check if two view accesses could be similar for two different param + // instance. + for (size_t i = 0; i < view_accesses.size(); ++i) { + if (!view_accesses_is_thread_safe[i]) + continue; + std::string name_i = dyn_cast(view_accesses[i]->getArg(0))->getNameInfo().getName().getAsString(); + for (size_t j = i+1; j < view_accesses.size(); ++j) { + + // If the two views have different name, continue + std::string name_j = dyn_cast(view_accesses[j]->getArg(0))->getNameInfo().getName().getAsString(); + if (name_i != name_j) + continue; + + bool updated_thread_safe = true; + bool all_other_same_args = true; + + for (size_t i_arg = 1; i_arg < view_accesses[i]->getNumArgs(); ++i_arg) { + bool include_param = false; + for (auto PVD : params) { + if (VisitViewAccess(view_accesses[i]->getArg(i_arg), PVD) && VisitViewAccess(view_accesses[j]->getArg(i_arg), PVD)) { + include_param = true; + if (view_accesses[i]->getArg(i_arg) != view_accesses[j]->getArg(i_arg)) { + updated_thread_safe = false; + } + } + } + if (!include_param) { + if (view_accesses[i]->getArg(i_arg) != view_accesses[j]->getArg(i_arg)) { + all_other_same_args = false; + } + } + } + + if (all_other_same_args) { + if (view_accesses_is_thread_safe[i]) { + + if (!updated_thread_safe) { + { + unsigned diagID1 = semaRef.Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning, + "The view access might not be thread safe in reverse mode -- continued"); + clang::Sema::SemaDiagnosticBuilder stream1 = semaRef.Diag(view_accesses[i]->getBeginLoc(), diagID1); + } + { + unsigned diagID2 = semaRef.Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning, + "continued -- due to this view access; an atomic will be required for the reverse mode. "); + clang::Sema::SemaDiagnosticBuilder stream2 = semaRef.Diag(view_accesses[j]->getBeginLoc(), diagID2); + } + } + + view_accesses_is_thread_safe[i] = updated_thread_safe; + } + if (view_accesses_is_thread_safe[j]) + view_accesses_is_thread_safe[j] = updated_thread_safe; + } + } + } } bool isAccessThreadSafe(const clang::CXXOperatorCallExpr* view_access) { @@ -105,6 +162,7 @@ namespace clad { } clang::Sema& semaRef; + clang::ASTContext& m_Context; std::vector view_names; std::vector view_DeclRefExpr; std::vector view_accesses_is_thread_safe; diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index fb0ff4ee6..70cf2572b 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -103,7 +103,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, ReverseModeVisitor::ReverseModeVisitor(DerivativeBuilder& builder) : VisitorBase(builder), m_Result(nullptr) { - m_KVAV = new KokkosViewAccessVisitor(m_Sema); + m_KVAV = new KokkosViewAccessVisitor(m_Sema, m_Context); } ReverseModeVisitor::~ReverseModeVisitor() { From b25d91c4690d47a75538e6f923d61f68bc1571a4 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Sat, 13 Jan 2024 13:11:15 -0700 Subject: [PATCH 53/75] Use isIdenticalStmt --- .../Differentiator/KokkosViewAccessVisitor.h | 200 +++++++++++++++++- 1 file changed, 197 insertions(+), 3 deletions(-) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index acba75e20..723e6ceff 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -8,10 +8,204 @@ #define CLAD_KOKKOS_VIEW_ACCESS_VISITOR_H #include "clad/Differentiator/CladUtils.h" -//#include "clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp" namespace clad { +static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stmt1, + const clang::Stmt *Stmt2, bool IgnoreSideEffects) { + + using namespace clang; + + if (!Stmt1 || !Stmt2) { + return !Stmt1 && !Stmt2; + } + + // If Stmt1 & Stmt2 are of different class then they are not + // identical statements. + if (Stmt1->getStmtClass() != Stmt2->getStmtClass()) + return false; + + const Expr *Expr1 = dyn_cast(Stmt1); + const Expr *Expr2 = dyn_cast(Stmt2); + + if (Expr1 && Expr2) { + // If Stmt1 has side effects then don't warn even if expressions + // are identical. + if (!IgnoreSideEffects && Expr1->HasSideEffects(Ctx)) + return false; + // If either expression comes from a macro then don't warn even if + // the expressions are identical. + if ((Expr1->getExprLoc().isMacroID()) || (Expr2->getExprLoc().isMacroID())) + return false; + + // If all children of two expressions are identical, return true. + Expr::const_child_iterator I1 = Expr1->child_begin(); + Expr::const_child_iterator I2 = Expr2->child_begin(); + while (I1 != Expr1->child_end() && I2 != Expr2->child_end()) { + if (!*I1 || !*I2 || !isIdenticalStmt(Ctx, *I1, *I2, IgnoreSideEffects)) + return false; + ++I1; + ++I2; + } + // If there are different number of children in the statements, return + // false. + if (I1 != Expr1->child_end()) + return false; + if (I2 != Expr2->child_end()) + return false; + } + + switch (Stmt1->getStmtClass()) { + default: + return false; + case Stmt::CallExprClass: + case Stmt::ArraySubscriptExprClass: + case Stmt::OMPArraySectionExprClass: + case Stmt::OMPArrayShapingExprClass: + case Stmt::OMPIteratorExprClass: + case Stmt::ImplicitCastExprClass: + case Stmt::ParenExprClass: + case Stmt::BreakStmtClass: + case Stmt::ContinueStmtClass: + case Stmt::NullStmtClass: + return true; + case Stmt::CStyleCastExprClass: { + const CStyleCastExpr* CastExpr1 = cast(Stmt1); + const CStyleCastExpr* CastExpr2 = cast(Stmt2); + + return CastExpr1->getTypeAsWritten() == CastExpr2->getTypeAsWritten(); + } + case Stmt::ReturnStmtClass: { + const ReturnStmt *ReturnStmt1 = cast(Stmt1); + const ReturnStmt *ReturnStmt2 = cast(Stmt2); + + return isIdenticalStmt(Ctx, ReturnStmt1->getRetValue(), + ReturnStmt2->getRetValue(), IgnoreSideEffects); + } + case Stmt::ForStmtClass: { + const ForStmt *ForStmt1 = cast(Stmt1); + const ForStmt *ForStmt2 = cast(Stmt2); + + if (!isIdenticalStmt(Ctx, ForStmt1->getInit(), ForStmt2->getInit(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, ForStmt1->getCond(), ForStmt2->getCond(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, ForStmt1->getInc(), ForStmt2->getInc(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, ForStmt1->getBody(), ForStmt2->getBody(), + IgnoreSideEffects)) + return false; + return true; + } + case Stmt::DoStmtClass: { + const DoStmt *DStmt1 = cast(Stmt1); + const DoStmt *DStmt2 = cast(Stmt2); + + if (!isIdenticalStmt(Ctx, DStmt1->getCond(), DStmt2->getCond(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, DStmt1->getBody(), DStmt2->getBody(), + IgnoreSideEffects)) + return false; + return true; + } + case Stmt::WhileStmtClass: { + const WhileStmt *WStmt1 = cast(Stmt1); + const WhileStmt *WStmt2 = cast(Stmt2); + + if (!isIdenticalStmt(Ctx, WStmt1->getCond(), WStmt2->getCond(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, WStmt1->getBody(), WStmt2->getBody(), + IgnoreSideEffects)) + return false; + return true; + } + case Stmt::IfStmtClass: { + const IfStmt *IStmt1 = cast(Stmt1); + const IfStmt *IStmt2 = cast(Stmt2); + + if (!isIdenticalStmt(Ctx, IStmt1->getCond(), IStmt2->getCond(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, IStmt1->getThen(), IStmt2->getThen(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, IStmt1->getElse(), IStmt2->getElse(), + IgnoreSideEffects)) + return false; + return true; + } + case Stmt::CompoundStmtClass: { + const CompoundStmt *CompStmt1 = cast(Stmt1); + const CompoundStmt *CompStmt2 = cast(Stmt2); + + if (CompStmt1->size() != CompStmt2->size()) + return false; + + CompoundStmt::const_body_iterator I1 = CompStmt1->body_begin(); + CompoundStmt::const_body_iterator I2 = CompStmt2->body_begin(); + while (I1 != CompStmt1->body_end() && I2 != CompStmt2->body_end()) { + if (!isIdenticalStmt(Ctx, *I1, *I2, IgnoreSideEffects)) + return false; + ++I1; + ++I2; + } + + return true; + } + case Stmt::CompoundAssignOperatorClass: + case Stmt::BinaryOperatorClass: { + const BinaryOperator *BinOp1 = cast(Stmt1); + const BinaryOperator *BinOp2 = cast(Stmt2); + return BinOp1->getOpcode() == BinOp2->getOpcode(); + } + case Stmt::CharacterLiteralClass: { + const CharacterLiteral *CharLit1 = cast(Stmt1); + const CharacterLiteral *CharLit2 = cast(Stmt2); + return CharLit1->getValue() == CharLit2->getValue(); + } + case Stmt::DeclRefExprClass: { + const DeclRefExpr *DeclRef1 = cast(Stmt1); + const DeclRefExpr *DeclRef2 = cast(Stmt2); + return DeclRef1->getDecl() == DeclRef2->getDecl(); + } + case Stmt::IntegerLiteralClass: { + const IntegerLiteral *IntLit1 = cast(Stmt1); + const IntegerLiteral *IntLit2 = cast(Stmt2); + + llvm::APInt I1 = IntLit1->getValue(); + llvm::APInt I2 = IntLit2->getValue(); + if (I1.getBitWidth() != I2.getBitWidth()) + return false; + return I1 == I2; + } + case Stmt::FloatingLiteralClass: { + const FloatingLiteral *FloatLit1 = cast(Stmt1); + const FloatingLiteral *FloatLit2 = cast(Stmt2); + return FloatLit1->getValue().bitwiseIsEqual(FloatLit2->getValue()); + } + case Stmt::StringLiteralClass: { + const StringLiteral *StringLit1 = cast(Stmt1); + const StringLiteral *StringLit2 = cast(Stmt2); + return StringLit1->getBytes() == StringLit2->getBytes(); + } + case Stmt::MemberExprClass: { + const MemberExpr *MemberStmt1 = cast(Stmt1); + const MemberExpr *MemberStmt2 = cast(Stmt2); + return MemberStmt1->getMemberDecl() == MemberStmt2->getMemberDecl(); + } + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UnaryOp1 = cast(Stmt1); + const UnaryOperator *UnaryOp2 = cast(Stmt2); + return UnaryOp1->getOpcode() == UnaryOp2->getOpcode(); + } + } +} + class KokkosViewAccessVisitor { public: KokkosViewAccessVisitor (clang::Sema& _semaRef, clang::ASTContext& _m_Context) : semaRef(_semaRef), m_Context(_m_Context) {} @@ -106,13 +300,13 @@ namespace clad { for (auto PVD : params) { if (VisitViewAccess(view_accesses[i]->getArg(i_arg), PVD) && VisitViewAccess(view_accesses[j]->getArg(i_arg), PVD)) { include_param = true; - if (view_accesses[i]->getArg(i_arg) != view_accesses[j]->getArg(i_arg)) { + if (!isIdenticalStmt(m_Context, view_accesses[i]->getArg(i_arg), view_accesses[j]->getArg(i_arg), true)) { updated_thread_safe = false; } } } if (!include_param) { - if (view_accesses[i]->getArg(i_arg) != view_accesses[j]->getArg(i_arg)) { + if (!isIdenticalStmt(m_Context, view_accesses[i]->getArg(i_arg), view_accesses[j]->getArg(i_arg), true)) { all_other_same_args = false; } } From 6c628177d8e63945ded74a9851ec9013b1763c49 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Sat, 13 Jan 2024 13:14:39 -0700 Subject: [PATCH 54/75] Update the example --- kokkos/functor_for.hpp | 2 +- kokkos/generated/Derivatives.cpp | 6 +++--- kokkos/generated/Derivatives.hpp | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp index 0301b0e57..a527a9c50 100644 --- a/kokkos/functor_for.hpp +++ b/kokkos/functor_for.hpp @@ -94,7 +94,7 @@ double f(double x, double y) { }); Kokkos::parallel_for( a.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { - a(j1,0) += b(j1+1,0)*6.89 + b(j1,1) + pow(b(j1,1) + a(j1,1) + b(j1+1,0) * b(j1+1,0) * a(j1,2)); + a(j1,0) += b(j1+1,0)*6.89 + b(j1,0) + pow(b(j1,1) + a(j1,1) + b(j1+1,0) * b(j1+1,0) * a(j1,2)); }); double sum; diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp index fcdf35de5..aab00161d 100644 --- a/kokkos/generated/Derivatives.cpp +++ b/kokkos/generated/Derivatives.cpp @@ -69,7 +69,7 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _t5 = b; const Kokkos::View _t6 = a; Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { - a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1) + pow(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2)); + a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 0) + pow(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2)); }); double sum; Kokkos::View, Kokkos::MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); @@ -88,8 +88,8 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_x, clad::array_ref _t5 = b; const Kokkos::View _t6 = a; Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { - a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 1) + pow(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2)); + a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 0) + pow(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2)); }); double sum; auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); @@ -89,8 +89,8 @@ void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref Date: Mon, 15 Jan 2024 05:56:45 -0700 Subject: [PATCH 55/75] nested view accesses --- .../Differentiator/KokkosViewAccessVisitor.h | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index 723e6ceff..c1b7201b5 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -279,16 +279,42 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm for (size_t i = 0; i < view_accesses.size(); ++i) { view_accesses_is_thread_safe.push_back(VisitViewAccess(view_accesses[i], params)); } + // Check for nested view accesses: + for (size_t i = 0; i < view_accesses.size(); ++i) { + for (size_t i_arg = 1; i_arg < view_accesses[i]->getNumArgs(); ++i_arg) { + + if (llvm::isa(view_accesses[i]->getArg(i_arg))) { + auto OCE = llvm::dyn_cast(view_accesses[i]->getArg(i_arg)); + + std::string constructedTypeName = OCE->getDirectCallee()->getQualifiedNameAsString(); + + if(constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("::operator()") != std::string::npos) { + view_accesses_is_thread_safe[i] = false; + { + unsigned diagID1 = semaRef.Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning, + "The view access has a nested view access-- continued"); + clang::Sema::SemaDiagnosticBuilder stream1 = semaRef.Diag(view_accesses[i]->getBeginLoc(), diagID1); + } + { + unsigned diagID2 = semaRef.Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning, + "continued -- here; an atomic will be required for the reverse mode. "); + clang::Sema::SemaDiagnosticBuilder stream2 = semaRef.Diag(view_accesses[i]->getArg(i_arg)->getBeginLoc(), diagID2); + } + break; + } + } + } + } // Check if two view accesses could be similar for two different param // instance. for (size_t i = 0; i < view_accesses.size(); ++i) { if (!view_accesses_is_thread_safe[i]) continue; - std::string name_i = dyn_cast(view_accesses[i]->getArg(0))->getNameInfo().getName().getAsString(); + std::string name_i = llvm::dyn_cast(view_accesses[i]->getArg(0))->getNameInfo().getName().getAsString(); for (size_t j = i+1; j < view_accesses.size(); ++j) { // If the two views have different name, continue - std::string name_j = dyn_cast(view_accesses[j]->getArg(0))->getNameInfo().getName().getAsString(); + std::string name_j = llvm::dyn_cast(view_accesses[j]->getArg(0))->getNameInfo().getName().getAsString(); if (name_i != name_j) continue; From ce5e8aff12c4efcbc73fe363882b93a376224ceb Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 15 Jan 2024 06:35:01 -0700 Subject: [PATCH 56/75] do not test the safety of a LHS view access of an AssignmentOp --- .../Differentiator/KokkosViewAccessVisitor.h | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index c1b7201b5..ffbd5e843 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -208,9 +208,10 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm class KokkosViewAccessVisitor { public: - KokkosViewAccessVisitor (clang::Sema& _semaRef, clang::ASTContext& _m_Context) : semaRef(_semaRef), m_Context(_m_Context) {} - - void Visit(const clang::Stmt *Node, bool record_view_names = false) { + KokkosViewAccessVisitor (clang::Sema& _semaRef, clang::ASTContext& _m_Context) : + semaRef(_semaRef), m_Context(_m_Context) {} + + void Visit(const clang::Stmt *Node, bool record_view_names = false, bool RHS = true) { if (llvm::isa(Node)) { if (llvm::isa(Node)) { auto OCE = llvm::dyn_cast(Node); @@ -221,6 +222,7 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm view_accesses.push_back(OCE); view_accesses_location.push_back(OCE->getBeginLoc()); + view_accesses_RHS.push_back(RHS); } } else { @@ -244,6 +246,11 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm return; } } + if (llvm::isa(Node) && llvm::dyn_cast(Node)->isAssignmentOp()) { + Visit(llvm::dyn_cast(Node)->getLHS(),record_view_names,false); + Visit(llvm::dyn_cast(Node)->getRHS(),record_view_names,RHS); + return; + } for (const clang::Stmt *SubStmt : Node->children()) Visit(SubStmt, record_view_names); @@ -277,7 +284,10 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm void VisitViewAccesses(std::vector params) { for (size_t i = 0; i < view_accesses.size(); ++i) { - view_accesses_is_thread_safe.push_back(VisitViewAccess(view_accesses[i], params)); + if (view_accesses_RHS[i]) + view_accesses_is_thread_safe.push_back(VisitViewAccess(view_accesses[i], params)); + else + view_accesses_is_thread_safe.push_back(false); } // Check for nested view accesses: for (size_t i = 0; i < view_accesses.size(); ++i) { @@ -378,6 +388,7 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm view_DeclRefExpr.clear(); view_accesses_is_thread_safe.clear(); view_accesses.clear(); + view_accesses_RHS.clear(); view_accesses_location.clear(); } @@ -387,6 +398,7 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm std::vector view_DeclRefExpr; std::vector view_accesses_is_thread_safe; std::vector view_accesses; + std::vector view_accesses_RHS; std::vector view_accesses_location; }; } // end namespace clad From a450d90a295ac732027dd13e9395b34d3c850a65 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 15 Jan 2024 09:53:57 -0700 Subject: [PATCH 57/75] use pushCodeSynthesisContext --- lib/Differentiator/ReverseModeVisitor.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 70cf2572b..5f082d177 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -782,13 +782,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, const Stmt* body = LE->getBody(); - //std::cout << "This Lambda has those accesses "<< std::endl; - //for (auto view_access : kVAV.view_accesses) { - // view_access->dump(); - //} - //std::cout << std::endl; - - //Stmt* reverseBody auto bodyV = Visit(body); auto children_iterator_range = LE->children(); @@ -1882,6 +1875,10 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, m_KVAV->clear(); m_KVAV->Visit(dyn_cast(arg)->getBody(), false); + Sema::CodeSynthesisContext Ctx; + Ctx.Entity = dyn_cast(arg)->getCallOperator(); + m_Sema.pushCodeSynthesisContext(Ctx); + for (auto DRE : m_KVAV->view_DeclRefExpr) { VarDecl* recordedView = BuildVarDecl(DRE->getType(), "_t", const_cast(DRE), /*DirectInit=*/true); addToCurrentBlock(BuildDeclStmt(recordedView), direction::forward); From e45f6d34d365a5d31c1058ff05acbec5321d1daf Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 15 Jan 2024 13:44:36 -0700 Subject: [PATCH 58/75] use CXXConstructExpr for the captures of the lambda --- lib/Differentiator/ReverseModeVisitor.cpp | 27 ++++++++++++++--------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 5f082d177..5a9919675 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -793,9 +793,13 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, auto children_expr = const_cast(dyn_cast(children)); if (children_expr) { auto children_exprV = Visit(children_expr); - children_Exp.push_back(children_exprV.getExpr()); - children_Exp_dx.push_back(children_exprV.getExpr()); - children_Exp_dx.push_back(children_exprV.getExpr_dx()); + children_Exp.push_back(children_expr); + + auto children_expr_copy = dyn_cast(Clone(children_expr)); + children_expr_copy->setArg(0, children_exprV.getExpr_dx()); + + children_Exp_dx.push_back(children_expr); + children_Exp_dx.push_back(children_expr_copy); } } @@ -862,13 +866,16 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, std::vector children_LC_Exp_dx; for (auto children_expr : children_Exp_dx) { - if (isa(children_expr)) { - auto VD = dyn_cast(dyn_cast(children_expr)->getDecl()); - children_LC_Exp_dx.push_back(LambdaCapture(SourceLocation(), true, LambdaCaptureKind::LCK_ByRef, VD)); - } - else { - if(isa(children_expr)) { - auto PE = dyn_cast(children_expr); + if(isa(children_expr)) { + + auto tmp = dyn_cast(children_expr)->getArg(0)->IgnoreImpCasts(); + + if (isa(tmp)) { + auto VD = dyn_cast(dyn_cast(tmp)->getDecl()); + children_LC_Exp_dx.push_back(LambdaCapture(SourceLocation(), true, LambdaCaptureKind::LCK_ByRef, VD)); + } + if(isa(tmp)) { + auto PE = dyn_cast(tmp); auto OCE = dyn_cast(PE->getSubExpr()); auto VD = dyn_cast(dyn_cast(OCE->getArg(0))->getDecl()); From b6724dab46cc4a53434bedaf70be196e10d3d038 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 15 Jan 2024 15:58:59 -0700 Subject: [PATCH 59/75] move towards the copy of the lambda operator () --- lib/Differentiator/ReverseModeVisitor.cpp | 42 ++++++++++++++--------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 5a9919675..bc640841b 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -824,7 +824,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, false); - clang::Expr * reverseLE; + clang::LambdaExpr * reverseLE; { clang::LambdaIntroducer Intro; Intro.Default = forwardLambdaClass->getLambdaCaptureDefault (); @@ -842,23 +842,33 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, m_Sema.ActOnStartOfLambdaDefinition(Intro, D, clad_compat::Sema_ActOnStartOfLambdaDefinition_ScopeOrDeclSpec(getCurrentScope(), DS)); + /* // needed for CLAD_COMPAT_CLANG10_FunctionDecl_Create_ExtraParams - //clad::VisitorBase& VB = *this; - // - //LSI->CallOperator = CXXMethodDecl::Create(m_Context, LE->getCallOperator()->getParent(), - // noLoc, - // DNI, - // LE->getCallOperator()->getType(), LE->getCallOperator()->getTypeSourceInfo(), - // LE->getCallOperator()->getStorageClass(), - // LE->getCallOperator()->isInlineSpecified(), - // clad_compat::Function_GetConstexprKind(LE->getCallOperator()), - // noLoc - // CLAD_COMPAT_CLANG10_FunctionDecl_Create_ExtraParams(LE->getCallOperator()->getTrailingRequiresClause())); - - - // This will replace the calloperator of the forward mode in the AST - LSI->CallOperator = LE->getCallOperator(); + clad::VisitorBase& VB = *this; + + auto DNI = utils::BuildDeclarationNameInfo(m_Sema, "operator_pullback"); + + CXXMethodDecl* CMD = LE->getCallOperator(); + DeclContext* DC = const_cast(CMD->getDeclContext()); + CXXRecordDecl* CXXRD = cast(DC); + DNI = CMD->getNameInfo(); + + LSI->CallOperator = CXXMethodDecl::Create(m_Context, + CXXRD, + noLoc, + DNI, + CMD->getType(), + CMD->getTypeSourceInfo(), + CMD->getStorageClass(), + CMD->isInlineSpecified(), + clad_compat::Function_GetConstexprKind(CMD), + noLoc + CLAD_COMPAT_CLANG10_FunctionDecl_Create_ExtraParams(CMD->getTrailingRequiresClause())); + LSI->CallOperator->setAccess(CMD->getAccess()); + */ + + LSI->CallOperator = LE->getCallOperator(); FunctionDecl *FD = LSI->CallOperator->getAsFunction(); FD->setBody(bodyV.getStmt_dx()); From 7d45a900ef68d248be349cef8a50b5f676bcadcd Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 15 Jan 2024 16:32:11 -0700 Subject: [PATCH 60/75] Add comments on the lambda operators --- lib/Differentiator/ReverseModeVisitor.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index bc640841b..bd07544c3 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -868,10 +868,22 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, LSI->CallOperator->setAccess(CMD->getAccess()); */ + // The following lines will modify the body of the + // LambdaExpr 0x16051c540 + // |-CXXRecordDecl 0x1409e2eb0 implicit class definition + // | |-CXXMethodDecl 0x1409e2ff0 used constexpr operator() 'void (const int) const' inline + // of the forward lambda but not its CompoundStmtBody (which are supposed to be consistent I think). + // + // However, the CompoundStmtBody and the body of the operator above for the reverse lambda are consistent. + // + // The generated C++ file seems to be fine as it seems to use the CompoundStmtBody. LSI->CallOperator = LE->getCallOperator(); FunctionDecl *FD = LSI->CallOperator->getAsFunction(); FD->setBody(bodyV.getStmt_dx()); + //LE->getCallOperator()->getAsFunction()->setBody(bodyV.getStmt_dx()); + //LE->getLambdaClass()->getLambdaCallOperator()->getAsFunction()->setBody(bodyV.getStmt_dx()); + std::vector children_LC_Exp_dx; @@ -920,6 +932,11 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, LE->getEndLoc(), false); + //std::cout << "forwardLE->dump()" << std::endl; + //forwardLE->dump(); + //std::cout << "reverseLE->dump()" << std::endl; + //reverseLE->dump(); + endScope(); } From bd0b9ca5e30f12fd17cbcbd403c7d690d1f7db3b Mon Sep 17 00:00:00 2001 From: kliegeois Date: Wed, 17 Jan 2024 15:03:10 -0700 Subject: [PATCH 61/75] Start the work towards hierarchical parallelism --- include/clad/Differentiator/CladUtils.h | 9 ++ lib/Differentiator/CladUtils.cpp | 27 +++- lib/Differentiator/ReverseModeVisitor.cpp | 163 ++++++++++++++++++++-- 3 files changed, 184 insertions(+), 15 deletions(-) diff --git a/include/clad/Differentiator/CladUtils.h b/include/clad/Differentiator/CladUtils.h index 18e63f82f..e7381a256 100644 --- a/include/clad/Differentiator/CladUtils.h +++ b/include/clad/Differentiator/CladUtils.h @@ -179,6 +179,15 @@ namespace clad { /// otherwise returns false. bool HasAnyReferenceOrPointerArgument(const clang::FunctionDecl* FD); + /// Returns true if `constructedTypeName` is a string describing Kokkos::TeamPolicy type. + bool IsKokkosTeamPolicy(const std::string constructedTypeName); + + /// Returns true if `constructedTypeName` is a string describing Kokkos::TeamThreadRange, Kokkos::ThreadVectorRange, or Kokkos::TeamVectorRange type. + bool IsKokkosRange(const std::string constructedTypeName); + + /// Returns true if `constructedTypeName` is a string describing Kokkos::Member type. + bool IsKokkosMember(const std::string constructedTypeName); + /// Returns true if `constructedTypeName` is a string describing Kokkos::View type. bool IsKokkosView(const std::string constructedTypeName); diff --git a/lib/Differentiator/CladUtils.cpp b/lib/Differentiator/CladUtils.cpp index c846a1f9f..3af5446f2 100644 --- a/lib/Differentiator/CladUtils.cpp +++ b/lib/Differentiator/CladUtils.cpp @@ -317,8 +317,33 @@ namespace clad { return false; } + bool IsKokkosTeamPolicy(const std::string constructedTypeName) { + return constructedTypeName.find("Kokkos::TeamPolicy") == 0 + || constructedTypeName.find("class Kokkos::TeamPolicy") == 0 + || constructedTypeName.find("const class Kokkos::TeamPolicy") == 0; + } + + bool IsKokkosRange(const std::string constructedTypeName) { + return constructedTypeName.find("Kokkos::TeamVectorRange") == 0 + || constructedTypeName.find("class Kokkos::TeamVectorRange") == 0 + || constructedTypeName.find("const class Kokkos::TeamVectorRange") == 0 + || constructedTypeName.find("Kokkos::TeamThreadRange") == 0 + || constructedTypeName.find("class Kokkos::TeamThreadRange") == 0 + || constructedTypeName.find("const class Kokkos::TeamThreadRange") == 0 + || constructedTypeName.find("Kokkos::ThreadVectorRange") == 0 + || constructedTypeName.find("class Kokkos::ThreadVectorRange") == 0 + || constructedTypeName.find("const class Kokkos::ThreadVectorRange") == 0; + } + + bool IsKokkosMember(const std::string constructedTypeName) { + return constructedTypeName.find("member_type") != -1; + } + bool IsKokkosView(const std::string constructedTypeName) { - return constructedTypeName.find("Kokkos::View") == 0 || constructedTypeName.find("class Kokkos::View") == 0 || constructedTypeName.find("const class Kokkos::View") == 0; + return constructedTypeName.find("Kokkos::View") == 0 + || constructedTypeName.find("class Kokkos::View") == 0 + || constructedTypeName.find("const Kokkos::View") == 0 + || constructedTypeName.find("const class Kokkos::View") == 0; //return constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("(dyn_cast(children)); if (children_expr) { - auto children_exprV = Visit(children_expr); children_Exp.push_back(children_expr); - auto children_expr_copy = dyn_cast(Clone(children_expr)); - children_expr_copy->setArg(0, children_exprV.getExpr_dx()); - children_Exp_dx.push_back(children_expr); - children_Exp_dx.push_back(children_expr_copy); + if(isa(children_expr)) { + std::string constructedTypeName = QualType::getAsString(dyn_cast(children_expr)->getType().split(), PrintingPolicy{ {} }); + if (!utils::IsKokkosTeamPolicy(constructedTypeName) && !utils::IsKokkosRange(constructedTypeName) && !utils::IsKokkosMember(constructedTypeName)) { + auto children_exprV = Visit(children_expr); + auto children_expr_copy = dyn_cast(Clone(children_expr)); + children_expr_copy->setArg(0, children_exprV.getExpr_dx()); + children_Exp_dx.push_back(children_expr_copy); + } + } + else if(isa(children_expr)) { + + } + else { + auto children_exprV = Visit(children_expr); + if (children_exprV.getExpr_dx()) { + children_Exp_dx.push_back(children_exprV.getExpr_dx()); + } + } } } @@ -904,7 +917,12 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, children_LC_Exp_dx.push_back(LambdaCapture(SourceLocation(), true, LambdaCaptureKind::LCK_ByRef, VD)); } } + if (isa(children_expr)) { + auto VD = dyn_cast(dyn_cast(children_expr)->getDecl()); + children_LC_Exp_dx.push_back(LambdaCapture(SourceLocation(), true, LambdaCaptureKind::LCK_ByRef, VD)); + } } + assert(children_Exp_dx.size() == children_LC_Exp_dx.size() && "Wrong number of captures"); llvm::ArrayRef childrenRef_LC_Exp_dx = clad_compat::makeArrayRef(children_LC_Exp_dx.data(), children_LC_Exp_dx.size()); @@ -1599,6 +1617,45 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // Member function called from a Kokkos::View; nothing to do here return StmtDiff(Clone(CE)); } + if (utils::IsKokkosTeamPolicy(MCE->getObjectType().getAsString())) { + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + + for (size_t i = 0; i < MCE->getNumArgs(); ++i) { + auto visitedArg = Visit(MCE->getArg(i)); + + ClonedArgs.push_back(visitedArg.getExpr()); + if (i == 0) + ClonedDArgs.push_back(visitedArg.getExpr()); + else + ClonedDArgs.push_back(visitedArg.getExpr_dx()); + } + + auto tmp = Visit(CE->getCallee()); + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), tmp.getExpr(), + noLoc, ClonedArgs, noLoc) + .get(); + + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), tmp.getExpr_dx(), + noLoc, ClonedDArgs, noLoc) + .get(); + + // We need to set the scratch pad sizes BEFORE the policy is used. + addToCurrentBlock(dCall, direction::forward); + + return StmtDiff(Call, nullptr); + } + if (utils::IsKokkosRange(MCE->getObjectType().getAsString()) || utils::IsKokkosMember(MCE->getObjectType().getAsString())) { + auto result = const_cast(MCE); + //CXXMemberCallExpr* result = dyn_cast(Clone(MCE)); + for (size_t i = 0; i < result->getNumArgs(); ++i) + result->setArg(i, Visit(result->getArg(i)).getExpr()); + + return StmtDiff(result, nullptr); + } } if (isa(CE)) { auto OCE = dyn_cast(CE); @@ -1886,6 +1943,65 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Call, dCall); } + if (FD->getQualifiedNameAsString().find("Kokkos::PerTeam") != std::string::npos || FD->getQualifiedNameAsString().find("Kokkos::PerThread") != std::string::npos) { + // What we do here depends whether we are in a parallel region or not. + if (!isInsideParallelRegion) { + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + + ClonedArgs.push_back(Clone(CE->getArg(0))); + + auto val2 = ConstantFolder::synthesizeLiteral(m_Context.IntTy, m_Context, 2); + ClonedDArgs.push_back(BuildOp(clang::BO_Mul, + val2, + Clone(CE->getArg(0)))); + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); + + return StmtDiff(Call, dCall); + } + else { + return StmtDiff(Clone(CE), Clone(CE)); + } + } + if (FD->getQualifiedNameAsString().find("Kokkos::single") != std::string::npos) { + llvm::SmallVector ClonedArgs; + llvm::SmallVector ClonedDArgs; + + for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { + auto arg = CE->getArg(i); + + if (i==0) { + ClonedArgs.push_back(const_cast(arg)); + ClonedDArgs.push_back(const_cast(arg)); + } + else { + auto visitedArg = Visit(arg); + ClonedArgs.push_back(visitedArg.getExpr()); + ClonedDArgs.push_back(visitedArg.getExpr_dx()); + } + } + + Expr* Call = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedArgs, noLoc) + .get(); + + Expr* dCall = m_Sema + .ActOnCallExpr(getCurrentScope(), Clone(CE->getCallee()), + noLoc, ClonedDArgs, noLoc) + .get(); + + return StmtDiff(Call, dCall); + } if (FD->getQualifiedNameAsString().find("Kokkos::parallel_for") != std::string::npos) { llvm::SmallVector ClonedArgs; llvm::SmallVector ClonedDArgs; @@ -1905,7 +2021,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (const auto* BTE = dyn_cast(arg)) arg = BTE->getSubExpr(); - if (isa(arg)) { + if (isa(arg) && !isInsideParallelRegion) { m_KVAV->clear(); m_KVAV->Visit(dyn_cast(arg)->getBody(), false); @@ -1951,12 +2067,22 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } - auto visitedArg = Visit(arg); - ClonedArgs.push_back(visitedArg.getExpr()); - if (i==0) - ClonedDArgs.push_back(visitedArg.getExpr()); - else + bool copyArg = i==0; + + if (copyArg) { + if (isa(arg)) + copyArg = false; + } + + if (copyArg) { + ClonedArgs.push_back(const_cast(arg)); + ClonedDArgs.push_back(const_cast(arg)); + } + else { + auto visitedArg = Visit(arg); + ClonedArgs.push_back(visitedArg.getExpr()); ClonedDArgs.push_back(visitedArg.getExpr_dx()); + } } Expr* Call = m_Sema @@ -3585,8 +3711,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, StmtDiff ReverseModeVisitor::VisitMemberExpr(const MemberExpr* ME) { auto baseDiff = VisitWithExplicitNoDfDx(ME->getBase()); auto* field = ME->getMemberDecl(); - assert(!isa(field) && - "CXXMethodDecl nodes not supported yet!"); + //assert(!isa(field) && + // "CXXMethodDecl nodes not supported yet!"); MemberExpr* clonedME = utils::BuildMemberExpr( m_Sema, getCurrentScope(), baseDiff.getExpr(), field->getName()); if (!baseDiff.getExpr_dx()) @@ -4409,7 +4535,16 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::SmallVector clonedArgs; llvm::SmallVector clonedDArgs; std::string constructedTypeName = QualType::getAsString(CE->getType().split(), PrintingPolicy{ {} }); - if (utils::IsKokkosView(constructedTypeName)) { + if (utils::IsKokkosTeamPolicy(constructedTypeName)) { + return {Clone(CE), Clone(CE)}; + } + else if (utils::IsKokkosRange(constructedTypeName)) { + return {Clone(CE), Clone(CE)}; + } + else if (utils::IsKokkosMember(constructedTypeName)) { + return {Clone(CE), Clone(CE)}; + } + else if (utils::IsKokkosView(constructedTypeName)) { size_t runTimeDim = 0; std::vector compileTimeDims; bool read = false; From 027dfe616fff75d51eb17836fd1c2ccfb799d85c Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 18 Jan 2024 11:01:54 -0700 Subject: [PATCH 62/75] clean --- include/clad/Differentiator/CladUtils.h | 3 +- .../Differentiator/KokkosViewAccessVisitor.h | 6 +- .../clad/Differentiator/ReverseModeVisitor.h | 2 +- kokkos/CMakeLists.txt | 15 - kokkos/functor_for.hpp | 104 ------- kokkos/generated/Derivatives.cpp | 235 --------------- kokkos/generated/Derivatives.hpp | 238 --------------- kokkos/lambda_reduction.hpp | 19 -- kokkos/lambda_reduction_subview.hpp | 21 -- kokkos/main.cpp | 159 ---------- kokkos/parallel_sum.hpp | 276 ------------------ kokkos/postProcess.py | 182 ------------ lib/Differentiator/BaseForwardModeVisitor.cpp | 38 +-- lib/Differentiator/CladUtils.cpp | 1 - lib/Differentiator/ReverseModeVisitor.cpp | 40 +-- lib/Differentiator/VisitorBase.cpp | 3 - 16 files changed, 14 insertions(+), 1328 deletions(-) delete mode 100644 kokkos/CMakeLists.txt delete mode 100644 kokkos/functor_for.hpp delete mode 100644 kokkos/generated/Derivatives.cpp delete mode 100644 kokkos/generated/Derivatives.hpp delete mode 100644 kokkos/lambda_reduction.hpp delete mode 100644 kokkos/lambda_reduction_subview.hpp delete mode 100644 kokkos/main.cpp delete mode 100644 kokkos/parallel_sum.hpp delete mode 100644 kokkos/postProcess.py diff --git a/include/clad/Differentiator/CladUtils.h b/include/clad/Differentiator/CladUtils.h index e7381a256..da6958b87 100644 --- a/include/clad/Differentiator/CladUtils.h +++ b/include/clad/Differentiator/CladUtils.h @@ -182,7 +182,8 @@ namespace clad { /// Returns true if `constructedTypeName` is a string describing Kokkos::TeamPolicy type. bool IsKokkosTeamPolicy(const std::string constructedTypeName); - /// Returns true if `constructedTypeName` is a string describing Kokkos::TeamThreadRange, Kokkos::ThreadVectorRange, or Kokkos::TeamVectorRange type. + /// Returns true if `constructedTypeName` is a string describing Kokkos::TeamThreadRange, + /// Kokkos::ThreadVectorRange, or Kokkos::TeamVectorRange type. bool IsKokkosRange(const std::string constructedTypeName); /// Returns true if `constructedTypeName` is a string describing Kokkos::Member type. diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index ffbd5e843..6943c494d 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -218,7 +218,8 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm std::string constructedTypeName = OCE->getDirectCallee()->getQualifiedNameAsString(); - if(constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("::operator()") != std::string::npos) { + if(constructedTypeName.find("Kokkos::View") != std::string::npos + && constructedTypeName.find("::operator()") != std::string::npos) { view_accesses.push_back(OCE); view_accesses_location.push_back(OCE->getBeginLoc()); @@ -298,7 +299,8 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm std::string constructedTypeName = OCE->getDirectCallee()->getQualifiedNameAsString(); - if(constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("::operator()") != std::string::npos) { + if(constructedTypeName.find("Kokkos::View") != std::string::npos + && constructedTypeName.find("::operator()") != std::string::npos) { view_accesses_is_thread_safe[i] = false; { unsigned diagID1 = semaRef.Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning, diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h index 4186be440..c8cd366f7 100644 --- a/include/clad/Differentiator/ReverseModeVisitor.h +++ b/include/clad/Differentiator/ReverseModeVisitor.h @@ -45,7 +45,7 @@ namespace clad { /// Stack is used to pass the arguments (dfdx) to further nodes /// in the Visit method. std::stack m_Stack; - // Used to pass a Kokkos view access visitor to further nodes + /// Used to pass a Kokkos view access visitor to further nodes /// in the Visit method. clad::KokkosViewAccessVisitor * m_KVAV; /// A sequence of DeclStmts containing "tape" variable declarations diff --git a/kokkos/CMakeLists.txt b/kokkos/CMakeLists.txt deleted file mode 100644 index 4e2025d8f..000000000 --- a/kokkos/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -cmake_minimum_required(VERSION 3.16.3) - -project(clad_example) - -option(USE_GENERATED_FILE "If" OFF) - -IF(USE_GENERATED_FILE) - add_compile_definitions(use_generated_file) -ENDIF() - -add_executable ( clad_example main.cpp ) -find_package(Kokkos REQUIRED) -set (CMAKE_CXX_STANDARD 17) -target_include_directories( clad_example PRIVATE ${CLAD_INCLUDE_PATH}) -target_link_libraries(clad_example PRIVATE Kokkos::kokkos) diff --git a/kokkos/functor_for.hpp b/kokkos/functor_for.hpp deleted file mode 100644 index a527a9c50..000000000 --- a/kokkos/functor_for.hpp +++ /dev/null @@ -1,104 +0,0 @@ -template -struct ParallelFunctor { - VT a; - double x, y; - - ParallelFunctor(VT _a, double _x, double _y) : a(_a), x(_x), y(_y) {} - - KOKKOS_INLINE_FUNCTION void operator()(const int i) const { - for (size_t j =0; j -typename ViewtypeA::value_type f_view(ViewtypeA a) { - typename ViewtypeA::value_type sum; - auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); - //auto h_a_row_0 = Kokkos::create_mirror_view(a_row_0); //create_mirror_view_and_copy - //Kokkos::deep_copy(h_a_row_0, a_row_0); - //sum = h_a_row_0(0,0); - //kokkos_builtin_derivative::parallel_sum(sum, Kokkos::subview(a_row_0,0,0)); - //sum = 10 * sum * sum * sum; - kokkos_builtin_derivative::parallel_sum(sum, a_row_0); - return 1e-6*sum*sum; -} - - -template -typename ViewtypeX::value_type f_multilevel(ViewtypeX x) { - typename ViewtypeX::value_type sum; - //kokkos_builtin_derivative::parallel_sum(mean_x, x); - - ViewtypeX y("y", x.extent(0)); - - Kokkos::parallel_for( x.extent(0), KOKKOS_LAMBDA ( const int j0) { - x(j0) = 3*x(j0); - //x(j0) = 3*x(j0) - mean_x; - }); - - Kokkos::parallel_for( x.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { - if (j1 != x.extent(0)-1) - y(j1+1) = 2.6*x(j1)*x(j1); - else - y(j1) = 2.6*x(0)*x(0); - }); - - const int n_max = 10; - const int n = x.extent(0) > n_max ? n_max : x.extent(0); - - auto y_n_rows = Kokkos::subview( y, Kokkos::make_pair(0, n)); - kokkos_builtin_derivative::parallel_sum(sum, y_n_rows); - return sum; -} - - -template -void f_view_2(ViewtypeA a, double tmp) { - Kokkos::deep_copy(a, tmp); -} - -template -KOKKOS_INLINE_FUNCTION -T pow(T a) { - return a*a; -} - -double f(double x, double y) { - - const int N1 = 4; - constexpr int N2 = 4; - - Kokkos::View a("a", N1); - Kokkos::View b("b", N1); - - double tmp = x * x + y; - - // These 2 lines do not work. Is it because nothing is returned by f_view_2? - //f_view_2(a, tmp); - //return f_view(a); - - Kokkos::deep_copy(a, tmp); - - Kokkos::deep_copy(a, x); - Kokkos::deep_copy(b, x * x + y); - //Kokkos::deep_copy(a, b); - - Kokkos::parallel_for( b.extent(0), KOKKOS_LAMBDA ( const int j0) { - b(j0,0) += 3.53; - }); - - Kokkos::parallel_for( a.extent(0)-1, KOKKOS_LAMBDA ( const int j1) { - a(j1,0) += b(j1+1,0)*6.89 + b(j1,0) + pow(b(j1,1) + a(j1,1) + b(j1+1,0) * b(j1+1,0) * a(j1,2)); - }); - - double sum; - auto a_row_0 = Kokkos::subview( a, Kokkos::make_pair(0, 2), Kokkos::ALL ); - - return f_view(a_row_0); -} \ No newline at end of file diff --git a/kokkos/generated/Derivatives.cpp b/kokkos/generated/Derivatives.cpp deleted file mode 100644 index aab00161d..000000000 --- a/kokkos/generated/Derivatives.cpp +++ /dev/null @@ -1,235 +0,0 @@ -inline void pow_pullback(double a, double _d_y, clad::array_ref _d_a) { - double _t0; - double _t1; - _t1 = a; - _t0 = a; - goto _label0; - _label0: - { - double _r0 = _d_y * _t0; - * _d_a += _r0; - double _r1 = _t1 * _d_y; - * _d_a += _r1; - } -} -void f_view_pullback(Kokkos::View, Kokkos::MemoryTraits<0> > a, typename View, MemoryTraits<0> >::value_type _d_y, clad::array_ref, MemoryTraits<0> > > _d_a) { - typename View, MemoryTraits<0> >::value_type _d_sum = 0; - Kokkos::View, Kokkos::MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); - typename View, MemoryTraits<0> >::value_type _t0; - typename View, MemoryTraits<0> >::value_type _t1; - double _t2; - typename View, MemoryTraits<0> >::value_type sum; - Kokkos::View, Kokkos::MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); - kokkos_builtin_derivative::parallel_sum(sum, a_row_0); - _t1 = sum; - _t2 = 9.9999999999999995E-7 * _t1; - _t0 = sum; - goto _label0; - _label0: - { - double _r0 = _d_y * _t0; - double _r1 = _r0 * _t1; - double _r2 = 9.9999999999999995E-7 * _r0; - _d_sum += _r2; - double _r3 = _t2 * _d_y; - _d_sum += _r3; - } - kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); -} -void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { - int _d_N1 = 0; - int _d_N2 = 0; - Kokkos::View _d_a("_d_a", N1); - Kokkos::View _d_b("_d_b", N1); - double _t0; - double _t1; - double _d_tmp = 0; - double _t2; - double _t3; - double _t4; - double _d_sum = 0; - Kokkos::View, Kokkos::MemoryTraits<0> > _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), ALL); - Kokkos::View, Kokkos::MemoryTraits<0> > _t7; - const int N1 = 4; - const int N2 = 4; - Kokkos::View a("a", N1); - Kokkos::View b("b", N1); - _t1 = x; - _t0 = x; - double tmp = _t1 * _t0 + y; - Kokkos::deep_copy(a, tmp); - Kokkos::deep_copy(a, x); - _t2 = x; - _t4 = x; - _t3 = x; - Kokkos::deep_copy(b, x * _t2 + y); - Kokkos::parallel_for(b.extent(0), [=](const int j0) { - b(j0, 0) += 3.5299999999999998; - }); - const Kokkos::View _t5 = b; - const Kokkos::View _t6 = a; - Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { - a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 0) + pow(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2)); - }); - double sum; - Kokkos::View, Kokkos::MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); - _t7 = a_row_0; - goto _label0; - _label0: - { - f_view_pullback(_t7, 1, &_d_a_row_0); - Kokkos::View, Kokkos::MemoryTraits<0> > _r11 = _d_a_row_0; - } - { - Kokkos::deep_copy(b, _t5); - Kokkos::deep_copy(a, _t6); - Kokkos::parallel_for(a.extent(0) - 1, [=](const int j1) { - { - double _r_d1 = _d_a(j1, 0); - _d_a(j1, 0) += _r_d1; - double _r5 = _r_d1 * 6.8899999999999997; - Kokkos::atomic_add(&_d_b(j1 + 1, 0), _r5); - Kokkos::atomic_add(&_d_b(j1, 0), _r_d1); - double _grad1 = 0.; - pow_pullback(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2), _r_d1, &_grad1); - double _r6 = _grad1; - _d_b(j1, 1) += _r6; - _d_a(j1, 1) += _r6; - double _r7 = _r6 * a(j1, 2); - double _r8 = _r7 * b(j1 + 1, 0); - _d_b(j1 + 1, 0) += _r8; - double _r9 = b(j1 + 1, 0) * _r7; - _d_b(j1 + 1, 0) += _r9; - double _r10 = b(j1 + 1, 0) * b(j1 + 1, 0) * _r6; - _d_a(j1, 2) += _r10; - _d_a(j1, 0) -= _r_d1; - _d_a(j1, 0); - } - }); - } - Kokkos::parallel_for(b.extent(0), [=](const int j0) { - { - double _r_d0 = _d_b(j0, 0); - _d_b(j0, 0) += _r_d0; - _d_b(j0, 0) -= _r_d0; - _d_b(j0, 0); - } - }); - { - double _grad0 = 0.; - kokkos_builtin_derivative::parallel_sum(_grad0, _d_b); - Kokkos::deep_copy(_d_b, 0.); - double _r2 = _grad0; - double _r3 = _r2 * _t3; - * _d_x += _r3; - double _r4 = _t4 * _r2; - * _d_x += _r4; - * _d_y += _r2; - } - { - kokkos_builtin_derivative::parallel_sum(* _d_x, _d_a); - Kokkos::deep_copy(_d_a, 0.); - } - { - kokkos_builtin_derivative::parallel_sum(_d_tmp, _d_a); - Kokkos::deep_copy(_d_a, 0.); - } - { - double _r0 = _d_tmp * _t0; - * _d_x += _r0; - double _r1 = _t1 * _d_tmp; - * _d_x += _r1; - * _d_y += _d_tmp; - } -} -void f_multilevel_grad(Kokkos::View x, clad::array_ref > _d_x) { - typename View::value_type _d_sum = 0; - Kokkos::View _d_y("_d_y", x.extent(0)); - int _d_n_max = 0; - bool _cond0; - int _d_n = 0; - Kokkos::View, Kokkos::MemoryTraits<0> > _d_y_n_rows = Kokkos::subview(_d_y, Kokkos::make_pair(0, n)); - typename View::value_type sum; - Kokkos::View y("y", x.extent(0)); - Kokkos::parallel_for(x.extent(0), [=](const int j0) { - x(j0) = 3 * x(j0); - }); - const Kokkos::View _t0 = x; - Kokkos::parallel_for(x.extent(0) - 1, [=](const int j1) { - if (j1 != x.extent(0) - 1) - y(j1 + 1) = 2.6000000000000001 * x(j1) * x(j1); - else - y(j1) = 2.6000000000000001 * x(0) * x(0); - }); - const int n_max = 10; - _cond0 = x.extent(0) > n_max; - const int n = _cond0 ? n_max : x.extent(0); - Kokkos::View, Kokkos::MemoryTraits<0> > y_n_rows = Kokkos::subview(y, Kokkos::make_pair(0, n)); - kokkos_builtin_derivative::parallel_sum(sum, y_n_rows); - goto _label0; - _label0: - _d_sum += 1; - kokkos_builtin_derivative::parallel_sum(_d_y_n_rows, _d_sum); - if (_cond0) - _d_n_max += _d_n; - { - Kokkos::deep_copy(x, _t0); - Kokkos::parallel_for(x.extent(0) - 1, [=](const int j1) { - if (j1 != x.extent(0) - 1) { - double _r_d1 = _d_y(j1 + 1); - double _r2 = _r_d1 * x(j1); - double _r3 = _r2 * x(j1); - double _r4 = 2.6000000000000001 * _r2; - (* _d_x)(j1) += _r4; - double _r5 = 2.6000000000000001 * x(j1) * _r_d1; - (* _d_x)(j1) += _r5; - _d_y(j1 + 1) -= _r_d1; - _d_y(j1 + 1); - } else { - double _r_d2 = _d_y(j1); - double _r6 = _r_d2 * x(0); - double _r7 = _r6 * x(0); - double _r8 = 2.6000000000000001 * _r6; - Kokkos::atomic_add(&(* _d_x)(0), _r8); - double _r9 = 2.6000000000000001 * x(0) * _r_d2; - Kokkos::atomic_add(&(* _d_x)(0), _r9); - _d_y(j1) -= _r_d2; - _d_y(j1); - } - }); - } - Kokkos::parallel_for(x.extent(0), [=](const int j0) { - { - double _r_d0 = (* _d_x)(j0); - double _r0 = _r_d0 * x(j0); - double _r1 = 3 * _r_d0; - (* _d_x)(j0) += _r1; - (* _d_x)(j0) -= _r_d0; - (* _d_x)(j0); - } - }); -} -void f_view_grad(Kokkos::View a, clad::array_ref > _d_a) { - typename View::value_type _d_sum = 0; - Kokkos::View, Kokkos::MemoryTraits<0> > _d_a_row_0 = Kokkos::subview((* _d_a), Kokkos::make_pair(0, 2), ALL); - typename View::value_type _t0; - typename View::value_type _t1; - double _t2; - typename View::value_type sum; - Kokkos::View, Kokkos::MemoryTraits<0> > a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), ALL); - kokkos_builtin_derivative::parallel_sum(sum, a_row_0); - _t1 = sum; - _t2 = 9.9999999999999995E-7 * _t1; - _t0 = sum; - goto _label0; - _label0: - { - double _r0 = 1 * _t0; - double _r1 = _r0 * _t1; - double _r2 = 9.9999999999999995E-7 * _r0; - _d_sum += _r2; - double _r3 = _t2 * 1; - _d_sum += _r3; - } - kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); -} diff --git a/kokkos/generated/Derivatives.hpp b/kokkos/generated/Derivatives.hpp deleted file mode 100644 index 76f7fc53b..000000000 --- a/kokkos/generated/Derivatives.hpp +++ /dev/null @@ -1,238 +0,0 @@ -KOKKOS_INLINE_FUNCTION void pow_pullback(double a, double _d_y, clad::array_ref _d_a) { - double _t0; - double _t1; - _t1 = a; - _t0 = a; - goto _label0; - _label0: - { - double _r0 = _d_y * _t0; - * _d_a += _r0; - double _r1 = _t1 * _d_y; - * _d_a += _r1; - } -} -template -void f_view_pullback(type_a a, typename type_a::value_type _d_y, type_a _d_a) { - typename type_a::value_type _d_sum = 0; - auto _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), Kokkos::ALL); - typename type_a::value_type _t0; - typename type_a::value_type _t1; - double _t2; - typename type_a::value_type sum; - auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); - kokkos_builtin_derivative::parallel_sum(sum, a_row_0); - _t1 = sum; - _t2 = 9.9999999999999995E-7 * _t1; - _t0 = sum; - goto _label0; - _label0: - { - double _r0 = _d_y * _t0; - double _r1 = _r0 * _t1; - double _r2 = 9.9999999999999995E-7 * _r0; - _d_sum += _r2; - double _r3 = _t2 * _d_y; - _d_sum += _r3; - } - kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); -} -void f_grad(double x, double y, clad::array_ref _d_x, clad::array_ref _d_y) { - const int N1 = 4; - int _d_N1 = 0; - int _d_N2 = 0; - Kokkos::View _d_a("_d_a", N1); - Kokkos::View _d_b("_d_b", N1); - double _t0; - double _t1; - double _d_tmp = 0; - double _t2; - double _t3; - double _t4; - double _d_sum = 0; - auto _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), Kokkos::ALL); - Kokkos::View, Kokkos::MemoryTraits<0> > _t7; - const int N2 = 4; - Kokkos::View a("a", N1); - Kokkos::View b("b", N1); - _t1 = x; - _t0 = x; - double tmp = _t1 * _t0 + y; - Kokkos::deep_copy(a, tmp); - Kokkos::deep_copy(a, x); - _t2 = x; - _t4 = x; - _t3 = x; - Kokkos::deep_copy(b, x * _t2 + y); - Kokkos::parallel_for(b.extent(0), KOKKOS_LAMBDA(const int j0) { - b(j0, 0) += 3.5299999999999998; - }); - const Kokkos::View _t5 = b; - const Kokkos::View _t6 = a; - Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { - a(j1, 0) += b(j1 + 1, 0) * 6.8899999999999997 + b(j1, 0) + pow(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2)); - }); - double sum; - auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); - _t7 = a_row_0; - goto _label0; - _label0: - { - f_view_pullback(_t7, 1, _d_a_row_0); - Kokkos::View, Kokkos::MemoryTraits<0> > _r11 = _d_a_row_0; - } - { - Kokkos::deep_copy(b, _t5); - Kokkos::deep_copy(a, _t6); - Kokkos::parallel_for(a.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { - { - double _r_d1 = _d_a(j1, 0); - _d_a(j1, 0) += _r_d1; - double _r5 = _r_d1 * 6.8899999999999997; - Kokkos::atomic_add(&_d_b(j1 + 1, 0), _r5); - Kokkos::atomic_add(&_d_b(j1, 0), _r_d1); - double _grad1 = 0.; - pow_pullback(b(j1, 1) + a(j1, 1) + b(j1 + 1, 0) * b(j1 + 1, 0) * a(j1, 2), _r_d1, &_grad1); - double _r6 = _grad1; - _d_b(j1, 1) += _r6; - _d_a(j1, 1) += _r6; - double _r7 = _r6 * a(j1, 2); - double _r8 = _r7 * b(j1 + 1, 0); - _d_b(j1 + 1, 0) += _r8; - double _r9 = b(j1 + 1, 0) * _r7; - _d_b(j1 + 1, 0) += _r9; - double _r10 = b(j1 + 1, 0) * b(j1 + 1, 0) * _r6; - _d_a(j1, 2) += _r10; - _d_a(j1, 0) -= _r_d1; - _d_a(j1, 0); - } - }); - } - Kokkos::parallel_for(b.extent(0), KOKKOS_LAMBDA(const int j0) { - { - double _r_d0 = _d_b(j0, 0); - _d_b(j0, 0) += _r_d0; - _d_b(j0, 0) -= _r_d0; - _d_b(j0, 0); - } - }); - { - double _grad0 = 0.; - kokkos_builtin_derivative::parallel_sum(_grad0, _d_b); - Kokkos::deep_copy(_d_b, 0.); - double _r2 = _grad0; - double _r3 = _r2 * _t3; - * _d_x += _r3; - double _r4 = _t4 * _r2; - * _d_x += _r4; - * _d_y += _r2; - } - { - kokkos_builtin_derivative::parallel_sum(* _d_x, _d_a); - Kokkos::deep_copy(_d_a, 0.); - } - { - kokkos_builtin_derivative::parallel_sum(_d_tmp, _d_a); - Kokkos::deep_copy(_d_a, 0.); - } - { - double _r0 = _d_tmp * _t0; - * _d_x += _r0; - double _r1 = _t1 * _d_tmp; - * _d_x += _r1; - * _d_y += _d_tmp; - } -} -template -void f_multilevel_grad(type_x x, type_x _d_x) { - bool _cond0; - const int n_max = 10; - _cond0 = x.extent(0) > n_max; - const int n = _cond0 ? n_max : x.extent(0); - typename type_x::value_type _d_sum = 0; - type_x _d_y("_d_y", x.extent(0)); - int _d_n_max = 0; - int _d_n = 0; - auto _d_y_n_rows = Kokkos::subview(_d_y, Kokkos::make_pair(0, n)); - typename type_x::value_type sum; - type_x y("y", x.extent(0)); - Kokkos::parallel_for(x.extent(0), KOKKOS_LAMBDA(const int j0) { - x(j0) = 3 * x(j0); - }); - const type_x _t0 = x; - Kokkos::parallel_for(x.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { - if (j1 != x.extent(0) - 1) - y(j1 + 1) = 2.6000000000000001 * x(j1) * x(j1); - else - y(j1) = 2.6000000000000001 * x(0) * x(0); - }); - auto y_n_rows = Kokkos::subview(y, Kokkos::make_pair(0, n)); - kokkos_builtin_derivative::parallel_sum(sum, y_n_rows); - goto _label0; - _label0: - _d_sum += 1; - kokkos_builtin_derivative::parallel_sum(_d_y_n_rows, _d_sum); - if (_cond0) - _d_n_max += _d_n; - { - Kokkos::deep_copy(x, _t0); - Kokkos::parallel_for(x.extent(0) - 1, KOKKOS_LAMBDA(const int j1) { - if (j1 != x.extent(0) - 1) { - double _r_d1 = _d_y(j1 + 1); - double _r2 = _r_d1 * x(j1); - double _r3 = _r2 * x(j1); - double _r4 = 2.6000000000000001 * _r2; - _d_x(j1) += _r4; - double _r5 = 2.6000000000000001 * x(j1) * _r_d1; - _d_x(j1) += _r5; - _d_y(j1 + 1) -= _r_d1; - _d_y(j1 + 1); - } else { - double _r_d2 = _d_y(j1); - double _r6 = _r_d2 * x(0); - double _r7 = _r6 * x(0); - double _r8 = 2.6000000000000001 * _r6; - Kokkos::atomic_add(&_d_x(0), _r8); - double _r9 = 2.6000000000000001 * x(0) * _r_d2; - Kokkos::atomic_add(&_d_x(0), _r9); - _d_y(j1) -= _r_d2; - _d_y(j1); - } - }); - } - Kokkos::parallel_for(x.extent(0), KOKKOS_LAMBDA(const int j0) { - { - double _r_d0 = _d_x(j0); - double _r0 = _r_d0 * x(j0); - double _r1 = 3 * _r_d0; - _d_x(j0) += _r1; - _d_x(j0) -= _r_d0; - _d_x(j0); - } - }); -} -template -void f_view_grad(type_a a, type_a _d_a) { - typename type_a::value_type _d_sum = 0; - auto _d_a_row_0 = Kokkos::subview(_d_a, Kokkos::make_pair(0, 2), Kokkos::ALL); - typename type_a::value_type _t0; - typename type_a::value_type _t1; - double _t2; - typename type_a::value_type sum; - auto a_row_0 = Kokkos::subview(a, Kokkos::make_pair(0, 2), Kokkos::ALL); - kokkos_builtin_derivative::parallel_sum(sum, a_row_0); - _t1 = sum; - _t2 = 9.9999999999999995E-7 * _t1; - _t0 = sum; - goto _label0; - _label0: - { - double _r0 = 1 * _t0; - double _r1 = _r0 * _t1; - double _r2 = 9.9999999999999995E-7 * _r0; - _d_sum += _r2; - double _r3 = _t2 * 1; - _d_sum += _r3; - } - kokkos_builtin_derivative::parallel_sum(_d_a_row_0, _d_sum); -} diff --git a/kokkos/lambda_reduction.hpp b/kokkos/lambda_reduction.hpp deleted file mode 100644 index e122f2c9e..000000000 --- a/kokkos/lambda_reduction.hpp +++ /dev/null @@ -1,19 +0,0 @@ - -template -typename ViewtypeA::value_type weightedDotProduct_2(ViewtypeA A, Viewtypex x, Viewtypey y) { - // Application: = y^T*A*x - - typename ViewtypeA::value_type result = 0; - - Kokkos::parallel_reduce( A.extent(0), KOKKOS_LAMBDA ( int j, typename ViewtypeA::value_type &update ) { - typename ViewtypeA::value_type temp2 = 0; - - for ( int i = 0; i < A.extent(1); ++i ) { - temp2 += A( j, i ) * x( i ); - } - - update += y( j ) * temp2; - }, result ); - - return result; -} \ No newline at end of file diff --git a/kokkos/lambda_reduction_subview.hpp b/kokkos/lambda_reduction_subview.hpp deleted file mode 100644 index eac7fc95f..000000000 --- a/kokkos/lambda_reduction_subview.hpp +++ /dev/null @@ -1,21 +0,0 @@ - -template -typename ViewtypeA::value_type weightedDotProduct_1(ViewtypeA A, Viewtypex x, Viewtypey y) { - // Application: = y^T*A*x - - typename ViewtypeA::value_type result = 0; - - Kokkos::parallel_reduce( A.extent(0), KOKKOS_LAMBDA ( int j, typename ViewtypeA::value_type &update ) { - typename ViewtypeA::value_type temp2 = 0; - - auto A_row_j = Kokkos::subview( A, j, Kokkos::ALL ); - - for ( int i = 0; i < A.extent(1); ++i ) { - temp2 += A_row_j( i ) * x( i ); - } - - update += y( j ) * temp2; - }, result ); - - return result; -} \ No newline at end of file diff --git a/kokkos/main.cpp b/kokkos/main.cpp deleted file mode 100644 index 085b18c65..000000000 --- a/kokkos/main.cpp +++ /dev/null @@ -1,159 +0,0 @@ -#include "clad/Differentiator/Differentiator.h" -#include -#include "parallel_sum.hpp" -#include "functor_for.hpp" -#include "lambda_reduction.hpp" -#include "lambda_reduction_subview.hpp" -#include - -//#define use_forward_mode - -#ifdef use_generated_file -#include "generated/Derivatives.hpp" -#endif - -template -typename ViewtypeA::value_type solve(ViewtypeA A, typename ViewtypeA::value_type (*objective)(ViewtypeA), CladFunctionType gradient) { - ViewtypeA gradA("gradA", A.extent(0), A.extent(1)); - ViewtypeA tmp("tmp", A.extent(0), A.extent(1)); - - std::vector objective_history; - - int n_iterations = 10; - int n_line_search = 10; - - double epsilon_min = 0.; - double epsilon_tmp = 0.; - double epsilon_max = 1000.; - double epsilon_delta = (epsilon_max-epsilon_min)/n_line_search; - - typename ViewtypeA::value_type obj_min = objective(A); - - objective_history.push_back(obj_min); - - for (int i = 0; i < n_iterations; ++i) { - - gradient.execute(A, &gradA); - - epsilon_min = 0.; - - for (int j = 0; j < n_line_search; ++j) { - epsilon_tmp = epsilon_delta * (j+1); - Kokkos::parallel_for( A.extent(0), KOKKOS_LAMBDA ( int i) { - - for ( int j = 0; j < A.extent(1); ++j ) { - tmp( i, j ) = A( i, j ) - epsilon_tmp * gradA( i, j ); - } - }); - - typename ViewtypeA::value_type obj_tmp = objective(tmp); - - if ( obj_tmp < obj_min) { - obj_min = obj_tmp; - epsilon_min = epsilon_tmp; - } - } - - Kokkos::parallel_for( A.extent(0), KOKKOS_LAMBDA ( int i) { - - for ( int j = 0; j < A.extent(1); ++j ) { - A( i, j ) -= epsilon_min * gradA( i, j ); - } - }); - - objective_history.push_back(obj_min); - } - - - for (int i = 0; i < n_iterations + 1; ++i) { - std::cout << "Objective value " << objective_history[i] << " iteration " << i << std::endl; - } - return obj_min; -} - -int main(int argc, char* argv[]) { - Kokkos::initialize(argc, argv); - { - constexpr int N = 100; - Kokkos::View A("A", N, N); - Kokkos::View dA("dA", N, N); - Kokkos::View x("x", N); - Kokkos::View y("y", N); - - Kokkos::deep_copy(A, 3); - Kokkos::deep_copy(x, 2); - Kokkos::deep_copy(y, 4); - - std::cout << f(3.,4.) << std::endl; - std::cout << weightedDotProduct_1(A, x, y) << std::endl; - std::cout << weightedDotProduct_2(A, x, y) << std::endl; - - auto t0_f_view = std::chrono::high_resolution_clock::now(); - double obj = f_view(A); - auto t1_f_view = std::chrono::high_resolution_clock::now(); - std::cout << obj << std::endl; - - double epsilon = 1e-6; - - double f_pe = f(3.+epsilon,4.); - double f_me = f(3.-epsilon,4.); - double dx_f_FD = (f_pe-f_me) / (2 * epsilon); - - double tolerance = 1e-6; - - std::cout << "dx_f_FD: " << dx_f_FD << std::endl; - - double dx = 0, dy = 0; - double dx_f; - -#ifndef use_generated_file - #ifdef use_forward_mode - auto f_dx_exe = clad::differentiate(f, "x"); - // Any of the two below will generate an "error: Attempted differentiation w.r.t. member 'x' which is not of real type." - //auto weightedDotProduct_1_dx = clad::differentiate(weightedDotProduct_1, "x"); - //auto weightedDotProduct_2_dx = clad::differentiate(weightedDotProduct_2, "x"); - #endif - auto f_grad_exe = clad::gradient(f); - - auto f_multilevel_grad_exe = clad::gradient(f_multilevel>); - auto f_view_grad_exe = clad::gradient(f_view>); - #ifdef use_forward_mode - dx_f = f_dx_exe.execute(3.,4.); - #endif - // After this call, dx and dy will store the derivatives of x and y respectively. - f_grad_exe.execute(3., 4., &dx, &dy); - - auto t0_f_view_grad = std::chrono::high_resolution_clock::now(); - f_view_grad_exe.execute(A, &dA); - auto t1_f_view_grad = std::chrono::high_resolution_clock::now(); - - solve(A, &f_view, f_view_grad_exe); -#else - #ifdef use_forward_mode - dx_f = f_darg0(3.,4.); - #endif - f_grad(3., 4., &dx, &dy); - - auto t0_f_view_grad = std::chrono::high_resolution_clock::now(); - f_view_grad>(A, dA); - auto t1_f_view_grad = std::chrono::high_resolution_clock::now(); -#endif - - double time_f_view = (t1_f_view-t0_f_view).count()*1E-9 ; - double time_f_view_grad = (t1_f_view_grad-t0_f_view_grad).count()*1E-9 ; - std::cout << " f_view took "<< time_f_view <<" second(s)."<< std::endl; - std::cout << " f_view_grad took "<< time_f_view_grad <<" second(s)."<< std::endl; - std::cout << " f_view_grad took "<< time_f_view_grad/time_f_view <<" the wall-clock time of f_view."<< std::endl; - - #ifdef use_forward_mode - std::cout << "dx: " << dx_f << std::endl; - #endif - std::cout << "dx: " << dx << ' ' << "dy: " << dy << std::endl; - #ifdef use_forward_mode - assert(dx==dx_f && "error"); - #endif - assert((std::abs(dx-dx_f_FD)/std::abs(dx_f_FD)) - -namespace kokkos_builtin_derivative { - -/* Things to do: - -- use span_is_contiguous corner case (regardless of the rank): done -- check the span of the thing, do we need more than int32. -- deduce iterate base on layout: done -- If you give me an execution space: non-blocking (in theory) (use an unmaged view if scalar argument): done -- If no execution space: blocking: done -*/ - -// Parallel sum: - -template -struct ViewSum; - -template -struct ViewSum { - - template - static void execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { - - using policy_type = Kokkos::RangePolicy>; - using value_type = typename Viewtype::value_type; - - value_type sum; - - Kokkos::parallel_reduce( - "ViewSum-1D", - policy_type(space, 0, v.extent(0)), - KOKKOS_LAMBDA ( - const iType& i0, - value_type& update) { - update += v(i0); - }, - sum ); - - result += sum; - } -}; - -template -struct ViewSum { - - template - static void execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { - - static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; - static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; - using iterate_type = - Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>; - using policy_type = - Kokkos::MDRangePolicy>; - using value_type = typename Viewtype::value_type; - - value_type sum; - - Kokkos::parallel_reduce( - "ViewSum-2D", - policy_type(space, {0, 0}, {v.extent(0), v.extent(1)}), - KOKKOS_LAMBDA ( - const iType& i0, - const iType& i1, - value_type& update) { - update += v(i0, i1); - }, - sum ); - - result += sum; - } -}; - -template -struct ViewSum { - - template - static void execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { - - static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; - static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; - using iterate_type = - Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>; - using policy_type = - Kokkos::MDRangePolicy>; - using value_type = typename Viewtype::value_type; - - value_type sum; - - Kokkos::parallel_reduce( - "ViewSum-3D", - policy_type(space, {0, 0}, {v.extent(0), v.extent(1), v.extent(2)}), - KOKKOS_LAMBDA ( - const iType& i0, - const iType& i1, - const iType& i2, - value_type& update) { - update += v(i0, i1, i2); - }, - sum ); - - result += sum; - } -}; - -// Parallel add - -template -struct ViewAdd; - -template -struct ViewAdd { - - template - static void execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { - - using policy_type = Kokkos::RangePolicy>; - - Kokkos::parallel_for( - "ViewAdd-1D", - policy_type(space, 0, v.extent(0)), - KOKKOS_LAMBDA ( - const iType& i0) { - v(i0) += update; - }); - } -}; - -template -struct ViewAdd { - - template - static void execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { - - static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; - static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; - using iterate_type = - Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>; - using policy_type = - Kokkos::MDRangePolicy>; - - Kokkos::parallel_for( - "ViewAdd-2D", - policy_type(space, {0, 0}, {v.extent(0), v.extent(1)}), - KOKKOS_LAMBDA ( - const iType& i0, - const iType& i1) { - v(i0, i1) += update; - }); - } -}; - -template -struct ViewAdd { - - template - static void execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { - - static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; - static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; - using iterate_type = - Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>; - using policy_type = - Kokkos::MDRangePolicy>; - - Kokkos::parallel_for( - "ViewAdd-3D", - policy_type(space, {0, 0}, {v.extent(0), v.extent(1), v.extent(2)}), - KOKKOS_LAMBDA ( - const iType& i0, - const iType& i1, - const iType& i2) { - v(i0, i1, i2) += update; - }); - } -}; - - -template -void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { - Kokkos::fence("parallel_sum: pre sum fence"); - if (A.span_is_contiguous()) { - - using ViewTypeFlat = Kokkos::View< - typename ViewtypeA::value_type*, Kokkos::LayoutRight, - Kokkos::Device>, - Kokkos::MemoryTraits<0>>; - - ViewTypeFlat A_flat(A.data(), A.size()); - ViewSum::template execute(sum, A_flat); - } - else { - ViewSum::template execute(sum, A); - } - Kokkos::fence("parallel_sum: post sum fence"); -} - -template -void parallel_sum(const ExecSpace& space, typename ViewtypeA::value_type &sum, const ViewtypeA A) { - space.fence("parallel_sum: pre sum fence"); - if (A.span_is_contiguous()) { - - using ViewTypeFlat = Kokkos::View< - typename ViewtypeA::value_type*, Kokkos::LayoutRight, - Kokkos::Device>, - Kokkos::MemoryTraits<0>>; - - ViewTypeFlat A_flat(A.data(), A.size()); - ViewSum::template execute(sum, A_flat, space); - } - else { - ViewSum::template execute(sum, A, space); - } - space.fence("parallel_sum: post sum fence"); -} - -template -void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { - Kokkos::fence("parallel_sum: pre add fence"); - if (A.span_is_contiguous()) { - - using ViewTypeFlat = Kokkos::View< - typename ViewtypeA::value_type*, Kokkos::LayoutRight, - Kokkos::Device>, - Kokkos::MemoryTraits<0>>; - - ViewTypeFlat A_flat(A.data(), A.size()); - ViewAdd::template execute(A_flat, b); - } - else { - ViewAdd::template execute(A, b); - } - Kokkos::fence("parallel_sum: post add fence"); -} - -template -void parallel_sum(const ExecSpace& space, ViewtypeA A, const typename ViewtypeA::value_type b) { - space.fence("parallel_sum: pre add fence"); - if (A.span_is_contiguous()) { - - using ViewTypeFlat = Kokkos::View< - typename ViewtypeA::value_type*, Kokkos::LayoutRight, - Kokkos::Device>, - Kokkos::MemoryTraits<0>>; - - ViewTypeFlat A_flat(A.data(), A.size()); - ViewAdd::template execute(A_flat, b, space); - } - else { - ViewAdd::template execute(A, b, space); - } - space.fence("parallel_sum: post add fence"); -} - -} \ No newline at end of file diff --git a/kokkos/postProcess.py b/kokkos/postProcess.py deleted file mode 100644 index 0f2ae78a3..000000000 --- a/kokkos/postProcess.py +++ /dev/null @@ -1,182 +0,0 @@ -import getopt, sys - -def replaceKokkosInlineFunction(stringIn): - string_old = 'inline' - string_new = 'KOKKOS_INLINE_FUNCTION' - return stringIn.replace(string_old, string_new) - -def replaceKokkosLambda(stringIn): - if stringIn.find('Kokkos::parallel_for') == -1 and stringIn.find('Kokkos::parallel_reduce') == -1: - return stringIn - string_old = '[=]' - string_new = 'KOKKOS_LAMBDA' - return stringIn.replace(string_old, string_new) - -def useAutoInSubview(stringIn): - if stringIn.find('= Kokkos::subview') == -1 and stringIn.find('= Kokkos::create_mirror_view') == -1: - return stringIn - index_eq = stringIn.find('=') - 1 - for index in range(1, index_eq): - if stringIn[index_eq-index] == ' ': - index1 = index_eq-index - break - index0 = 0 - for index in range(0, index_eq): - if stringIn[index] != ' ': - index0 = index - break - stringOut = stringIn[0:index0] + 'auto ' + stringIn[index1+1:-1] + stringIn[-1] - return stringOut - -def useKokkosNamespace(stringIn): - kokkosKeywords = ['ALL', 'View', 'LayoutLeft', 'LayoutRight', 'Device', 'Serial', 'HostSpace', 'MemoryTraits'] - for kokkosKeyword in kokkosKeywords: - stringIn = stringIn.replace(' '+kokkosKeyword, ' Kokkos::'+kokkosKeyword).replace('<'+kokkosKeyword, '': - bracket_lvl += 1 - if linesIn[index0][index_end-index] == '<': - bracket_lvl -= 1 - return linesIn[index0][index_begin:index_end] - - -def swapTypeForTemplate(linesIn, fucntionName, variableName, index0=-1, index1=-1): - if index0 == -1 or index1 == -1: - index0, index1 = getFunctionLineIDs(linesIn, fucntionName) - typeVar = getType(linesIn, fucntionName, variableName) - template = 'type_' + variableName - linesIn[index0] = 'template \n' + linesIn[index0] - - for index in range(index0, index1): - linesIn[index] = linesIn[index].replace(typeVar, template) - - # Get the _d_ names and replace the clad::array_ref by Kokkos::view directly. - derivativeVarNames = [] - while linesIn[index0].find('clad::array_ref<' + template + ' >') != -1: - indexVarName0 = linesIn[index0].find('clad::array_ref<' + template + ' >') + len('clad::array_ref<' + template + ' >') + 1 - for indexVarName in range(indexVarName0, len(linesIn[index0])): - if linesIn[index0][indexVarName] == ',': - indexVarName1 = indexVarName - break - if linesIn[index0][indexVarName] == ')': - indexVarName1 = indexVarName - break - derivativeVarNames.append(linesIn[index0][indexVarName0:indexVarName1]) - linesIn[index0] = linesIn[index0].replace('clad::array_ref<' + template + ' >', template) - for index in range(index0, index1): - for derivativeVarName in derivativeVarNames: - linesIn[index] = linesIn[index].replace('(* ' + derivativeVarName + ')', derivativeVarName) - - for index in range(0, len(linesIn)): - #to be improved! - if linesIn[index].find(fucntionName) != -1 and linesIn[index].find(';') != -1: - linesIn[index] = linesIn[index].replace('&', '') - -def transform(filenameIn, filenameOut): - - fileIn = open(filenameIn, "r") - linesIn = fileIn.readlines() - fileOut = open(filenameOut, "w") - - swapLinesForVariableDecl(linesIn, 'f_grad', 'N1') - swapLinesForVariableDecl(linesIn, 'f_multilevel_grad', 'n_max', 3) - swapLinesForVariableDecl(linesIn, 'f_multilevel_grad', '_cond0') - - for i in range(0, len(linesIn)): - linesIn[i] = replaceKokkosInlineFunction(linesIn[i]) - linesIn[i] = replaceKokkosLambda(linesIn[i]) - linesIn[i] = useAutoInSubview(linesIn[i]) - linesIn[i] = useKokkosNamespace(linesIn[i]) - - swapTypeForTemplate(linesIn, 'f_view_grad', 'a') - swapTypeForTemplate(linesIn, 'f_view_pullback', 'a') - swapTypeForTemplate(linesIn, 'f_multilevel_grad', 'x') - - for line in linesIn: - fileOut.write(line) - fileIn.close() - fileOut.close() - -argumentList = sys.argv[1:] - -options = "hi:o:" - -long_options = ["help", "filenameIn=", "filenameOut="] - - -filenameIn = '' -filenameOut = '' - -try: - arguments, values = getopt.getopt(argumentList, options, long_options) - - for currentArgument, currentValue in arguments: - - if currentArgument in ("-h", "--help"): - print ("Displaying Help") - - elif currentArgument in ("-i", "--filenameIn"): - filenameIn = currentValue - - elif currentArgument in ("-o", "--filenameOut"): - filenameOut = currentValue - -except getopt.error as err: - # output error, and return with an error code - print (str(err)) - -if filenameIn != '' and filenameOut != '' : - transform(filenameIn, filenameOut) -else: - print("Missing arguments") diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index 3ed80ca75..381220f48 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -31,8 +31,6 @@ #include "clad/Differentiator/Compatibility.h" -#include - using namespace clang; namespace clad { @@ -214,14 +212,6 @@ BaseForwardModeVisitor::Derive(const FunctionDecl* FD, derivedFD->setParams(paramsRef); derivedFD->setBody(nullptr); - //AnnotateAttr* A = FD->getAttr(); - //if (A && - // (A->getAnnotation().equals("KOKKOS_INLINE_FUNCTION") || A->getAnnotation().equals("KOKKOS_FUNCTION"))) { - // std::cout << "This is a Kokkos function!" << std::endl; - // //derivedFD->addAttr(A); - // derivedFD->dump(); - // } - // Function body scope beginScope(Scope::FnScope | Scope::DeclScope); m_DerivativeFnScope = getCurrentScope(); @@ -975,7 +965,7 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { auto MCE = dyn_cast(CE); if (utils::IsKokkosView(MCE->getObjectType().getAsString())) { - //std::cout << "Member function called from a Kokkos::View; nothing to do here" << std::endl; + //Member function called from a Kokkos::View; nothing to do here return StmtDiff(Clone(CE)); } } @@ -990,7 +980,6 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { auto SE = baseOriginalE->IgnoreImpCasts(); if (auto DRE = dyn_cast(SE)) { std::string constructedTypeName = QualType::getAsString(DRE->getType().split(), PrintingPolicy{ {} }); - std::cout << constructedTypeName << std::endl; if (utils::IsKokkosView(constructedTypeName)) { isKokkosViewAccess = true; kokkosViewName = DRE->getNameInfo().getName().getAsString (); @@ -1021,7 +1010,6 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { noLoc, ClonedArgs, noLoc) .get(); - //std::cout << " kokkosViewName = " << kokkosViewName << std::endl; return StmtDiff(Call, dCall); } } @@ -1195,14 +1183,6 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { if (BaseForwardModeVisitor::IsDifferentiableType(arg->getType())) { Expr* dArg = argDiff.getExpr_dx(); QualType CallArgTy = CallArgs.back()->getType(); - - std::string error_message = "Type mismatch, we might fail to instantiate a pullback with types " + - QualType::getAsString(CallArgTy.split(), PrintingPolicy{ {} }) + " and " + - QualType::getAsString(dArg->getType().split(), PrintingPolicy{ {} }); - if (!(!dArg || m_Context.hasSameType(CallArgTy, dArg->getType()))) { - std::cout << error_message.c_str() << std::endl; - CE->dump(); - } assert((!dArg || m_Context.hasSameType(CallArgTy, dArg->getType())) && "Type mismatch, we might fail to instantiate a pullback"); (void)CallArgTy; @@ -1935,13 +1915,6 @@ StmtDiff BaseForwardModeVisitor::VisitBreakStmt(const BreakStmt* stmt) { StmtDiff BaseForwardModeVisitor::VisitCXXConstructExpr(const CXXConstructExpr* CE) { llvm::SmallVector clonedArgs, derivedArgs; - //CE->dump (); - //std::string className = CE->getStmtClassName(); - //std::cout << className << std::endl; - //CE->getConstructor ()->dump(); - //std::cout << className << std::endl; - //CE->getType()->dump(); - //std::cout << CE->getType()->getAsString () << std::endl; std::string constructedTypeName = QualType::getAsString(CE->getType().split(), PrintingPolicy{ {} }); // Check if we are in a Kokkos View construction. @@ -1956,11 +1929,7 @@ BaseForwardModeVisitor::VisitCXXConstructExpr(const CXXConstructExpr* CE) { compileTimeDims.push_back(std::stoi(&constructedTypeName[i+1])); if (!read && constructedTypeName[i] == ' ') read = true; - } - //std::cout << "runTimeDim = " << runTimeDim << std::endl; - //std::cout << "compileTimeDim = " << compileTimeDims.size() << std::endl; - //for (auto compileTimeDim : compileTimeDims) - // std::cout << " compileTimeDim = " << compileTimeDim << std::endl; + } size_t i = 0; for (auto arg : CE->arguments()) { @@ -2158,7 +2127,8 @@ StmtDiff BaseForwardModeVisitor::VisitValueStmt( std::string name_str("_d_"+ SL->getString().str()); StringRef name(name_str); - Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); + Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, + SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); return {Clone(VS), derivedVS}; } return {Clone(VS), Clone(VS)}; diff --git a/lib/Differentiator/CladUtils.cpp b/lib/Differentiator/CladUtils.cpp index 3af5446f2..458f314ff 100644 --- a/lib/Differentiator/CladUtils.cpp +++ b/lib/Differentiator/CladUtils.cpp @@ -344,7 +344,6 @@ namespace clad { || constructedTypeName.find("class Kokkos::View") == 0 || constructedTypeName.find("const Kokkos::View") == 0 || constructedTypeName.find("const class Kokkos::View") == 0; - //return constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find(" - using namespace clang; namespace clad { @@ -762,7 +760,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, std::string name_str("_d_"+ SL->getString().str()); StringRef name(name_str); - Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); + Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, + SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); return {Clone(VS), derivedVS}; } @@ -950,11 +949,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, LE->getEndLoc(), false); - //std::cout << "forwardLE->dump()" << std::endl; - //forwardLE->dump(); - //std::cout << "reverseLE->dump()" << std::endl; - //reverseLE->dump(); - endScope(); } @@ -1398,6 +1392,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, std::cout << "return value is a view!" << std::endl; } auto* dfdf = m_Pullback; + if (isa(dfdf) || isa(dfdf)) { ExprResult tmp = dfdf; dfdf = m_Sema @@ -1607,9 +1602,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } StmtDiff ReverseModeVisitor::VisitCallExpr(const CallExpr* CE) { - //std::cout << " CE dump start" << std::endl; - //CE->dump(); - //std::cout << " CE dump end" << std::endl; if (isa(CE)) { auto MCE = dyn_cast(CE); @@ -1662,39 +1654,21 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, const Expr* baseOriginalE = OCE->getArg(0); bool isKokkosViewAccess = false; - //std::string kokkosViewName; - - //std::cout << " OCE dump start" << std::endl; - //OCE->dump(); - //std::cout << " OCE dump start" << std::endl; - - //std::cout << " baseOriginalE dump start" << std::endl; - //baseOriginalE->dump(); - //std::cout << " baseOriginalE dump start" << std::endl; if (isa(baseOriginalE)) { - //std::cout << "true 1" << std::endl; auto SE = baseOriginalE->IgnoreImpCasts(); if (auto DRE = dyn_cast(SE)) { - //std::cout << "true 2" << std::endl; if (utils::IsKokkosView(DRE->getType())) { - //std::cout << "true 3" << std::endl; isKokkosViewAccess = true; - //kokkosViewName = DRE->getNameInfo().getName().getAsString (); } } } if (auto DRE = dyn_cast(baseOriginalE)) { - //std::cout << "true 2" << std::endl; - //DRE->getType()->dump(); if (utils::IsKokkosView(DRE->getType())) { - //std::cout << "true 3" << std::endl; isKokkosViewAccess = true; - //kokkosViewName = DRE->getNameInfo().getName().getAsString (); } isKokkosViewAccess = true; } - //std::cout << " isKokkosViewAccess = " << isKokkosViewAccess << std::endl; // Returning the function call and zero derivative if (isKokkosViewAccess) { @@ -1708,8 +1682,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, noLoc, ClonedArgs, noLoc) .get(); - // replace kokkosViewName with "_d_"+kokkosViewName - auto visited = Visit(CE->getArg(0), dfdx()); Expr* dView = visited.getExpr_dx(); @@ -1773,7 +1745,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, auto visitedArg_0 = Visit(CE->getArg(0), dfdx()); auto visitedArg_1 = Visit(CE->getArg(1), dfdx()); - //auto visitedArg_2 = Visit(CE->getArg(2), dfdx()); ClonedArgs.push_back(visitedArg_0.getExpr()); ClonedArgs.push_back(visitedArg_1.getExpr()); @@ -2007,10 +1978,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, llvm::SmallVector ClonedDArgs; for (unsigned i = 0, e = CE->getNumArgs(); i < e; ++i) { - //std::cout << "Start CE->getArg("<dump()" << std::endl; - //CE->getArg(i)->dump(); - //std::cout << "end CE->getArg("<dump()" << std::endl; - auto arg = CE->getArg(i); if (const auto* MTE = dyn_cast(arg)) arg = clad_compat::GetSubExpr(MTE); @@ -2381,7 +2348,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, std::string customPushforward = FD->getNameAsString() + "_pushforward"; auto pushforwardCallArgs = DerivedCallArgs; if (utils::IsKokkosView(DerivedCallArgs.front()->getType())) { - // KL: Is it useful? pushforwardCallArgs.push_back(DerivedCallArgs.front()); } else { diff --git a/lib/Differentiator/VisitorBase.cpp b/lib/Differentiator/VisitorBase.cpp index 0a707451a..d6fbc9a4a 100644 --- a/lib/Differentiator/VisitorBase.cpp +++ b/lib/Differentiator/VisitorBase.cpp @@ -690,9 +690,6 @@ namespace clad { } QualType VisitorBase::GetCladArrayRefOfType(clang::QualType T) { - //KL: needed ? - //if (utils::IsKokkosView(T)) - // return T; return InstantiateTemplate(GetCladArrayRefDecl(), {T}); } From 5d79b2c2b8cf936554af40f908b6b0c28076f872 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 18 Jan 2024 12:08:36 -0700 Subject: [PATCH 63/75] remove warning --- lib/Differentiator/CladUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Differentiator/CladUtils.cpp b/lib/Differentiator/CladUtils.cpp index 458f314ff..9ec43a1ca 100644 --- a/lib/Differentiator/CladUtils.cpp +++ b/lib/Differentiator/CladUtils.cpp @@ -336,7 +336,7 @@ namespace clad { } bool IsKokkosMember(const std::string constructedTypeName) { - return constructedTypeName.find("member_type") != -1; + return constructedTypeName.find("member_type") != (size_t) -1; } bool IsKokkosView(const std::string constructedTypeName) { From 845a1b3d1bd4f1f12c014d529588190d4ec667ef Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 18 Jan 2024 13:08:58 -0700 Subject: [PATCH 64/75] merged master --- lib/Differentiator/BaseForwardModeVisitor.cpp | 4 ---- lib/Differentiator/ReverseModeVisitor.cpp | 11 ++++------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index 381220f48..b0370ceec 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -1182,10 +1182,6 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { CallArgs.push_back(argDiff.getExpr()); if (BaseForwardModeVisitor::IsDifferentiableType(arg->getType())) { Expr* dArg = argDiff.getExpr_dx(); - QualType CallArgTy = CallArgs.back()->getType(); - assert((!dArg || m_Context.hasSameType(CallArgTy, dArg->getType())) && - "Type mismatch, we might fail to instantiate a pullback"); - (void)CallArgTy; // FIXME: What happens when dArg is nullptr? diffArgs.push_back(dArg); } diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 0dd4393af..4d5b565c6 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1387,12 +1387,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // Initially, df/df = 1. const Expr* value = RS->getRetValue(); QualType type = value->getType(); - - if (utils::IsKokkosView(type.getAsString())) { - std::cout << "return value is a view!" << std::endl; - } auto* dfdf = m_Pullback; - if (isa(dfdf) || isa(dfdf)) { ExprResult tmp = dfdf; dfdf = m_Sema @@ -3881,7 +3876,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return DelayedStoreResult{*this, Ediff, /*isConstant*/ isConst, /*isInsideLoop*/ false, - /*pNeedsUpdate=*/false, + /*pNeedsUpdate=*/ false, /*isInsideParallelRegion*/ false}; } if (isInsideLoop) { @@ -3918,7 +3913,9 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, // Return reference to the declaration instead of original expression. return DelayedStoreResult{*this, StmtDiff{Ref, nullptr, nullptr, Ref}, /*isConstant*/ false, - /*isInsideLoop*/ false, /*pNeedsUpdate=*/true}; + /*isInsideLoop*/ false, + /*pNeedsUpdate=*/ true, + /*isInsideParallelRegion=*/ false}; } ReverseModeVisitor::LoopCounter::LoopCounter(ReverseModeVisitor& RMV) From 14342cbc79622ba075ad2212945b891f75b45084 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 18 Jan 2024 14:45:49 -0700 Subject: [PATCH 65/75] Update the CMakeLists of the unittests --- unittests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 054f42630..86b4f41ae 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -2,7 +2,7 @@ add_custom_target(CladUnitTests) set_target_properties(CladUnitTests PROPERTIES FOLDER "Clad tests") # LLVM builds (not installed llvm) provides gtest. -if (NOT TARGET gtest) +if (NOT TARGET gtest AND CLAD_BUILT_STANDALONE) include(CladGoogleTest) endif() From 2bb7bc9d2e4d9da2096c684548ada93345ee53e4 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 18 Jan 2024 15:58:07 -0700 Subject: [PATCH 66/75] fix a bug --- lib/Differentiator/ReverseModeVisitor.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 4d5b565c6..0be3769b3 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1662,7 +1662,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, if (utils::IsKokkosView(DRE->getType())) { isKokkosViewAccess = true; } - isKokkosViewAccess = true; } // Returning the function call and zero derivative From 1a5193b3451ad189e6a0ce43ef7a1f68ad7b3efe Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 18 Jan 2024 19:17:11 -0700 Subject: [PATCH 67/75] start to add unit tests --- lib/Differentiator/BaseForwardModeVisitor.cpp | 2 - lib/Differentiator/ReverseModeVisitor.cpp | 2 +- unittests/Kokkos/CMakeLists.txt | 1 + unittests/Kokkos/parallel_for.cpp | 31 ++ unittests/Kokkos/parallel_sum.hpp | 269 ++++++++++++++++++ unittests/Kokkos/view_access.cpp | 76 +++++ 6 files changed, 378 insertions(+), 3 deletions(-) create mode 100644 unittests/Kokkos/parallel_sum.hpp create mode 100644 unittests/Kokkos/view_access.cpp diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index b0370ceec..83bafbb70 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -1003,8 +1003,6 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) { Expr* dView = Visit(CE->getArg(0)).getExpr_dx(); - dView->dump(); - Expr* dCall = m_Sema .ActOnCallExpr(getCurrentScope(), dView, noLoc, ClonedArgs, noLoc) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 0be3769b3..cb515d30e 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1685,7 +1685,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, .get(); if (dfdx()) { - if (m_KVAV->isAccessThreadSafe(dyn_cast(Call))) { + if (!isInsideParallelRegion || m_KVAV->isAccessThreadSafe(dyn_cast(Call))) { Expr* add_assign = BuildOp(BO_AddAssign, dCall, dfdx()); addToCurrentBlock(add_assign, direction::reverse); } diff --git a/unittests/Kokkos/CMakeLists.txt b/unittests/Kokkos/CMakeLists.txt index be0250cf1..9e01dcddd 100644 --- a/unittests/Kokkos/CMakeLists.txt +++ b/unittests/Kokkos/CMakeLists.txt @@ -1,6 +1,7 @@ add_clad_unittest(KokkosTests parallel_for.cpp main.cpp + view_access.cpp ) # If llvm does not require rtti, kokkos does. diff --git a/unittests/Kokkos/parallel_for.cpp b/unittests/Kokkos/parallel_for.cpp index addade30b..239de4136 100644 --- a/unittests/Kokkos/parallel_for.cpp +++ b/unittests/Kokkos/parallel_for.cpp @@ -4,6 +4,8 @@ #include "clad/Differentiator/Differentiator.h" +#include "parallel_sum.hpp" + struct hello_world_pow2 { double x = 0.; // double result = 0.; @@ -12,6 +14,31 @@ struct hello_world_pow2 { } }; +template +typename ViewtypeX::value_type f_multilevel(ViewtypeX x) { + typename ViewtypeX::value_type sum; + + ViewtypeX y("y", x.extent(0)); + + Kokkos::parallel_for( x.extent(0), KOKKOS_LAMBDA ( const size_t j0) { + x(j0) = 3*x(j0); + }); + + Kokkos::parallel_for( x.extent(0)-1, KOKKOS_LAMBDA ( const size_t j1) { + if (j1 != x.extent(0)-1) + y(j1+1) = 2.6*x(j1)*x(j1); + else + y(j1) = 2.6*x(0)*x(0); + }); + + const int n_max = 10; + const int n = x.extent(0) > n_max ? n_max : x.extent(0); + + auto y_n_rows = Kokkos::subview( y, Kokkos::make_pair(0, n)); + kokkos_builtin_derivative::parallel_sum(sum, y_n_rows); + return sum; +} + TEST(parallel_for, HelloWorldFunctor) { hello_world_pow2 hw; hw.x = 2; @@ -19,3 +46,7 @@ TEST(parallel_for, HelloWorldFunctor) { // EXPECT_EQ(); // FIXME: Add the calls to clad::differentiate/gradient... } + +TEST(parallel_for, multilevelG) { + //auto f_multilevel_grad_exe = clad::gradient(f_multilevel>); +} \ No newline at end of file diff --git a/unittests/Kokkos/parallel_sum.hpp b/unittests/Kokkos/parallel_sum.hpp new file mode 100644 index 000000000..7e87a71e1 --- /dev/null +++ b/unittests/Kokkos/parallel_sum.hpp @@ -0,0 +1,269 @@ +#pragma once + +#include + +namespace kokkos_builtin_derivative { + +// Parallel sum: + +template +struct ViewSum; + +template +struct ViewSum { + + template + static void execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { + + using policy_type = Kokkos::RangePolicy>; + using value_type = typename Viewtype::value_type; + + value_type sum; + + Kokkos::parallel_reduce( + "ViewSum-1D", + policy_type(space, 0, v.extent(0)), + KOKKOS_LAMBDA ( + const iType& i0, + value_type& update) { + update += v(i0); + }, + sum ); + + result += sum; + } +}; + +template +struct ViewSum { + + template + static void execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + using value_type = typename Viewtype::value_type; + + value_type sum; + + Kokkos::parallel_reduce( + "ViewSum-2D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1, + value_type& update) { + update += v(i0, i1); + }, + sum ); + + result += sum; + } +}; + +template +struct ViewSum { + + template + static void execute(ResultT& result, const Viewtype& v, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + using value_type = typename Viewtype::value_type; + + value_type sum; + + Kokkos::parallel_reduce( + "ViewSum-3D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1), v.extent(2)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1, + const iType& i2, + value_type& update) { + update += v(i0, i1, i2); + }, + sum ); + + result += sum; + } +}; + +// Parallel add + +template +struct ViewAdd; + +template +struct ViewAdd { + + template + static void execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + + using policy_type = Kokkos::RangePolicy>; + + Kokkos::parallel_for( + "ViewAdd-1D", + policy_type(space, 0, v.extent(0)), + KOKKOS_LAMBDA ( + const iType& i0) { + v(i0) += update; + }); + } +}; + +template +struct ViewAdd { + + template + static void execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "ViewAdd-2D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1) { + v(i0, i1) += update; + }); + } +}; + +template +struct ViewAdd { + + template + static void execute(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "ViewAdd-3D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1), v.extent(2)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1, + const iType& i2) { + v(i0, i1, i2) += update; + }); + } +}; + + +template +void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { + Kokkos::fence("parallel_sum: pre sum fence"); + if (A.span_is_contiguous()) { + + using ViewTypeFlat = Kokkos::View< + typename ViewtypeA::value_type*, Kokkos::LayoutRight, + Kokkos::Device>, + Kokkos::MemoryTraits<0>>; + + ViewTypeFlat A_flat(A.data(), A.size()); + ViewSum::template execute(sum, A_flat); + } + else { + ViewSum::template execute(sum, A); + } + Kokkos::fence("parallel_sum: post sum fence"); +} + +template +void parallel_sum(const ExecSpace& space, typename ViewtypeA::value_type &sum, const ViewtypeA A) { + space.fence("parallel_sum: pre sum fence"); + if (A.span_is_contiguous()) { + + using ViewTypeFlat = Kokkos::View< + typename ViewtypeA::value_type*, Kokkos::LayoutRight, + Kokkos::Device>, + Kokkos::MemoryTraits<0>>; + + ViewTypeFlat A_flat(A.data(), A.size()); + ViewSum::template execute(sum, A_flat, space); + } + else { + ViewSum::template execute(sum, A, space); + } + space.fence("parallel_sum: post sum fence"); +} + +template +void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { + Kokkos::fence("parallel_sum: pre add fence"); + if (A.span_is_contiguous()) { + + using ViewTypeFlat = Kokkos::View< + typename ViewtypeA::value_type*, Kokkos::LayoutRight, + Kokkos::Device>, + Kokkos::MemoryTraits<0>>; + + ViewTypeFlat A_flat(A.data(), A.size()); + ViewAdd::template execute(A_flat, b); + } + else { + ViewAdd::template execute(A, b); + } + Kokkos::fence("parallel_sum: post add fence"); +} + +template +void parallel_sum(const ExecSpace& space, ViewtypeA A, const typename ViewtypeA::value_type b) { + space.fence("parallel_sum: pre add fence"); + if (A.span_is_contiguous()) { + + using ViewTypeFlat = Kokkos::View< + typename ViewtypeA::value_type*, Kokkos::LayoutRight, + Kokkos::Device>, + Kokkos::MemoryTraits<0>>; + + ViewTypeFlat A_flat(A.data(), A.size()); + ViewAdd::template execute(A_flat, b, space); + } + else { + ViewAdd::template execute(A, b, space); + } + space.fence("parallel_sum: post add fence"); +} + +} \ No newline at end of file diff --git a/unittests/Kokkos/view_access.cpp b/unittests/Kokkos/view_access.cpp new file mode 100644 index 000000000..f1472b0d7 --- /dev/null +++ b/unittests/Kokkos/view_access.cpp @@ -0,0 +1,76 @@ +#include + +#include "gtest/gtest.h" + +#include "clad/Differentiator/Differentiator.h" + +#include "parallel_sum.hpp" + +template +T finiteDifferenceTangent(std::function func, const T& x, const T& epsilon) { + return (func(x+epsilon)-func(x-epsilon)) / (2 * epsilon); +} + +double f(double x, double y) { + + const int N1 = 4; + + Kokkos::View a("a", N1); + Kokkos::View b("b", N1); + + a(0,0) = x; + b(0,0) = y; + + b(0,0) += a(0,0) * b(0,0); + + return a(0,0) * a(0,0) * b(0,0) + b(0,0); +} + +double f_2(double x, double y) { + + const int N1 = 4; + + Kokkos::View a("a", N1); + Kokkos::View b("b", N1); + + Kokkos::deep_copy(a, 3*x+y); + b(0,0) = x; + //Kokkos::deep_copy(b, a); + + b(0,0) += a(0,0) * b(0,0); + + return a(0,0); +} + +TEST(view_access, test_1) { + EXPECT_NEAR(f(0,1), 1, 1e-8); + EXPECT_NEAR(f(0,2), 2, 1e-8); +} + +TEST(view_access, test_2) { + + double tolerance = 1e-8; + double epsilon = 1e-6; + + auto f_x = clad::differentiate(f, "x"); + + std::function f_tmp = [](double x){ return f(x,4.); }; + double dx_f_FD = finiteDifferenceTangent(f_tmp, 3., epsilon); + + EXPECT_NEAR(f_x.execute(3, 4),dx_f_FD,tolerance*dx_f_FD); + + auto f_2_x = clad::differentiate(f_2, "x"); + + std::function f_2_tmp = [](double x){ return f_2(x,4.); }; + double dx_f_2_FD = finiteDifferenceTangent(f_2_tmp, 3., epsilon); + EXPECT_NEAR(f_2_x.execute(3, 4),dx_f_2_FD,tolerance*dx_f_2_FD); + + auto f_grad_exe = clad::gradient(f); + double dx, dy; + f_grad_exe.execute(3., 4., &dx, &dy); + EXPECT_NEAR(f_x.execute(3, 4),dx,tolerance*dx); + + auto f_2_grad_exe = clad::gradient(f_2); + //f_2_grad_exe.execute(3., 4., &dx, &dy); + //EXPECT_NEAR(f_2_x.execute(3, 4),dx,tolerance*dx); +} From 21af8c1e0f45ef58508c434974a59442df699839 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 22 Jan 2024 08:33:49 -0700 Subject: [PATCH 68/75] towards fixing of the deep_copy test --- lib/Differentiator/ReverseModeVisitor.cpp | 2 +- unittests/Kokkos/parallel_sum.hpp | 90 +++++++++++++++++++++-- unittests/Kokkos/view_access.cpp | 2 +- 3 files changed, 84 insertions(+), 10 deletions(-) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index cb515d30e..3cdaec538 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1759,7 +1759,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedArgs, noLoc).get(); Expr* dCall = - m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgs, noLoc).get(); + m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_builtin_derivative_parallel_sum, noLoc, ClonedDArgs, noLoc).get(); Expr* dCallZero = m_Sema.ActOnCallExpr(getCurrentScope(), kokkos_deep_copy, noLoc, ClonedDArgsZero, noLoc).get(); diff --git a/unittests/Kokkos/parallel_sum.hpp b/unittests/Kokkos/parallel_sum.hpp index 7e87a71e1..c5d028e4f 100644 --- a/unittests/Kokkos/parallel_sum.hpp +++ b/unittests/Kokkos/parallel_sum.hpp @@ -122,6 +122,20 @@ struct ViewAdd { v(i0) += update; }); } + + template + static void executeView(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + + using policy_type = Kokkos::RangePolicy>; + + Kokkos::parallel_for( + "ViewAdd-1D", + policy_type(space, 0, v.extent(0)), + KOKKOS_LAMBDA ( + const iType& i0) { + v(i0) += update(i0); + }); + } }; template @@ -148,6 +162,28 @@ struct ViewAdd { v(i0, i1) += update; }); } + + template + static void executeView(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "ViewAdd-2D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1) { + v(i0, i1) += update(i0, i1); + }); + } }; template @@ -175,11 +211,35 @@ struct ViewAdd { v(i0, i1, i2) += update; }); } + + template + static void executeView(const Viewtype& v, ResultT& update, const ExecSpace space = ExecSpace()) { + + static const Kokkos::Iterate outer_iteration_pattern = + Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = + Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + using iterate_type = + Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>; + using policy_type = + Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "ViewAdd-3D", + policy_type(space, {0, 0}, {v.extent(0), v.extent(1), v.extent(2)}), + KOKKOS_LAMBDA ( + const iType& i0, + const iType& i1, + const iType& i2) { + v(i0, i1, i2) += update(i0, i1, i2); + }); + } }; -template -void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { +template +void parallel_sum(typename Kokkos::ViewTraits::value_type &sum, const Kokkos::View A) { + using ViewtypeA = Kokkos::View; Kokkos::fence("parallel_sum: pre sum fence"); if (A.span_is_contiguous()) { @@ -200,8 +260,9 @@ void parallel_sum(typename ViewtypeA::value_type &sum, const ViewtypeA A) { Kokkos::fence("parallel_sum: post sum fence"); } -template -void parallel_sum(const ExecSpace& space, typename ViewtypeA::value_type &sum, const ViewtypeA A) { +template +void parallel_sum(const ExecSpace& space, typename Kokkos::ViewTraits::value_type &sum, const Kokkos::View A) { + using ViewtypeA = Kokkos::View; space.fence("parallel_sum: pre sum fence"); if (A.span_is_contiguous()) { @@ -222,8 +283,9 @@ void parallel_sum(const ExecSpace& space, typename ViewtypeA::value_type &sum, c space.fence("parallel_sum: post sum fence"); } -template -void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { +template +void parallel_sum(Kokkos::View A, typename Kokkos::ViewTraits::const_value_type b) { + using ViewtypeA = Kokkos::View; Kokkos::fence("parallel_sum: pre add fence"); if (A.span_is_contiguous()) { @@ -244,8 +306,9 @@ void parallel_sum(ViewtypeA A, const typename ViewtypeA::value_type b) { Kokkos::fence("parallel_sum: post add fence"); } -template -void parallel_sum(const ExecSpace& space, ViewtypeA A, const typename ViewtypeA::value_type b) { +template +void parallel_sum(const ExecSpace& space, Kokkos::View A, typename Kokkos::ViewTraits::const_value_type b) { + using ViewtypeA = Kokkos::View; space.fence("parallel_sum: pre add fence"); if (A.span_is_contiguous()) { @@ -266,4 +329,15 @@ void parallel_sum(const ExecSpace& space, ViewtypeA A, const typename ViewtypeA: space.fence("parallel_sum: post add fence"); } +template +void parallel_sum(Kokkos::View A, const Kokkos::View B) { + using ViewtypeA = Kokkos::View; + using ViewtypeA = Kokkos::View; + Kokkos::fence("parallel_sum: pre add fence"); + + ViewAdd::template executeView(A, B); + + Kokkos::fence("parallel_sum: post add fence"); +} + } \ No newline at end of file diff --git a/unittests/Kokkos/view_access.cpp b/unittests/Kokkos/view_access.cpp index f1472b0d7..005de7375 100644 --- a/unittests/Kokkos/view_access.cpp +++ b/unittests/Kokkos/view_access.cpp @@ -35,7 +35,7 @@ double f_2(double x, double y) { Kokkos::deep_copy(a, 3*x+y); b(0,0) = x; - //Kokkos::deep_copy(b, a); + Kokkos::deep_copy(b, a); b(0,0) += a(0,0) * b(0,0); From 0fa0dc09b1530e0c634fb2a58048bb440e8234cf Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 22 Jan 2024 08:50:26 -0700 Subject: [PATCH 69/75] fix kokkos deep_copy test --- unittests/Kokkos/view_access.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/unittests/Kokkos/view_access.cpp b/unittests/Kokkos/view_access.cpp index 005de7375..c037d9cba 100644 --- a/unittests/Kokkos/view_access.cpp +++ b/unittests/Kokkos/view_access.cpp @@ -70,7 +70,8 @@ TEST(view_access, test_2) { f_grad_exe.execute(3., 4., &dx, &dy); EXPECT_NEAR(f_x.execute(3, 4),dx,tolerance*dx); + double dx_2, dy_2; auto f_2_grad_exe = clad::gradient(f_2); - //f_2_grad_exe.execute(3., 4., &dx, &dy); - //EXPECT_NEAR(f_2_x.execute(3, 4),dx,tolerance*dx); + f_2_grad_exe.execute(3., 4., &dx_2, &dy_2); + EXPECT_NEAR(f_2_x.execute(3, 4),dx_2,tolerance*dx_2); } From 8a831e92a3121c53cee046b60eeb9b2f03153736 Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 22 Jan 2024 15:37:26 -0700 Subject: [PATCH 70/75] fix Clad tests --- .../Differentiator/BaseForwardModeVisitor.h | 1 - lib/Differentiator/BaseForwardModeVisitor.cpp | 41 +++++++------------ lib/Differentiator/ReverseModeVisitor.cpp | 30 ++++++-------- 3 files changed, 28 insertions(+), 44 deletions(-) diff --git a/include/clad/Differentiator/BaseForwardModeVisitor.h b/include/clad/Differentiator/BaseForwardModeVisitor.h index 28c81f2e0..375ae88d5 100644 --- a/include/clad/Differentiator/BaseForwardModeVisitor.h +++ b/include/clad/Differentiator/BaseForwardModeVisitor.h @@ -100,7 +100,6 @@ class BaseForwardModeVisitor StmtDiff VisitCXXStaticCastExpr(const clang::CXXStaticCastExpr* CSE); StmtDiff VisitCXXFunctionalCastExpr(const clang::CXXFunctionalCastExpr* FCE); StmtDiff VisitCXXBindTemporaryExpr(const clang::CXXBindTemporaryExpr* BTE); - StmtDiff VisitValueStmt(const clang::ValueStmt* VS); StmtDiff VisitCXXNullPtrLiteralExpr(const clang::CXXNullPtrLiteralExpr* NPL); StmtDiff VisitUnaryExprOrTypeTraitExpr(const clang::UnaryExprOrTypeTraitExpr* UE); diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp index 83bafbb70..acf65f3d9 100644 --- a/lib/Differentiator/BaseForwardModeVisitor.cpp +++ b/lib/Differentiator/BaseForwardModeVisitor.cpp @@ -1345,8 +1345,6 @@ StmtDiff BaseForwardModeVisitor::VisitUnaryOperator(const UnaryOperator* UnOp) { return StmtDiff(op, BuildOp(opKind, diff.getExpr_dx())); } else if (opKind == UnaryOperatorKind::UO_AddrOf) { return StmtDiff(op, BuildOp(opKind, diff.getExpr_dx())); - } else if (opKind == UnaryOperatorKind::UO_Not) { - return StmtDiff(op, BuildOp(opKind, diff.getExpr_dx())); } else { unsupportedOpWarn(UnOp->getEndLoc()); auto zero = @@ -1912,7 +1910,7 @@ BaseForwardModeVisitor::VisitCXXConstructExpr(const CXXConstructExpr* CE) { std::string constructedTypeName = QualType::getAsString(CE->getType().split(), PrintingPolicy{ {} }); // Check if we are in a Kokkos View construction. - if (constructedTypeName.rfind("Kokkos::View", 0) == 0) { + if (utils::IsKokkosView(constructedTypeName)) { size_t runTimeDim = 0; std::vector compileTimeDims; bool read = false; @@ -1929,12 +1927,20 @@ BaseForwardModeVisitor::VisitCXXConstructExpr(const CXXConstructExpr* CE) { for (auto arg : CE->arguments()) { if (i == runTimeDim + 1) break; - auto argDiff = Visit(arg); - clonedArgs.push_back(argDiff.getExpr()); - if (i==0) - derivedArgs.push_back(argDiff.getExpr_dx()); - else - derivedArgs.push_back(argDiff.getExpr()); + clonedArgs.push_back(Clone(arg)); + if(isa(arg)) { + // Prepend the label of the view with "_d_". + // This is a very specific case and not a general derivation of a string. + auto SL = dyn_cast(arg); + std::string name_str("_d_"+ SL->getString().str()); + StringRef name(name_str); + + derivedArgs.push_back(StringLiteral::Create(m_Sema.getASTContext(), name, + SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc())); + } + else { + derivedArgs.push_back(Clone(arg)); + } ++i; } } @@ -2112,23 +2118,6 @@ StmtDiff BaseForwardModeVisitor::VisitCXXBindTemporaryExpr( return BTEDiff; } -StmtDiff BaseForwardModeVisitor::VisitValueStmt( - const clang::ValueStmt* VS) { - // Test if StringLiteral - if (isa(VS)) { - auto SL = dyn_cast(VS); - - std::string name_str("_d_"+ SL->getString().str()); - StringRef name(name_str); - - Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, - SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); - return {Clone(VS), derivedVS}; - } - return {Clone(VS), Clone(VS)}; -} - - StmtDiff BaseForwardModeVisitor::VisitCXXNullPtrLiteralExpr( const clang::CXXNullPtrLiteralExpr* NPL) { return {Clone(NPL), Clone(NPL)}; diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 3cdaec538..88dc8c1a4 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -753,18 +753,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } StmtDiff ReverseModeVisitor::VisitValueStmt(const clang::ValueStmt* VS) { - // Test if StringLiteral - if (isa(VS)) { - auto SL = dyn_cast(VS); - - std::string name_str("_d_"+ SL->getString().str()); - StringRef name(name_str); - - Expr* derivedVS = StringLiteral::Create(m_Sema.getASTContext(), name, - SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc()); - - return {Clone(VS), derivedVS}; - } if (isa(VS)) { auto CBTE = dyn_cast(VS); @@ -3285,11 +3273,19 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, for (auto arg : CE->arguments()) { if (i == runTimeDim + 1) break; - auto argDiff = Visit(arg); - if (i == 0) - clonedArgs.push_back(argDiff.getExpr_dx()); - else - clonedArgs.push_back(argDiff.getExpr()); + if(isa(arg)) { + // Prepend the label of the view with "_d_". + // This is a very specific case and not a general derivation of a string. + auto SL = dyn_cast(arg); + std::string name_str("_d_"+ SL->getString().str()); + StringRef name(name_str); + + clonedArgs.push_back(StringLiteral::Create(m_Sema.getASTContext(), name, + SL->getKind(), SL->isPascal(), SL->getType(), SL->getBeginLoc())); + } + else { + clonedArgs.push_back(Clone(arg)); + } ++i; } From 630538cd43c63c0dd0c039820d591bdbc75d49fd Mon Sep 17 00:00:00 2001 From: kliegeois Date: Mon, 22 Jan 2024 16:39:37 -0700 Subject: [PATCH 71/75] Fix the construction of the dual team policy --- lib/Differentiator/ReverseModeVisitor.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 88dc8c1a4..18c46f75e 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -3329,7 +3329,25 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, assert(false && "Not supported yet!"); } + } else if (utils::IsKokkosTeamPolicy(constructedTypeName)) { + if (isa(VD->getInit()->IgnoreImpCasts())) { + auto CE = dyn_cast(VD->getInit()->IgnoreImpCasts()); + llvm::SmallVector clonedArgs; + for (auto arg : CE->arguments()) { + clonedArgs.push_back(Clone(arg)); + } + VDDerivedInit = + m_Sema.ActOnParenListExpr(noLoc, noLoc, clonedArgs).get(); + + + VDDerived = + BuildVarDecl(VDDerivedType, "_d_" + VD->getNameAsString(), + VDDerivedInit, VD->isDirectInit(), + m_Context.getTrivialTypeSourceInfo(VDDerivedType), + VD->getInitStyle()); + } + } // VDDerivedInit now serves two purposes -- as the initial derivative value // or the size of the derivative array -- depending on the primal type. } else if (const auto* AT = dyn_cast(VD->getType())) { From 0239a5a8b77d90eb28ae0c19affa146d82a98ae6 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Tue, 23 Jan 2024 14:26:56 -0700 Subject: [PATCH 72/75] Don't make a std::vector of const SourceLocation This triggers a static assert in GNU libstdc++ --- include/clad/Differentiator/KokkosViewAccessVisitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index 6943c494d..a71988f37 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -401,7 +401,7 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm std::vector view_accesses_is_thread_safe; std::vector view_accesses; std::vector view_accesses_RHS; - std::vector view_accesses_location; + std::vector view_accesses_location; }; } // end namespace clad From 1b9dead314363903d962a6e502f1d0eecbc1bb4e Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 15 Feb 2024 11:42:06 -0700 Subject: [PATCH 73/75] cleaned --- lib/Differentiator/ReverseModeVisitor.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index 18c46f75e..abdab7a9f 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -1366,11 +1366,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return StmtDiff(Clone(NPE), Clone(NPE)); } - bool isKokkosView(const std::string constructedTypeName){ - return constructedTypeName.find("Kokkos::View") == 0 || constructedTypeName.find("class Kokkos::View") == 0; - //return constructedTypeName.find("Kokkos::View") != std::string::npos && constructedTypeName.find("getRetValue(); @@ -3350,7 +3345,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, } // VDDerivedInit now serves two purposes -- as the initial derivative value // or the size of the derivative array -- depending on the primal type. - } else if (const auto* AT = dyn_cast(VD->getType())) { + else if (const auto* AT = dyn_cast(VD->getType())) { VDDerivedInit = getArraySizeExpr(AT, m_Context, *this); VDDerived = BuildGlobalVarDecl( VDDerivedType, "_d_" + VD->getNameAsString(), VDDerivedInit, false, From 816cfadb0fe3f6d4550e77d94c5e7124988363bd Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 22 Feb 2024 09:40:46 -0700 Subject: [PATCH 74/75] Add comments --- .../clad/Differentiator/KokkosViewAccessVisitor.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/clad/Differentiator/KokkosViewAccessVisitor.h b/include/clad/Differentiator/KokkosViewAccessVisitor.h index a71988f37..d9850a6ec 100644 --- a/include/clad/Differentiator/KokkosViewAccessVisitor.h +++ b/include/clad/Differentiator/KokkosViewAccessVisitor.h @@ -11,6 +11,16 @@ namespace clad { +/// Determines whether two statement trees are identical regarding +/// operators and symbols. +/// +/// Exceptions: expressions containing macros or functions with possible side +/// effects are never considered identical. +/// Limitations: (t + u) and (u + t) are not considered identical. +/// t*(u + t) and t*u + t*t are not considered identical. +/// +/// function copied from clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp +/// static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stmt1, const clang::Stmt *Stmt2, bool IgnoreSideEffects) { @@ -206,6 +216,11 @@ static bool isIdenticalStmt(const clang::ASTContext &Ctx, const clang::Stmt *Stm } } + /// A visitor for processing the Kokkos View accesses. + /// This visitor is used for two purposes: + /// - Detect if a View access in the reverse pass is thread safe, + /// - Detect if the View needs to be recorded during the forward pass + /// to use the values in the reverse one. class KokkosViewAccessVisitor { public: KokkosViewAccessVisitor (clang::Sema& _semaRef, clang::ASTContext& _m_Context) : From 3a585dd5b1e4f5cdc9de104f886223cefc7bef6e Mon Sep 17 00:00:00 2001 From: kliegeois Date: Thu, 22 Feb 2024 12:39:12 -0700 Subject: [PATCH 75/75] link Kokkos::kokkos --- unittests/Kokkos/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unittests/Kokkos/CMakeLists.txt b/unittests/Kokkos/CMakeLists.txt index 9e01dcddd..cfe2432ab 100644 --- a/unittests/Kokkos/CMakeLists.txt +++ b/unittests/Kokkos/CMakeLists.txt @@ -13,5 +13,6 @@ if (NOT (LLVM_REQUIRES_RTTI OR LLVM_ENABLE_RTTI)) endif() endif() +target_link_libraries(KokkosTests PRIVATE Kokkos::kokkos) target_link_libraries(KokkosTests PUBLIC ${Kokkos_LIBRARIES}) target_include_directories(KokkosTests SYSTEM PRIVATE ${Kokkos_INCLUDE_DIRS})