diff --git a/.docker/environment.yml b/.docker/environment.yml index ab226729..ba68dc78 100644 --- a/.docker/environment.yml +++ b/.docker/environment.yml @@ -12,3 +12,4 @@ dependencies: - gcc - gxx - make + - dill diff --git a/.github/environment.yml b/.github/environment.yml index 1a458d43..e14fe0d1 100644 --- a/.github/environment.yml +++ b/.github/environment.yml @@ -14,4 +14,4 @@ dependencies: - nlopt >= 2.7 - pytorch - cxx-compiler - + - dill diff --git a/.github/workflows/build-bindings.yml b/.github/workflows/build-bindings.yml index 6a1b422a..91337b47 100644 --- a/.github/workflows/build-bindings.yml +++ b/.github/workflows/build-bindings.yml @@ -3,6 +3,7 @@ name: binding-tests on: push: branches: + - release - main pull_request: {} @@ -47,7 +48,7 @@ jobs: - name: Setup Julia run: | - julia -e "using Pkg; Pkg.add([\"CxxWrap\",\"TestReports\"])" + julia -e "using Pkg; Pkg.add([Pkg.PackageSpec(;name=\"CxxWrap\",version=v\"0.14.2\"),Pkg.PackageSpec(;name=\"TestReports\")])" export GITHUB_JULIA_PATH=$(julia -e "println(DEPOT_PATH[1])") echo -n $'[bee5971c-294f-5168-9fcd-9fb3c811d495]\nMParT = \"' >> $GITHUB_JULIA_PATH/artifacts/Overrides.toml echo -n $GITHUB_WORKSPACE >> $GITHUB_JULIA_PATH/artifacts/Overrides.toml diff --git a/.github/workflows/build-doc.yml b/.github/workflows/build-doc.yml index 1214797c..17da50c5 100644 --- a/.github/workflows/build-doc.yml +++ b/.github/workflows/build-doc.yml @@ -4,6 +4,7 @@ on: push: branches: - main + - release jobs: build-docs: diff --git a/.github/workflows/build-external-lib-tests.yml b/.github/workflows/build-external-lib-tests.yml index 584da4ae..e58f2c03 100644 --- a/.github/workflows/build-external-lib-tests.yml +++ b/.github/workflows/build-external-lib-tests.yml @@ -4,6 +4,7 @@ on: push: branches: - main + - release pull_request: {} env: diff --git a/.github/workflows/build-push-docker.yml b/.github/workflows/build-push-docker.yml index 2c784397..336a28b7 100644 --- a/.github/workflows/build-push-docker.yml +++ b/.github/workflows/build-push-docker.yml @@ -4,6 +4,7 @@ on: push: branches: - main + - release jobs: docker: diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml index 8fabb02a..2ba6d1bf 100644 --- a/.github/workflows/build-tests.yml +++ b/.github/workflows/build-tests.yml @@ -4,6 +4,7 @@ on: push: branches: - main + - release pull_request: {} jobs: diff --git a/MParT/ComposedMap.h b/MParT/ComposedMap.h index c51d6160..0b9f55de 100644 --- a/MParT/ComposedMap.h +++ b/MParT/ComposedMap.h @@ -50,12 +50,12 @@ class ComposedMap : public ConditionalMapBase start at index \f$\sum_{j=1}^{k-1} C_j\f$. */ using ConditionalMapBase::SetCoeffs; - void SetCoeffs(Kokkos::View coeffs) override; - void WrapCoeffs(Kokkos::View coeffs) override; + using ConditionalMapBase::WrapCoeffs; + void SetCoeffs(Kokkos::View coeffs) override; + void WrapCoeffs(Kokkos::View coeffs) override; #if defined(MPART_ENABLE_GPU) - void SetCoeffs(Kokkos::View coeffs) override; - void WrapCoeffs(Kokkos::View coeffs) override; + void SetCoeffs(Kokkos::View, mpart::DeviceSpace, Kokkos::HostSpace>> coeffs) override; #endif virtual std::shared_ptr> GetComponent(unsigned int i){ return maps_.at(i);} diff --git a/MParT/Distributions/DensityBase.h b/MParT/Distributions/DensityBase.h index 5ab27dfb..050412d1 100644 --- a/MParT/Distributions/DensityBase.h +++ b/MParT/Distributions/DensityBase.h @@ -47,8 +47,10 @@ class DensityBase { * @param X The points where we want to evaluate the log density. * @return Matrix \f$A\in\mathbb{R}^{m\times n}\f$ containing the log density at each point. */ - template - StridedVector LogDensity(StridedMatrix const &X); + StridedVector LogDensity(StridedMatrix const &X); + #if defined(MPART_ENABLE_GPU) + StridedVector , DeviceSpace, Kokkos::HostSpace> > LogDensity(StridedMatrix , DeviceSpace, Kokkos::HostSpace > > const &X); + #endif /** * @brief Computes the log density at the given points. @@ -66,8 +68,10 @@ class DensityBase { * @param X The points where we want to evaluate the gradient log density. * @return A matrix \f$A\in\mathbb{R}^{m\times n}\f$ containing the gradient of the log density at each point. */ - template - StridedMatrix LogDensityInputGrad(StridedMatrix const &X); + StridedMatrix LogDensityInputGrad(StridedMatrix const &X); + #if defined(MPART_ENABLE_GPU) + StridedMatrix , DeviceSpace, Kokkos::HostSpace> > LogDensityInputGrad(StridedMatrix , DeviceSpace, Kokkos::HostSpace > > const &X); + #endif /** * @brief Returns the input dimension of the density @@ -81,4 +85,4 @@ class DensityBase { }; } -#endif // MPART_DensityBase_H \ No newline at end of file +#endif // MPART_DensityBase_H diff --git a/MParT/Distributions/PushforwardDensity.h b/MParT/Distributions/PushforwardDensity.h index 74537bca..9656af37 100644 --- a/MParT/Distributions/PushforwardDensity.h +++ b/MParT/Distributions/PushforwardDensity.h @@ -46,7 +46,8 @@ class PushforwardDensity: public DensityBase { StridedMatrix mappedPts = map_->Inverse(prefix_null, pts); density_->LogDensityImpl(mappedPts, output); StridedVector logJacobian = map_->LogDeterminant(mappedPts); - Kokkos::parallel_for("Subtract logJac", output.extent(0), KOKKOS_LAMBDA(const unsigned int i){ + Kokkos::RangePolicy::Space> policy{0lu, output.extent(0)}; + Kokkos::parallel_for("Subtract logJac", policy, KOKKOS_LAMBDA(const unsigned int i){ output(i) -= logJacobian(i); }); }; @@ -71,4 +72,4 @@ class PushforwardDensity: public DensityBase { } // namespace mpart -#endif //MPART_PUSHFORWARDDENSITY_H \ No newline at end of file +#endif //MPART_PUSHFORWARDDENSITY_H diff --git a/MParT/MapFactory.h b/MParT/MapFactory.h index d02ec57a..b9ef2f38 100644 --- a/MParT/MapFactory.h +++ b/MParT/MapFactory.h @@ -140,12 +140,13 @@ namespace mpart{ unsigned int inputDim, unsigned int totalOrder, StridedVector centers, MapOptions opts); - template, bool> = true> - std::shared_ptr> CreateSigmoidComponent( + template + std::shared_ptr> CreateSigmoidComponent( unsigned int inputDim, unsigned int totalOrder, Eigen::Ref centers, MapOptions opts) { StridedVector centersVec = ConstVecToKokkos(centers); - return CreateSigmoidComponent(inputDim, totalOrder, centersVec, opts); + Kokkos::View centers_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), centersVec); + return CreateSigmoidComponent(inputDim, totalOrder, centers_d, opts); } template @@ -153,12 +154,13 @@ namespace mpart{ FixedMultiIndexSet mset_offdiag, FixedMultiIndexSet mset_diag, StridedVector centers, MapOptions opts); - template, bool> = true> + template std::shared_ptr> CreateSigmoidComponent( FixedMultiIndexSet mset_offdiag, FixedMultiIndexSet mset_diag, Eigen::Ref centers, MapOptions opts) { StridedVector centersVec = ConstVecToKokkos(centers); - return CreateSigmoidComponent(mset_offdiag, mset_diag, centersVec, opts); + Kokkos::View centers_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), centersVec); + return CreateSigmoidComponent(mset_offdiag, mset_diag, centers_d, opts); } template @@ -180,13 +182,14 @@ namespace mpart{ return CreateSigmoidTriangular(inputDim, outputDim, totalOrder, centersVecs, opts); } - template, bool> = true> - std::shared_ptr> CreateSigmoidTriangular( + template + std::shared_ptr> CreateSigmoidTriangular( unsigned int inputDim, unsigned int outputDim, unsigned int totalOrder, Eigen::Ref const& centers, MapOptions opts ) { StridedMatrix centersMat = ConstRowMatToKokkos(centers); - return CreateSigmoidTriangular(inputDim, outputDim, totalOrder, centersMat, opts); + Kokkos::View centers_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), centersMat); + return CreateSigmoidTriangular(inputDim, outputDim, totalOrder, centers_d, opts); } /** This struct is used to map the options to functions that can create a map component with types corresponding diff --git a/MParT/MonotoneComponent.h b/MParT/MonotoneComponent.h index bbdfd2f0..65097100 100644 --- a/MParT/MonotoneComponent.h +++ b/MParT/MonotoneComponent.h @@ -141,7 +141,7 @@ class MonotoneComponent : public ConditionalMapBase "sens: (" << sensRows << "," << sensCols << "), expected: " << this->outputDim << ", " << ptsCols << "), " << "pts: (" << ptsRows << "," << ptsCols << "), expected: (" << this->inputDim << "," << ptsCols << "), " << "output: (" << outputRows << "," << outputCols << "), expected: (" << expectedOutputRows << "," << ptsCols << ")"; - ProcAgnosticError::error(ss.str().c_str()); + ProcAgnosticError(ss.str().c_str()); } } @@ -262,7 +262,7 @@ class MonotoneComponent : public ConditionalMapBase std::stringstream ss; ss << "EvaluateImpl: output has incorrect number of columns. " << "Expected: " << pts.extent(1) << ", got " << output.extent(0); - ProcAgnosticError::error(ss.str().c_str()); + ProcAgnosticError(ss.str().c_str()); } // Ask the expansion how much memory it would like for its one-point cache @@ -663,7 +663,7 @@ class MonotoneComponent : public ConditionalMapBase << "jacobian: (" << jacRows << "," << jacCols << "), expected: (" << expectJacRows << "," << expectJacCols << "), "; if(expectEvalRows > 0) ss << "evaluations: (" << evalRows << "), expected: (" << expectEvalRows << ")"; - ProcAgnosticError::error(ss.str().c_str()); + ProcAgnosticError(ss.str().c_str()); } } @@ -1082,9 +1082,9 @@ class MonotoneComponent : public ConditionalMapBase ExpansionType expansion; double nugget; - SingleEvaluator(double* workspace_, double* cache_, PointType pt_, CoeffType coeffs_, QuadratureType quad_, ExpansionType expansion_, double nugget_): + KOKKOS_FUNCTION SingleEvaluator(double* workspace_, double* cache_, PointType pt_, CoeffType coeffs_, QuadratureType quad_, ExpansionType expansion_, double nugget_): workspace(workspace_), cache(cache_), pt(pt_), coeffs(coeffs_), quad(quad_), expansion(expansion_), nugget(nugget_) {}; - double operator()(double x) { + KOKKOS_INLINE_FUNCTION double operator()(double x) { return EvaluateSingle(cache, workspace, pt, x, coeffs, quad, expansion, nugget); } }; diff --git a/MParT/MonotoneIntegrand.h b/MParT/MonotoneIntegrand.h index cf382b83..c3481cf8 100644 --- a/MParT/MonotoneIntegrand.h +++ b/MParT/MonotoneIntegrand.h @@ -183,7 +183,7 @@ class MonotoneIntegrand{ // Check for infs or nans if(std::isinf(gf)){ if(failOnNaN) { - ProcAgnosticError::error("MonotoneIntegrand: nan was encountered in value of g(df(...)). Use MonotoneIntegrand::setFailOnNaN for enabling NaN propagation."); + ProcAgnosticError("MonotoneIntegrand: nan was encountered in value of g(df(...)). Use MonotoneIntegrand::setFailOnNaN for enabling NaN propagation."); } else { printf("\nERROR: In MonotoneIntegrand, value of g(df(...)) is inf. The value of df(...) is %0.4f, and the value of f(df(...)) is %0.4f.\n\n", df, gf); } diff --git a/MParT/MultivariateExpansionWorker.h b/MParT/MultivariateExpansionWorker.h index be0a4ca0..d2dde344 100644 --- a/MParT/MultivariateExpansionWorker.h +++ b/MParT/MultivariateExpansionWorker.h @@ -339,7 +339,6 @@ class MultivariateExpansionWorker { const unsigned int numTerms = multiSet_.Size(); double df = 0.0; - unsigned int posInd; for(int wrt=-1; wrt=0){ @@ -538,7 +537,6 @@ class MultivariateExpansionWorker KOKKOS_FUNCTION double GetTermValMixedCoeffDeriv(unsigned int termInd, int wrt, const double* cache, unsigned int posIndex) const { // Compute the value of this term in the expansion double termVal = 1.0; - bool hasDeriv = false; if(multiSet_.nzStarts(termInd)==multiSet_.nzStarts(termInd+1)) return 0.; // Value is zero if constant in last dimension unsigned int end_idx = multiSet_.nzStarts(termInd+1)-1; // last index in loop if(multiSet_.nzDims(end_idx)!=dim_-1) return 0.; // Value is zero if constant in last dimension diff --git a/MParT/ParameterizedFunctionBase.h b/MParT/ParameterizedFunctionBase.h index 94e14383..c8cbebcc 100644 --- a/MParT/ParameterizedFunctionBase.h +++ b/MParT/ParameterizedFunctionBase.h @@ -51,10 +51,8 @@ namespace mpart { @param coeffs A view containing the coefficients to copy. */ virtual void SetCoeffs(Kokkos::View coeffs); + void SetCoeffs(Kokkos::View< double*, MemorySpace> coeffs); - #if defined(MPART_ENABLE_GPU) - void SetCoeffs(Kokkos::View,mpart::DeviceSpace,Kokkos::HostSpace>> coeffs); - #endif /** @brief Wrap the internal coefficient view around another view. @details Performs a shallow copy of the input coefficients to the internally stored coefficients. @@ -62,12 +60,12 @@ namespace mpart { internally stored view. @param coeffs A view containing the coefficients we want to wrap. */ - virtual void WrapCoeffs(Kokkos::View coeffs); + virtual void WrapCoeffs(Kokkos::View coeffs); #if defined(MPART_ENABLE_GPU) - virtual void SetCoeffs(Kokkos::View coeffs); - virtual void WrapCoeffs(Kokkos::View coeffs); - #endif + virtual void SetCoeffs(Kokkos::View, mpart::DeviceSpace, Kokkos::HostSpace>> coeffs); + void SetCoeffs(Kokkos::View< double*, std::conditional_t, mpart::DeviceSpace, Kokkos::HostSpace>> coeffs); + #endif /** SetCoeffs function with conversion from Eigen to Kokkos vector types.*/ virtual void SetCoeffs(Eigen::Ref coeffs); @@ -249,4 +247,4 @@ namespace mpart { }; // class ParameterizedFunctionBase } -#endif \ No newline at end of file +#endif diff --git a/MParT/Quadrature.h b/MParT/Quadrature.h index 81b46fbe..a980de8d 100644 --- a/MParT/Quadrature.h +++ b/MParT/Quadrature.h @@ -9,6 +9,8 @@ #include +#include "MParT/Utilities/KokkosSpaceMappings.h" + #if defined(MPART_HAS_CEREAL) #include #include @@ -353,8 +355,8 @@ struct GetRuleFunctor{ template inline ClenshawCurtisQuadrature::ClenshawCurtisQuadrature(unsigned int numPts, unsigned int maxDim) : QuadratureBase(maxDim,maxDim), pts_("Points", numPts), wts_("Weights", numPts), numPts_(numPts) { - // TODO: Add parallel for loop here with one thread to make sure rule is filled in the correct space - Kokkos::parallel_for(1, GetRuleFunctor(numPts, pts_.data(), wts_.data())); + Kokkos::RangePolicy::Space> policy {0,1}; + Kokkos::parallel_for(policy, GetRuleFunctor(numPts, pts_.data(), wts_.data())); }; template<> @@ -367,8 +369,8 @@ inline ClenshawCurtisQuadrature::ClenshawCurtisQuadrature(uns template inline ClenshawCurtisQuadrature::ClenshawCurtisQuadrature(unsigned int numPts, unsigned int maxDim, double* workspace) : QuadratureBase(maxDim,maxDim,workspace), pts_("Points", numPts), wts_("Weights", numPts), numPts_(numPts) { - // TODO: Add parallel for loop here with one thread to make sure rule is filled in the correct space - Kokkos::parallel_for(1, GetRuleFunctor(numPts, pts_.data(), wts_.data())); + Kokkos::RangePolicy::Space> policy {0,1}; + Kokkos::parallel_for(policy, GetRuleFunctor(numPts, pts_.data(), wts_.data())); }; template<> @@ -1020,9 +1022,9 @@ inline AdaptiveClenshawCurtis::AdaptiveClenshawCurtis(unsigned int fineWts_("Coarse Pts", std::pow(2,level+1)+1) { assert(std::pow(2,level)+1 >=3); - - Kokkos::parallel_for(1, GetRuleFunctor(std::pow(2,level)+1, coarsePts_.data(), coarseWts_.data())); - Kokkos::parallel_for(1, GetRuleFunctor(std::pow(2,level+1)+1, finePts_.data(), fineWts_.data())); + Kokkos::RangePolicy::Space> policy {0,1}; + Kokkos::parallel_for(policy, GetRuleFunctor(std::pow(2,level)+1, coarsePts_.data(), coarseWts_.data())); + Kokkos::parallel_for(policy, GetRuleFunctor(std::pow(2,level+1)+1, finePts_.data(), fineWts_.data())); }; template<> @@ -1060,9 +1062,9 @@ inline AdaptiveClenshawCurtis::AdaptiveClenshawCurtis(unsigned int fineWts_("Coarse Pts", std::pow(2,level+1)+1) { assert(std::pow(2,level)+1 >=3); - - Kokkos::parallel_for(1, GetRuleFunctor(std::pow(2,level)+1, coarsePts_.data(), coarseWts_.data())); - Kokkos::parallel_for(1, GetRuleFunctor(std::pow(2,level+1)+1, finePts_.data(), fineWts_.data())); + Kokkos::RangePolicy::Space> policy {0, 1}; + Kokkos::parallel_for(policy, GetRuleFunctor(std::pow(2,level)+1, coarsePts_.data(), coarseWts_.data())); + Kokkos::parallel_for(policy, GetRuleFunctor(std::pow(2,level+1)+1, finePts_.data(), fineWts_.data())); }; template<> diff --git a/MParT/RectifiedMultivariateExpansion.h b/MParT/RectifiedMultivariateExpansion.h index 83980264..6b369b7b 100644 --- a/MParT/RectifiedMultivariateExpansion.h +++ b/MParT/RectifiedMultivariateExpansion.h @@ -72,7 +72,7 @@ namespace mpart{ // Create a subview containing only the current point auto pt = Kokkos::subview(pts, Kokkos::ALL(), ptInd); - auto pt_off = Kokkos::subview(pt, std::pair(0,pt.size()-1)); + auto pt_off = Kokkos::subview(pt, Kokkos::pair(0,pt.size()-1)); // Get a pointer to the shared memory that Kokkos set up for this team Kokkos::View cache(team_member.thread_scratch(1), cacheSize); @@ -126,11 +126,11 @@ namespace mpart{ // Create a subview containing only the current point auto pt = Kokkos::subview(pts, Kokkos::ALL(), ptInd); - auto pt_off = Kokkos::subview(pts, std::pair(0,pt.size()-1), ptInd); + auto pt_off = Kokkos::subview(pts, Kokkos::pair(0,pt.size()-1), ptInd); // Get a pointer to the shared memory that Kokkos set up for this team - Kokkos::View cache(team_member.thread_scratch(1), cacheSize); - Kokkos::View grad_off(team_member.thread_scratch(1), inDim-1); + Kokkos::View cache(team_member.thread_scratch(1), cacheSize); + Kokkos::View grad_off(team_member.thread_scratch(1), inDim-1); StridedVector grad_out = Kokkos::subview(output, Kokkos::ALL(), ptInd); // Fill in entries in the cache that are independent of x_d @@ -188,7 +188,7 @@ namespace mpart{ // Create a subview containing only the current point auto pt = Kokkos::subview(pts, Kokkos::ALL(), ptInd); - auto pt_off = Kokkos::subview(pt, std::pair(0,pt.extent(0)-1)); + auto pt_off = Kokkos::subview(pt, Kokkos::pair(0,pt.extent(0)-1)); // Get a pointer to the shared memory that Kokkos set up for this team Kokkos::View cache(team_member.thread_scratch(1), cacheSize); @@ -232,7 +232,7 @@ namespace mpart{ unsigned int cacheSize = worker_diag.CacheSize(); // Take logdet of diagonal expansion - auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto functor = KOKKOS_CLASS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { unsigned int ptInd = team_member.league_rank () * team_member.team_size () + team_member.team_rank (); @@ -333,7 +333,7 @@ namespace mpart{ unsigned int cacheSize = worker_diag.CacheSize(); // Take logdet of diagonal expansion - auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto functor = KOKKOS_CLASS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { unsigned int ptInd = team_member.league_rank () * team_member.team_size () + team_member.team_rank (); @@ -386,7 +386,7 @@ namespace mpart{ unsigned int cacheSize = worker_diag.CacheSize(); // Take logdet of diagonal expansion - auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto functor = KOKKOS_CLASS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { unsigned int ptInd = team_member.league_rank () * team_member.team_size () + team_member.team_rank (); @@ -437,9 +437,9 @@ namespace mpart{ CoeffType coeffs; DiagWorker_T worker; - SingleWorkerEvaluator(double* cache_, PointType pt_, CoeffType coeffs_, DiagWorker_T worker_): + KOKKOS_INLINE_FUNCTION SingleWorkerEvaluator(double* cache_, PointType pt_, CoeffType coeffs_, DiagWorker_T worker_): cache(cache_), pt(pt_), coeffs(coeffs_), worker(worker_) {} - double operator()(double x) { + KOKKOS_INLINE_FUNCTION double operator()(double x) { worker.FillCache2(cache, pt, x, DerivativeFlags::None); return worker.Evaluate(cache, coeffs); } diff --git a/MParT/Sigmoid.h b/MParT/Sigmoid.h index 09739d4b..e31ec9a9 100644 --- a/MParT/Sigmoid.h +++ b/MParT/Sigmoid.h @@ -109,7 +109,7 @@ class Sigmoid1d { Kokkos::View widths) : centers_(centers), widths_(widths) { Kokkos::View weights ("Sigmoid weights", centers.extent(0)); - Kokkos::parallel_for(centers.extent(0), KOKKOS_LAMBDA(unsigned int i) { weights(i) = 1.; }); + Kokkos::deep_copy(weights, 1.0); weights_ = weights; Validate(); } @@ -123,12 +123,7 @@ class Sigmoid1d { */ KOKKOS_FUNCTION void EvaluateAll(double* output, int max_order, double input) const { if (order_ < max_order) { - std::stringstream ss; - ss << "Sigmoid basis evaluation order too large.\n"; - ss << "Given order " << max_order << ", "; - ss << "can only evaluate up to order " << order_; - ProcAgnosticError::error( - ss.str().c_str()); + ProcAgnosticError("Sigmoid basis evaluation order too large."); } output[0] = 1.; @@ -167,12 +162,7 @@ class Sigmoid1d { KOKKOS_FUNCTION void EvaluateDerivatives(double* output, double* output_diff, int max_order, double input) const { if (order_ < max_order) { - std::stringstream ss; - ss << "Sigmoid basis evaluation order too large.\n"; - ss << "Given order " << max_order << ", "; - ss << "can only evaluate up to order " << order_; - ProcAgnosticError::error( - ss.str().c_str()); + ProcAgnosticError("Sigmoid derivative evaluation order too large."); } output[0] = 1.; @@ -222,12 +212,7 @@ class Sigmoid1d { double* output_diff2, int max_order, double input) const { if (order_ < max_order) { - std::stringstream ss; - ss << "Sigmoid basis evaluation order too large.\n"; - ss << "Given order " << max_order << ", "; - ss << "can only evaluate up to order " << order_; - ProcAgnosticError::error( - ss.str().c_str()); + ProcAgnosticError("Sigmoid second derivative evaluation order too large"); } output[0] = 1.; @@ -295,14 +280,14 @@ class Sigmoid1d { ss << "centers: " << centers_.extent(0) << ", \n"; ss << "widths: " << widths_.extent(0) << ",\n"; ss << "weights: " << weights_.extent(0) << "\n"; - ProcAgnosticError::error( + ProcAgnosticError< std::invalid_argument>( ss.str().c_str()); } if (centers_.extent(0) < 2) { std::stringstream ss; ss << "Sigmoid: centers/widths/weights too short.\n"; ss << "Length should be of form 2+(1+2+3+...+n) for some order n>=0"; - ProcAgnosticError::error( + ProcAgnosticError< std::invalid_argument>( ss.str().c_str()); } int n_sigmoid_centers = centers_.extent(0) - 2; @@ -315,7 +300,7 @@ class Sigmoid1d { std::stringstream ss; ss << "Incorrect length of centers/widths/weights."; ss << "Length should be of form 2+(1+2+3+...+n) for some order n"; - ProcAgnosticError::error( + ProcAgnosticError< std::invalid_argument>( ss.str().c_str()); } // one added for affine part of this, two added for edge terms diff --git a/MParT/SummarizedMap.h b/MParT/SummarizedMap.h index 0f40964f..06ced30f 100644 --- a/MParT/SummarizedMap.h +++ b/MParT/SummarizedMap.h @@ -44,11 +44,10 @@ where the function \f$s:\mathbb{R}^{N-1}\rightarrow \mathbb{R}^{r}\f$ is a funct virtual ~SummarizedMap() = default; using ConditionalMapBase::SetCoeffs; - void SetCoeffs(Kokkos::View coeffs) override; - void WrapCoeffs(Kokkos::View coeffs) override; + void SetCoeffs(Kokkos::View coeffs) override; + void WrapCoeffs(Kokkos::View coeffs) override; #if defined(MPART_ENABLE_GPU) - void SetCoeffs(Kokkos::View coeffs) override; - void WrapCoeffs(Kokkos::View coeffs) override; + void SetCoeffs(Kokkos::View, mpart::DeviceSpace, Kokkos::HostSpace>> coeffs) override; #endif void SummarizePts(StridedMatrix const& pts, @@ -95,4 +94,4 @@ where the function \f$s:\mathbb{R}^{N-1}\rightarrow \mathbb{R}^{r}\f$ is a funct } -#endif \ No newline at end of file +#endif diff --git a/MParT/UnivariateExpansion.h b/MParT/UnivariateExpansion.h index 5c7c6cd5..c733afcd 100644 --- a/MParT/UnivariateExpansion.h +++ b/MParT/UnivariateExpansion.h @@ -279,11 +279,11 @@ class UnivariateExpansion: public ConditionalMapBase{ template class SingleUnivariateEvaluator { public: - SingleUnivariateEvaluator(double* cache, CoeffType coeff, BasisType basis): + KOKKOS_FUNCTION SingleUnivariateEvaluator(double* cache, CoeffType coeff, BasisType basis): cache_(cache), coeff_(coeff), basisEval_(basis), maxOrderEval_(coeff.extent(0)-1) {}; - double operator()(double x) const { + KOKKOS_INLINE_FUNCTION double operator()(double x) const { basisEval_.EvaluateAll(cache_, maxOrderEval_, x); double result = 0.0; for(int i = 0; i <= maxOrderEval_; i++) { diff --git a/MParT/Utilities/ArrayConversions.h b/MParT/Utilities/ArrayConversions.h index 6205964e..cc9cde94 100644 --- a/MParT/Utilities/ArrayConversions.h +++ b/MParT/Utilities/ArrayConversions.h @@ -220,26 +220,6 @@ namespace mpart{ } - /** - @brief Copies a Kokkos array from device memory to host memory - @details - @param[in] inview A kokkos array in device memory. - @return A kokkos array in host memory. Note that the layout (row-major or col-major) might be different than the default on the Host. The layout will match the device's default layout. - */ - template - typename Kokkos::View::HostMirror ToHost(Kokkos::View const& inview){ - typename Kokkos::View::HostMirror outview = Kokkos::create_mirror_view(inview); - Kokkos::deep_copy (outview, inview); - return outview; - } - - template - StridedMatrix ToHost(StridedMatrix const& inview){ - typename StridedMatrix::HostMirror outview = Kokkos::create_mirror_view(inview); - Kokkos::deep_copy (outview, inview); - return outview; - } - /** @brief Copies a range of elements from a Kokkos array in device to host memory @details @@ -263,24 +243,14 @@ namespace mpart{ Kokkos::View deviceView("Some stuff on the device", N1, N2); Kokkos::View hostView = ToHost(deviceView, 2, Kokkos::All() ); // Similar to python notation: deviceView[2,:] @endcode - - @param[in] inview A kokkos array in device memory. - @param[in] sliceParams One or more parameters defining a Kokkos::subview. See the [Kokkos Subview documentation](https://github.com/kokkos/kokkos/wiki/Subviews#112-how-to-take-a-subview) for more details. - @return A kokkos array in host memory. Note that the layout (row-major or col-major) might be different than the default on the Host. The layout will match the device's default layout. - - @tparam DeviceMemoryType The memory space (e.g., Kokkos::CudaSpace) or the device - @tparam ScalarType The type and dimension of the Kokkos::View (e.g., double*, double**, or int*) - @tparam SliceTypes A variadic parameter pack containing options for constructing a Kokkos::subview of the device view. */ - template - Kokkos::View ToHost(Kokkos::View const& inview, SliceTypes... sliceParams){ - auto subview = Kokkos::subview(inview, sliceParams...); // Construct the subview - typename Kokkos::View::HostMirror outview = Kokkos::create_mirror_view(subview); - Kokkos::deep_copy (outview, subview); + template + typename ViewType::HostMirror ToHost(ViewType const& inview){ + typename ViewType::HostMirror outview = Kokkos::create_mirror_view(inview); + Kokkos::deep_copy (outview, inview); return outview; } - #if defined(MPART_ENABLE_GPU) /** diff --git a/MParT/Utilities/LinearAlgebra.h b/MParT/Utilities/LinearAlgebra.h index ae758c40..aec2d5c1 100644 --- a/MParT/Utilities/LinearAlgebra.h +++ b/MParT/Utilities/LinearAlgebra.h @@ -26,7 +26,8 @@ template void AddInPlace(ViewType1 x, ViewType2 y) { constexpr size_t rank = GetViewRank::Rank; if constexpr(rank == 1) { - Kokkos::parallel_for(x.extent(0), KOKKOS_LAMBDA(const int i){x(i) += y(i);}); + Kokkos::RangePolicy policy(0,x.extent(0)); + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){x(i) += y(i);}); } else if (rank == 2) { Kokkos::MDRangePolicy,typename ViewType1::execution_space> policy({0, 0}, {x.extent(0), x.extent(1)}); Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i, const int j){x(i, j) += y(i, j);}); @@ -286,4 +287,4 @@ class Cholesky } // namespace mpart -#endif // #ifndef MPART_CUDALINEARALGEBRA_H \ No newline at end of file +#endif // #ifndef MPART_CUDALINEARALGEBRA_H diff --git a/MParT/Utilities/Miscellaneous.h b/MParT/Utilities/Miscellaneous.h index b4e3b290..fb7d7e13 100644 --- a/MParT/Utilities/Miscellaneous.h +++ b/MParT/Utilities/Miscellaneous.h @@ -19,22 +19,17 @@ namespace mpart{ std::string const& key, std::string const& defaultValue); - /** Provides a mechanism for raising exceptions in CPU code where recovery is possible - and assertions in GPU code where exceptions aren't alllowed. - */ - template - struct ProcAgnosticError { - static void error(const char*) { - assert(false); - } - }; - template - struct ProcAgnosticError { - static void error(const char* message) { - throw ErrorType(message); + template + KOKKOS_INLINE_FUNCTION void ProcAgnosticError(const char* msg) { + KOKKOS_IF_ON_DEVICE( + Kokkos::abort(msg); + ); + KOKKOS_IF_ON_HOST( + throw ErrorType(msg); + ); } - }; + } -#endif \ No newline at end of file +#endif diff --git a/MParT/Utilities/RootFinding.h b/MParT/Utilities/RootFinding.h index 9ecdc82f..7149c480 100644 --- a/MParT/Utilities/RootFinding.h +++ b/MParT/Utilities/RootFinding.h @@ -125,7 +125,6 @@ KOKKOS_INLINE_FUNCTION double Find_x_ITP(double xlb, double xub, double yd, doub template KOKKOS_INLINE_FUNCTION double InverseSingleBracket(double yd, FunctorType f, double x0, const double xtol, const double ftol, int& info) { - double stepSize=1.0; const unsigned int maxIts = 10000; // First, we need to find two points that bound the solution. diff --git a/MParT/Utilities/Serialization.h b/MParT/Utilities/Serialization.h index 5b889f2d..8c945e10 100644 --- a/MParT/Utilities/Serialization.h +++ b/MParT/Utilities/Serialization.h @@ -17,16 +17,17 @@ namespace cereal { template - std::enable_if_t, Archive>::value && (std::is_same_v || ...), void> save( + std::enable_if_t, Archive>::value, void> save( Archive &ar, Kokkos::View const &vec) { - Kokkos::View vec_h = vec; + Kokkos::View vec_d = vec; static_assert(!(std::is_same_v || ...), "LayoutStride not supported"); std::string name = vec.label(); ar(name); - unsigned int sz = vec_h.extent(0); + unsigned int sz = vec_d.extent(0); ar(sz); + auto vec_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), vec_d); if(sz>0){ ar(binary_data(vec_h.data(), sz * sizeof(std::remove_cv_t))); @@ -34,7 +35,7 @@ namespace cereal { } template - std::enable_if_t, Archive>::value && (std::is_same_v || ...), void> load( + std::enable_if_t, Archive>::value, void> load( Archive &ar, Kokkos::View &vec) { static_assert(!(std::is_same_v || ...), "LayoutStride not supported"); @@ -42,12 +43,16 @@ namespace cereal { ar(name); unsigned int sz; ar(sz); - Kokkos::View vec_h (name, sz); + + Kokkos::View vec_d (name, sz); + auto vec_h = Kokkos::create_mirror_view(Kokkos::HostSpace(), vec_d); if(sz>0){ ar(binary_data(vec_h.data(), sz * sizeof(ScalarType))); } - vec = std::move(vec_h); + Kokkos::deep_copy(vec_d, vec_h); + vec = std::move(vec_d); + } template diff --git a/bindings/julia/src/ParameterizedFunctionBase.cpp b/bindings/julia/src/ParameterizedFunctionBase.cpp index bae78cf7..1f38988e 100644 --- a/bindings/julia/src/ParameterizedFunctionBase.cpp +++ b/bindings/julia/src/ParameterizedFunctionBase.cpp @@ -8,7 +8,10 @@ void mpart::binding::ParameterizedFunctionBaseWrapper(jlcxx::Module &mod) { // ParameterizedFunctionBase mod.add_type>("ParameterizedFunctionBase") .method("CoeffMap" , [](ParameterizedFunctionBase &pfb){ return KokkosToJulia(pfb.Coeffs()); }) - .method("SetCoeffs", [](ParameterizedFunctionBase &pfb, jlcxx::ArrayRef v){ pfb.SetCoeffs(JuliaToKokkos(v)); }) + .method("SetCoeffs", [](ParameterizedFunctionBase &pfb, jlcxx::ArrayRef v){ + Kokkos::View ConstCoeffs = JuliaToKokkos(v); + pfb.SetCoeffs(ConstCoeffs); + }) .method("numCoeffs", [](ParameterizedFunctionBase &pfb) { return pfb.numCoeffs; }) .method("inputDim" , [](ParameterizedFunctionBase &pfb) { return pfb.inputDim; }) .method("outputDim", [](ParameterizedFunctionBase &pfb) { return pfb.outputDim; }) @@ -76,4 +79,4 @@ void mpart::binding::ParameterizedFunctionBaseWrapper(jlcxx::Module &mod) { std::cerr << "DeserializeMap: MParT was not compiled with Cereal support. Operation incomplete." << std::endl; #endif // MPART_HAS_CEREAL }); -} \ No newline at end of file +} diff --git a/bindings/python/package/torch.py b/bindings/python/package/torch.py index 166a5ecc..6882c618 100644 --- a/bindings/python/package/torch.py +++ b/bindings/python/package/torch.py @@ -1,124 +1,13 @@ import torch - -def ExtractTorchTensorData(tensor): - """ Extracts the pointer, shape, and stride from a pytorch tensor and returns a tuple - that can be passed to MParT functions that have been overloaded to accept - (double*, std::tuple, std::tuple) instead of a Kokkos::View. - - Arguments: - ------------ - tensor: pytorch.Tensor - The pytorch tensor we want to eventually wrap with a Kokkos view. +from .torch_helpers import ExtractTorchTensorData, MpartTorchAutograd - Returns: - ------------ - Tuple[int, Tuple[int,int], Tuple[int,int]] - A python tuple that contains all information needed to construct a Kokkos::View. - After casting to c++ types using pybind, this output can be passed to the - mpart::ConstructViewFromPointer function. - """ - - # Make sure the tensor has double data type - if tensor.dtype != torch.float64: - raise ValueError(f'Currently only tensors with float64 datatype can be converted. Current dtype is {tensor.dtype}') - - if len(tensor.shape)==1: - return tensor.data_ptr(), tensor.shape[0], tensor.stride()[0] - elif len(tensor.shape)==2: - return tensor.data_ptr(), tuple(tensor.shape), tuple(tensor.stride()) - else: - raise ValueError(f'Currently only 1d and 2d tensors can be converted.') - - -class MpartTorchAutograd(torch.autograd.Function): - - @staticmethod - def forward(ctx, input, coeffs, f, return_logdet): - ctx.save_for_backward(input, coeffs) - ctx.f = f - - coeffs_dbl = None - if coeffs is not None: - coeffs_dbl = coeffs.double() - f.WrapCoeffs(ExtractTorchTensorData(coeffs_dbl)) - input_dbl = input.double() - - output = torch.zeros(f.outputDim, input.shape[1], dtype=torch.double) - f.EvaluateImpl(ExtractTorchTensorData(input_dbl), ExtractTorchTensorData(output)) - - if return_logdet: - logdet = torch.zeros(input.shape[1], dtype=torch.double) - f.LogDeterminantImpl(ExtractTorchTensorData(input_dbl), ExtractTorchTensorData(logdet)) - return output.type(input.dtype), logdet.type(input.dtype) - else: - return output.type(input.dtype) - - @staticmethod - def backward(ctx, output_sens, logdet_sens=None): - input, coeffs = ctx.saved_tensors - f = ctx.f - - coeffs_dbl = None - if coeffs is not None: - coeffs_dbl = coeffs.double() - f.WrapCoeffs(ExtractTorchTensorData(coeffs_dbl)) - input_dbl = input.double() - output_sens_dbl = output_sens.double() - - logdet_sens_dbl = None - if logdet_sens is not None: - logdet_sens_dbl = logdet_sens.double() - - # Get the gradient wrt input - grad = None - if input.requires_grad: - grad = torch.zeros(f.inputDim, input.shape[1], dtype=torch.double) - - f.GradientImpl(ExtractTorchTensorData(input_dbl), - ExtractTorchTensorData(output_sens_dbl), - ExtractTorchTensorData(grad)) - - if logdet_sens is not None: - grad2 = torch.zeros(f.inputDim, input.shape[1], dtype=torch.double) - - f.LogDeterminantInputGradImpl(ExtractTorchTensorData(input_dbl), - ExtractTorchTensorData(grad2)) - grad += grad2*logdet_sens_dbl[None,:] - - coeff_grad = None - if coeffs is not None: - if coeffs.requires_grad: - coeff_grad = torch.zeros(f.numCoeffs, input.shape[1], dtype=torch.double) - f.CoeffGradImpl(ExtractTorchTensorData(input_dbl), - ExtractTorchTensorData(output_sens_dbl), - ExtractTorchTensorData(coeff_grad)) - - coeff_grad = coeff_grad.sum(axis=1) # pytorch expects total gradient not per-sample gradient - - if logdet_sens is not None: - grad2 = torch.zeros(f.numCoeffs, input.shape[1], dtype=torch.double) - - f.LogDeterminantCoeffGradImpl(ExtractTorchTensorData(input_dbl), - ExtractTorchTensorData(grad2)) - - coeff_grad += torch.sum(grad2*logdet_sens[None,:],axis=1) - - if coeff_grad is not None: - coeff_grad = coeff_grad.type(input.dtype) - - if grad is not None: - grad = grad.type(input.dtype) - - return grad, coeff_grad, None, None - - class TorchParameterizedFunctionBase(torch.nn.Module): """ Defines a wrapper around the MParT ParameterizedFunctionBase class that can be used with pytorch. """ - def __init__(self, f, store_coeffs=True, dtype=torch.double): + def __init__(self, f=None, store_coeffs=True, dtype=torch.double): super().__init__() self.f = f @@ -129,7 +18,7 @@ def __init__(self, f, store_coeffs=True, dtype=torch.double): self.coeffs = torch.nn.Parameter(coeff_tensor) else: self.coeffs = None - + def forward(self, x, coeffs=None): if coeffs is None: @@ -148,7 +37,7 @@ class TorchConditionalMapBase(torch.nn.Module): This can be done either in the constructor or afterwards. """ - def __init__(self, f, store_coeffs=True, return_logdet=False, dtype=torch.double): + def __init__(self, f=None, store_coeffs=True, return_logdet=False, dtype=torch.double): super().__init__() self.return_logdet = return_logdet @@ -159,7 +48,7 @@ def __init__(self, f, store_coeffs=True, return_logdet=False, dtype=torch.double self.coeffs = torch.nn.Parameter(coeff_tensor) else: self.coeffs = None - + def forward(self, x, coeffs=None): if coeffs is None: diff --git a/bindings/python/package/torch_helpers.py b/bindings/python/package/torch_helpers.py new file mode 100644 index 00000000..8fe5cfbf --- /dev/null +++ b/bindings/python/package/torch_helpers.py @@ -0,0 +1,115 @@ +import torch + +def ExtractTorchTensorData(tensor): + """ Extracts the pointer, shape, and stride from a pytorch tensor and returns a tuple + that can be passed to MParT functions that have been overloaded to accept + (double*, std::tuple, std::tuple) instead of a Kokkos::View. + + Arguments: + ------------ + tensor: pytorch.Tensor + The pytorch tensor we want to eventually wrap with a Kokkos view. + + Returns: + ------------ + Tuple[int, Tuple[int,int], Tuple[int,int]] + A python tuple that contains all information needed to construct a Kokkos::View. + After casting to c++ types using pybind, this output can be passed to the + mpart::ConstructViewFromPointer function. + """ + + # Make sure the tensor has double data type + if tensor.dtype != torch.float64: + raise ValueError(f'Currently only tensors with float64 datatype can be converted. Current dtype is {tensor.dtype}') + + if len(tensor.shape)==1: + return tensor.data_ptr(), tensor.shape[0], tensor.stride()[0] + elif len(tensor.shape)==2: + return tensor.data_ptr(), tuple(tensor.shape), tuple(tensor.stride()) + else: + raise ValueError(f'Currently only 1d and 2d tensors can be converted.') + + +class MpartTorchAutograd(torch.autograd.Function): + + def __reduce__(self): + return (self.__class__, (None,)) + + @staticmethod + def forward(ctx, input, coeffs, f, return_logdet): + ctx.save_for_backward(input, coeffs) + ctx.f = f + + coeffs_dbl = None + if coeffs is not None: + coeffs_dbl = coeffs.double() + f.WrapCoeffs(ExtractTorchTensorData(coeffs_dbl)) + input_dbl = input.double() + + output = torch.zeros(f.outputDim, input.shape[1], dtype=torch.double) + f.EvaluateImpl(ExtractTorchTensorData(input_dbl), ExtractTorchTensorData(output)) + + if return_logdet: + logdet = torch.zeros(input.shape[1], dtype=torch.double) + f.LogDeterminantImpl(ExtractTorchTensorData(input_dbl), ExtractTorchTensorData(logdet)) + return output.type(input.dtype), logdet.type(input.dtype) + else: + return output.type(input.dtype) + + @staticmethod + def backward(ctx, output_sens, logdet_sens=None): + input, coeffs = ctx.saved_tensors + f = ctx.f + + coeffs_dbl = None + if coeffs is not None: + coeffs_dbl = coeffs.double() + f.WrapCoeffs(ExtractTorchTensorData(coeffs_dbl)) + input_dbl = input.double() + output_sens_dbl = output_sens.double() + + logdet_sens_dbl = None + if logdet_sens is not None: + logdet_sens_dbl = logdet_sens.double() + + # Get the gradient wrt input + grad = None + if input.requires_grad: + grad = torch.zeros(f.inputDim, input.shape[1], dtype=torch.double) + + f.GradientImpl(ExtractTorchTensorData(input_dbl), + ExtractTorchTensorData(output_sens_dbl), + ExtractTorchTensorData(grad)) + + if logdet_sens is not None: + grad2 = torch.zeros(f.inputDim, input.shape[1], dtype=torch.double) + + f.LogDeterminantInputGradImpl(ExtractTorchTensorData(input_dbl), + ExtractTorchTensorData(grad2)) + grad += grad2*logdet_sens_dbl[None,:] + + coeff_grad = None + if coeffs is not None: + if coeffs.requires_grad: + coeff_grad = torch.zeros(f.numCoeffs, input.shape[1], dtype=torch.double) + f.CoeffGradImpl(ExtractTorchTensorData(input_dbl), + ExtractTorchTensorData(output_sens_dbl), + ExtractTorchTensorData(coeff_grad)) + + coeff_grad = coeff_grad.sum(axis=1) # pytorch expects total gradient not per-sample gradient + + if logdet_sens is not None: + grad2 = torch.zeros(f.numCoeffs, input.shape[1], dtype=torch.double) + + f.LogDeterminantCoeffGradImpl(ExtractTorchTensorData(input_dbl), + ExtractTorchTensorData(grad2)) + + coeff_grad += torch.sum(grad2*logdet_sens[None,:],axis=1) + + if coeff_grad is not None: + coeff_grad = coeff_grad.type(input.dtype) + + if grad is not None: + grad = grad.type(input.dtype) + + return grad, coeff_grad, None, None \ No newline at end of file diff --git a/bindings/python/src/CommonPybindUtilities.cpp b/bindings/python/src/CommonPybindUtilities.cpp index 0da023d1..4b221948 100644 --- a/bindings/python/src/CommonPybindUtilities.cpp +++ b/bindings/python/src/CommonPybindUtilities.cpp @@ -29,5 +29,5 @@ void mpart::binding::Initialize(py::dict opts) { void mpart::binding::CommonUtilitiesWrapper(py::module &m) { m.def("Initialize", py::overload_cast( &mpart::binding::Initialize )); - m.def("Concurrency", &Kokkos::DefaultExecutionSpace::concurrency); + m.def("Concurrency", [](){return Kokkos::DefaultExecutionSpace::concurrency();}); } diff --git a/bindings/python/src/ConditionalMapBase.cpp b/bindings/python/src/ConditionalMapBase.cpp index bf74aa1b..cacbf5b4 100644 --- a/bindings/python/src/ConditionalMapBase.cpp +++ b/bindings/python/src/ConditionalMapBase.cpp @@ -44,7 +44,7 @@ void mpart::binding::ConditionalMapBaseWrapper(py::module &m) .def("GetBaseFunction", &ConditionalMapBase::GetBaseFunction) #if defined(MPART_HAS_CEREAL) .def(py::pickle( - [](std::shared_ptr> const& ptr) { // __getstate__ + [](std::shared_ptr> const& ptr) { // __getstate__ std::stringstream ss; ptr->Save(ss); return py::bytes(ss.str()); @@ -54,12 +54,12 @@ void mpart::binding::ConditionalMapBaseWrapper(py::module &m) std::stringstream ss; ss.str(input); - auto ptr = std::dynamic_pointer_cast>(ParameterizedFunctionBase::Load(ss)); + auto ptr = std::dynamic_pointer_cast>(ParameterizedFunctionBase::Load(ss)); return ptr; } )) #endif - ; + ; } diff --git a/bindings/python/src/MapObjective.cpp b/bindings/python/src/MapObjective.cpp index 7d5e2825..5a1f59f1 100644 --- a/bindings/python/src/MapObjective.cpp +++ b/bindings/python/src/MapObjective.cpp @@ -30,15 +30,15 @@ void mpart::binding::MapObjectiveWrapper(py::module &m) { py::class_, MapObjective, std::shared_ptr>>(m, t2Name.c_str()); m.def(mName.c_str(), [](Eigen::Ref &train, unsigned int dim){ - StridedMatrix trainView = MatToKokkos(train); + StridedMatrix trainView = ConstColMatToKokkos(train); Kokkos::View storeTrain ("Training data store", trainView.extent(0), trainView.extent(1)); Kokkos::deep_copy(storeTrain, trainView); trainView = storeTrain; return ObjectiveFactory::CreateGaussianKLObjective(trainView, dim); }, "train"_a, "dim"_a = 0) .def(mName.c_str(), [](Eigen::Ref &trainEig, Eigen::Ref &testEig, unsigned int dim){ - StridedMatrix trainView = MatToKokkos(trainEig); - StridedMatrix testView = MatToKokkos(testEig); + StridedMatrix trainView = ConstColMatToKokkos(trainEig); + StridedMatrix testView = ConstColMatToKokkos(testEig); Kokkos::View storeTrain ("Training data store", trainView.extent(0), trainView.extent(1)); Kokkos::View storeTest ("Testing data store", testView.extent(0), testView.extent(1)); Kokkos::deep_copy(storeTrain, trainView); diff --git a/bindings/python/src/MultiIndex.cpp b/bindings/python/src/MultiIndex.cpp index ff34b6db..6e533c38 100644 --- a/bindings/python/src/MultiIndex.cpp +++ b/bindings/python/src/MultiIndex.cpp @@ -24,6 +24,37 @@ namespace py = pybind11; using namespace mpart::binding; +template +using Matrix_Map_T = Eigen::Map, 0, Eigen::Stride>; + +mpart::MultiIndexSet MultiIndexSet_PyBuffer(py::buffer x){ + constexpr bool rowMajor = Eigen::Matrix::Flags & Eigen::RowMajorBit; + + py::buffer_info info = x.request(); + + // Check for int32, int64 + bool is_int32 = info.format == py::format_descriptor::format(); + bool is_int64 = info.format == "l"; // This is based on a pybind bug; numpy int64 buffer is l, not q + if (!(is_int32 || is_int64)) + throw std::runtime_error("Incompatible format: expected an array of either int32 or int64!"); + + if (info.ndim != 2) + throw std::runtime_error("Expected array with ndims = 2"); + + int stride_size = is_int32 ? sizeof(int32_t) : sizeof(int64_t); + Eigen::Stride strides( + info.strides[rowMajor ? 0 : 1] / (py::ssize_t)stride_size, + info.strides[rowMajor ? 1 : 0] / (py::ssize_t)stride_size + ); + + if(is_int64) { // Is int64 + Matrix_Map_T map_64 (static_cast(info.ptr), info.shape[0], info.shape[1], strides); + return mpart::MultiIndexSet {map_64.cast()}; + } else { // Is int32 + Matrix_Map_T map (static_cast(info.ptr), info.shape[0], info.shape[1], strides); + return mpart::MultiIndexSet {map}; + } +} void mpart::binding::MultiIndexWrapper(py::module &m) { @@ -112,6 +143,7 @@ void mpart::binding::MultiIndexWrapper(py::module &m) // MultiIndexSet py::class_>(m, "MultiIndexSet") .def(py::init()) + .def(py::init>(&MultiIndexSet_PyBuffer)) .def(py::init const&>()) .def("fix", &MultiIndexSet::Fix) .def("__len__", &MultiIndexSet::Length, "Retrieves the length of _each_ multiindex within this set (i.e. the dimension of the input)") diff --git a/bindings/python/src/ParameterizedFunctionBase.cpp b/bindings/python/src/ParameterizedFunctionBase.cpp index 3fe63c70..ba40b64d 100644 --- a/bindings/python/src/ParameterizedFunctionBase.cpp +++ b/bindings/python/src/ParameterizedFunctionBase.cpp @@ -93,7 +93,7 @@ void mpart::binding::ParameterizedFunctionBaseWrapper(py::mo // ParameterizedFunctionBase py::class_, std::shared_ptr>>(m, "dParameterizedFunctionBase") .def("CoeffMap", [](const ParameterizedFunctionBase &f) { - Kokkos::View host_coeffs = ToHost( f.Coeffs() ); + Kokkos::View host_coeffs = ToHost( f.Coeffs() ); return Eigen::VectorXd(Eigen::Map(host_coeffs.data(), host_coeffs.size())); }) .def("SetCoeffs", py::overload_cast>(&ParameterizedFunctionBase::SetCoeffs)) diff --git a/bindings/python/src/Serialization.cpp b/bindings/python/src/Serialization.cpp index ee1c251a..12924bf6 100644 --- a/bindings/python/src/Serialization.cpp +++ b/bindings/python/src/Serialization.cpp @@ -16,8 +16,8 @@ namespace py = pybind11; using namespace mpart; using namespace mpart::binding; -template<> -void mpart::binding::DeserializeWrapper(py::module &m) +template +void mpart::binding::DeserializeWrapper(py::module &m) { m.def("DeserializeMap", [](std::string const &filename) { @@ -25,8 +25,13 @@ void mpart::binding::DeserializeWrapper(py::module &m) cereal::BinaryInputArchive archive(is); unsigned int inputDim, outputDim, numCoeffs; archive(inputDim, outputDim, numCoeffs); - Kokkos::View coeffs ("Map coeffs", numCoeffs); + Kokkos::View coeffs ("Map coeffs", numCoeffs); load(archive, coeffs); return std::make_tuple(inputDim, outputDim, CopyKokkosToVec(coeffs)); }); -} \ No newline at end of file +} + +template void mpart::binding::DeserializeWrapper(py::module&); +#if defined(MPART_ENABLE_GPU) +template void mpart::binding::DeserializeWrapper(py::module&); +#endif diff --git a/bindings/python/src/TrainMapAdaptive.cpp b/bindings/python/src/TrainMapAdaptive.cpp index cbf209ca..3a23a291 100644 --- a/bindings/python/src/TrainMapAdaptive.cpp +++ b/bindings/python/src/TrainMapAdaptive.cpp @@ -58,5 +58,6 @@ void mpart::binding::TrainMapAdaptiveWrapper(py::module &m) { template void mpart::binding::TrainMapAdaptiveWrapper(py::module&); #if defined(MPART_ENABLE_GPU) -template void mpart::binding::TrainMapAdaptiveWrapper(py::module&); +// TODO: Implement TrainMap for GPU +// template void mpart::binding::TrainMapAdaptiveWrapper(py::module&); #endif // MPART_ENABLE_GPU \ No newline at end of file diff --git a/bindings/python/src/TriangularMap.cpp b/bindings/python/src/TriangularMap.cpp index 578a85fc..1091f476 100644 --- a/bindings/python/src/TriangularMap.cpp +++ b/bindings/python/src/TriangularMap.cpp @@ -20,9 +20,9 @@ void mpart::binding::TriangularMapWrapper(py::module &m) py::class_, ConditionalMapBase, ParameterizedFunctionBase, std::shared_ptr>>(m, tName.c_str()) .def(py::init>>, bool>(), py::arg("comps"), py::arg("moveCoeffs") = false) .def("GetComponent", &TriangularMap::GetComponent) - #if defined(MPART_HAS_CEREAL) +#if defined(MPART_HAS_CEREAL) .def(py::pickle( - [](std::shared_ptr> const& ptr) { // __getstate__ + [](std::shared_ptr> const& ptr) { // __getstate__ std::stringstream ss; ptr->Save(ss); return py::bytes(ss.str()); @@ -32,12 +32,12 @@ void mpart::binding::TriangularMapWrapper(py::module &m) std::stringstream ss; ss.str(input); - auto ptr = std::dynamic_pointer_cast>(ParameterizedFunctionBase::Load(ss)); + auto ptr = std::dynamic_pointer_cast>(ParameterizedFunctionBase::Load(ss)); return ptr; } )) - #endif - ; +#endif + ; } diff --git a/bindings/python/src/Wrapper.cpp b/bindings/python/src/Wrapper.cpp index eee93e63..cbb58659 100644 --- a/bindings/python/src/Wrapper.cpp +++ b/bindings/python/src/Wrapper.cpp @@ -40,8 +40,9 @@ PYBIND11_MODULE(pympart, m) { MapFactoryWrapper(m); AffineMapWrapperDevice(m); AffineFunctionWrapperDevice(m); - IdentityMapWrapper(m); - SerializeWrapper(m); + IdentityMapWrapper(m); +#if defined(MPART_HAS_CEREAL) DeserializeWrapper(m); #endif +#endif } \ No newline at end of file diff --git a/bindings/python/tests/test_MultiIndexSet.py b/bindings/python/tests/test_MultiIndexSet.py index a90d659e..2c4272db 100644 --- a/bindings/python/tests/test_MultiIndexSet.py +++ b/bindings/python/tests/test_MultiIndexSet.py @@ -12,6 +12,14 @@ msetTensorProduct = mpart.MultiIndexSet.CreateTensorProduct(dim,power,noneLim) msetTotalOrder = mpart.MultiIndexSet.CreateTotalOrder(dim,power,noneLim) +def test_create(): + mset_one = mpart.MultiIndexSet([[2]]) + assert mset_one.Size() == 1 + assert len(mset_one) == 1 + mset_one = mpart.MultiIndexSet(np.array([[2]])) + assert mset_one.Size() == 1 + assert len(mset_one) == 1 + def test_max_degrees(): assert np.all(msetFromArray.MaxOrders() == [2,1]) diff --git a/bindings/python/tests/test_TorchWrapper.py b/bindings/python/tests/test_TorchWrapper.py index 70dbc4d5..5c28a1a8 100644 --- a/bindings/python/tests/test_TorchWrapper.py +++ b/bindings/python/tests/test_TorchWrapper.py @@ -7,6 +7,7 @@ import mpart as mt import numpy as np +import dill if haveTorch: @@ -81,6 +82,21 @@ def test_AutogradCoeffs(): loss.backward() assert tmap2.coeffs.grad is not None + def test_AutogradCoeffAsInput(): + + opts = mt.MapOptions() + tmap = mt.CreateTriangular(dim,dim,3,opts) # Simple third order map + + tmap2 = tmap.torch(store_coeffs=False) + + x = torch.randn(numSamps, dim, dtype=torch.double) + coeffs = torch.randn(tmap.numCoeffs, dtype=torch.double) + + y = tmap2(x,coeffs) + assert y.shape[0] == numSamps + assert y.shape[1] == dim + assert not y.isnan().any() + def test_TorchMethod(): opts = mt.MapOptions() tmap = mt.CreateTriangular(dim,dim,3,opts) # Simple third order map @@ -96,20 +112,20 @@ def test_TorchMethod(): assert np.all(y.detach().numpy() == tmap.Evaluate(x.T.detach().numpy()).T) assert np.all(logdet.detach().numpy() == tmap.LogDeterminant(x.T.detach().numpy())) - def test_AutogradCoeffAsInput(): - + def test_TorchPickle(): opts = mt.MapOptions() tmap = mt.CreateTriangular(dim,dim,3,opts) # Simple third order map - tmap2 = tmap.torch(store_coeffs=False) - x = torch.randn(numSamps, dim, dtype=torch.double) - coeffs = torch.randn(tmap.numCoeffs, dtype=torch.double) + tmap2 = tmap.torch(store_coeffs=True) + y = tmap2.forward(x) + - y = tmap2(x,coeffs) - assert y.shape[0] == numSamps - assert y.shape[1] == dim - assert not y.isnan().any() + map_bytes = dill.dumps(tmap2, dill.HIGHEST_PROTOCOL) + tmap3 = dill.loads(map_bytes) + + y2 = tmap3.forward(x) + assert (y2-y).abs().max() < 1e-8 if __name__=='__main__': @@ -118,4 +134,5 @@ def test_AutogradCoeffAsInput(): test_Autograd() test_AutogradCoeffs() test_TorchMethod() + test_TorchPickle() test_AutogradCoeffAsInput() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 123404c0..56a8c3ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ license={file="LICENSE.txt"} readme="README.md" requires-python = ">=3.7" description="A Monotone Parameterization Toolkit" -version="2.2.1" +version="2.2.2" keywords=["Measure Transport", "Monotone", "Transport Map", "Isotonic Regression", "Triangular", "Knothe-Rosenblatt"] [project.urls] diff --git a/src/AffineMap.cpp b/src/AffineMap.cpp index 1e4b7baa..a23c7201 100644 --- a/src/AffineMap.cpp +++ b/src/AffineMap.cpp @@ -56,7 +56,8 @@ template void AffineMap::LogDeterminantImpl(StridedMatrix const& pts, StridedVector output) { - Kokkos::RangePolicy::Space> policy(0,output.size()); + unsigned int N = output.size(); + Kokkos::RangePolicy::Space> policy{0u, N}; Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& i) { output(i) = logDet_; diff --git a/src/ComposedMap.cpp b/src/ComposedMap.cpp index 503b100c..a681ffc4 100644 --- a/src/ComposedMap.cpp +++ b/src/ComposedMap.cpp @@ -178,7 +178,7 @@ ComposedMap::ComposedMap(std::vector -void ComposedMap::SetCoeffs(Kokkos::View coeffs) +void ComposedMap::SetCoeffs(Kokkos::View coeffs) { // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable ConditionalMapBase::SetCoeffs(coeffs); @@ -195,7 +195,7 @@ void ComposedMap::SetCoeffs(Kokkos::View -void ComposedMap::WrapCoeffs(Kokkos::View coeffs) +void ComposedMap::WrapCoeffs(Kokkos::View coeffs) { // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable ConditionalMapBase::WrapCoeffs(coeffs); @@ -212,19 +212,36 @@ void ComposedMap::WrapCoeffs(Kokkos::View -void ComposedMap::SetCoeffs(Kokkos::View coeffs) +template<> +void ComposedMap::SetCoeffs(Kokkos::View coeffs) { // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable - ConditionalMapBase::SetCoeffs(coeffs); + ConditionalMapBase::SetCoeffs(coeffs); // Now create subviews for each of the maps unsigned int cumNumCoeffs = 0; for(unsigned int i=0; inumCoeffs <= this->savedCoeffs.size()); - maps_.at(i)->savedCoeffs = Kokkos::subview(this->savedCoeffs, - std::make_pair(cumNumCoeffs, cumNumCoeffs+maps_.at(i)->numCoeffs)); + maps_.at(i)->WrapCoeffs(Kokkos::subview(this->savedCoeffs, + std::make_pair(cumNumCoeffs, cumNumCoeffs+maps_.at(i)->numCoeffs))); + cumNumCoeffs += maps_.at(i)->numCoeffs; + } +} + +template<> +void ComposedMap::SetCoeffs(Kokkos::View coeffs) +{ + // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable + ConditionalMapBase::SetCoeffs(coeffs); + + // Now create subviews for each of the maps + unsigned int cumNumCoeffs = 0; + for(unsigned int i=0; inumCoeffs <= this->savedCoeffs.size()); + + maps_.at(i)->WrapCoeffs(Kokkos::subview(this->savedCoeffs, + std::make_pair(cumNumCoeffs, cumNumCoeffs+maps_.at(i)->numCoeffs))); cumNumCoeffs += maps_.at(i)->numCoeffs; } } @@ -471,4 +488,4 @@ void ComposedMap::LogDeterminantInputGradImpl(StridedMatrix; #if defined(MPART_ENABLE_GPU) template class mpart::ComposedMap; -#endif \ No newline at end of file +#endif diff --git a/src/ConditionalMapBase.cpp b/src/ConditionalMapBase.cpp index ff09ddc0..7b2a092a 100644 --- a/src/ConditionalMapBase.cpp +++ b/src/ConditionalMapBase.cpp @@ -250,12 +250,13 @@ StridedMatrix ConditionalMapBase return output; } + template<> template<> StridedMatrix ConditionalMapBase::LogDeterminantInputGrad(StridedMatrix const& pts) { // Copy the points to the device space - StridedMatrix pts_device = ToDevice(pts); + StridedMatrix pts_device = ToDevice(pts); // Evaluate on the device space StridedMatrix evals_device = this->LogDeterminantInputGrad(pts_device); @@ -275,7 +276,7 @@ StridedMatrix ConditionalMapBase: StridedMatrix evals_host = this->LogDeterminantInputGrad(pts_host); // Copy back to the device - return ToDevice(evals_host); + return ToDevice(evals_host); } @@ -296,4 +297,4 @@ Eigen::RowMatrixXd ConditionalMapBase::LogDeterminantInputGr template class mpart::ConditionalMapBase; #if defined(MPART_ENABLE_GPU) template class mpart::ConditionalMapBase; -#endif \ No newline at end of file +#endif diff --git a/src/Distributions/DensityBase.cpp b/src/Distributions/DensityBase.cpp index c313732c..e12daf05 100644 --- a/src/Distributions/DensityBase.cpp +++ b/src/Distributions/DensityBase.cpp @@ -1,49 +1,102 @@ #include "MParT/Distributions/DensityBase.h" +#include "MParT/Utilities/ArrayConversions.h" using namespace mpart; -template<> -Eigen::VectorXd DensityBase::LogDensity(Eigen::Ref const &pts) { +template +Eigen::VectorXd DensityBase::LogDensity(Eigen::Ref const &pts) { // Allocate output Eigen::VectorXd output(pts.cols()); - StridedMatrix ptsView = ConstRowMatToKokkos(pts); - StridedVector outView = VecToKokkos(output); + StridedVector outView_h = VecToKokkos(output); + StridedMatrix ptsView_d = ConstRowMatToKokkos(pts); + StridedVector outView_d; + if constexpr(std::is_same_v) outView_d = outView_h; + else outView_d = Kokkos::View {"Outview device", ptsView_d.extent(1)}; // Call the LogDensity function - LogDensityImpl(ptsView, outView); - + LogDensityImpl(ptsView_d, outView_d); + Kokkos::deep_copy(outView_h, outView_d); return output; } -template<> -Eigen::RowMatrixXd DensityBase::LogDensityInputGrad(Eigen::Ref const &pts) { +template +Eigen::RowMatrixXd DensityBase::LogDensityInputGrad(Eigen::Ref const &pts) { // Allocate output Eigen::RowMatrixXd output(pts.rows(), pts.cols()); - StridedMatrix ptsView = ConstRowMatToKokkos(pts); - StridedMatrix outView = MatToKokkos(output); + StridedMatrix outView_h = MatToKokkos(output); + StridedMatrix ptsView_d = ConstRowMatToKokkos(pts); + StridedMatrix outView_d; + if constexpr(std::is_same_v) outView_d = outView_h; + else outView_d = MatToKokkos(output); // TODO: Could be optimized // Call the LogDensity function - LogDensityInputGradImpl(ptsView, outView); - + LogDensityInputGradImpl(ptsView_d, outView_d); + Kokkos::deep_copy(outView_h, outView_d); return output; } -template<> -template<> -StridedVector DensityBase::LogDensity(StridedMatrix const &X) { +template +StridedVector DensityBase::LogDensity(StridedMatrix const &X) { // Allocate output - Kokkos::View output("output", X.extent(1)); + Kokkos::View output("output", X.extent(1)); // Call the LogDensity function LogDensityImpl(X, output); + return output; +} + +template +StridedMatrix DensityBase::LogDensityInputGrad(StridedMatrix const &X) { + // Allocate output + Kokkos::View output("output", X.extent(0), X.extent(1)); + // Call the LogDensity function + LogDensityInputGradImpl(X, output); return output; } +#if defined(MPART_ENABLE_GPU) + template<> +StridedVector DensityBase::LogDensity(StridedMatrix const &X) { + StridedMatrix X_d = ToDevice(X); + // Allocate output + Kokkos::View output_d("output", X.extent(1)); + // Call the LogDensity function + LogDensityImpl(X_d, output_d); + return ToHost(output_d); +} + template<> -StridedMatrix DensityBase::LogDensityInputGrad(StridedMatrix const &X) { +StridedVector DensityBase::LogDensity(StridedMatrix const &X) { + StridedMatrix X_h = ToHost(X); // Allocate output - Kokkos::View output("output", X.extent(0), X.extent(1)); + Kokkos::View output_h("output", X.extent(1)); // Call the LogDensity function - LogDensityInputGradImpl(X, output); + LogDensityImpl(X_h, output_h); + return ToDevice(output_h); +} - return output; -} \ No newline at end of file +template<> +StridedMatrix DensityBase::LogDensityInputGrad(StridedMatrix const &X) { + StridedMatrix X_d = ToDevice(X); + // Allocate output + Kokkos::View output_d("output", X.extent(0), X.extent(1)); + // Call the LogDensityInputGrad function + LogDensityInputGradImpl(X_d, output_d); + return ToHost(output_d); +} + +template<> +StridedMatrix DensityBase::LogDensityInputGrad(StridedMatrix const &X) { + StridedMatrix X_h = ToHost(X); + // Allocate output + Kokkos::View output_h("output", X.extent(0), X.extent(1)); + // Call the LogDensityInputGrad function + LogDensityInputGradImpl(X_h, output_h); + return ToDevice(output_h); +} + +#endif + +template class mpart::DensityBase; +#if defined(MPART_ENABLE_GPU) + template class mpart::DensityBase; +#endif \ No newline at end of file diff --git a/src/Distributions/GaussianSamplerDensity.cpp b/src/Distributions/GaussianSamplerDensity.cpp index 9d991842..22d05196 100644 --- a/src/Distributions/GaussianSamplerDensity.cpp +++ b/src/Distributions/GaussianSamplerDensity.cpp @@ -21,21 +21,21 @@ GaussianSamplerDensity::GaussianSamplerDensity(unsigned int dim): S template void GaussianSamplerDensity::LogDensityImpl(StridedMatrix const &pts, StridedVector output) { // Compute the log density - int M = pts.extent(0); - int N = pts.extent(1); + unsigned int M = pts.extent(0); + unsigned int N = pts.extent(1); if(M != dim_) { throw std::runtime_error("GaussianSamplerDensity::LogDensityImpl: The number of rows in pts must match the dimension of the distribution."); } - Kokkos::MDRangePolicy, typename MemoryToExecution::Space> policy({{0, 0}}, {{N, M}}); + Kokkos::MDRangePolicy, typename MemoryToExecution::Space> policy {{0u, 0u}, {N, M}}; Kokkos::View diff ("diff", M, N); if(mean_.extent(0) == 0){ - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int& j, const int& i) { + Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& j, const int& i) { diff(i,j) = pts(i,j); }); } else { - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int& j, const int& i) { + Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& j, const int& i) { diff(i,j) = pts(i,j) - mean_(i); }); } @@ -44,7 +44,8 @@ void GaussianSamplerDensity::LogDensityImpl(StridedMatrix::Space> policy1d{0u, N}; + Kokkos::parallel_for(policy1d, KOKKOS_CLASS_LAMBDA(const int& j){ output(j) = -0.5*( M*logtau_ + logDetCov_ ); for(int d=0; d::LogDensityInputGradImpl(StridedMatrix< Kokkos::MDRangePolicy, typename MemoryToExecution::Space> policy({{0, 0}}, {{N, M}}); if(mean_.extent(0) == 0){ - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int& j, const int& i) { + Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& j, const int& i) { output(i,j) = -pts(i,j); }); } else { - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int& j, const int& i) { + Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& j, const int& i) { output(i,j) = -pts(i,j) + mean_(i); }); } @@ -94,7 +95,7 @@ void GaussianSamplerDensity::SampleImpl(StridedMatrix::SampleImpl(StridedMatrix::SampleImpl(StridedMatrix output = output_; // Sample from the standard normal - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int j, const int i) { + Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int j, const int i) { GeneratorType rgen = rand_pool.get_state(); output(i,j) = rgen.normal(); rand_pool.free_state(rgen); @@ -126,7 +127,7 @@ void GaussianSamplerDensity::SampleImpl(StridedMatrix::SampleImpl(StridedMatrix; #ifdef MPART_ENABLE_GPU template struct mpart::GaussianSamplerDensity; -#endif \ No newline at end of file +#endif diff --git a/src/InnerMarginalAffineMap.cpp b/src/InnerMarginalAffineMap.cpp index d20e9993..ea225f64 100644 --- a/src/InnerMarginalAffineMap.cpp +++ b/src/InnerMarginalAffineMap.cpp @@ -7,6 +7,17 @@ using namespace mpart; +template +double CalculateLogDet(StridedVector scale) { + double logDet_ = 0.; + unsigned int N = scale.size(); + Kokkos::RangePolicy::Space> policy {0, N}; + Kokkos::parallel_reduce(policy, KOKKOS_LAMBDA(const int i, double& ldet){ + ldet += Kokkos::log(scale(i)); + }, logDet_); + return logDet_; +} + template InnerMarginalAffineMap::InnerMarginalAffineMap(StridedVector scale, StridedVector shift, @@ -20,14 +31,11 @@ InnerMarginalAffineMap::InnerMarginalAffineMap(StridedVector::error("InnerMarginalAffineMap: scale and shift must have the same size"); + ProcAgnosticError("InnerMarginalAffineMap: scale and shift must have the same size"); if (scale_.size() != map->inputDim) - ProcAgnosticError::error("InnerMarginalAffineMap: scale and shift must have the same size as the input dimension of the map"); + ProcAgnosticError("InnerMarginalAffineMap: scale and shift must have the same size as the input dimension of the map"); - logDet_ = 0.; - Kokkos::parallel_reduce("InnerMarginalAffineMap logdet", scale.extent(0), KOKKOS_LAMBDA(const int&i, double& ldet){ - ldet += Kokkos::log(scale_(i)); - }, logDet_); + logDet_ = CalculateLogDet(scale); this->SetCoeffs(map->Coeffs()); if (moveCoeffs) { map->WrapCoeffs(this->savedCoeffs); @@ -38,14 +46,16 @@ template void InnerMarginalAffineMap::LogDeterminantImpl(StridedMatrix const& pts, StridedVector output) { - int n1 = pts.extent(0), n2 = pts.extent(1); + unsigned int n1 = pts.extent(0), n2 = pts.extent(1); Kokkos::View tmp("tmp", n1, n2); Kokkos::MDRangePolicy, typename MemoryToExecution::Space> policy({0, 0}, {n1, n2}); Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(const int& i, const int& j) { tmp(i,j) = pts(i,j)*scale_(i) + shift_(i); }); map_->LogDeterminantImpl(tmp, output); - Kokkos::parallel_for("InnerMarginalAffineMap LogDeterminant", output.size(), KOKKOS_CLASS_LAMBDA(const int& i) { + unsigned int N = output.size(); + Kokkos::RangePolicy::Space> policy1d {0, N}; + Kokkos::parallel_for("InnerMarginalAffineMap LogDeterminant", policy1d, KOKKOS_CLASS_LAMBDA(const int& i) { output(i) += logDet_; }); } @@ -55,7 +65,7 @@ void InnerMarginalAffineMap::InverseImpl(StridedMatrix const& r, StridedMatrix output) { - int x1_n1 = this->inputDim - this->outputDim, x1_n2 = x1.extent(1); + unsigned int x1_n1 = this->inputDim - this->outputDim, x1_n2 = x1.extent(1); Kokkos::View x1_tmp; if(x1_n1 > 0) { Kokkos::View x1_t("x1_tmp", x1_n1, x1_n2); diff --git a/src/MapFactory.cpp b/src/MapFactory.cpp index d810933f..1382148f 100644 --- a/src/MapFactory.cpp +++ b/src/MapFactory.cpp @@ -53,7 +53,7 @@ std::shared_ptr> mpart::MapFactory::CreateSingle if(activeInd == 1){ // special case if activeInd = 1, map is of form [T_1; Id] // Bottom identity map - std::shared_ptr> botIdMap = std::make_shared>(dim, dim-activeInd); + std::shared_ptr> botIdMap = std::make_shared>(dim, dim-activeInd); // fill a vector of components with identity, active component, identity std::vector>> blocks(2); @@ -67,7 +67,7 @@ std::shared_ptr> mpart::MapFactory::CreateSingle } else if (activeInd == dim){ // special case if activeInd = dim, map is of form [Id; T_d] // Top identity map - std::shared_ptr> topIdMap = std::make_shared>(activeInd-1, activeInd-1); + std::shared_ptr> topIdMap = std::make_shared>(activeInd-1, activeInd-1); // fill a vector of components with identity, active component, identity std::vector>> blocks(2); @@ -80,10 +80,10 @@ std::shared_ptr> mpart::MapFactory::CreateSingle else{ // general case, map is of form [Id; T_i; Id] // Top identity map - std::shared_ptr> topIdMap = std::make_shared>(activeInd-1, activeInd-1); + std::shared_ptr> topIdMap = std::make_shared>(activeInd-1, activeInd-1); // Bottom identity map - std::shared_ptr> botIdMap = std::make_shared>(dim, dim-activeInd); + std::shared_ptr> botIdMap = std::make_shared>(dim, dim-activeInd); // fill a vector of components with identity, active component, identity std::vector>> blocks(3); @@ -95,8 +95,8 @@ std::shared_ptr> mpart::MapFactory::CreateSingle output = std::make_shared>(blocks); } - - output->SetCoeffs(Kokkos::View("Component Coefficients", output->numCoeffs)); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", output->numCoeffs); + output->SetCoeffs(coeffs); return output; } @@ -118,7 +118,9 @@ std::shared_ptr> mpart::MapFactory::CreateTriang comps.at(i) = CreateComponent(mset.ToDevice(), options); } auto output = std::make_shared>(comps); - output->SetCoeffs(Kokkos::View("Component Coefficients", output->numCoeffs)); + + Kokkos::View coeffs = Kokkos::View("Component Coefficients", output->numCoeffs); + output->SetCoeffs(coeffs); return output; } @@ -160,7 +162,8 @@ std::shared_ptr> mpart::MapFactory::Creat } if(output){ - output->SetCoeffs(Kokkos::View("Component Coefficients", output->numCoeffs)); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", output->numCoeffs); + output->SetCoeffs(coeffs); return output; } @@ -184,10 +187,12 @@ Sigmoid1d CreateSigmoid( std::stringstream ss; ss << "Incorrect length of centers, " << centers.size() << ".\n"; ss << "Length should be of form 2+(1+2+3+...+n) for some order n"; - ProcAgnosticError::error( + ProcAgnosticError( ss.str().c_str()); } - Kokkos::parallel_for(max_order+2, KOKKOS_LAMBDA(unsigned int i) { + unsigned int N = max_order+2; + Kokkos::RangePolicy::Space> policy{0, N}; + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(unsigned int i) { if (i == max_order) { widths(0) = edgeShape; weights(0) = 1./edgeShape; @@ -233,7 +238,7 @@ std::shared_ptr> CreateSigmoidExpansionTemplate( ss << "Mismatched input dimensions for offdiag and diag multiindex sets\n" << "offdiag: " << mset_offdiag.Length() << "\n" << "diag: " << mset_diag.Length(); - ProcAgnosticError::error(ss.str().c_str()); + ProcAgnosticError(ss.str().c_str()); } using Sigmoid_T = Sigmoid1d; using DiagBasisEval_T = BasisEvaluator, Rectifier>; @@ -270,10 +275,11 @@ std::shared_ptr> CreateSigmoidExpansionTemplate( mset_offdiag, mset, centers, edgeWidth); } MultiIndexSet mset = MultiIndexSet::CreateTotalOrder(inputDim, totalOrder, MultiIndexLimiter::NonzeroDiag()); - FixedMultiIndexSet fmset_diag = mset.Fix(true).ToDevice(); - FixedMultiIndexSet fmset_offdiag {inputDim-1, totalOrder}; + FixedMultiIndexSet fmset_diag_d = mset.Fix(true).ToDevice(); + FixedMultiIndexSet fmset_offdiag {inputDim-1, totalOrder}; + FixedMultiIndexSet fmset_offdiag_d = fmset_offdiag.ToDevice(); return CreateSigmoidExpansionTemplate( - fmset_offdiag, fmset_diag, centers, edgeWidth); + fmset_offdiag_d, fmset_diag_d, centers, edgeWidth); } template @@ -283,25 +289,25 @@ void HandleSigmoidComponentErrors(MapOptions const& opts) { std::string basisString = MapOptions::btypes[static_cast(opts.basisType)]; std::stringstream ss; ss << "Unsupported basis type for sigmoid expansion: " << basisString; - ProcAgnosticError::error(ss.str().c_str()); + ProcAgnosticError(ss.str().c_str()); } if(opts.posFuncType != PosFuncTypes::Exp && opts.posFuncType != PosFuncTypes::SoftPlus) { std::string posString = MapOptions::pftypes[static_cast(opts.posFuncType)]; std::stringstream ss; ss << "Unsupported positive function type for sigmoid expansion: " << posString; - ProcAgnosticError::error(ss.str().c_str()); + ProcAgnosticError(ss.str().c_str()); } if(opts.edgeType != EdgeTypes::SoftPlus) { std::string edgeString = MapOptions::etypes[static_cast(opts.edgeType)]; std::stringstream ss; ss << "Unsupported edge type for sigmoid expansion: " << edgeString; - ProcAgnosticError::error(ss.str().c_str()); + ProcAgnosticError(ss.str().c_str()); } if(opts.sigmoidType != SigmoidTypes::Logistic) { std::string sigmoidString = MapOptions::stypes[static_cast(opts.sigmoidType)]; std::stringstream ss; ss << "Unsupported sigmoid type for sigmoid expansion: " << sigmoidString; - ProcAgnosticError::error(ss.str().c_str()); + ProcAgnosticError(ss.str().c_str()); } } @@ -346,7 +352,7 @@ std::shared_ptr> MapFactory::CreateSigmoidTriang if(outputDim > inputDim) { std::stringstream ss; ss << "CreateSigmoidTriangular: Output dimension " << outputDim << " cannot be greater than input dimension " << inputDim; - ProcAgnosticError::error(ss.str().c_str()); + ProcAgnosticError(ss.str().c_str()); } std::vector> > comps(outputDim); for(int i = 0; i < outputDim; i++) { @@ -375,4 +381,4 @@ template std::shared_ptr> mpart::MapFactor template std::shared_ptr> mpart::MapFactory::CreateSigmoidComponent(unsigned int, unsigned int, StridedVector, MapOptions); template std::shared_ptr> mpart::MapFactory::CreateSigmoidComponent(FixedMultiIndexSet, FixedMultiIndexSet, StridedVector, MapOptions); template std::shared_ptr> mpart::MapFactory::CreateSigmoidTriangular(unsigned int, unsigned int, unsigned int, std::vector> const&, MapOptions); -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl1.cpp b/src/MapFactoryImpl1.cpp index 883d7163..dc6ed1d6 100644 --- a/src/MapFactoryImpl1.cpp +++ b/src/MapFactoryImpl1.cpp @@ -26,7 +26,8 @@ std::shared_ptr> CreateComponentImpl_Phys_ACC(Fi output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); return output; } @@ -42,8 +43,8 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, Adaptiv REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory1) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl10.cpp b/src/MapFactoryImpl10.cpp index 89caf963..c7e2e8a8 100644 --- a/src/MapFactoryImpl10.cpp +++ b/src/MapFactoryImpl10.cpp @@ -26,7 +26,9 @@ std::shared_ptr> CreateComponentImpl_LinPhys_ACC output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -42,7 +44,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory10) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl11.cpp b/src/MapFactoryImpl11.cpp index c82bb35d..c4e0299d 100644 --- a/src/MapFactoryImpl11.cpp +++ b/src/MapFactoryImpl11.cpp @@ -24,7 +24,9 @@ std::shared_ptr> CreateComponentImpl_LinPhys_CC( output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -40,7 +42,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory11) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl12.cpp b/src/MapFactoryImpl12.cpp index 03edb83d..8723ce92 100644 --- a/src/MapFactoryImpl12.cpp +++ b/src/MapFactoryImpl12.cpp @@ -23,7 +23,9 @@ std::shared_ptr> CreateComponentImpl_LinPhys_AS( output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Exp, AdaptiveSimpson, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Softplus, AdaptiveSimpson, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory12) #endif diff --git a/src/MapFactoryImpl13.cpp b/src/MapFactoryImpl13.cpp index 793e7728..757d9fb4 100644 --- a/src/MapFactoryImpl13.cpp +++ b/src/MapFactoryImpl13.cpp @@ -25,7 +25,9 @@ std::shared_ptr> CreateComponentImpl_LinProb_ACC output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -41,7 +43,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory13) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl14.cpp b/src/MapFactoryImpl14.cpp index d58723c2..794b70b9 100644 --- a/src/MapFactoryImpl14.cpp +++ b/src/MapFactoryImpl14.cpp @@ -23,7 +23,9 @@ std::shared_ptr> CreateComponentImpl_LinProb_CC( output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory14) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl15.cpp b/src/MapFactoryImpl15.cpp index d2e43efd..f5ae8513 100644 --- a/src/MapFactoryImpl15.cpp +++ b/src/MapFactoryImpl15.cpp @@ -23,7 +23,9 @@ std::shared_ptr> CreateComponentImpl_LinProb_AS( output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Exp, AdaptiveSimpson, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Softplus, AdaptiveSimpson, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory15) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl16.cpp b/src/MapFactoryImpl16.cpp index ad34fd4e..40f9c109 100644 --- a/src/MapFactoryImpl16.cpp +++ b/src/MapFactoryImpl16.cpp @@ -23,15 +23,17 @@ std::shared_ptr> CreateComponentImpl_LinHF_AS(Fi output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } static auto reg_host_linhf_as_exp = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS)); static auto reg_host_linhf_as_splus = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS)); #if defined(MPART_ENABLE_GPU) - static auto reg_device_linhf_as_exp = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS)); - static auto reg_device_linhf_as_splus = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS)); + static auto reg_device_linhf_as_exp = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS)); + static auto reg_device_linhf_as_splus = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveSimpson), CreateComponentImpl_LinHF_AS)); #endif #if defined(MPART_HAS_CEREAL) @@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Exp, AdaptiveSimpson, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Softplus, AdaptiveSimpson, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory16) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl17.cpp b/src/MapFactoryImpl17.cpp index bb3d979c..f14fae97 100644 --- a/src/MapFactoryImpl17.cpp +++ b/src/MapFactoryImpl17.cpp @@ -23,15 +23,17 @@ std::shared_ptr> CreateComponentImpl_LinHF_CC(Fi output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } static auto reg_host_linhf_cc_exp = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC)); static auto reg_host_linhf_cc_splus = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC)); #if defined(MPART_ENABLE_GPU) - static auto reg_device_linhf_cc_exp = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC)); - static auto reg_device_linhf_cc_splus = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC)); + static auto reg_device_linhf_cc_exp = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC)); + static auto reg_device_linhf_cc_splus = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::ClenshawCurtis), CreateComponentImpl_LinHF_CC)); #endif #if defined(MPART_HAS_CEREAL) @@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory17) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl18.cpp b/src/MapFactoryImpl18.cpp index e98007a4..f135404a 100644 --- a/src/MapFactoryImpl18.cpp +++ b/src/MapFactoryImpl18.cpp @@ -24,15 +24,17 @@ std::shared_ptr> CreateComponentImpl_LinHF_ACC(F output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } static auto reg_host_linhf_acc_exp = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC)); static auto reg_host_linhf_acc_splus = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC)); #if defined(MPART_ENABLE_GPU) - static auto reg_device_linhf_acc_exp = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC)); - static auto reg_device_linhf_acc_splus = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC)); + static auto reg_device_linhf_acc_exp = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::Exp, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC)); + static auto reg_device_linhf_acc_splus = mpart::MapFactory::CompFactoryImpl::GetFactoryMap()->insert(std::make_pair(std::make_tuple(BasisTypes::HermiteFunctions, true, PosFuncTypes::SoftPlus, QuadTypes::AdaptiveClenshawCurtis), CreateComponentImpl_LinHF_ACC)); #endif #if defined(MPART_HAS_CEREAL) @@ -40,7 +42,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, LinearizedBasis, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory18) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl2.cpp b/src/MapFactoryImpl2.cpp index b6b23fb6..67ebc2bc 100644 --- a/src/MapFactoryImpl2.cpp +++ b/src/MapFactoryImpl2.cpp @@ -22,7 +22,8 @@ std::shared_ptr> CreateComponentImpl_Phys_CC(Fix output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); return output; } @@ -38,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, Clensha REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory2) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl3.cpp b/src/MapFactoryImpl3.cpp index 7bac8fb4..b7b19289 100644 --- a/src/MapFactoryImpl3.cpp +++ b/src/MapFactoryImpl3.cpp @@ -21,7 +21,9 @@ std::shared_ptr> CreateComponentImpl_Phys_AS(Fix output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -37,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, Adaptiv REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Exp, AdaptiveSimpson, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, Softplus, AdaptiveSimpson, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, PhysicistHermite, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory3) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl4.cpp b/src/MapFactoryImpl4.cpp index eabfb21a..e1f8edc7 100644 --- a/src/MapFactoryImpl4.cpp +++ b/src/MapFactoryImpl4.cpp @@ -22,8 +22,10 @@ std::shared_ptr> CreateComponentImpl_Prob_ACC(Fi std::shared_ptr> output; output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); + + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); return output; } @@ -39,7 +41,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, Adapt REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory4) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl5.cpp b/src/MapFactoryImpl5.cpp index 2eaa8113..4bc9a03a 100644 --- a/src/MapFactoryImpl5.cpp +++ b/src/MapFactoryImpl5.cpp @@ -21,7 +21,9 @@ std::shared_ptr> CreateComponentImpl_Prob_CC(Fix output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -37,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, Clens REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory5) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl6.cpp b/src/MapFactoryImpl6.cpp index 63348840..0e51786e 100644 --- a/src/MapFactoryImpl6.cpp +++ b/src/MapFactoryImpl6.cpp @@ -21,7 +21,9 @@ std::shared_ptr> CreateComponentImpl_Prob_AS(Fix output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -38,7 +40,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, Adapt REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Exp, AdaptiveSimpson, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, Softplus, AdaptiveSimpson, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, ProbabilistHermite, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory6) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl7.cpp b/src/MapFactoryImpl7.cpp index 93463d21..6a29d336 100644 --- a/src/MapFactoryImpl7.cpp +++ b/src/MapFactoryImpl7.cpp @@ -23,7 +23,9 @@ std::shared_ptr> CreateComponentImpl_HF_ACC(Fixe output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -40,7 +42,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, Adaptive REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, AdaptiveClenshawCurtis, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, AdaptiveClenshawCurtis, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Softplus, AdaptiveClenshawCurtis, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, AdaptiveClenshawCurtis, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory7) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl8.cpp b/src/MapFactoryImpl8.cpp index 69ab2725..04a91cc5 100644 --- a/src/MapFactoryImpl8.cpp +++ b/src/MapFactoryImpl8.cpp @@ -21,7 +21,9 @@ std::shared_ptr> CreateComponentImpl_HF_CC(Fixed output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -37,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, Clenshaw REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, ClenshawCurtisQuadrature, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, ClenshawCurtisQuadrature, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Softplus, ClenshawCurtisQuadrature, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, ClenshawCurtisQuadrature, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory8) -#endif \ No newline at end of file +#endif diff --git a/src/MapFactoryImpl9.cpp b/src/MapFactoryImpl9.cpp index 12d7316a..11313bfc 100644 --- a/src/MapFactoryImpl9.cpp +++ b/src/MapFactoryImpl9.cpp @@ -21,7 +21,9 @@ std::shared_ptr> CreateComponentImpl_HF_AS(Fixed output = std::make_shared>(expansion, quad, opts.contDeriv, opts.nugget); - output->SetCoeffs(Kokkos::View("Component Coefficients", mset.Size())); + Kokkos::View coeffs = Kokkos::View("Component Coefficients", mset.Size()); + output->SetCoeffs(coeffs); + return output; } @@ -37,7 +39,7 @@ REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, Adaptive REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, AdaptiveSimpson, Kokkos::HostSpace) #if defined(MPART_ENABLE_GPU) REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Exp, AdaptiveSimpson, mpart::DeviceSpace) -REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, Softplus, AdaptiveSimpson, mpart::DeviceSpace) +REGISTER_MONO_COMP(BasisHomogeneity::Homogeneous, HermiteFunction, SoftPlus, AdaptiveSimpson, mpart::DeviceSpace) #endif CEREAL_REGISTER_DYNAMIC_INIT(mpartInitMapFactory9) -#endif \ No newline at end of file +#endif diff --git a/src/MapObjective.cpp b/src/MapObjective.cpp index d46f1737..50431688 100644 --- a/src/MapObjective.cpp +++ b/src/MapObjective.cpp @@ -1,10 +1,11 @@ #include "MParT/MapObjective.h" + using namespace mpart; template double MapObjective::operator()(unsigned int n, const double* coeffs, double* grad, std::shared_ptr> map) { - StridedVector coeffView = ToConstKokkos(coeffs, n); + Kokkos::View coeffView = ToConstKokkos(coeffs, n); StridedVector gradView = ToKokkos(grad, n); map->SetCoeffs(coeffView); return ObjectivePlusCoeffGradImpl(train_, gradView, map); @@ -51,14 +52,16 @@ std::shared_ptr> ObjectiveFactory::CreateGaussianKLObj template double KLObjective::ObjectivePlusCoeffGradImpl(StridedMatrix data, StridedVector grad, std::shared_ptr> map) const { + using ExecSpace = typename MemoryToExecution::Space; unsigned int N_samps = data.extent(1); unsigned int grad_dim = grad.extent(0); PullbackDensity pullback {map, density_}; StridedVector densityX = pullback.LogDensity(data); StridedMatrix densityGradX = pullback.LogDensityCoeffGrad(data); double sumDensity = 0.; - - Kokkos::parallel_reduce ("Sum Negative Log Likelihood", N_samps, KOKKOS_LAMBDA (const int i, double &sum) { + + Kokkos::RangePolicy::Space> policy(0,N_samps); + Kokkos::parallel_reduce ("Sum Negative Log Likelihood", policy, KOKKOS_LAMBDA (const int i, double &sum) { sum -= densityX(i); }, sumDensity); @@ -66,21 +69,22 @@ double KLObjective::ObjectivePlusCoeffGradImpl(StridedMatrix> policy(grad_dim, Kokkos::AUTO()); - + Kokkos::TeamPolicy policy(grad_dim, Kokkos::AUTO()); + using team_handle = typename Kokkos::TeamPolicy::member_type; Kokkos::parallel_for(policy, - KOKKOS_LAMBDA(auto& tag, auto&& teamMember){ + KOKKOS_LAMBDA(const team_handle& teamMember){ int row = teamMember.league_rank(); double thisRowSum = 0.0; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, N_samps), - KOKKOS_LAMBDA(int col, double& innerUpdate){ - innerUpdate += scale*densityGradX(row,col); - }, + [&] (const int& col, double& innerUpdate) { + innerUpdate += scale*densityGradX(row,col); + }, thisRowSum); grad(row) = thisRowSum; } ); + Kokkos::fence(); } return sumDensity/N_samps; } @@ -91,7 +95,8 @@ double KLObjective::ObjectiveImpl(StridedMatrix pullback {map, density_}; StridedVector densityX = pullback.LogDensity(data); double sumDensity = 0.; - Kokkos::parallel_reduce ("Sum Negative Log Likelihood", N_samps, KOKKOS_LAMBDA (const int i, double &sum) { + Kokkos::RangePolicy::Space> policy(0,N_samps); + Kokkos::parallel_reduce ("Sum Negative Log Likelihood", policy, KOKKOS_LAMBDA (const int i, double &sum) { sum -= densityX(i); }, sumDensity); return sumDensity/N_samps; @@ -99,22 +104,28 @@ double KLObjective::ObjectiveImpl(StridedMatrix void KLObjective::CoeffGradImpl(StridedMatrix data, StridedVector grad, std::shared_ptr> map) const { + using ExecSpace = typename MemoryToExecution::Space; unsigned int N_samps = data.extent(1); unsigned int grad_dim = grad.extent(0); PullbackDensity pullback {map, density_}; StridedMatrix densityGradX = pullback.LogDensityCoeffGrad(data); double scale = -1.0/((double) N_samps); - Kokkos::parallel_for("grad", Kokkos::TeamPolicy>(grad_dim, Kokkos::AUTO()), - KOKKOS_LAMBDA(auto t, typename Kokkos::TeamPolicy>::member_type teamMember){ + Kokkos::TeamPolicy policy(grad_dim, Kokkos::AUTO()); + using team_handle = typename Kokkos::TeamPolicy::member_type; + Kokkos::parallel_for(policy, + KOKKOS_LAMBDA(const team_handle& teamMember){ int row = teamMember.league_rank(); double thisRowSum = 0.0; - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, N_samps), KOKKOS_LAMBDA(int col, double& innerUpdate){ - innerUpdate += scale*densityGradX(row,col); - }, thisRowSum); + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, N_samps), + [&] (const int& col, double& innerUpdate) { + innerUpdate += scale*densityGradX(row,col); + }, + thisRowSum); grad(row) = thisRowSum; } ); + Kokkos::fence(); } // Explicit template instantiation @@ -127,4 +138,4 @@ template std::shared_ptr> mpart::ObjectiveFactor template class mpart::KLObjective; template std::shared_ptr> mpart::ObjectiveFactory::CreateGaussianKLObjective(StridedMatrix, unsigned int); template std::shared_ptr> mpart::ObjectiveFactory::CreateGaussianKLObjective(StridedMatrix, StridedMatrix, unsigned int); -#endif \ No newline at end of file +#endif diff --git a/src/MultiIndices/FixedMultiIndexSet.cpp b/src/MultiIndices/FixedMultiIndexSet.cpp index a96cb3c1..9a27a184 100644 --- a/src/MultiIndices/FixedMultiIndexSet.cpp +++ b/src/MultiIndices/FixedMultiIndexSet.cpp @@ -2,6 +2,9 @@ #include "MParT/MultiIndices/MultiIndex.h" #include "MParT/MultiIndices/MultiIndexSet.h" #include "MParT/Utilities/ArrayConversions.h" +#include "MParT/Utilities/KokkosSpaceMappings.h" +#include "MParT/Utilities/GPUtils.h" + #include using namespace mpart; @@ -126,8 +129,10 @@ void FixedMultiIndexSet::SetupTerms() unsigned int numTerms = nzOrders.extent(0) / dim; nzStarts = Kokkos::View("Start of a Multiindex", numTerms+1); - Kokkos::parallel_for(numTerms, StartSetter(nzStarts,dim)); - Kokkos::parallel_for(dim*numTerms, DimSetter(nzDims,dim)); + Kokkos::RangePolicy::Space> policy(0, numTerms); + Kokkos::RangePolicy::Space> policyDims(0, dim*numTerms); + Kokkos::parallel_for(policy, StartSetter(nzStarts,dim)); + Kokkos::parallel_for(policyDims, DimSetter(nzDims,dim)); } template<> void FixedMultiIndexSet::SetupTerms() @@ -153,9 +158,11 @@ template void FixedMultiIndexSet::CalculateMaxDegrees() { maxDegrees = Kokkos::View("Maximum degrees", dim); - - Kokkos::parallel_for(dim, MaxDegreeInitializer(maxDegrees)); - Kokkos::parallel_for(nzOrders.extent(0), MaxDegreeSetter(maxDegrees, nzDims, nzOrders, dim)); + + Kokkos::RangePolicy::Space> DimPolicy(0, dim); + Kokkos::RangePolicy::Space> NZPolicy(0, nzOrders.extent(0)); + Kokkos::parallel_for(DimPolicy, MaxDegreeInitializer(maxDegrees)); + Kokkos::parallel_for(NZPolicy, MaxDegreeSetter(maxDegrees, nzDims, nzOrders, dim)); } template<> @@ -186,8 +193,11 @@ FixedMultiIndexSet::FixedMultiIndexSet(unsigned int nzDims(nzDims), nzOrders(nzOrders) { + DimensionSorter dimSort {nzStarts, nzDims, nzOrders}; // Sort so that nzDims increases for each multiindex - Kokkos::parallel_for(nzStarts.extent(0)-1, DimensionSorter(nzStarts, nzDims, nzOrders)); + unsigned int N_midxs = nzStarts.extent(0) - 1; + Kokkos::RangePolicy::Space> policy{0lu, N_midxs}; + Kokkos::parallel_for(policy, dimSort); CalculateMaxDegrees(); } @@ -456,18 +466,18 @@ FixedMultiIndexSet FixedMultiIndexSet::ToD #if defined(MPART_ENABLE_GPU) template<> template<> - FixedMultiIndexSet FixedMultiIndexSet::ToDevice() + FixedMultiIndexSet FixedMultiIndexSet::ToDevice() { - auto deviceStarts = mpart::ToDevice(nzStarts); - auto deviceDims = mpart::ToDevice(nzDims); - auto deviceOrders = mpart::ToDevice(nzOrders); - FixedMultiIndexSet output(dim, deviceStarts, deviceDims, deviceOrders); + auto deviceStarts = mpart::ToDevice(nzStarts); + auto deviceDims = mpart::ToDevice(nzDims); + auto deviceOrders = mpart::ToDevice(nzOrders); + FixedMultiIndexSet output(dim, deviceStarts, deviceDims, deviceOrders); return output; } template<> template<> - FixedMultiIndexSet FixedMultiIndexSet::ToDevice() + FixedMultiIndexSet FixedMultiIndexSet::ToDevice() { assert(false); return FixedMultiIndexSet(0,0); @@ -475,10 +485,9 @@ FixedMultiIndexSet FixedMultiIndexSet::ToD template<> template<> - FixedMultiIndexSet FixedMultiIndexSet::ToDevice() + FixedMultiIndexSet FixedMultiIndexSet::ToDevice() { - assert(false); - return FixedMultiIndexSet(0,0); + return *this; } #endif @@ -487,7 +496,7 @@ FixedMultiIndexSet FixedMultiIndexSet::ToD // Explicit template instantiation #if defined(MPART_ENABLE_GPU) template class mpart::FixedMultiIndexSet; - template class mpart::FixedMultiIndexSet; + template class mpart::FixedMultiIndexSet; #else template class mpart::FixedMultiIndexSet; #endif diff --git a/src/ParameterizedFunctionBase.cpp b/src/ParameterizedFunctionBase.cpp index 8887ccf6..f38b4ff9 100644 --- a/src/ParameterizedFunctionBase.cpp +++ b/src/ParameterizedFunctionBase.cpp @@ -181,31 +181,42 @@ Eigen::RowMatrixXd ParameterizedFunctionBase::Gradient(Eigen #endif - - -template -void ParameterizedFunctionBase::SetCoeffs(Kokkos::View coeffs){ - +template +void SetCoeffsInternal(unsigned int numCoeffs, + Kokkos::View& coeffsDest, + Kokkos::View coeffsSrc) { // If coefficients already exist, make sure the sizes match - if(this->savedCoeffs.is_allocated()){ - if(coeffs.size() != numCoeffs){ + if(coeffsDest.is_allocated()){ + if(coeffsSrc.size() != numCoeffs){ std::stringstream msg; - msg << "Error in ParameterizedFunctionBase::SetCoeffs. Expected coefficient vector with size " << numCoeffs << ", but new coefficients have size " << coeffs.size() << "."; + msg << "Error in ParameterizedFunctionBase::SetCoeffs. Expected coefficient vector with size " << numCoeffs << ", but new coefficients have size " << coeffsSrc.size() << "."; throw std::invalid_argument(msg.str()); } - if(this->savedCoeffs.size() != numCoeffs) - Kokkos::resize(this->savedCoeffs, numCoeffs); + if(coeffsDest.size() != numCoeffs) + Kokkos::resize(coeffsDest, numCoeffs); }else{ - - this->savedCoeffs = Kokkos::View("ParameterizedFunctionBase Coefficients", coeffs.size()); + coeffsDest = Kokkos::View("ParameterizedFunctionBase Coefficients", numCoeffs); } - Kokkos::deep_copy(this->savedCoeffs, coeffs); + Kokkos::deep_copy(coeffsDest, coeffsSrc); +} + +template +void ParameterizedFunctionBase::SetCoeffs(Kokkos::View coeffs){ + SetCoeffsInternal(this->numCoeffs, this->savedCoeffs, coeffs); +} + +template +void ParameterizedFunctionBase::SetCoeffs(Kokkos::View coeffs){ + Kokkos::View ConstCoeffs = coeffs; + SetCoeffs(ConstCoeffs); } + + template -void ParameterizedFunctionBase::WrapCoeffs(Kokkos::View coeffs){ +void ParameterizedFunctionBase::WrapCoeffs(Kokkos::View coeffs){ if(coeffs.size() != numCoeffs){ std::stringstream msg; @@ -213,37 +224,43 @@ void ParameterizedFunctionBase::WrapCoeffs(Kokkos::ViewsavedCoeffs = coeffs; + } #if defined(MPART_ENABLE_GPU) - template<> void ParameterizedFunctionBase::SetCoeffs(Kokkos::View coeffs) { - // Copy the coefficients to the device - Kokkos::View coeffs_device = ToDevice(coeffs); - this->SetCoeffs(coeffs_device); + SetCoeffsInternal(this->numCoeffs, this->savedCoeffs, coeffs); +} +template<> +void ParameterizedFunctionBase::SetCoeffs(Kokkos::View coeffs) +{ + SetCoeffsInternal(this->numCoeffs, this->savedCoeffs, coeffs); } - -template -void ParameterizedFunctionBase::WrapCoeffs(Kokkos::View coeffs) +template<> +void ParameterizedFunctionBase::SetCoeffs(Kokkos::View coeffs) { + Kokkos::View ConstCoeffs = coeffs; + SetCoeffs(ConstCoeffs); +} +template<> +void ParameterizedFunctionBase::SetCoeffs(Kokkos::View coeffs) +{ + Kokkos::View ConstCoeffs = coeffs; + SetCoeffs(ConstCoeffs); +} + - if(coeffs.size() != numCoeffs){ - std::stringstream msg; - msg << "Error in ParameterizedFunctionBase::WrapCoeffs. Expected coefficient vector with size " << numCoeffs << ", but new coefficients have size " << coeffs.size() << "."; - throw std::invalid_argument(msg.str()); - } - this->savedCoeffs = coeffs; -} #endif template void ParameterizedFunctionBase::SetCoeffs(Eigen::Ref coeffs) { - SetCoeffs(Kokkos::View(VecToKokkos(coeffs))); + Kokkos::View inputCoeffs = VecToKokkos(coeffs); + SetCoeffs(inputCoeffs); } template @@ -254,7 +271,7 @@ void ParameterizedFunctionBase::WrapCoeffs(Eigen::Ref void ParameterizedFunctionBase::WrapCoeffs(Eigen::Ref coeffs) { - WrapCoeffs(Kokkos::View(VecToKokkos(coeffs))); + WrapCoeffs(Kokkos::View(VecToKokkos(coeffs))); } template<> @@ -372,4 +389,4 @@ template class mpart::ParameterizedFunctionBase; } template class mpart::ParameterizedFunctionBase; -#endif \ No newline at end of file +#endif diff --git a/src/SummarizedMap.cpp b/src/SummarizedMap.cpp index 05bf6322..3bfcf9c9 100644 --- a/src/SummarizedMap.cpp +++ b/src/SummarizedMap.cpp @@ -27,7 +27,7 @@ SummarizedMap::SummarizedMap(std::shared_ptr -void SummarizedMap::SetCoeffs(Kokkos::View coeffs) +void SummarizedMap::SetCoeffs(Kokkos::View coeffs) { // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable ConditionalMapBase::SetCoeffs(coeffs); @@ -35,7 +35,7 @@ void SummarizedMap::SetCoeffs(Kokkos::View -void SummarizedMap::WrapCoeffs(Kokkos::View coeffs) +void SummarizedMap::WrapCoeffs(Kokkos::View coeffs) { // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable @@ -45,23 +45,24 @@ void SummarizedMap::WrapCoeffs(Kokkos::View -void SummarizedMap::SetCoeffs(Kokkos::View coeffs) +template<> +void SummarizedMap::SetCoeffs(Kokkos::View coeffs) { // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable - ConditionalMapBase::SetCoeffs(coeffs); - comp_->WrapCoeffs(coeffs); + ConditionalMapBase::SetCoeffs(coeffs); + comp_->WrapCoeffs(this->savedCoeffs); } +template<> +void SummarizedMap::SetCoeffs(Kokkos::View coeffs) -template -void SummarizedMap::WrapCoeffs(Kokkos::View coeffs) { - // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable - ConditionalMapBase::WrapCoeffs(coeffs); - comp_->WrapCoeffs(coeffs); + // First, call the ConditionalMapBase version of this function to copy the view into the savedCoeffs member variable + ConditionalMapBase::SetCoeffs(coeffs); + comp_->WrapCoeffs(this->savedCoeffs); + } #endif @@ -240,4 +241,4 @@ void SummarizedMap::LogDeterminantInputGradImpl(StridedMatrix; #if defined(MPART_ENABLE_GPU) template class mpart::SummarizedMap; -#endif \ No newline at end of file +#endif diff --git a/src/TrainMap.cpp b/src/TrainMap.cpp index db177a0f..af74a58b 100644 --- a/src/TrainMap.cpp +++ b/src/TrainMap.cpp @@ -63,10 +63,12 @@ double mpart::TrainMap(std::shared_ptr> ma std::cout << "TrainMap: Initializing map coeffs to 1." << std::endl; } Kokkos::View coeffs ("Default coeffs", map->numCoeffs); - Kokkos::parallel_for("Setting default coeff val", map->numCoeffs, KOKKOS_LAMBDA(const unsigned int i){ + Kokkos::RangePolicy::Space> policy(0,map->numCoeffs); + Kokkos::parallel_for("Setting default coeff val", policy, KOKKOS_LAMBDA(const unsigned int i){ coeffs(i) = 1.; }); - map->SetCoeffs(coeffs); + Kokkos::View constCoeffs = coeffs; + map->SetCoeffs(constCoeffs); } nlopt::opt opt = SetupOptimization(map->numCoeffs, options); @@ -83,7 +85,7 @@ double mpart::TrainMap(std::shared_ptr> ma nlopt::result res = opt.optimize(mapCoeffsStd, error); // Set the coefficients using SetCoeffs - Kokkos::View mapCoeffsView = VecToKokkos(mapCoeffsStd); + Kokkos::View mapCoeffsView = VecToKokkos(mapCoeffsStd); map->SetCoeffs(mapCoeffsView); // Print a warning if something goes wrong with NLOpt diff --git a/src/Utilities/LinearAlgebra.cpp b/src/Utilities/LinearAlgebra.cpp index 157cf9ab..9543ab96 100644 --- a/src/Utilities/LinearAlgebra.cpp +++ b/src/Utilities/LinearAlgebra.cpp @@ -295,7 +295,8 @@ double PartialPivLU::determinant() const { assert(isComputed); double det = 1.0; - Kokkos::parallel_reduce(LU_.extent(0), KOKKOS_CLASS_LAMBDA (const int& i, double& lprod) { + Kokkos::RangePolicy::Space> policy(0, LU_.extent(0)); + Kokkos::parallel_reduce(policy, KOKKOS_CLASS_LAMBDA (const int& i, double& lprod) { lprod *= LU_(i,i); },Kokkos::Prod(det)); @@ -473,7 +474,8 @@ double Cholesky::determinant() const { assert(isComputed); double det = 1.0; - Kokkos::parallel_reduce(LLT_.extent(0), KOKKOS_CLASS_LAMBDA (const int& i, double& lprod) { + Kokkos::RangePolicy::Space> policy(0, LLT_.extent(0)); + Kokkos::parallel_reduce(policy, KOKKOS_CLASS_LAMBDA (const int& i, double& lprod) { lprod *= LLT_(i,i); },Kokkos::Prod(det)); return det*det; @@ -485,4 +487,4 @@ template struct mpart::Cholesky; #endif template struct mpart::PartialPivLU; -template struct mpart::Cholesky; \ No newline at end of file +template struct mpart::Cholesky; diff --git a/tests/Distributions/Test_DensityBase.cpp b/tests/Distributions/Test_DensityBase.cpp index 977966e4..94c8e680 100644 --- a/tests/Distributions/Test_DensityBase.cpp +++ b/tests/Distributions/Test_DensityBase.cpp @@ -27,10 +27,7 @@ TEST_CASE( "Testing Custom Uniform Density", "[UniformDensity]" ) { SECTION("LogDensityInputGradImpl") { Kokkos::View output ("output", 2, N_pts); - Kokkos::parallel_for( "initialize output", N_pts, KOKKOS_LAMBDA (const int& j) { - output(0,j) = -3.; - output(1,j) = -3.; - }); + Kokkos::deep_copy(output, -3.); density->LogDensityInputGradImpl(pts, output); for(unsigned int j = 0; j < N_pts; ++j) { REQUIRE(output(0,j) == Approx(0.)); diff --git a/tests/Distributions/Test_Distributions_Common.h b/tests/Distributions/Test_Distributions_Common.h index eb98ba1d..36d40e38 100644 --- a/tests/Distributions/Test_Distributions_Common.h +++ b/tests/Distributions/Test_Distributions_Common.h @@ -23,7 +23,7 @@ UniformSampler(int dim, double scale = std::exp(1.)): SampleGenerator output) { Kokkos::MDRangePolicy,typename MemoryToExecution::Space> policy({0, 0}, {output.extent(0), output.extent(1)}); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i, int j) { + Kokkos::parallel_for(policy, KOKKOS_CLASS_LAMBDA(int i, int j) { auto rgen = this->rand_pool.get_state(); output(i,j) = scale_*rgen.drand(); this->rand_pool.free_state(rgen); @@ -42,7 +42,8 @@ UniformDensity(int dim): DensityBase(dim) {} void LogDensityImpl(StridedMatrix const &pts, StridedVector output) override { double euler = std::exp(1.0); unsigned int N = pts.extent(1); - Kokkos::parallel_for( "uniform log density", N, KOKKOS_LAMBDA (const int& j) { + Kokkos::RangePolicy::Space> policy(0, N); + Kokkos::parallel_for( "uniform log density", policy, KOKKOS_CLASS_LAMBDA (const int& j) { bool in_bounds1 = (pts(0, j) >= 0.0) && (pts(0, j) <= euler); bool in_bounds2 = (pts(1, j) >= 0.0) && (pts(1, j) <= euler); output(j) = in_bounds1 && in_bounds2 ? -2 : -std::numeric_limits::infinity(); @@ -51,7 +52,8 @@ void LogDensityImpl(StridedMatrix const &pts, Strided void LogDensityInputGradImpl(StridedMatrix const &pts, StridedMatrix output) override { unsigned int N = pts.extent(1); - Kokkos::parallel_for( "uniform grad log density", N, KOKKOS_LAMBDA (const int& j) { + Kokkos::RangePolicy::Space> policy(0, N); + Kokkos::parallel_for( "uniform grad log density", policy, KOKKOS_CLASS_LAMBDA (const int& j) { output(0,j) = 0.; output(1,j) = 0.; }); diff --git a/tests/Distributions/Test_GaussianDistribution.cpp b/tests/Distributions/Test_GaussianDistribution.cpp index e3baf804..a9bd5b75 100644 --- a/tests/Distributions/Test_GaussianDistribution.cpp +++ b/tests/Distributions/Test_GaussianDistribution.cpp @@ -3,6 +3,7 @@ #include "MParT/Utilities/ArrayConversions.h" #include "MParT/Distributions/GaussianSamplerDensity.h" #include "Test_Distributions_Common.h" +#include using namespace mpart; using namespace Catch; @@ -15,8 +16,10 @@ void TestGaussianLogPDF(StridedMatrix samples, Stride double offset = -0.9189385332046728*dim; // -log(2*pi)*dim/2 offset -= 0.5*logdet_cov; + + Kokkos::RangePolicy::Space> policy {0, N_samp}; // Calculate difference of samples_pdf and the true pdf in place - Kokkos::parallel_for(N_samp, KOKKOS_LAMBDA(const int i) { + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i) { double norm = 0.; for(int j = 0; j < dim; j++) { norm += samples(j, i)*samples(j, i); @@ -66,7 +69,8 @@ TEST_CASE( "Testing Gaussian Distribution", "[GaussianDist]") { dist->SampleImpl(samples); dist->LogDensityImpl(samples, samples_pdf); dist->LogDensityInputGradImpl(samples, samples_gradpdf); - Kokkos::parallel_for(dim, KOKKOS_LAMBDA(const int i) { + Kokkos::RangePolicy::Space> policy {0, dim}; + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i) { for(int j = 0; j < N_samp; j++) { samples(i, j) -= 1.0; } @@ -76,7 +80,7 @@ TEST_CASE( "Testing Gaussian Distribution", "[GaussianDist]") { } Kokkos::View covar("covar", dim, dim); - auto policy = Kokkos::MDRangePolicy>({0, 0}, {dim, dim}); + Kokkos::MDRangePolicy::Space, Kokkos::Rank<2>> policy ({0, 0}, {dim, dim}); Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i, const int j) { covar(i, j) = ((double) (i == j))*covar_diag_val; }); @@ -90,7 +94,7 @@ TEST_CASE( "Testing Gaussian Distribution", "[GaussianDist]") { dist->SampleImpl(samples); dist->LogDensityImpl(samples, samples_pdf); dist->LogDensityInputGradImpl(samples, samples_gradpdf); - policy = Kokkos::MDRangePolicy>({0, 0}, {dim, N_samp}); + policy = Kokkos::MDRangePolicy::Space, Kokkos::Rank<2>>({0, 0}, {dim, N_samp}); Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i, const int j) { samples(i, j) /= std::sqrt(covar_diag_val); }); @@ -107,7 +111,7 @@ TEST_CASE( "Testing Gaussian Distribution", "[GaussianDist]") { dist->SampleImpl(samples); dist->LogDensityImpl(samples, samples_pdf); dist->LogDensityInputGradImpl(samples, samples_gradpdf); - policy = Kokkos::MDRangePolicy>({0, 0}, {dim, N_samp}); + policy = Kokkos::MDRangePolicy::Space, Kokkos::Rank<2>>({0, 0}, {dim, N_samp}); Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i, const int j) { samples(i, j) -= 1.0; samples(i, j) /= std::sqrt(covar_diag_val); diff --git a/tests/Distributions/Test_SampleGenerator.cpp b/tests/Distributions/Test_SampleGenerator.cpp index 36931920..58abf895 100644 --- a/tests/Distributions/Test_SampleGenerator.cpp +++ b/tests/Distributions/Test_SampleGenerator.cpp @@ -1,5 +1,8 @@ #include "Test_Distributions_Common.h" +#include +#include + TEST_CASE( "Testing SampleGenerator", "[SampleGenerator]") { // Sample 1000 points // Check empirical CDF against uniform CDF diff --git a/tests/Distributions/Test_TransportDensity.cpp b/tests/Distributions/Test_TransportDensity.cpp index 4a85abb5..5091f34f 100644 --- a/tests/Distributions/Test_TransportDensity.cpp +++ b/tests/Distributions/Test_TransportDensity.cpp @@ -119,7 +119,9 @@ TEST_CASE( "Testing Pullback/Pushforward density", "[PullbackPushforwardDensity] for(int i = 0; i < map->numCoeffs; i++) { map->Coeffs()(i) += fdstep; auto logDensityPerturb = pullback.LogDensity(pullbackSamples); - Kokkos::parallel_for("Test LogDensityCoeffGrad", N_samp, KOKKOS_LAMBDA(const int j) { + Kokkos::RangePolicy::Space> policy(0,N_samp); + + Kokkos::parallel_for("Test LogDensityCoeffGrad", policy, KOKKOS_LAMBDA(const int j) { logDensityPerturb(j) -= samplesDensity(j); logDensityPerturb(j) /= fdstep; logDensityPerturb(j) -= coeffGrad(i, j); @@ -131,4 +133,4 @@ TEST_CASE( "Testing Pullback/Pushforward density", "[PullbackPushforwardDensity] map->Coeffs()(i) = (double) (i + 1); } } -} \ No newline at end of file +} diff --git a/tests/Distributions/Test_TransportSampler.cpp b/tests/Distributions/Test_TransportSampler.cpp index 38ddf498..d1a48996 100644 --- a/tests/Distributions/Test_TransportSampler.cpp +++ b/tests/Distributions/Test_TransportSampler.cpp @@ -11,6 +11,8 @@ using namespace mpart; using namespace Catch; +using MemorySpace = Kokkos::HostSpace; + TEST_CASE( "Testing Pullback/Pushforward sampling", "[PullbackPushforwardSampler]") { unsigned int dim = 2; unsigned int N_samp = 10000; @@ -41,7 +43,7 @@ TEST_CASE( "Testing Pullback/Pushforward sampling", "[PullbackPushforwardSampler auto pullbackSamples = pullback.Sample(N_samp); // Calculate the pullback and pushforward density error - auto policy = Kokkos::MDRangePolicy>({0, 0}, {N_samp, dim}); + auto policy = Kokkos::MDRangePolicy, typename MemoryToExecution::Space>({0, 0}, {N_samp, dim}); Kokkos::parallel_for("Normalize Pullback Samples", policy, KOKKOS_LAMBDA(const int j, const int i) { pullbackSamples(i,j) *= diag_el; pullbackSamples(i,j) += mean; @@ -58,7 +60,7 @@ TEST_CASE( "Testing Pullback/Pushforward sampling", "[PullbackPushforwardSampler auto pushforwardSamples = pushforward.Sample(N_samp); // Calculate the pullback and pushforward density error - auto policy = Kokkos::MDRangePolicy>({0, 0}, {N_samp, dim}); + auto policy = Kokkos::MDRangePolicy, typename MemoryToExecution::Space>({0, 0}, {N_samp, dim}); Kokkos::parallel_for("Normalize Samples", policy, KOKKOS_LAMBDA(const int j, const int i) { pushforwardSamples(i,j) -= mean; pushforwardSamples(i,j) /= diag_el; diff --git a/tests/MultiIndices/Test_MultiIndexSet.cpp b/tests/MultiIndices/Test_MultiIndexSet.cpp index e393ad2d..7f7ffd9c 100644 --- a/tests/MultiIndices/Test_MultiIndexSet.cpp +++ b/tests/MultiIndices/Test_MultiIndexSet.cpp @@ -3,6 +3,7 @@ #include "MParT/MultiIndices/FixedMultiIndexSet.h" #include "MParT/MultiIndices/MultiIndexSet.h" +#include "MParT/Utilities/GPUtils.h" using namespace mpart; @@ -24,10 +25,12 @@ TEST_CASE( "Testing the FixedMultiIndexSet class", "[FixedMultiIndexSet]" ) { FixedMultiIndexSet multiSet_fixed = multiSet_original.Fix(true); MultiIndexSet multiSet_reconstructed = multiSet_fixed.Unfix(); REQUIRE(multiSet_original.Size() == multiSet_reconstructed.Size()); - REQUIRE(multiSet_original.MaxOrders() == multiSet_reconstructed.MaxOrders()); + bool same_max_orders = multiSet_original.MaxOrders() == multiSet_reconstructed.MaxOrders(); + REQUIRE(same_max_orders); std::vector diagonal_idxs_ref = multiSet_reconstructed.NonzeroDiagonalEntries(); std::vector diagonal_idxs = multiSet_fixed.NonzeroDiagonalEntries(); - REQUIRE(diagonal_idxs_ref == diagonal_idxs); + bool diag_idxs_pass = diagonal_idxs_ref == diagonal_idxs; + REQUIRE(diag_idxs_pass); } TEST_CASE( "Testing dimension sorting in the FixedMultiIndexSet class", "[FixedMultiIndexSetSorting]" ) { @@ -50,7 +53,7 @@ TEST_CASE( "Testing dimension sorting in the FixedMultiIndexSet class", "[FixedM nzDims(3) = 1; nzOrders(3)=2; // The 2 in [1,2] FixedMultiIndexSet mset(dim, nzStarts, nzDims, nzOrders); - + CHECK(mset.nzDims(3)>mset.nzDims(2)); CHECK(mset.nzOrders(3)==2); CHECK(mset.nzOrders(2)==1); @@ -61,12 +64,12 @@ TEST_CASE( "Testing dimension sorting in the FixedMultiIndexSet class", "[FixedM nzDims(1) = 1; nzOrders(1)=1; // [0,1] nzDims(2) = 1; nzOrders(2)=2; // The 2 in [1,2] nzDims(3) = 0; nzOrders(3)=1; // The 1 in [1,2]. Internal to the FixedMultiIndexSet, this should come before 2. - + FixedMultiIndexSet mset2(dim, nzStarts, nzDims, nzOrders); - CHECK(mset.nzDims(3)>mset.nzDims(2)); - CHECK(mset.nzOrders(3)==2); - CHECK(mset.nzOrders(2)==1); + CHECK(mset2.nzDims(3)>mset2.nzDims(2)); + CHECK(mset2.nzOrders(3)==2); + CHECK(mset2.nzOrders(2)==1); } @@ -111,7 +114,7 @@ TEST_CASE( "Testing the FixedMultiIndexSet class with anisotropic degrees", "[An } -#if defined(KOKKOS_ENABLE_CUDA ) || defined(KOKKOS_ENABLE_SYCL) +#if defined( MPART_ENABLE_GPU) TEST_CASE( "Testing the FixedMultiIndexSet class copy to device", "[FixedMultiIndexSet]" ) { @@ -120,7 +123,7 @@ TEST_CASE( "Testing the FixedMultiIndexSet class copy to device", "[FixedMultiIn FixedMultiIndexSet mset(dim,maxOrder); - FixedMultiIndexSet deviceSet = mset.ToDevice(); + FixedMultiIndexSet deviceSet = mset.ToDevice(); } #endif @@ -506,8 +509,10 @@ TEST_CASE("Testing the MultiIndexSet class", "[MultiIndexSet]" ) { std::sort(inds.begin(), inds.end()); std::sort(inds_fixed.begin(), inds_fixed.end()); std::sort(inds_fixed2.begin(), inds_fixed2.end()); - REQUIRE( inds == inds_fixed ); - REQUIRE( inds == inds_fixed2 ); + bool inds_same_fixed = inds == inds_fixed; + REQUIRE( inds_same_fixed ); + bool inds_same_fixed2 = inds == inds_fixed2; + REQUIRE( inds_same_fixed2 ); MultiIndexSet full_set = MultiIndexSet::CreateTotalOrder(dim, maxOrder, MultiIndexLimiter::NonzeroDiag()); inds = full_set.NonzeroDiagonalEntries(); REQUIRE( inds.size() == full_set.Size() ); diff --git a/tests/Test_BasisEvaluator.cpp b/tests/Test_BasisEvaluator.cpp index 77226fea..ac7daed1 100644 --- a/tests/Test_BasisEvaluator.cpp +++ b/tests/Test_BasisEvaluator.cpp @@ -5,13 +5,13 @@ using namespace mpart; using namespace Catch; struct TestEvaluators { - virtual void EvaluateAll(double* output, int max_order, double input) const { + KOKKOS_INLINE_FUNCTION virtual void EvaluateAll(double* output, int max_order, double input) const { assert(false); }; - virtual void EvaluateDerivatives(double* output, double* output_diff, int max_order, double input) const { + KOKKOS_INLINE_FUNCTION virtual void EvaluateDerivatives(double* output, double* output_diff, int max_order, double input) const { assert(false); }; - virtual void EvaluateSecondDerivatives(double* output, double* output_diff, double* output_diff2, int max_order, double input) const { + KOKKOS_INLINE_FUNCTION virtual void EvaluateSecondDerivatives(double* output, double* output_diff, double* output_diff2, int max_order, double input) const { assert(false); }; virtual ~TestEvaluators() = default; @@ -145,4 +145,4 @@ TEST_CASE( "Testing basis evaluators", "[BasisEvaluators]") { } } } -} \ No newline at end of file +} diff --git a/tests/Test_InnerMarginalAffineMap.cpp b/tests/Test_InnerMarginalAffineMap.cpp index 3dfebf2f..a41b504f 100644 --- a/tests/Test_InnerMarginalAffineMap.cpp +++ b/tests/Test_InnerMarginalAffineMap.cpp @@ -225,7 +225,6 @@ TEST_CASE( "Test InnerMarginalAffineMap", "[InnerMarginalAffineMap]") { SECTION("MoveCoeffs") { unsigned int maxOrder = 3; unsigned int outputDim = 2; - unsigned int numPts = 20; auto trimap = MapFactory::CreateTriangular(inputDim, outputDim, maxOrder, MapOptions()); // Penalize high order k diff --git a/tests/Test_LinearAlgebra.cpp b/tests/Test_LinearAlgebra.cpp index d51ded4f..effbd00d 100644 --- a/tests/Test_LinearAlgebra.cpp +++ b/tests/Test_LinearAlgebra.cpp @@ -150,7 +150,7 @@ TEST_CASE( "Testing LU Factorization", "LinearAlgebra_LU" ) { B(1,0) = 2.; B(1,1) = 5.; B(2,0) = 3.; B(2,1) = 6.; - auto constA = ToConstKokkos(A.data(), 3, 3); + Kokkos::View constA = A; auto eigA = ConstKokkosToMat(constA); auto eigB = KokkosToMat(B); unsigned int nrows = eigA.rows(); @@ -197,7 +197,7 @@ TEST_CASE( "Testing Cholesky Factorization", "LinearAlgebra_Cholesky" ) { B(1,0) = 2.; B(1,1) = 5.; B(2,0) = 3.; B(2,1) = 6.; - auto constA = ToConstKokkos(A.data(), 3, 3); + Kokkos::View constA = A; auto eigA = ConstKokkosToMat(constA); auto eigB = KokkosToMat(B); unsigned int nrows = eigA.rows(); @@ -364,7 +364,7 @@ TEST_CASE( "Testing LU Factorization on Device", "LinearAlgebra_LUDevice" ) { B(1,0) = 2.; B(1,1) = 5.; B(2,0) = 3.; B(2,1) = 6.; - auto constA = ToConstKokkos(A.data(), 3, 3); + Kokkos::View constA = A; auto constA_d = ToDevice(constA); auto B_d = ToDevice(B); auto eigA = ConstKokkosToMat(constA); @@ -390,7 +390,7 @@ TEST_CASE( "Testing LU Factorization on Device", "LinearAlgebra_LUDevice" ) { } SECTION("Solve LU out of place") { PartialPivLU Alu_d(constA_d); - auto C_d = Alu.solve(B_d); + auto C_d = Alu_d.solve(B_d); auto C_h = ToHost(C_d); for(unsigned int j=0; j Alu_d(constA_d); + PartialPivLU Alu_d(constA_d); CHECK(Alu_d.determinant() == Approx(eigA.determinant()).epsilon(1e-14).margin(1e-14)); } } -TEST_CASE( "Testing Cholesky Factorization", "LinearAlgebra_Cholesky" ) { +TEST_CASE( "Testing Cholesky Factorization on Device", "LinearAlgebra_Cholesky_Deivice" ) { Kokkos::View A("A", 3, 3); Kokkos::View B_h("B", 3, 2); A(0,0) = 36.; A(0,1) = 18.; A(0,2) = 6.; @@ -415,21 +415,21 @@ TEST_CASE( "Testing Cholesky Factorization", "LinearAlgebra_Cholesky" ) { B_h(1,0) = 2.; B_h(1,1) = 5.; B_h(2,0) = 3.; B_h(2,1) = 6.; - auto constA_h = ToConstKokkos(A.data(), 3, 3); + Kokkos::View constA_h = A; auto constA_d = ToDevice(constA_h); - auto B_d = ToDevice(B); + auto B_d = ToDevice(B_h); auto eigA = ConstKokkosToMat(constA_h); - auto eigB = KokkosToMat(B); + auto eigB = KokkosToMat(B_h); Eigen::MatrixXd eigX = eigA.llt().solve(eigB); Eigen::MatrixXd eigY = eigA.llt().matrixL().solve(eigB); - SECTION("Compute Cholesky") { - Cholesky Achol_d(constA_d); + SECTION("Compute Cholesky on Device") { + Cholesky Achol_d(constA_d); } - SECTION("Solve Cholesky inplace") { + SECTION("Solve Cholesky inplace on Device") { Kokkos::View C_d("C", 3, 2); Kokkos::deep_copy(C_d, B_d); - Cholesky Achol_d(constA_d); + Cholesky Achol_d(constA_d); Achol_d.solveInPlace(C_d); auto C_h = ToHost(C_d); for(unsigned int j=0; j C_d("C", 3, 2); Kokkos::deep_copy(C_d, B_d); - Cholesky Achol_d(constA_d); + Cholesky Achol_d(constA_d); Achol_d.solveInPlaceL(C_d); auto C_h = ToHost(C_d); for(unsigned int j=0; j Achol_d(constA_d); - auto C_d = Achol.solve(B_d); + SECTION("Solve Cholesky out of place on Device") { + Cholesky Achol_d(constA_d); + auto C_d = Achol_d.solve(B_d); auto C_h = ToHost(C_d); for(unsigned int j=0; j Achol_d(constA_d); + SECTION("Compute Determinant on Device") { + Cholesky Achol_d(constA_d); CHECK(Achol_d.determinant() == Approx(eigA.determinant()).epsilon(1e-14).margin(1e-14)); } } -#endif \ No newline at end of file +#endif diff --git a/tests/Test_MapFactory.cpp b/tests/Test_MapFactory.cpp index 02bd7bfc..33a16387 100644 --- a/tests/Test_MapFactory.cpp +++ b/tests/Test_MapFactory.cpp @@ -5,6 +5,7 @@ #include "MParT/TriangularMap.h" #include "MParT/MultiIndices/MultiIndexSet.h" #include "MParT/MultiIndices/FixedMultiIndexSet.h" +#include "MParT/Utilities/KokkosSpaceMappings.h" #include #include @@ -225,7 +226,7 @@ TEST_CASE( "Testing factory method for Sigmoid Component", "[MapFactorySigmoidCo REQUIRE(func != nullptr); unsigned int numPts = 100; Kokkos::View pts("Points", inputDim, numPts); - Kokkos::MDRangePolicy> policy({0,0}, {inputDim, numPts}); + Kokkos::MDRangePolicy, typename MemoryToExecution::Space> policy({0,0}, {inputDim, numPts}); Kokkos::parallel_for("Fill points", policy, KOKKOS_LAMBDA(const int i, const int j){ pts(i,j) = double(j*i)/double((inputDim)*(numPts-1)); }); @@ -237,7 +238,8 @@ TEST_CASE( "Testing factory method for Sigmoid Component", "[MapFactorySigmoidCo CHECK(eval.extent(0)==1); SECTION("Check Gradient") { Kokkos::View sens ( "Sensitivities", 1, numPts); - Kokkos::parallel_for("fill sensitivities", numPts, KOKKOS_LAMBDA(const int i){ + Kokkos::RangePolicy::Space> policy_pts {0, numPts}; + Kokkos::parallel_for("fill sensitivities", policy_pts, KOKKOS_LAMBDA(const int i){ sens(0,i) = 1.0; }); Kokkos::View grad = func->Gradient(pts, sens); @@ -245,7 +247,7 @@ TEST_CASE( "Testing factory method for Sigmoid Component", "[MapFactorySigmoidCo CHECK(grad.extent(1)==numPts); double fd_step = 1e-6; for(int i = 0; i < inputDim; i++){ - Kokkos::parallel_for(numPts, KOKKOS_LAMBDA(const int j){ + Kokkos::parallel_for(policy_pts, KOKKOS_LAMBDA(const int j){ if(i > 0) pts(i-1,j) -= fd_step; pts(i,j) += fd_step; }); diff --git a/tests/Test_MapObjective.cpp b/tests/Test_MapObjective.cpp index e878c5d8..a89f61ac 100644 --- a/tests/Test_MapObjective.cpp +++ b/tests/Test_MapObjective.cpp @@ -46,9 +46,7 @@ TEST_CASE( "Test KLMapObjective", "[KLMapObjective]") { const double coeff_def = 1.; auto map = MapFactory::CreateTriangular(dim, dim, 2); - Kokkos::parallel_for("Fill coeffs", map->numCoeffs, KOKKOS_LAMBDA(const unsigned int i){ - map->Coeffs()(i) = coeff_def; - }); + Kokkos::deep_copy(map->Coeffs(), coeff_def); SECTION("CoeffGradImpl"){ double fd_step = 1e-6; diff --git a/tests/Test_MonotoneComponent.cpp b/tests/Test_MonotoneComponent.cpp index 4cd31f77..87700998 100644 --- a/tests/Test_MonotoneComponent.cpp +++ b/tests/Test_MonotoneComponent.cpp @@ -263,12 +263,11 @@ TEST_CASE( "MonotoneIntegrand2d", "[MonotoneIntegrand2d]") { } SECTION("Integrand Input Gradient") { - double fdStep = 1e-5; MonotoneIntegrand>, Exp, Kokkos::View,Kokkos::View> integrand(&cache[0], expansion, pt, coeffs, DerivativeFlags::Input, 0.0); // Evaluate the expansion - double df, d2f; + double df; Kokkos::View fval("Integrand", dim+1); Kokkos::View coeffGrad("Coefficient Gradient", dim); @@ -872,7 +871,8 @@ TEST_CASE("Testing MonotoneComponent CoeffGrad and LogDeterminantCoeffGrad", "[M SECTION("DiagonalCoeffIndices") { std::vector indices = comp.DiagonalCoeffIndices(); std::vector indices_ref = expansion.NonzeroDiagonalEntries(); - REQUIRE(indices == indices_ref); + bool same_indices = indices == indices_ref; + REQUIRE(same_indices); } } @@ -911,7 +911,8 @@ TEST_CASE( "MonotoneIntegrand1d on device", "[MonotoneIntegrandDevice]") { SECTION("Integrand Only") { Kokkos::View dres("result", 1); - Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){ + Kokkos::RangePolicy::Space> policy(0,1); + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){ MonotoneIntegrand integrand(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::None, 0.0); integrand(0.5, &dres(0)); }); @@ -924,7 +925,8 @@ TEST_CASE( "MonotoneIntegrand1d on device", "[MonotoneIntegrandDevice]") { SECTION("Integrand Derivative") { Kokkos::View dres("result", 2); - Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){ + Kokkos::RangePolicy::Space> policy(0,1); + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){ MonotoneIntegrand integrand(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::Diagonal, 0.0); integrand(0.5, dres.data()); }); @@ -940,7 +942,8 @@ TEST_CASE( "MonotoneIntegrand1d on device", "[MonotoneIntegrandDevice]") { Kokkos::View dres_fd("result_fd", hset.Size()); Kokkos::View testVal("integrand", 1+hset.Size()); - Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){ + Kokkos::RangePolicy::Space> policy(0,1); + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){ MonotoneIntegrand integrand(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::Parameters, 0.0); MonotoneIntegrand integrand2(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::None, 0.0); @@ -974,8 +977,9 @@ TEST_CASE( "MonotoneIntegrand1d on device", "[MonotoneIntegrandDevice]") { Kokkos::View dres_fd("result_fd", hset.Size()); Kokkos::View testVal("integrand", 1+hset.Size()); Kokkos::View workspace("workspace", hset.Size()); - - Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){ + + Kokkos::RangePolicy::Space> policy(0,1); + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){ MonotoneIntegrand integrand(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::MixedCoeff, 0.0, workspace); MonotoneIntegrand integrand2(dcache.data(), expansion, dpt, dcoeffs, DerivativeFlags::Diagonal, 0.0); @@ -1042,7 +1046,8 @@ TEST_CASE( "Testing MonotoneComponent::EvaluateSingle on Device", "[MonotoneComp Kokkos::View dres("Device Evaluation", 1); // Run the fill cache funciton, using a parallel_for loop to ensure it's run on the device - Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i){ + Kokkos::RangePolicy::Space> policy(0,1); + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){ dexpansion.FillCache1(dcache.data(), dpt, DerivativeFlags::None); dres(0) = MonotoneComponent::EvaluateSingle(dcache.data(), workspace.data(), dpt, dpt(dim-1), dcoeffs, quad, dexpansion); }); @@ -1145,4 +1150,4 @@ TEST_CASE( "Testing 1d monotone component evaluation on device", "[MonotoneCompo } -#endif \ No newline at end of file +#endif diff --git a/tests/Test_MultivariateExpansion.cpp b/tests/Test_MultivariateExpansion.cpp index b4856493..04ed1da9 100644 --- a/tests/Test_MultivariateExpansion.cpp +++ b/tests/Test_MultivariateExpansion.cpp @@ -24,7 +24,8 @@ TEST_CASE( "Testing multivariate expansion", "[MultivariateExpansion]") { CHECK(func.numCoeffs == (mset.Size()*outDim)); std::vector nonzeroDiagTerms_ref = mset.NonzeroDiagonalEntries(); std::vector diagCoeffs = func.DiagonalCoeffIndices(); - REQUIRE(nonzeroDiagTerms_ref == diagCoeffs); + + REQUIRE((nonzeroDiagTerms_ref == diagCoeffs)); Kokkos::View coeffs("coefficients", func.numCoeffs); for(unsigned int i=0; i::Space> policy(0,1); + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i){ dexpansion.FillCache1(dcache.data(), dpt, DerivativeFlags::None); dexpansion.FillCache2(dcache.data(), dpt, 0.5 * dpt(dim-1), DerivativeFlags::None); }); @@ -266,4 +267,4 @@ TEST_CASE( "Testing multivariate expansion on device", "[MultivariateExpansionWo } } -#endif \ No newline at end of file +#endif diff --git a/tests/Test_OrthogonalPolynomials.cpp b/tests/Test_OrthogonalPolynomials.cpp index 10a2d32b..2e8d7e73 100644 --- a/tests/Test_OrthogonalPolynomials.cpp +++ b/tests/Test_OrthogonalPolynomials.cpp @@ -251,9 +251,9 @@ TEST_CASE( "Device Hermite polynomial evaluation", "[PhysicistHermiteDevice]" ) auto xs_device = ToDevice(xs_host); Kokkos::View ys_device("evals", xs.size()); - + Kokkos::RangePolicy policy(0,xs.size()); for(unsigned int p=0; p<10; ++p){ - Kokkos::parallel_for(xs.size(), KOKKOS_LAMBDA(const size_t ind){ + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const size_t ind){ ys_device(ind) = poly.Evaluate(p, xs_device(ind)); }); @@ -267,4 +267,4 @@ TEST_CASE( "Device Hermite polynomial evaluation", "[PhysicistHermiteDevice]" ) } -#endif \ No newline at end of file +#endif diff --git a/tests/Test_PositiveBijectors.cpp b/tests/Test_PositiveBijectors.cpp index 10f1b372..ed535398 100644 --- a/tests/Test_PositiveBijectors.cpp +++ b/tests/Test_PositiveBijectors.cpp @@ -4,6 +4,7 @@ #include "MParT/PositiveBijectors.h" #include "MParT/Utilities/ArrayConversions.h" +#include "MParT/Utilities/KokkosSpaceMappings.h" using namespace mpart; using namespace Catch; @@ -50,10 +51,11 @@ TEST_CASE( "Testing soft plus function on device.", "[SofPlusDevice]" ) { auto xs_device = ToDevice(xs_host); - Kokkos::View ys_device("ys_device", xs_host.extent(0)); - Kokkos::View deriv_device("deriv_device", xs_host.extent(0)); - - Kokkos::parallel_for(xs_host.size(), KOKKOS_LAMBDA(const size_t ind){ + unsigned int N_p = xs_host.extent(0); + Kokkos::View ys_device("ys_device", N_p); + Kokkos::View deriv_device("deriv_device", N_p); + Kokkos::RangePolicy::Space> policy {0u, N_p}; + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const size_t ind){ ys_device(ind) = SoftPlus::Evaluate(xs_device(ind)); deriv_device(ind) = SoftPlus::Derivative(xs_device(ind)); }); diff --git a/tests/Test_Quadrature.cpp b/tests/Test_Quadrature.cpp index a27473c2..2f5d0265 100644 --- a/tests/Test_Quadrature.cpp +++ b/tests/Test_Quadrature.cpp @@ -333,7 +333,6 @@ TEST_CASE( "Testing CC Quadrature on device", "[ClenshawCurtisDevice]" ) { unsigned int numRepeats = 320; // Set tolerance for tests - double testTol = 1e-8; double lb = 0; double ub = 1.0; @@ -383,7 +382,6 @@ TEST_CASE( "Testing Adaptive Simpson Quadrature on device", "[AdaptiveSimpsonDev double absTol = 1e-6; // Set tolerance for tests - double testTol = 1e-4; double lb = 0; double ub = 1.0; @@ -432,7 +430,6 @@ TEST_CASE( "Testing Adaptive Clenshaw Curtis on device", "[AdaptiveCCDevice]" ) unsigned int order = 8; // Set tolerance for tests - double testTol = 1e-4; double lb = 0; double ub = 1.0; diff --git a/tests/Test_RectifiedMultivariateExpansion.cpp b/tests/Test_RectifiedMultivariateExpansion.cpp index ec1651d6..a045e008 100644 --- a/tests/Test_RectifiedMultivariateExpansion.cpp +++ b/tests/Test_RectifiedMultivariateExpansion.cpp @@ -288,7 +288,6 @@ TEMPLATE_TEST_CASE("Multiple Sigmoid RectifiedMultivariateExpansion","[multi_sig // Set up sigmoids const int num_sigmoids = 3; - const int order = num_sigmoids+1+2; const int param_length = 2 + num_sigmoids*(num_sigmoids+1)/2; Kokkos::View center("Sigmoid Center", param_length); Kokkos::View width("Sigmoid Width", param_length); diff --git a/tests/Test_RootFinding.cpp b/tests/Test_RootFinding.cpp index e9a68cb1..c811da6d 100644 --- a/tests/Test_RootFinding.cpp +++ b/tests/Test_RootFinding.cpp @@ -70,7 +70,7 @@ TEST_CASE( "RootFindingUtils", "[RootFindingUtils]") { CHECK(info==0); } SECTION("Test Inverse Flat") { - double xd = 0.5, yd = 0.0; + double yd = 0.0; double x0 = 0.0, xtol = 1e-5, ftol = 1e-5; auto f = [](double x){return -1.0;}; int info = 0; diff --git a/tests/Test_Serialization.cpp b/tests/Test_Serialization.cpp index d3ad618b..26d6c5a6 100644 --- a/tests/Test_Serialization.cpp +++ b/tests/Test_Serialization.cpp @@ -289,7 +289,6 @@ TEST_CASE("Test serialization of multivariate expansion worker.", "[Serializatio TEST_CASE("Test serialization of monotone component.", "[Serialization]"){ - double testTol = 1e-7; unsigned int maxDegree = 2; unsigned int dim = 1; diff --git a/tests/Test_Sigmoid.cpp b/tests/Test_Sigmoid.cpp index 80c88866..f0beee4e 100644 --- a/tests/Test_Sigmoid.cpp +++ b/tests/Test_Sigmoid.cpp @@ -1,89 +1,116 @@ #include #include "MParT/Sigmoid.h" +#include using namespace mpart; using namespace Catch; using namespace Catch::Matchers; -using MemorySpace = Kokkos::HostSpace; void CheckNearZero(double calc, double ref, double delta=1e-12, double tol=1e-12) { if(ref == 0.) REQUIRE_THAT(calc, WithinAbs(0., tol)); else REQUIRE_THAT(calc, WithinRel(ref, delta)); } -template +template void TestSigmoidGradients(Function Sigmoid, unsigned int N_grad_points, double fd_delta) { Kokkos::View gradPts("Gradient points", N_grad_points); Kokkos::View gradPts_plus_delta("Gradient points plus delta", N_grad_points); - Kokkos::parallel_for(N_grad_points, KOKKOS_LAMBDA(unsigned int point_index) { + Kokkos::RangePolicy::Space> policy {0u, N_grad_points}; + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(unsigned int point_index) { double gradPt = 3.0*(-1.0 + 2*((double) point_index)/((double) N_grad_points-1)); gradPts(point_index) = gradPt; gradPts_plus_delta(point_index) = gradPt + fd_delta; }); int max_order = Sigmoid.GetOrder(); // Create output array for each possible evaluation - double output[max_order+1]; - double output_pos_fd[max_order+1]; - double output_deriv[max_order+1]; - double output_deriv_pos_fd[max_order+1]; - double output_2deriv[max_order+1]; - double output_diff[max_order+1]; - double output_diff_pos_fd[max_order+1]; - double output_diff_2deriv[max_order+1]; - double output_diff2[max_order+1]; + Kokkos::View output_d ("Output", N_grad_points*(max_order+1)); + Kokkos::View output_pos_fd_d ("Output", N_grad_points*(max_order+1)); + Kokkos::View output_deriv_d ("Output", N_grad_points*(max_order+1)); + Kokkos::View output_deriv_pos_fd_d ("Output", N_grad_points*(max_order+1)); + Kokkos::View output_2deriv_d ("Output", N_grad_points*(max_order+1)); + Kokkos::View output_diff_d ("Output", N_grad_points*(max_order+1)); + Kokkos::View output_diff_pos_fd_d ("Output", N_grad_points*(max_order+1)); + Kokkos::View output_diff_2deriv_d ("Output", N_grad_points*(max_order+1)); + Kokkos::View output_diff2_d ("Output", N_grad_points*(max_order+1)); + Kokkos::parallel_for(policy, KOKKOS_LAMBDA(unsigned int i) { + unsigned int offset = i*(max_order+1); + Sigmoid.EvaluateAll(output_d.data() + offset, max_order, gradPts(i)); + Sigmoid.EvaluateAll(output_pos_fd_d.data() + offset, max_order, gradPts_plus_delta(i)); + Sigmoid.EvaluateDerivatives(output_deriv_d.data() + offset, output_diff_d.data() + offset, max_order, gradPts(i)); + Sigmoid.EvaluateDerivatives(output_deriv_pos_fd_d.data() + offset, output_diff_pos_fd_d.data() + offset, max_order, gradPts_plus_delta(i)); + Sigmoid.EvaluateSecondDerivatives(output_2deriv_d.data() + offset, output_diff_2deriv_d.data() + offset, output_diff2_d.data() + offset, max_order, gradPts(i)); + }); + Kokkos::fence(); + Kokkos::View output = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_d); + Kokkos::View output_pos_fd = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_pos_fd_d); + Kokkos::View output_deriv = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_deriv_d); + Kokkos::View output_deriv_pos_fd = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_deriv_pos_fd_d); + Kokkos::View output_2deriv = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_2deriv_d); + Kokkos::View output_diff = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_diff_d); + Kokkos::View output_diff_pos_fd = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_diff_pos_fd_d); + Kokkos::View output_diff_2deriv = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_diff_2deriv_d); + Kokkos::View output_diff2 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_diff2_d); + for(int i = 0; i < N_grad_points; i++) { - Sigmoid.EvaluateAll(output, max_order, gradPts(i)); - Sigmoid.EvaluateAll(output_pos_fd, max_order, gradPts_plus_delta(i)); - Sigmoid.EvaluateDerivatives(output_deriv, output_diff, max_order, gradPts(i)); - Sigmoid.EvaluateDerivatives(output_deriv_pos_fd, output_diff_pos_fd, max_order, gradPts_plus_delta(i)); - Sigmoid.EvaluateSecondDerivatives(output_2deriv, output_diff_2deriv, output_diff2, max_order, gradPts(i)); for(int j = 0; j <= max_order; j++) { - double fd_diff = (output_pos_fd[j]-output[j])/fd_delta; - double fd_diff2 = (output_diff_pos_fd[j] - output_diff[j])/fd_delta; - CheckNearZero(output_deriv[j], output[j], 1e-12); - CheckNearZero(output_2deriv[j], output[j], 1e-12); - CheckNearZero(output_deriv_pos_fd[j], output_pos_fd[j], 1e-12); - CheckNearZero(output_diff[j], fd_diff, sqrt(fd_delta)); - CheckNearZero(output_diff_2deriv[j], fd_diff, sqrt(fd_delta)); - CheckNearZero(output_diff2[j], fd_diff2, sqrt(fd_delta)); + double fd_diff = (output_pos_fd(j)-output(j))/fd_delta; + double fd_diff2 = (output_diff_pos_fd(j) - output_diff(j))/fd_delta; + CheckNearZero(output_deriv(j), output(j), 1e-12); + CheckNearZero(output_2deriv(j), output(j), 1e-12); + CheckNearZero(output_deriv_pos_fd(j), output_pos_fd(j), 1e-12); + CheckNearZero(output_diff(j), fd_diff, sqrt(fd_delta)); + CheckNearZero(output_diff_2deriv(j), fd_diff, sqrt(fd_delta)); + CheckNearZero(output_diff2(j), fd_diff2, sqrt(fd_delta)); } } } -TEMPLATE_TEST_CASE("Sigmoid1d","[sigmoid1d]", SigmoidTypeSpace::Logistic) { +using TestType1 = std::pair< SigmoidTypeSpace::Logistic, Kokkos::HostSpace>; + +#if defined(MPART_ENABLE_GPU) +using TestType2 = std::pair< SigmoidTypeSpace::Logistic, DeviceSpace>; +#else +using TestType2 = std::pair< SigmoidTypeSpace::Logistic, std::false_type>; +#endif + +TEMPLATE_TEST_CASE("Sigmoid1d","[sigmoid1d]", TestType1, TestType2) { +if (!std::is_same_v) { // Don't worry about testing the host twice + + using SigmoidType = typename TestType::first_type; + using MemorySpace = std::conditional_t, Kokkos::HostSpace, typename TestType::second_type>; + using ExecutionSpace = typename MemoryToExecution::Space; SECTION("Initialization") { Kokkos::View centers("Sigmoid Centers", 2); Kokkos::View widths("Sigmoid Widths", 3); Kokkos::View weights("Sigmoid weights", 2); - CHECK_THROWS_AS((Sigmoid1d(centers, widths, weights)), std::invalid_argument); - CHECK_THROWS_AS((Sigmoid1d(centers, widths)), std::invalid_argument); + CHECK_THROWS_AS((Sigmoid1d(centers, widths, weights)), std::invalid_argument); + CHECK_THROWS_AS((Sigmoid1d(centers, widths)), std::invalid_argument); int N_wrong = 2+1+2+3+5; centers = Kokkos::View("Sigmoid Centers", N_wrong); widths = Kokkos::View("Sigmoid widths", N_wrong); weights = Kokkos::View("Sigmoid weights", N_wrong); - CHECK_THROWS_AS((Sigmoid1d(centers, widths, weights)), std::invalid_argument); - CHECK_THROWS_AS((Sigmoid1d(centers, widths)), std::invalid_argument); + CHECK_THROWS_AS((Sigmoid1d(centers, widths, weights)), std::invalid_argument); + CHECK_THROWS_AS((Sigmoid1d(centers, widths)), std::invalid_argument); int N_wrong_arr[4] = {0, 1, 2+(1+3)}; for(int N_wrong : N_wrong_arr) { centers = Kokkos::View("Sigmoid Centers", N_wrong); widths = Kokkos::View("Sigmoid widths", N_wrong); weights = Kokkos::View("Sigmoid weights", N_wrong); - CHECK_THROWS_AS((Sigmoid1d(centers, widths, weights)), std::invalid_argument); - CHECK_THROWS_AS((Sigmoid1d(centers, widths)), std::invalid_argument); + CHECK_THROWS_AS((Sigmoid1d(centers, widths, weights)), std::invalid_argument); + CHECK_THROWS_AS((Sigmoid1d(centers, widths)), std::invalid_argument); } centers = Kokkos::View("Sigmoid Centers", 2); widths = Kokkos::View("Sigmoid widths", 2); - Sigmoid1d Sigmoid = Sigmoid1d(centers, widths); + Sigmoid1d Sigmoid {centers, widths}; CHECK(Sigmoid.GetOrder() == 1 + 2); // Affine + 2 edge terms centers = Kokkos::View("Sigmoid Centers", 3); widths = Kokkos::View("Sigmoid widths", 3); - Sigmoid = Sigmoid1d(centers, widths); + Sigmoid = Sigmoid1d(centers, widths); CHECK(Sigmoid.GetOrder() == 1 + 2 + 1); // Affine + 2 edge terms + 1 sigmoid } + Kokkos::fence(); - unsigned int N_grad_points = 100; - double fd_delta = 1e-6; const double support_bound = 100.; SECTION("Single Sigmoid") { @@ -93,98 +120,131 @@ TEMPLATE_TEST_CASE("Sigmoid1d","[sigmoid1d]", SigmoidTypeSpace::Logistic) { Kokkos::View center("Sigmoid Center", param_length); Kokkos::View width("Sigmoid Width", param_length); Kokkos::View weight("Sigmoid Weight", param_length); - for(int i = 0; i < param_length; i++) { - center(i) = 0; width(i) = 1; weight(i) = 1.; - } - Sigmoid1d Sigmoid (center, width, weight); + Kokkos::deep_copy(center, 0.); + Kokkos::deep_copy(width, 1.); + Kokkos::deep_copy(weight, 1.); + + Sigmoid1d Sigmoid (center, width, weight); // Ensure the sigmoid f is monotone over a grid, f(-infty) = 0, f(+infty) = 1, f(0) = 0.5 const int num_pts_grid = 100; const double grid_bdry = 5.; - double eval_pts[num_pts_grid+3]; - eval_pts[0] = -support_bound; eval_pts[1] = 0.; eval_pts[2] = support_bound; - for(int p = 0; p < num_pts_grid; p++) eval_pts[p+3] = -grid_bdry + p*2*grid_bdry/(num_pts_grid-1); + + Kokkos::View eval_pts_d {"Grid points device", num_pts_grid+3}; + Kokkos::RangePolicy pts_policy {0, num_pts_grid}; + Kokkos::parallel_for(pts_policy, KOKKOS_LAMBDA(unsigned int p){ + if(p == 0) { + eval_pts_d(0) = -support_bound; + eval_pts_d(1) = 0.; + eval_pts_d(2) = support_bound; + } + eval_pts_d(p+3) = -grid_bdry + p*2*grid_bdry/(num_pts_grid-1); + }); + auto eval_pts = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), eval_pts_d); + Kokkos::fence(); + double expect_output[(order+1)*3] = { - 1., eval_pts[0], eval_pts[0], 0., 0., - 1., eval_pts[1], -std::log(2), std::log(2), 0.5, - 1., eval_pts[2], 0., eval_pts[2], 1.}; - double output[(order+1)*(num_pts_grid+3)]; - + 1., -support_bound, -support_bound, 0., 0.0, + 1., 0., -std::log(2), std::log(2), 0.5, + 1., support_bound, 0., support_bound, 1.0}; + + Kokkos::View output_d {"Output storage device", (order+1)*(num_pts_grid+3)}; + Kokkos::RangePolicy eval_policy {0, num_pts_grid+3}; + Kokkos::parallel_for(eval_policy, KOKKOS_LAMBDA(unsigned int j){ + Sigmoid.EvaluateAll(output_d.data() + j*(order+1), order, eval_pts_d(j)); + }); + Kokkos::fence(); + Kokkos::View output = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_d); + int j = 0; for(; j < 3; j++) { - Sigmoid.EvaluateAll(output+j*(order+1), order, eval_pts[j]); for(int i = 0; i < (order+1); i++) { int idx = j*(order+1)+i; - REQUIRE_THAT(output[idx], WithinRel(expect_output[idx], 1e-12)); + REQUIRE_THAT(output(idx), WithinRel(expect_output[idx], 1e-12)); } } + double prev = 0.; for(; j < num_pts_grid + 3; j++) { - Sigmoid.EvaluateAll(output+j*(order+1), order, eval_pts[j]); - CHECK(output[j*(order+1) ] == 1.); - CHECK(output[j*(order+1)+1] == eval_pts[j]); - double next = output[j*(order+1)+3]; + CHECK(output(j*(order+1) ) == 1.); + CHECK(output(j*(order+1)+1) == eval_pts(j)); + double next = output(j*(order+1)+3); CHECK(next > prev); prev = next; } - TestSigmoidGradients(Sigmoid, 100, 1e-7); + TestSigmoidGradients(Sigmoid, 100, 1e-7); } SECTION("Multiple Sigmoids") { const int num_sigmoids = 3; const int order = num_sigmoids+1+2; const int param_length = 2 + num_sigmoids*(num_sigmoids+1)/2; - Kokkos::View center("Sigmoid Center", param_length); - Kokkos::View width("Sigmoid Width", param_length); - Kokkos::View weight("Sigmoid Weight", param_length); + Kokkos::View centers("Sigmoid centers", param_length); + Kokkos::View widths("Sigmoid widths", param_length); + Kokkos::View weights("Sigmoid weights", param_length); double edge_bound = 3.; - center(0) = -edge_bound; width(0) = 2*edge_bound/10; weight(0) = 1.; - center(1) = edge_bound; width(1) = 2*edge_bound/10; weight(1) = 1.; + centers(0) = -edge_bound; widths(0) = 2*edge_bound/10; weights(0) = 1.; + centers(1) = edge_bound; widths(1) = 2*edge_bound/10; weights(1) = 1.; int param_idx = 2; for(int curr_order = 1; curr_order <= num_sigmoids; curr_order++) { for(int i = 0; i < curr_order; i++) { - center(param_idx) = 4*(-(curr_order-1)/2 + i); - width(param_idx) = 1/((double)i+1); - weight(param_idx) = 1./curr_order; + centers(param_idx) = 4*(-(curr_order-1)/2 + i); + widths(param_idx) = 1/((double)i+1); + weights(param_idx) = 1./curr_order; param_idx++; } } - Sigmoid1d Sigmoid (center, width, weight); + auto centers_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), centers); + auto widths_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), widths); + auto weights_d = Kokkos::create_mirror_view_and_copy(MemorySpace(), weights); + Sigmoid1d Sigmoid {centers_d, widths_d, weights_d}; // Ensure the sigmoid f is monotone over a grid, f(-infty) = 0, f(+infty) = 1 const int num_pts_grid = 100; const double grid_bdry = 5.; - double eval_pts[num_pts_grid]; - for(int p = 0; p < num_pts_grid; p++) { - eval_pts[p] = -grid_bdry + 2*p*grid_bdry/(num_pts_grid-1); - } - double output[order+1]; - Sigmoid.EvaluateAll(output, order, -support_bound); - CHECK(output[0] == 1.); - CHECK(output[1] == -support_bound); - CHECK(output[2] == -(support_bound-edge_bound)*width(0)); - CHECK(output[3] == 0.); - for(int i = 4; i <= order; i++) { - REQUIRE_THAT(output[i], WithinAbs(0., 1e-12)); + Kokkos::View eval_pts_d {"Grid points", num_pts_grid+2}; + Kokkos::parallel_for(Kokkos::RangePolicy(0, num_pts_grid), KOKKOS_LAMBDA(int p) { + if(p == 0) { + eval_pts_d(0) = -support_bound; + eval_pts_d(1) = support_bound; + } + eval_pts_d(p+2) = -grid_bdry + 2*p*grid_bdry/(num_pts_grid-1); + }); + auto eval_pts = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), eval_pts_d); + Kokkos::View output_d {"Output storage device", (order+1)*(num_pts_grid+2)}; + Kokkos::parallel_for(Kokkos::RangePolicy(0, num_pts_grid+2), KOKKOS_LAMBDA(int j) { + Sigmoid.EvaluateAll(output_d.data() + j*(order+1), order, eval_pts_d(j)); + }); + Kokkos::fence(); + auto output = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output_d); + + double expect_output[5*2] = { + 1., -support_bound, -(support_bound-edge_bound)*widths(0), 0.0, 0., + 1., support_bound, 0., (support_bound-edge_bound)*widths(1), 1. + }; + + for(int i = 0; i < 2; i++) { + for(int j = 0; j < 4; j++) { + CHECK(output(i*(order+1)+j) == expect_output[i*5 + j]); + } + for(int j = 4; j <= order; j++) { + REQUIRE_THAT(output(i*(order+1)+j), WithinAbs(expect_output[i*5+4], 1e-12)); + } } - Sigmoid.EvaluateAll(output, order, support_bound); - CHECK(output[0] == 1.); - CHECK(output[1] == support_bound); - CHECK(output[2] == 0.); - CHECK(output[3] == (support_bound-edge_bound)*width(1)); - for(int i = 4; i <= order; i++) REQUIRE_THAT(output[i], WithinAbs(1., 1e-12)); + unsigned int output_start_idx = 2*(order+1); double prev[order+1] = {0.}; // set prev for left edge term to negative infty prev[2] = -2*support_bound; for(int j = 0; j < num_pts_grid; j++) { - Sigmoid.EvaluateAll(output, order, eval_pts[j]); - CHECK(output[0] == 1.); - CHECK(output[1] == eval_pts[j]); - for(int curr_sigmoid = 2; curr_sigmoid <= order; curr_sigmoid++) { - CHECK(output[curr_sigmoid] > prev[curr_sigmoid]); - prev[curr_sigmoid] = output[curr_sigmoid]; + CHECK(output(output_start_idx + 0) == 1.); // Constant + CHECK(output(output_start_idx + 1) == eval_pts(j+2)); // Linear + for(int curr_sigmoid = 2; curr_sigmoid <= order; curr_sigmoid++) { // edge+sigmoids + double out_val = output(output_start_idx + curr_sigmoid); + CHECK(out_val > prev[curr_sigmoid]); + prev[curr_sigmoid] = out_val; } + output_start_idx += order+1; } - TestSigmoidGradients(Sigmoid, 100, 1e-7); + TestSigmoidGradients(Sigmoid, 100, 1e-7); } -} \ No newline at end of file +}} \ No newline at end of file diff --git a/tests/Test_SummarizedMap.cpp b/tests/Test_SummarizedMap.cpp index bd5e111d..17e963ec 100644 --- a/tests/Test_SummarizedMap.cpp +++ b/tests/Test_SummarizedMap.cpp @@ -41,7 +41,6 @@ TEST_CASE( "SummarizedMap", "[SummarizedMap_MonotoneComponent]" ) { sumMap->SetCoeffs(coeffs); // Now make sure that the coefficients of each block were set - unsigned int cumCoeffInd = 0; for(unsigned int i=0; inumCoeffs; ++i){ CHECK(sumMap->Coeffs()(i) == 0.1*(i+1)); // Values of coefficients should be correct CHECK(sumMap->Coeffs()(i) == comp->Coeffs()(i)); // Values of coefficients should be equal to those of comp diff --git a/tests/Test_TrainMap.cpp b/tests/Test_TrainMap.cpp index a175a745..57e9c142 100644 --- a/tests/Test_TrainMap.cpp +++ b/tests/Test_TrainMap.cpp @@ -3,6 +3,7 @@ #include "MParT/MapFactory.h" #include "MParT/MapObjective.h" #include "MParT/TrainMap.h" +#include "MParT/Utilities/KokkosSpaceMappings.h" #include "MParT/Distributions/GaussianSamplerDensity.h" // For testing the normality of the pushforward @@ -11,17 +12,19 @@ using namespace mpart; using namespace Catch; +using MemorySpace = Kokkos::HostSpace; + TEST_CASE("Test_TrainMap", "[TrainMap]") { unsigned int seed = 42; unsigned int dim = 2; unsigned int numPts = 5000; unsigned int testPts = numPts / 5; - auto sampler = std::make_shared>(3); + auto sampler = std::make_shared>(3); sampler->SetSeed(seed); auto samples = sampler->Sample(numPts); Kokkos::View targetSamples("targetSamples", 3, numPts); - double max = 0; - Kokkos::parallel_for("Banana", numPts, KOKKOS_LAMBDA(const unsigned int i) { + Kokkos::RangePolicy::Space> policy {0u, numPts}; + Kokkos::parallel_for("Banana", policy, KOKKOS_LAMBDA(const unsigned int i) { targetSamples(0,i) = samples(0,i); targetSamples(1,i) = samples(1,i); targetSamples(2,i) = samples(2,i) + samples(1,i)*samples(1,i); diff --git a/tests/Test_TrainMapAdaptive.cpp b/tests/Test_TrainMapAdaptive.cpp index b3c54acd..f3a75867 100644 --- a/tests/Test_TrainMapAdaptive.cpp +++ b/tests/Test_TrainMapAdaptive.cpp @@ -10,6 +10,7 @@ using namespace Catch; #include "MParT/Utilities/LinearAlgebra.h" #include "Distributions/Test_Distributions_Common.h" +using MemorySpace = Kokkos::HostSpace; void NormalizeSamples(StridedMatrix mat) { using MemorySpace = Kokkos::HostSpace; @@ -154,7 +155,8 @@ TEST_CASE("Adaptive Transport Map","[ATM]") { // } SECTION("TraditionalBananaOneComp") { Kokkos::View targetSamples("targetSamples", 2, numPts); - Kokkos::parallel_for("Intializing targetSamples", numPts, KOKKOS_LAMBDA(const unsigned int i){ + Kokkos::RangePolicy::Space> policy {0u, numPts}; + Kokkos::parallel_for("Intializing targetSamples", policy, KOKKOS_LAMBDA(const unsigned int i){ targetSamples(0,i) = samples(0,i); targetSamples(1,i) = samples(1,i) + samples(0,i)*samples(0,i); }); diff --git a/tests/Test_TriangularMap.cpp b/tests/Test_TriangularMap.cpp index 2e2b6940..3bb2c18d 100644 --- a/tests/Test_TriangularMap.cpp +++ b/tests/Test_TriangularMap.cpp @@ -583,7 +583,6 @@ TEST_CASE( "Testing TriangularMap made using CreateSingleEntryMap", "[Triangular triMap->SetCoeffs(coeffs); // Now make sure that the coefficients of each block were set - unsigned int cumCoeffInd = 0; for(unsigned int i=0; inumCoeffs; ++i){ CHECK(comp->Coeffs()(i) == triMap->Coeffs()(i)); // Values of coefficients should be equal CHECK(&comp->Coeffs()(i) == &triMap->Coeffs()(i)); // Memory location should also be the same (no copy) @@ -604,9 +603,6 @@ TEST_CASE( "Testing TriangularMap made using CreateSingleEntryMap", "[Triangular SECTION("Evaluation"){ - unsigned int start = 0; - - auto inTop = Kokkos::subview(in, std::make_pair(0, int(activeInd-1)), Kokkos::ALL()); auto inTopAndMid = Kokkos::subview(in, std::make_pair(0, int(activeInd)), Kokkos::ALL()); auto inBot = Kokkos::subview(in, std::make_pair(int(activeInd), int(dim)), Kokkos::ALL()); diff --git a/tests/Test_UnivariateExpansion.cpp b/tests/Test_UnivariateExpansion.cpp index 6e401632..f3772352 100644 --- a/tests/Test_UnivariateExpansion.cpp +++ b/tests/Test_UnivariateExpansion.cpp @@ -88,7 +88,6 @@ TEST_CASE("UnivariateExpansion") { StridedMatrix logDetGrad = expansion.LogDeterminantInputGrad(points); REQUIRE(logDetGrad.extent(0) == 1); REQUIRE(logDetGrad.extent(1) == numPts); - unsigned int counter = 0; for(int i = 0; i < numPts; i++) { double df = 0.; double d2f = 0.;