introduced RegularizedSVD, fPCA takes a generic solver (the previous …

…implementation is equivalent to discharge the solution on one of the RegularizedSVD solvers), PowerIteration no more stores a pointer to the model (was useless), better model and regression type erasure, use standardized RegressionViews in gcv and edfs strategies, updated tests
fdaPDE · Jan 28, 2024 · b2e51cb · b2e51cb
1 parent aa697d2
commit b2e51cb
Show file tree

Hide file tree

Showing 13 changed files with 364 additions and 382 deletions.
diff --git a/fdaPDE/core b/fdaPDE/core
diff --git a/fdaPDE/models/functional/center.h b/fdaPDE/models/functional/center.h
@@ -27,14 +27,23 @@ struct CenterReturnType {
     DMatrix<double> mean;     // the mean field \mu
 };
 
+  // need to handle case with missing data
+  // introudce a mean function which just produces the (smooth) mean field (no centering)
+  // then we can speak about weighted average field
+
 // functional centering of the data matrix X
 template <typename SmootherType_, typename CalibrationType_>
 CenterReturnType center(const DMatrix<double>& X, SmootherType_&& smoother, CalibrationType_&& calibration) {
     BlockFrame<double, int> df;
     df.insert<double>(OBSERVATIONS_BLK, X.colwise().sum().transpose() / X.rows());
     smoother.set_data(df);
     // set optimal lambda according to choosen calibration strategy
-    smoother.set_lambda(calibration.fit(smoother));
+    if constexpr (std::is_base_of<Eigen::MatrixBase<CalibrationType_>, CalibrationType_>::value) {   // fixed \lambda
+        fdapde_assert(calibration.cols() == 1 && calibration.rows() == SmootherType_::n_lambda);
+        smoother.set_lambda(calibration);   // center with fixed \lambda
+    } else {
+        smoother.set_lambda(calibration.fit(smoother));
+    }
     smoother.init();
     smoother.solve();
     // compute mean matrix and return

diff --git a/fdaPDE/models/functional/fpca.h b/fdaPDE/models/functional/fpca.h
@@ -27,172 +27,53 @@
 using fdapde::calibration::Calibration;
 #include "functional_base.h"
 #include "power_iteration.h"
-#include "rsvd.h"
+#include "regularized_svd.h"
 
 namespace fdapde {
 namespace models {
 
-// FPCA (Functional Principal Components Analysis) model signature
-template <typename RegularizationType, typename SolutionPolicy> class FPCA;
-
-// implementation of FPCA for sequential approach, see e.g.
-// Lila, E., Aston, J.A.D., Sangalli, L.M. (2016), Smooth Principal Component Analysis over two-dimensional manifolds
-// with an application to Neuroimaging, Annals of Applied Statistics, 10 (4), 1854-1879.
+// Functional Principal Components Analysis
 template <typename RegularizationType_>
-class FPCA<RegularizationType_, sequential> :
-    public FunctionalBase<FPCA<RegularizationType_, sequential>, RegularizationType_> {
-   public:
-    using RegularizationType = std::decay_t<RegularizationType_>;
-    using This = FPCA<RegularizationType, sequential>;
-    using Base = FunctionalBase<This, RegularizationType>;
-    IMPORT_MODEL_SYMBOLS;
-    using Base::lambda;         // vector of smoothing parameters
-    using Base::n_basis;        // number of basis functions
-    using Base::n_stat_units;   // number of statistical units
-    using Base::X;              // n_stat_units \times n_locs data matrix
-
-    // constructors
-    FPCA() = default;
-    fdapde_enable_constructor_if(is_space_only, This) FPCA(const pde_ptr& pde, Sampling s, Calibration c) :
-        Base(pde, s), calibration_(c) {};
-    fdapde_enable_constructor_if(is_space_time_separable, This)
-      FPCA(const pde_ptr& space_penalty, const pde_ptr& time_penalty, Sampling s, Calibration c) :
-        Base(space_penalty, time_penalty, s), calibration_(c) {};
-
-    void init_model() { return; };
-    void solve() {
-        // preallocate space
-        loadings_.resize(n_basis(), n_pc_);
-        fitted_loadings_.resize(n_locs(), n_pc_);
-        scores_.resize(n_stat_units(), n_pc_);
-        DMatrix<double> X_ = X();   // copy original data to avoid side effects
-
-        // first guess of PCs set to a multivariate PCA (SVD)
-        Eigen::JacobiSVD<DMatrix<double>> svd(X_, Eigen::ComputeThinU | Eigen::ComputeThinV);
-        PowerIteration<This> solver(this, tolerance_, max_iter_);   // power iteration solver
-        solver.set_seed(seed_);
-        solver.init();
-
-        // sequential extraction of principal components
-        for (std::size_t i = 0; i < n_pc_; i++) {
-            DVector<double> f0 = svd.matrixV().col(i);
-            switch (calibration_) {
-            case Calibration::off: {
-                // find vectors s,f minimizing \norm_F{Y - s^T*f}^2 + (s^T*s)*P(f) fixed \lambda
-                solver.compute(X_, lambda(), f0);
-            } break;
-            case Calibration::gcv: {
-                // select \lambda minimizing the GCV index
-                ScalarField<Dynamic> gcv([&solver, &X_, &f0](const DVector<double>& lambda) -> double {
-                    solver.compute(X_, lambda, f0);
-                    return solver.gcv();   // return GCV index at convergence
-                });
-                solver.compute(X_, core::Grid<Dynamic> {}.optimize(gcv, lambda_grid_), f0);
-            } break;
-            case Calibration::kcv: {
-                auto cv_score = [&solver, &X_, &f0](
-                                  const DVector<double>& lambda, const core::BinaryVector<Dynamic>& train_set,
-                                  const core::BinaryVector<Dynamic>& test_set) -> double {
-                    solver.compute(train_set.blk_repeat(1, X_.cols()).select(X_), lambda, f0);   // fit on train set
-                    // reconstruction error on test set: \norm{X_test * (I - fn*fn^\top/J)}_F/n_test, with
-                    // J = \norm{f_n}_2^2 + f^\top*P(\lambda)*f (PS: the division of f^\top*P(\lambda)*f by
-                    // \norm{f}_{L^2} is necessary to obtain J as expected)
-                    return (test_set.blk_repeat(1, X_.cols()).select(X_) *
-                            (DMatrix<double>::Identity(X_.cols(), X_.cols()) -
-                             solver.fn() * solver.fn().transpose() /
-                               (solver.fn().squaredNorm() + solver.ftPf(lambda) / solver.f_squaredNorm())))
-                             .squaredNorm() /
-                           test_set.count() * X_.cols();
-                };
-                solver.compute(X_, calibration::KCV {n_folds_, seed_}.fit(*this, lambda_grid_, cv_score), f0);
-            } break;
-            }
-            // store results
-            loadings_.col(i) = solver.f();
-            fitted_loadings_.col(i) = solver.fn();   // \frac{\Psi * f}{\norm{f}_{L^2}}
-            scores_.col(i) = solver.s() * solver.f_norm();
-            X_ -= scores_.col(i) * fitted_loadings_.col(i).transpose();   // X <- X - s*f_n^\top (deflation step)
-        }
-        return;
-    }
-
-    // getters
-    const DMatrix<double>& fitted_loadings() const { return fitted_loadings_; }
-    const DMatrix<double>& loadings() const { return loadings_; }
-    const DMatrix<double>& scores() const { return scores_; }
-    // setters
-    void set_npc(std::size_t n_pc) { n_pc_ = n_pc; }
-    void set_tolerance(double tolerance) { tolerance_ = tolerance; }
-    void set_max_iter(std::size_t max_iter) { max_iter_ = max_iter; }
-    void set_seed(std::size_t seed) { seed_ = seed; }
-    void set_lambda(const std::vector<DVector<double>>& lambda_grid) {
-        fdapde_assert(calibration_ != Calibration::off);
-        lambda_grid_ = lambda_grid;
-    }
-    void set_nfolds(std::size_t n_folds) {
-        fdapde_assert(calibration_ == Calibration::kcv);
-        n_folds_ = n_folds;
-    }
+class FPCA : public FunctionalBase<FPCA<RegularizationType_>, RegularizationType_> {
    private:
-    std::size_t n_pc_ = 3;       // number of principal components
-    Calibration calibration_;    // PC function's smoothing parameter selection strategy
-    std::size_t n_folds_ = 10;   // for a kcv calibration strategy, the number of folds
-    std::vector<DVector<double>> lambda_grid_;
-    // power iteration parameters
-    double tolerance_ = 1e-6;     // relative tolerance between Jnew and Jold, used as stopping criterion
-    std::size_t max_iter_ = 20;   // maximum number of allowed iterations
-    int seed_ = fdapde::random_seed;
-
-    // problem solution
-    DMatrix<double> loadings_;          // PC functions' expansion coefficients
-    DMatrix<double> fitted_loadings_;   // evaluation of the PC functions at data locations
-    DMatrix<double> scores_;
-};
+    std::size_t n_pc_ = 3;   // number of principal components
 
-// implementation of FPCA for monolithic approach, see e.g.
-template <typename RegularizationType_>
-class FPCA<RegularizationType_, monolithic> :
-    public FunctionalBase<FPCA<RegularizationType_, monolithic>, RegularizationType_> {
+    struct SolverType__ {   // type erased solver strategy
+        using This_ = FPCA<RegularizationType_>;
+        template <typename T> using fn_ptrs = mem_fn_ptrs<&T::template compute<This_>, &T::loadings, &T::scores>;
+        void compute(const DMatrix<double>& X, std::size_t rank, This_& model) {
+            invoke<void, 0>(*this, X, rank, model);
+        }
+        decltype(auto) loadings() const { return invoke<const DMatrix<double>&, 1>(*this); }
+        decltype(auto) scores()   const { return invoke<const DMatrix<double>&, 2>(*this); }
+    };
+    using SolverType = fdapde::erase<heap_storage, SolverType__>;
+    SolverType solver_;
    public:
     using RegularizationType = std::decay_t<RegularizationType_>;
-    using This = FPCA<RegularizationType, monolithic>;
+    using This = FPCA<RegularizationType>;
     using Base = FunctionalBase<This, RegularizationType>;
     IMPORT_MODEL_SYMBOLS;
-    using Base::lambda;
-    using Base::X;
+    using Base::X;              // n_stat_units \times n_locs data matrix
 
     // constructors
     FPCA() = default;
-    fdapde_enable_constructor_if(is_space_only, This) FPCA(const pde_ptr& pde, Sampling s) : Base(pde, s) {};
+    fdapde_enable_constructor_if(is_space_only, This) FPCA(const pde_ptr& pde, Sampling s, SolverType solver) :
+        Base(pde, s), solver_(solver) {};
     fdapde_enable_constructor_if(is_space_time_separable, This)
-      FPCA(const pde_ptr& space_penalty, const pde_ptr& time_penalty, Sampling s) :
-        Base(space_penalty, time_penalty, s) {};
-
-    void init_model() { return; };
-    void solve() {   // monolithic approach via Regularized SVD
-        RSVD<This> solver(this);
-        solver.compute(X(), lambda(), n_pc_);
-        // store results
-	loadings_ = solver.W();
-        fitted_loadings_ = Psi(not_nan()) * solver.W();   // evaluation of L^2 normalized PC functions at data locations
-        scores_ = solver.H().array().rowwise() * solver.w_norm().transpose().array();
-        return;
-    }
+      FPCA(const pde_ptr& space_penalty, const pde_ptr& time_penalty, Sampling s, SolverType solver) :
+        Base(space_penalty, time_penalty, s), solver_(solver) {};
 
+    void init_model() { fdapde_assert(bool(solver_) == true); };   // check solver is assigned
+    void solve() { solver_.compute(X(), n_pc_, *this); }
     // getters
-    const DMatrix<double>& fitted_loadings() const { return fitted_loadings_; }
-    const DMatrix<double>& loadings() const { return loadings_; }
-    const DMatrix<double>& scores() const { return scores_; }
+    const DMatrix<double>& loadings() const { return solver_.loadings(); }
+    const DMatrix<double>& scores() const { return solver_.scores(); }
+    // setters
     void set_npc(std::size_t n_pc) { n_pc_ = n_pc; }
-   private:
-    std::size_t n_pc_ = 3;   // number of principal components
-    // problem solution
-    DMatrix<double> loadings_;          // PC functions' expansion coefficients
-    DMatrix<double> fitted_loadings_;   // evaluation of the PC functions at data locations
-    DMatrix<double> scores_;
+    void set_solver(SolverType solver) { solver_ = solver; }
 };
 
-
 }   // namespace models
 }   // namespace fdapde
 

diff --git a/fdaPDE/models/functional/functional_base.h b/fdaPDE/models/functional/functional_base.h
@@ -49,7 +49,7 @@ class FunctionalBase : public select_regularization_base<Model, RegularizationTy
       FunctionalBase(const pde_ptr& space_penalty, const pde_ptr& time_penalty, Sampling s) :
         Base(space_penalty, time_penalty), SamplingBase<Model>(s) {};
 
-  // getters
+    // getters
     const DMatrix<double>& X() const { return df_.template get<double>(OBSERVATIONS_BLK); }   // observation matrix X
     std::size_t n_stat_units() const { return X().rows(); }
     std::size_t n_obs() const { return X().size(); };

diff --git a/fdaPDE/models/functional/power_iteration.h b/fdaPDE/models/functional/power_iteration.h
@@ -36,42 +36,40 @@ template <typename Model_> class PowerIteration {
     using SolverType = typename std::conditional<
       is_space_only<Model>::value, SRPDE, STRPDE<typename Model::RegularizationType, fdapde::monolithic>>::type;
     static constexpr int n_lambda = Model::n_lambda;
-    Model* m_;
     GCV gcv_;   // GCV functor
     // algorithm's parameters
     double tolerance_ = 1e-6;     // treshold on |Jnew - Jold| used as stopping criterion
     std::size_t max_iter_ = 20;   // maximum number of iterations before forced stop
     std::size_t k_ = 0;           // iteration index
     SolverType solver_;           // internal solver
-    int seed_;
+    int seed_ = fdapde::random_seed;
+    int n_mc_samples_ = 100;
 
     DVector<double> s_;    // estimated score vector
     DVector<double> fn_;   // field evaluation at data location \frac{\Psi*f}{\norm{f}_{L^2}}
     DVector<double> f_;    // field basis expansion at convergence
     DVector<double> g_;    // PDE misfit at convergence
     double f_norm_;        // L^2 norm of estimated field at converegence
+
    public:
     // constructors
     PowerIteration() = default;
-    PowerIteration(Model* m, double tolerance, std::size_t max_iter, int seed) :
-        m_(m), tolerance_(tolerance), max_iter_(max_iter),
-        seed_((seed == fdapde::random_seed) ? std::random_device()() : seed) {};
-    PowerIteration(Model* m, double tolerance, std::size_t max_iter) :
-        PowerIteration(m, tolerance, max_iter, fdapde::random_seed) {};
-
-    // initializes internal smoothing solver
-    void init() {
-        if constexpr (is_space_only<Model>::value) {   // space-only solver
-            solver_ = SolverType(m_->pde(), m_->sampling());
-        } else {   // space-time separable solver
-            solver_ = SolverType(m_->pde(), m_->time_pde(), m_->sampling());
-            solver_.set_temporal_locations(m_->time_locs());
+    PowerIteration(const Model& m, double tolerance, std::size_t max_iter, int seed) :
+        tolerance_(tolerance), max_iter_(max_iter),
+        seed_((seed == fdapde::random_seed) ? std::random_device()() : seed) {
+        // initialize internal smoothing solver
+        if constexpr (is_space_only<SolverType>::value) { solver_ = SolverType(m.pde(), m.sampling()); }
+        else {
+            solver_ = SolverType(m.pde(), m.time_pde(), m.sampling());
+            solver_.set_temporal_locations(m.time_locs());
         }
-        solver_.set_spatial_locations(m_->locs());
-        // initialize GCV functor
-        gcv_ = solver_.template gcv<StochasticEDF>(100, seed_);
-    }
+        solver_.set_spatial_locations(m.locs());
+        return;
+    };
+    template <typename ModelType> PowerIteration(const ModelType& m, double tolerance, std::size_t max_iter) :
+        PowerIteration(m, tolerance, max_iter, fdapde::random_seed) {};
 
+    void init() { gcv_ = solver_.template gcv<StochasticEDF>(n_mc_samples_, seed_); }
     // executes the power itration algorithm on data X and smoothing parameter \lambda, starting from f0
     void compute(const DMatrix<double>& X, const SVector<n_lambda>& lambda, const DVector<double>& f0) {
         // initialization
@@ -83,16 +81,16 @@ template <typename Model_> class PowerIteration {
         double Jnew = 1;
         fn_ = f0;   // set starting point
         while (!almost_equal(Jnew, Jold, tolerance_) && k_ < max_iter_) {
-            // compute score vector s as \frac{X*fn}{\norm(X*fn)}
+            // s = \frac{X*fn}{\norm(X*fn)}
             s_ = X * fn_;
             s_ = s_ / s_.norm();
-            // compute loadings by solving a proper smoothing problem
+            // compute loadings (solve smoothing problem)
             solver_.data().template insert<double>(OBSERVATIONS_BLK, X.transpose() * s_);   // X^\top*s
             solver_.solve();
             // prepare for next iteration
             k_++;
             fn_ = solver_.fitted();   // \Psi*f
-            // update value of discretized functional \norm{X - s*f_n^\top}_F + f^\top*P(\lambda)*f
+            // update value of discretized functional: \norm{X - s*f_n^\top}_F + f^\top*P(\lambda)*f
             Jold = Jnew;
             Jnew = (X - s_ * fn_.transpose()).squaredNorm() + solver_.ftPf(lambda);
         }
+1 −1		fdaPDE/utils/traits.h
+9 −7		fdaPDE/utils/type_erasure.h
+32 −0		test/src/type_erasure_test.cpp