Skip to content

Commit

Permalink
introduced RegularizedSVD, fPCA takes a generic solver (the previous …
Browse files Browse the repository at this point in the history
…implementation is equivalent to discharge the solution on one of the RegularizedSVD solvers), PowerIteration no more stores a pointer to the model (was useless), better model and regression type erasure, use standardized RegressionViews in gcv and edfs strategies, updated tests
  • Loading branch information
AlePalu committed Jan 28, 2024
1 parent aa697d2 commit b2e51cb
Show file tree
Hide file tree
Showing 13 changed files with 364 additions and 382 deletions.
2 changes: 1 addition & 1 deletion fdaPDE/core
11 changes: 10 additions & 1 deletion fdaPDE/models/functional/center.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,23 @@ struct CenterReturnType {
DMatrix<double> mean; // the mean field \mu
};

// need to handle case with missing data
// introudce a mean function which just produces the (smooth) mean field (no centering)
// then we can speak about weighted average field

// functional centering of the data matrix X
template <typename SmootherType_, typename CalibrationType_>
CenterReturnType center(const DMatrix<double>& X, SmootherType_&& smoother, CalibrationType_&& calibration) {
BlockFrame<double, int> df;
df.insert<double>(OBSERVATIONS_BLK, X.colwise().sum().transpose() / X.rows());
smoother.set_data(df);
// set optimal lambda according to choosen calibration strategy
smoother.set_lambda(calibration.fit(smoother));
if constexpr (std::is_base_of<Eigen::MatrixBase<CalibrationType_>, CalibrationType_>::value) { // fixed \lambda
fdapde_assert(calibration.cols() == 1 && calibration.rows() == SmootherType_::n_lambda);
smoother.set_lambda(calibration); // center with fixed \lambda
} else {
smoother.set_lambda(calibration.fit(smoother));
}
smoother.init();
smoother.solve();
// compute mean matrix and return
Expand Down
173 changes: 27 additions & 146 deletions fdaPDE/models/functional/fpca.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,172 +27,53 @@
using fdapde::calibration::Calibration;
#include "functional_base.h"
#include "power_iteration.h"
#include "rsvd.h"
#include "regularized_svd.h"

namespace fdapde {
namespace models {

// FPCA (Functional Principal Components Analysis) model signature
template <typename RegularizationType, typename SolutionPolicy> class FPCA;

// implementation of FPCA for sequential approach, see e.g.
// Lila, E., Aston, J.A.D., Sangalli, L.M. (2016), Smooth Principal Component Analysis over two-dimensional manifolds
// with an application to Neuroimaging, Annals of Applied Statistics, 10 (4), 1854-1879.
// Functional Principal Components Analysis
template <typename RegularizationType_>
class FPCA<RegularizationType_, sequential> :
public FunctionalBase<FPCA<RegularizationType_, sequential>, RegularizationType_> {
public:
using RegularizationType = std::decay_t<RegularizationType_>;
using This = FPCA<RegularizationType, sequential>;
using Base = FunctionalBase<This, RegularizationType>;
IMPORT_MODEL_SYMBOLS;
using Base::lambda; // vector of smoothing parameters
using Base::n_basis; // number of basis functions
using Base::n_stat_units; // number of statistical units
using Base::X; // n_stat_units \times n_locs data matrix

// constructors
FPCA() = default;
fdapde_enable_constructor_if(is_space_only, This) FPCA(const pde_ptr& pde, Sampling s, Calibration c) :
Base(pde, s), calibration_(c) {};
fdapde_enable_constructor_if(is_space_time_separable, This)
FPCA(const pde_ptr& space_penalty, const pde_ptr& time_penalty, Sampling s, Calibration c) :
Base(space_penalty, time_penalty, s), calibration_(c) {};

void init_model() { return; };
void solve() {
// preallocate space
loadings_.resize(n_basis(), n_pc_);
fitted_loadings_.resize(n_locs(), n_pc_);
scores_.resize(n_stat_units(), n_pc_);
DMatrix<double> X_ = X(); // copy original data to avoid side effects

// first guess of PCs set to a multivariate PCA (SVD)
Eigen::JacobiSVD<DMatrix<double>> svd(X_, Eigen::ComputeThinU | Eigen::ComputeThinV);
PowerIteration<This> solver(this, tolerance_, max_iter_); // power iteration solver
solver.set_seed(seed_);
solver.init();

// sequential extraction of principal components
for (std::size_t i = 0; i < n_pc_; i++) {
DVector<double> f0 = svd.matrixV().col(i);
switch (calibration_) {
case Calibration::off: {
// find vectors s,f minimizing \norm_F{Y - s^T*f}^2 + (s^T*s)*P(f) fixed \lambda
solver.compute(X_, lambda(), f0);
} break;
case Calibration::gcv: {
// select \lambda minimizing the GCV index
ScalarField<Dynamic> gcv([&solver, &X_, &f0](const DVector<double>& lambda) -> double {
solver.compute(X_, lambda, f0);
return solver.gcv(); // return GCV index at convergence
});
solver.compute(X_, core::Grid<Dynamic> {}.optimize(gcv, lambda_grid_), f0);
} break;
case Calibration::kcv: {
auto cv_score = [&solver, &X_, &f0](
const DVector<double>& lambda, const core::BinaryVector<Dynamic>& train_set,
const core::BinaryVector<Dynamic>& test_set) -> double {
solver.compute(train_set.blk_repeat(1, X_.cols()).select(X_), lambda, f0); // fit on train set
// reconstruction error on test set: \norm{X_test * (I - fn*fn^\top/J)}_F/n_test, with
// J = \norm{f_n}_2^2 + f^\top*P(\lambda)*f (PS: the division of f^\top*P(\lambda)*f by
// \norm{f}_{L^2} is necessary to obtain J as expected)
return (test_set.blk_repeat(1, X_.cols()).select(X_) *
(DMatrix<double>::Identity(X_.cols(), X_.cols()) -
solver.fn() * solver.fn().transpose() /
(solver.fn().squaredNorm() + solver.ftPf(lambda) / solver.f_squaredNorm())))
.squaredNorm() /
test_set.count() * X_.cols();
};
solver.compute(X_, calibration::KCV {n_folds_, seed_}.fit(*this, lambda_grid_, cv_score), f0);
} break;
}
// store results
loadings_.col(i) = solver.f();
fitted_loadings_.col(i) = solver.fn(); // \frac{\Psi * f}{\norm{f}_{L^2}}
scores_.col(i) = solver.s() * solver.f_norm();
X_ -= scores_.col(i) * fitted_loadings_.col(i).transpose(); // X <- X - s*f_n^\top (deflation step)
}
return;
}

// getters
const DMatrix<double>& fitted_loadings() const { return fitted_loadings_; }
const DMatrix<double>& loadings() const { return loadings_; }
const DMatrix<double>& scores() const { return scores_; }
// setters
void set_npc(std::size_t n_pc) { n_pc_ = n_pc; }
void set_tolerance(double tolerance) { tolerance_ = tolerance; }
void set_max_iter(std::size_t max_iter) { max_iter_ = max_iter; }
void set_seed(std::size_t seed) { seed_ = seed; }
void set_lambda(const std::vector<DVector<double>>& lambda_grid) {
fdapde_assert(calibration_ != Calibration::off);
lambda_grid_ = lambda_grid;
}
void set_nfolds(std::size_t n_folds) {
fdapde_assert(calibration_ == Calibration::kcv);
n_folds_ = n_folds;
}
class FPCA : public FunctionalBase<FPCA<RegularizationType_>, RegularizationType_> {
private:
std::size_t n_pc_ = 3; // number of principal components
Calibration calibration_; // PC function's smoothing parameter selection strategy
std::size_t n_folds_ = 10; // for a kcv calibration strategy, the number of folds
std::vector<DVector<double>> lambda_grid_;
// power iteration parameters
double tolerance_ = 1e-6; // relative tolerance between Jnew and Jold, used as stopping criterion
std::size_t max_iter_ = 20; // maximum number of allowed iterations
int seed_ = fdapde::random_seed;

// problem solution
DMatrix<double> loadings_; // PC functions' expansion coefficients
DMatrix<double> fitted_loadings_; // evaluation of the PC functions at data locations
DMatrix<double> scores_;
};
std::size_t n_pc_ = 3; // number of principal components

// implementation of FPCA for monolithic approach, see e.g.
template <typename RegularizationType_>
class FPCA<RegularizationType_, monolithic> :
public FunctionalBase<FPCA<RegularizationType_, monolithic>, RegularizationType_> {
struct SolverType__ { // type erased solver strategy
using This_ = FPCA<RegularizationType_>;
template <typename T> using fn_ptrs = mem_fn_ptrs<&T::template compute<This_>, &T::loadings, &T::scores>;
void compute(const DMatrix<double>& X, std::size_t rank, This_& model) {
invoke<void, 0>(*this, X, rank, model);
}
decltype(auto) loadings() const { return invoke<const DMatrix<double>&, 1>(*this); }
decltype(auto) scores() const { return invoke<const DMatrix<double>&, 2>(*this); }
};
using SolverType = fdapde::erase<heap_storage, SolverType__>;
SolverType solver_;
public:
using RegularizationType = std::decay_t<RegularizationType_>;
using This = FPCA<RegularizationType, monolithic>;
using This = FPCA<RegularizationType>;
using Base = FunctionalBase<This, RegularizationType>;
IMPORT_MODEL_SYMBOLS;
using Base::lambda;
using Base::X;
using Base::X; // n_stat_units \times n_locs data matrix

// constructors
FPCA() = default;
fdapde_enable_constructor_if(is_space_only, This) FPCA(const pde_ptr& pde, Sampling s) : Base(pde, s) {};
fdapde_enable_constructor_if(is_space_only, This) FPCA(const pde_ptr& pde, Sampling s, SolverType solver) :
Base(pde, s), solver_(solver) {};
fdapde_enable_constructor_if(is_space_time_separable, This)
FPCA(const pde_ptr& space_penalty, const pde_ptr& time_penalty, Sampling s) :
Base(space_penalty, time_penalty, s) {};

void init_model() { return; };
void solve() { // monolithic approach via Regularized SVD
RSVD<This> solver(this);
solver.compute(X(), lambda(), n_pc_);
// store results
loadings_ = solver.W();
fitted_loadings_ = Psi(not_nan()) * solver.W(); // evaluation of L^2 normalized PC functions at data locations
scores_ = solver.H().array().rowwise() * solver.w_norm().transpose().array();
return;
}
FPCA(const pde_ptr& space_penalty, const pde_ptr& time_penalty, Sampling s, SolverType solver) :
Base(space_penalty, time_penalty, s), solver_(solver) {};

void init_model() { fdapde_assert(bool(solver_) == true); }; // check solver is assigned
void solve() { solver_.compute(X(), n_pc_, *this); }
// getters
const DMatrix<double>& fitted_loadings() const { return fitted_loadings_; }
const DMatrix<double>& loadings() const { return loadings_; }
const DMatrix<double>& scores() const { return scores_; }
const DMatrix<double>& loadings() const { return solver_.loadings(); }
const DMatrix<double>& scores() const { return solver_.scores(); }
// setters
void set_npc(std::size_t n_pc) { n_pc_ = n_pc; }
private:
std::size_t n_pc_ = 3; // number of principal components
// problem solution
DMatrix<double> loadings_; // PC functions' expansion coefficients
DMatrix<double> fitted_loadings_; // evaluation of the PC functions at data locations
DMatrix<double> scores_;
void set_solver(SolverType solver) { solver_ = solver; }
};


} // namespace models
} // namespace fdapde

Expand Down
2 changes: 1 addition & 1 deletion fdaPDE/models/functional/functional_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class FunctionalBase : public select_regularization_base<Model, RegularizationTy
FunctionalBase(const pde_ptr& space_penalty, const pde_ptr& time_penalty, Sampling s) :
Base(space_penalty, time_penalty), SamplingBase<Model>(s) {};

// getters
// getters
const DMatrix<double>& X() const { return df_.template get<double>(OBSERVATIONS_BLK); } // observation matrix X
std::size_t n_stat_units() const { return X().rows(); }
std::size_t n_obs() const { return X().size(); };
Expand Down
42 changes: 20 additions & 22 deletions fdaPDE/models/functional/power_iteration.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,42 +36,40 @@ template <typename Model_> class PowerIteration {
using SolverType = typename std::conditional<
is_space_only<Model>::value, SRPDE, STRPDE<typename Model::RegularizationType, fdapde::monolithic>>::type;
static constexpr int n_lambda = Model::n_lambda;
Model* m_;
GCV gcv_; // GCV functor
// algorithm's parameters
double tolerance_ = 1e-6; // treshold on |Jnew - Jold| used as stopping criterion
std::size_t max_iter_ = 20; // maximum number of iterations before forced stop
std::size_t k_ = 0; // iteration index
SolverType solver_; // internal solver
int seed_;
int seed_ = fdapde::random_seed;
int n_mc_samples_ = 100;

DVector<double> s_; // estimated score vector
DVector<double> fn_; // field evaluation at data location \frac{\Psi*f}{\norm{f}_{L^2}}
DVector<double> f_; // field basis expansion at convergence
DVector<double> g_; // PDE misfit at convergence
double f_norm_; // L^2 norm of estimated field at converegence

public:
// constructors
PowerIteration() = default;
PowerIteration(Model* m, double tolerance, std::size_t max_iter, int seed) :
m_(m), tolerance_(tolerance), max_iter_(max_iter),
seed_((seed == fdapde::random_seed) ? std::random_device()() : seed) {};
PowerIteration(Model* m, double tolerance, std::size_t max_iter) :
PowerIteration(m, tolerance, max_iter, fdapde::random_seed) {};

// initializes internal smoothing solver
void init() {
if constexpr (is_space_only<Model>::value) { // space-only solver
solver_ = SolverType(m_->pde(), m_->sampling());
} else { // space-time separable solver
solver_ = SolverType(m_->pde(), m_->time_pde(), m_->sampling());
solver_.set_temporal_locations(m_->time_locs());
PowerIteration(const Model& m, double tolerance, std::size_t max_iter, int seed) :
tolerance_(tolerance), max_iter_(max_iter),
seed_((seed == fdapde::random_seed) ? std::random_device()() : seed) {
// initialize internal smoothing solver
if constexpr (is_space_only<SolverType>::value) { solver_ = SolverType(m.pde(), m.sampling()); }
else {
solver_ = SolverType(m.pde(), m.time_pde(), m.sampling());
solver_.set_temporal_locations(m.time_locs());
}
solver_.set_spatial_locations(m_->locs());
// initialize GCV functor
gcv_ = solver_.template gcv<StochasticEDF>(100, seed_);
}
solver_.set_spatial_locations(m.locs());
return;
};
template <typename ModelType> PowerIteration(const ModelType& m, double tolerance, std::size_t max_iter) :
PowerIteration(m, tolerance, max_iter, fdapde::random_seed) {};

void init() { gcv_ = solver_.template gcv<StochasticEDF>(n_mc_samples_, seed_); }
// executes the power itration algorithm on data X and smoothing parameter \lambda, starting from f0
void compute(const DMatrix<double>& X, const SVector<n_lambda>& lambda, const DVector<double>& f0) {
// initialization
Expand All @@ -83,16 +81,16 @@ template <typename Model_> class PowerIteration {
double Jnew = 1;
fn_ = f0; // set starting point
while (!almost_equal(Jnew, Jold, tolerance_) && k_ < max_iter_) {
// compute score vector s as \frac{X*fn}{\norm(X*fn)}
// s = \frac{X*fn}{\norm(X*fn)}
s_ = X * fn_;
s_ = s_ / s_.norm();
// compute loadings by solving a proper smoothing problem
// compute loadings (solve smoothing problem)
solver_.data().template insert<double>(OBSERVATIONS_BLK, X.transpose() * s_); // X^\top*s
solver_.solve();
// prepare for next iteration
k_++;
fn_ = solver_.fitted(); // \Psi*f
// update value of discretized functional \norm{X - s*f_n^\top}_F + f^\top*P(\lambda)*f
// update value of discretized functional: \norm{X - s*f_n^\top}_F + f^\top*P(\lambda)*f
Jold = Jnew;
Jnew = (X - s_ * fn_.transpose()).squaredNorm() + solver_.ftPf(lambda);
}
Expand Down
Loading

0 comments on commit b2e51cb

Please sign in to comment.