-
Notifications
You must be signed in to change notification settings - Fork 0
/
ortho.cpp
441 lines (386 loc) · 14.4 KB
/
ortho.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
#include "ortho.h"
#include <Eigen/Dense>
#include <iostream>
// basic tool functions -- linear algebra routines
// ortho
/* contains:
* ortho(QR)
* ortho(Cholesky)
* b_ortho
* ortho_against_y
* b_ortho_against_y
*/
#ifdef USE_QR
/**
* @brief Orthonormalize a set of m vectors (of size n) using QR decomposition.
*
* First compute the QR decomposition of the input matrix
* U = Q * (R)
* (0)
* to get R,
* and then solve the upper triangular system U_{ortho}R = U.
* We get U_{ortho} = UR^{-1} = Q, and Q is an orthogonal matrix.
*
* @param n Size of the vectors (number of rows in u and w).
* @param m Number of vectors to orthogonalize (number of columns in u and w).
* @param u Input matrix of vectors, size of n x m ,to be orthogonalized (will be overwritten with Q).
*/
void ortho(int n, int m, Eigen::MatrixXd &u)
{
// direct approach, using QR
// Thin QR Factorization!
/*Eigen::ColPivHouseholderQR performs
a rank-revealing QR decomposition of a matrix A into matrices P, Q and R
such that AP=QR*/
Eigen::ColPivHouseholderQR<Eigen::MatrixXd> qr(u);
if (qr.info() == Eigen::Success){
Eigen::MatrixXd Qfull = qr.matrixQ();
u.topLeftCorner(n,m) = Qfull.topLeftCorner(n,m);
}
else
std::cerr << "QR decomposition was not successful." << std::endl;
}
#elif defined(USE_THIN_QR)
// thinQR
void ortho(int n, int m, Eigen::MatrixXd &u)
{
// direct approach, using QR
// Thin QR Factorization! U=Q1R1, see Matrix Computations Page 237
/* Eigen::HouseholderQR performs
a QR decomposition of a matrix A into matrices Q and R
such thatA=QR
*/
Eigen::HouseholderQR<Eigen::MatrixXd> qr(u);
// Eigen::MatrixXd thinQ(u); // same shape as u
u.setIdentity();
u = qr.householderQ() * u; // now u=Q
}
#elif defined(USE_MGS)
// modified Gram-Schmidt for thinQR
// void ortho(int n, int m, Eigen::MatrixXd &A) {
// // int n = A.rows(); int m = A.cols();
// Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(n, m); // 初始化正交矩阵Q
// // 处理第一列
// Eigen::VectorXd A1 = A.col(0);
// Eigen::VectorXd q0 = A1.normalized();
// Q.col(0) = q0;
// for (int i = 1; i < m-1; ++i) {
// // 减去与前面向量的投影
// for (int j = i+1; j < m; ++j) {
// double proj = A.col(i).dot(Q.col(j));
// A.col(i) = A.col(i) - proj * Q.col(j);
// }
// // 检查是否正交化失败(向量是否为零向量)
// if (A.col(i).norm() < 1e-6) {
// // 如果是零向量,直接赋值
// Q.col(i) = A.col(i);
// } else {
// // 归一化
// Q.col(i) = A.col(i).normalized();
// }
// }
// A=Q;
// }
void ortho(int n, int m, Eigen::MatrixXd &x) {
assert(x.rows() == n);assert(x.cols() == m);
// 用于存储正交化后的向量集合
Eigen::MatrixXd Q=x;
Eigen::MatrixXd R(m, m);
for (int k = 0; k < m; ++k) {
double r_kk = x.col(k).norm();
if (r_kk < 1e-10) {
std::cerr << "Zero vector encountered in Modified Gram-Schmidt process." << std::endl;
std::cout << "Q'Q = " << Q.transpose()*Q << std::endl;
return;
}
R(k,k)=r_kk;
// x.col(k) = x.col(k) / r_kk;
// Q.col(k) = x.col(k);
Q.col(k) = x.col(k) / r_kk;
// std::cout << "orthonormalized q("<<k<<") =\n" << Q.col(k) << std::endl;
for (int j = k+1; j < m; ++j) {
// 投影x的第j列到Q的第k列上,并减去这个投影
#ifdef DEBUG_ORTHO
std::cout << "project col(" << k << ") to col(" << j << ")=\n"<< Q.col(j) << std::endl;
#endif
// std::cout << "<qj, qi> qi =\n"<<(q.dot(Q.col(j))) * Q.col(j)<< std::endl;
double r_kj = Q.col(k).dot(x.col(j));
// std::cout << "qk dot qj ="<< r_kj << std::endl;
R(k,j)=r_kj;
x.col(j) = x.col(j) -Q.col(k) * r_kj;
// std::cout << "now q(" << k << ")=\n" << q << std::endl;
}
}
x = Q;
#ifdef DEBUG_ORTHO
std::cout << "Q =\n" << Q << std::endl;
std::cout << "R =\n" << R << std::endl;
std::cout << "Q*R =\n" << Q * R << std::endl;
std::cout << "Q'Q = " << Q.transpose()*Q << std::endl;
#endif
}
#else // USE_QR and USE_THIN_QR not defined, use cholesky
/* cholesky ortho */
void ortho(int n, int m, Eigen::MatrixXd &x)
// void ortho_cho(int n, int m, Eigen::MatrixXd &x)
{
// direct approach, using Cholesky
// overlap = x'x
Eigen::MatrixXd overlap = x.transpose() * x;
// overlap = LL^T
Eigen::LLT<Eigen::MatrixXd> llt(overlap);
if (llt.info() == Eigen::Success)
{
Eigen::MatrixXd Lt = llt.matrixL().transpose();
Lt = Lt.inverse();
x = x * Lt;
}
else
std::cerr << "Cholesky decomposition failed." << std::endl;
}
#endif // USE_QR
void ortho_qr_two_phase(int n, int m, Eigen::MatrixXd &x){
// ortho using QR, two-phase
// 1. QR decomposition, get R
/*
* First compute the QR decomposition of the input matrix
* U = Q * (R)
* (0)
*/
Eigen::MatrixXd overlap = x.transpose() * x;
Eigen::HouseholderQR<Eigen::MatrixXd> qr(overlap);
Eigen::MatrixXd R = qr.matrixQR().triangularView<Eigen::Upper>();
// 2. Solve X_ortho * R = X to get X_ortho = XR^{-1} = Q
x = x * R.inverse();
}
/**
* @brief Performs b-orthogonalization of the given matrices `u`, given `bu`.
*
* b-orthogonalize m vectors of lenght n using the Cholesky decomposition
* of the overlap matrix u^tbu
*
* @param n The number of rows in the matrices `u` and `bu`.
* @param m The number of columns in the matrices `u` and `bu`.
* @param u The input matrix `u` to be b-orthonormalized.
* @param bu The input matrix `bu` to be b-orthonormalized.
*
* @note This function modifies the input matrices `u` and `bu` in-place.
*
* works only if u is already orthonormal.
* overlap matrix u^tbu matrix can be very ill-conditioned
*
*/
void b_ortho(int n, int m, Eigen::MatrixXd& u, Eigen::MatrixXd& bu) {
// 局部变量
Eigen::MatrixXd ubu(m, m);
Eigen::VectorXd sigma(m);
Eigen::MatrixXd u_svd(m, m), vt_svd(m, m), temp(n, m);
double tol_svd = 1.0e-5;
bool use_svd = false; //true;
// 计算重叠矩阵
ubu = u.transpose() * bu;
// ubu = (u.transpose() * bu).eval();
// std::cout << "overlap ubu = \n" << ubu << std::endl;
if (use_svd) {
// 使用SVD进行b-正交化
Eigen::JacobiSVD<Eigen::MatrixXd> svd(ubu, Eigen::ComputeThinU | Eigen::ComputeThinV);
sigma = svd.singularValues();
u_svd = svd.matrixU();
vt_svd = svd.matrixV().transpose();
// 计算sigma的逆平方根
for (int i = 0; i < sigma.size(); ++i) {
if (sigma(i) > tol_svd) sigma(i) = 1 / sqrt(sigma(i));
else sigma(i) = 0;
}
ubu = Eigen::MatrixXd::Zero(m, m);
for (int i = 0; i < m; ++i) {
ubu += sigma(i) * vt_svd.col(i) * u_svd.row(i).transpose();
}
// 应用变换
temp = u * ubu;
u = temp;
temp = bu * ubu;
bu = temp;
} else {
// 使用Cholesky分解
// ubu = LL^T
Eigen::LLT<Eigen::MatrixXd> llt(ubu);
if (llt.info() == Eigen::Success) {
// 计算u * L^-T 和 bu * L^-T
Eigen::MatrixXd Lt = llt.matrixL().transpose();
// std::cout << "L^T = \n" << Lt << std::endl;
Lt = Lt.inverse();
// std::cout << "L^-T = \n" << Lt << std::endl;
// std::cout << "u = \n" << u << std::endl;
// std::cout << "bu = \n" << bu << std::endl;
u = u * Lt;
bu = bu * Lt;
// std::cout << "u = \n" << u << std::endl;
// std::cout << "bu = \n" << bu << std::endl;
} else {
std::cerr << "Cholesky decomposition failed." << std::endl;
}
}
}
/**
* Orthogonalizes block vector `x` against a given \b orthonormal set `y`
* and orthonormalizes `x`. If `y` is not orthonormal, extra computation
* is needed inside this function.
*
* `y(n, m)` is a given block vector, assumed to be orthonormal
* `x(n, k)` is the block vector to be orthogonalized
* first against x and then internally.
*
* if y is not orthonormal, we will do by solving (Y'Y)y_coeff = Y'X
* X = X - Y(Y'Y)^{-1}Y'X = X - Y * y_coeff
*
* @param n The number of rows in `x` and `y`.
* @param m The number of columns in `y`.
* @param k The number of columns in `x`.
* @param x The matrix to be orthogonalized. size of n x k.
* @param y The matrix to orthogonalize against. size of n x m. assumed to be orthonormal
*/
void ortho_against_y(int n, int m, int k, Eigen::MatrixXd& x, const Eigen::MatrixXd& y){
#ifdef DEBUG_LOBPCG
// std::cout << "----- start ortho against y -----" << std::endl;
#endif
// orthogonalize x(n, k) against y(n, m)
double tol_ortho = 1.0e-10;//1.0e-13;
// first check input y is orthonormal
bool is_y_orthonormal = true;
Eigen::MatrixXd yby = y.transpose() * y;
Eigen::MatrixXd ybx(m,m);
double diag_norm = yby.diagonal().array().square().sum();
double out_norm = (yby.array().square()).sum() - diag_norm;
if (!( (std::abs(diag_norm - m) <= tol_ortho) && (std::abs(out_norm) <= tol_ortho) )){
std::cout << "Input y is not orthonormal! continue by Solving Y^TY" << std::endl;
#ifdef DEBUG_ORTHO
std::cout << "y = \n" << y << std::endl << "yby = \n" << yby << std::endl;
std::cerr << "diag norm:" << diag_norm << " | and out norm:" <<out_norm << std::endl;
#endif
// return;
is_y_orthonormal = false;
}
// start with initial orthogonalization
ortho(n, k, x);
// X = X - Y * y_coeff
// Y'BY y_coeff = Y'BX
// X = X - Y(Y'Y)^{-1}Y'X
Eigen::MatrixXd y_coeff = Eigen::MatrixXd::Identity(m, k); // to store Y'X or (Y'Y)^{-1}Y'X
// since y will not change, we will compute Cholesky factor of yby only once
Eigen::LLT<Eigen::MatrixXd> factYBY(yby);
if(!is_y_orthonormal) factYBY.compute(yby); //only solve Y'Y y_coeff = Y'X if y'y is not I
// do ortho while overlap norm > tol_ortho
double norm_overlap = 10.0; // big init value
const int ITER_MAX = 10;//10;
int iter_cnt = ITER_MAX; // max 10 iters
while(norm_overlap >= tol_ortho){
#ifdef DEBUG_LOBPCG
// std::cout <<"--- "<< ITER_MAX - iter_cnt << "th iter" <<" ---" << std::endl;
#endif
// orthogonalize x against y, X = X - Y(Y'Y)^{-1}Y'X
// X = X - Y * y_coeff
// Y'Y y_coeff = Y'X
ybx = y.transpose() * x;
if(!is_y_orthonormal){
y_coeff = factYBY.solve(ybx);
x = x - y * y_coeff; // (Y'Y)^{-1}Y'X
}else{
x = x - y * ybx; // Y'X
}
// compute overlap = Y^TX, size(m, k)
Eigen::MatrixXd overlap;
// ortho_qr(n, k, x);
ortho(n, k, x);
// compute overlap norm ||y^T x|| after x orthonormalization
overlap = y.transpose() * x;
norm_overlap = overlap.norm();
// 判定条件:norm_overlap >= tol_ortho
#ifdef DEBUG_LOBPCG
// std::cout << " norm = " << norm_overlap << std::endl;
#endif
--iter_cnt;
if(iter_cnt < 0){ // to many ortho iterations, exit
std::cerr << "Too many iterations in ortho_against_x. Failed to reach tolerance." << std::endl;
break;
}
}
#ifdef DEBUG_ORTHO
std::cout << "end after " << ITER_MAX - iter_cnt << " iterations" << std::endl;
#endif
#ifdef DEBUG_LOBPCG
// std::cout << "----- end ortho against y -----" << std::endl;
#endif
}
/**
* B-Orthogonalizes the matrix `x` against the matrix `y` given y and by.
*
* b-orthogonalize x(n, k) against y(n, m)
*
* @param n The number of rows in `x` and `y`.
* @param m The number of columns in `y`.
* @param k The number of columns in `x`.
* @param x The matrix `x` to be b-orthogonalized.
* @param y The matrix `y` used for b-orthogonalization.
* @param by The matrix `by` used for b-orthonormalization.
*/
void b_ortho_against_y(int n, int m, int k, Eigen::MatrixXd& x, const Eigen::MatrixXd& y, const Eigen::MatrixXd& by){
// b-orthogonalize x(n, k) against y(n, m)
#ifdef DEBUG_LOBPCG
// std::cout << "----- start b-ortho against y -----" << std::endl;
#endif
double tol_ortho = 1.0e-10;//1.0e-13;
// first check input y is b-orthonormal
Eigen::MatrixXd yby = by.transpose() * y;
Eigen::MatrixXd ybx(m,m);
bool is_y_orthonormal = true;
double diag_norm = yby.diagonal().array().square().sum();
double out_norm = (yby.array().square()).sum() - diag_norm;
if (!( (std::abs(diag_norm - m) <= tol_ortho) && (std::abs(out_norm) <= tol_ortho) )){
std::cout << "Input y is not b-orthonormal!" << std::endl;
// std::cerr << "diag norm:" << diag_norm << " | and out norm:" <<out_norm << std::endl;
// return;
std::cout << "continue by Solving Y^TBY" << std::endl;
is_y_orthonormal = false;
}
// X = X - Y * y_coeff
// Y'BY y_coeff = Y'BX
// X = X - Y(Y'BY)^{-1} Y'BX
Eigen::LLT<Eigen::MatrixXd> factYBY(yby);
Eigen::MatrixXd y_coeff = Eigen::MatrixXd::Identity(m, k);
// since y will not change, we will compute Cholesky factor of yby only once
if(!is_y_orthonormal) factYBY.compute(yby);
// do ortho while overlap norm > tol_ortho
double norm_overlap = 10.0; // big init value
const int ITER_MAX = 10;//10;
int iter_cnt = ITER_MAX; // max 10 iters
while(norm_overlap >= tol_ortho){
#ifdef DEBUG_LOBPCG
// std::cout <<"--- "<< ITER_MAX - iter_cnt << "th iter" <<" ---" << std::endl;
#endif
// orthogonalize x against y, X = X - Y(Y'BY)^{-1}(YB)'X
// X = X - Y * y_coeff
// Y'BY y_coeff = Y'BX
// X = X - Y(Y'BY)^{-1}(YB)'X
ybx = by.transpose() * x;
if(!is_y_orthonormal){
y_coeff = factYBY.solve(ybx);
x = x - y * y_coeff; // (Y'BY)^{-1}Y'X
}else{
x = x - y * ybx; // Y'BX
}
// compute overlap = (BY)^TX, size(m, k)
Eigen::MatrixXd overlap;
ortho(n, k, x);
overlap = by.transpose() * x;
norm_overlap = overlap.norm();
--iter_cnt;
if(iter_cnt < 0){ // to many ortho iterations, exit
std::cerr << "Too many iterations in ortho_against_x. Failed to reach tolerance." << std::endl;
break;
}
}
#ifdef DEBUG_LOBPCG
// std::cout << "----- end b-ortho against y -----" << std::endl;
#endif
}