SymmetricEigensolver.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. // David Eberly, Geometric Tools, Redmond WA 98052
  2. // Copyright (c) 1998-2020
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // https://www.boost.org/LICENSE_1_0.txt
  5. // https://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
  6. // Version: 4.0.2019.08.13
  7. #pragma once
  8. #include <Mathematics/Math.h>
  9. #include <Mathematics/RangeIteration.h>
  10. #include <algorithm>
  11. #include <cstring>
  12. #include <vector>
  13. // The SymmetricEigensolver class is an implementation of Algorithm 8.2.3
  14. // (Symmetric QR Algorithm) described in "Matrix Computations, 2nd edition"
  15. // by G. H. Golub and C. F. Van Loan, The Johns Hopkins University Press,
  16. // Baltimore MD, Fourth Printing 1993. Algorithm 8.2.1 (Householder
  17. // Tridiagonalization) is used to reduce matrix A to tridiagonal T.
  18. // Algorithm 8.2.2 (Implicit Symmetric QR Step with Wilkinson Shift) is
  19. // used for the iterative reduction from tridiagonal to diagonal. If A is
  20. // the original matrix, D is the diagonal matrix of eigenvalues, and Q is
  21. // the orthogonal matrix of eigenvectors, then theoretically Q^T*A*Q = D.
  22. // Numerically, we have errors E = Q^T*A*Q - D. Algorithm 8.2.3 mentions
  23. // that one expects |E| is approximately u*|A|, where |M| denotes the
  24. // Frobenius norm of M and where u is the unit roundoff for the
  25. // floating-point arithmetic: 2^{-23} for 'float', which is FLT_EPSILON
  26. // = 1.192092896e-7f, and 2^{-52} for'double', which is DBL_EPSILON
  27. // = 2.2204460492503131e-16.
  28. //
  29. // The condition |a(i,i+1)| <= epsilon*(|a(i,i) + a(i+1,i+1)|) used to
  30. // determine when the reduction decouples to smaller problems is implemented
  31. // as: sum = |a(i,i)| + |a(i+1,i+1)|; sum + |a(i,i+1)| == sum. The idea is
  32. // that the superdiagonal term is small relative to its diagonal neighbors,
  33. // and so it is effectively zero. The unit tests have shown that this
  34. // interpretation of decoupling is effective.
  35. //
  36. // The authors suggest that once you have the tridiagonal matrix, a practical
  37. // implementation will store the diagonal and superdiagonal entries in linear
  38. // arrays, ignoring the theoretically zero values not in the 3-band. This is
  39. // good for cache coherence. The authors also suggest storing the Householder
  40. // vectors in the lower-triangular portion of the matrix to save memory. The
  41. // implementation uses both suggestions.
  42. //
  43. // For matrices with randomly generated values in [0,1], the unit tests
  44. // produce the following information for N-by-N matrices.
  45. //
  46. // N |A| |E| |E|/|A| iterations
  47. // -------------------------------------------
  48. // 2 1.2332 5.5511e-17 4.5011e-17 1
  49. // 3 2.0024 1.1818e-15 5.9020e-16 5
  50. // 4 2.8708 9.9287e-16 3.4585e-16 7
  51. // 5 2.9040 2.5958e-15 8.9388e-16 9
  52. // 6 4.0427 2.2434e-15 5.5493e-16 12
  53. // 7 5.0276 3.2456e-15 6.4555e-16 15
  54. // 8 5.4468 6.5626e-15 1.2048e-15 15
  55. // 9 6.1510 4.0317e-15 6.5545e-16 17
  56. // 10 6.7523 4.9334e-15 7.3062e-16 21
  57. // 11 7.1322 7.1347e-15 1.0003e-15 22
  58. // 12 7.8324 5.6106e-15 7.1633e-16 24
  59. // 13 8.1073 5.1366e-15 6.3357e-16 25
  60. // 14 8.6257 8.3496e-15 9.6798e-16 29
  61. // 15 9.2603 6.9526e-15 7.5080e-16 31
  62. // 16 9.9853 6.5807e-15 6.5904e-16 32
  63. // 17 10.5388 8.0931e-15 7.6793e-16 35
  64. // 18 11.2377 1.1149e-14 9.9218e-16 38
  65. // 19 11.7105 1.0711e-14 9.1470e-16 42
  66. // 20 12.2227 1.7723e-14 1.4500e-15 42
  67. // 21 12.7411 1.2515e-14 9.8231e-16 47
  68. // 22 13.4462 1.2645e-14 9.4046e-16 50
  69. // 23 13.9541 1.2899e-14 9.2444e-16 47
  70. // 24 14.3298 1.6337e-14 1.1400e-15 53
  71. // 25 14.8050 1.4760e-14 9.9701e-16 54
  72. // 26 15.3914 1.7076e-14 1.1094e-15 57
  73. // 27 15.8430 1.9714e-14 1.2443e-15 60
  74. // 28 16.4771 1.7148e-14 1.0407e-15 60
  75. // 29 16.9909 1.7309e-14 1.0187e-15 60
  76. // 30 17.4456 2.1453e-14 1.2297e-15 64
  77. // 31 17.9909 2.2069e-14 1.2267e-15 68
  78. //
  79. // The eigenvalues and |E|/|A| values were compared to those generated by
  80. // Mathematica Version 9.0; Wolfram Research, Inc., Champaign IL, 2012.
  81. // The results were all comparable with eigenvalues agreeing to a large
  82. // number of decimal places.
  83. //
  84. // Timing on an Intel (R) Core (TM) i7-3930K CPU @ 3.20 GHZ (in seconds):
  85. //
  86. // N |E|/|A| iters tridiag QR evecs evec[N] comperr
  87. // --------------------------------------------------------------
  88. // 512 4.4149e-15 1017 0.180 0.005 1.151 0.848 2.166
  89. // 1024 6.1691e-15 1990 1.775 0.031 11.894 12.759 21.179
  90. // 2048 8.5108e-15 3849 16.592 0.107 119.744 116.56 212.227
  91. //
  92. // where iters is the number of QR steps taken, tridiag is the computation
  93. // of the Householder reflection vectors, evecs is the composition of
  94. // Householder reflections and Givens rotations to obtain the matrix of
  95. // eigenvectors, evec[N] is N calls to get the eigenvectors separately, and
  96. // comperr is the computation E = Q^T*A*Q - D. The construction of the full
  97. // eigenvector matrix is, of course, quite expensive. If you need only a
  98. // small number of eigenvectors, use function GetEigenvector(int,Real*).
  99. namespace WwiseGTE
  100. {
  101. template <typename Real>
  102. class SymmetricEigensolver
  103. {
  104. public:
  105. // The solver processes NxN symmetric matrices, where N > 1 ('size'
  106. // is N) and the matrix is stored in row-major order. The maximum
  107. // number of iterations ('maxIterations') must be specified for the
  108. // reduction of a tridiagonal matrix to a diagonal matrix. The goal
  109. // is to compute NxN orthogonal Q and NxN diagonal D for which
  110. // Q^T*A*Q = D.
  111. SymmetricEigensolver(int size, unsigned int maxIterations)
  112. :
  113. mSize(0),
  114. mMaxIterations(0),
  115. mEigenvectorMatrixType(-1)
  116. {
  117. if (size > 1 && maxIterations > 0)
  118. {
  119. mSize = size;
  120. mMaxIterations = maxIterations;
  121. mMatrix.resize(size * size);
  122. mDiagonal.resize(size);
  123. mSuperdiagonal.resize(size - 1);
  124. mGivens.reserve(maxIterations * (size - 1));
  125. mPermutation.resize(size);
  126. mVisited.resize(size);
  127. mPVector.resize(size);
  128. mVVector.resize(size);
  129. mWVector.resize(size);
  130. }
  131. }
  132. // A copy of the NxN symmetric input is made internally. The order of
  133. // the eigenvalues is specified by sortType: -1 (decreasing), 0 (no
  134. // sorting), or +1 (increasing). When sorted, the eigenvectors are
  135. // ordered accordingly. The return value is the number of iterations
  136. // consumed when convergence occurred, 0xFFFFFFFF when convergence did
  137. // not occur, or 0 when N <= 1 was passed to the constructor.
  138. unsigned int Solve(Real const* input, int sortType)
  139. {
  140. mEigenvectorMatrixType = -1;
  141. if (mSize > 0)
  142. {
  143. std::copy(input, input + mSize * mSize, mMatrix.begin());
  144. Tridiagonalize();
  145. mGivens.clear();
  146. for (unsigned int j = 0; j < mMaxIterations; ++j)
  147. {
  148. int imin = -1, imax = -1;
  149. for (int i = mSize - 2; i >= 0; --i)
  150. {
  151. // When a01 is much smaller than its diagonal
  152. // neighbors, it is effectively zero.
  153. Real a00 = mDiagonal[i];
  154. Real a01 = mSuperdiagonal[i];
  155. Real a11 = mDiagonal[i + 1];
  156. Real sum = std::fabs(a00) + std::fabs(a11);
  157. if (sum + std::fabs(a01) != sum)
  158. {
  159. if (imax == -1)
  160. {
  161. imax = i;
  162. }
  163. imin = i;
  164. }
  165. else
  166. {
  167. // The superdiagonal term is effectively zero
  168. // compared to the neighboring diagonal terms.
  169. if (imin >= 0)
  170. {
  171. break;
  172. }
  173. }
  174. }
  175. if (imax == -1)
  176. {
  177. // The algorithm has converged.
  178. ComputePermutation(sortType);
  179. return j;
  180. }
  181. // Process the lower-right-most unreduced tridiagonal
  182. // block.
  183. DoQRImplicitShift(imin, imax);
  184. }
  185. return 0xFFFFFFFF;
  186. }
  187. else
  188. {
  189. return 0;
  190. }
  191. }
  192. // Get the eigenvalues of the matrix passed to Solve(...). The input
  193. // 'eigenvalues' must have N elements.
  194. void GetEigenvalues(Real* eigenvalues) const
  195. {
  196. if (eigenvalues && mSize > 0)
  197. {
  198. if (mPermutation[0] >= 0)
  199. {
  200. // Sorting was requested.
  201. for (int i = 0; i < mSize; ++i)
  202. {
  203. int p = mPermutation[i];
  204. eigenvalues[i] = mDiagonal[p];
  205. }
  206. }
  207. else
  208. {
  209. // Sorting was not requested.
  210. size_t numBytes = mSize * sizeof(Real);
  211. std::memcpy(eigenvalues, &mDiagonal[0], numBytes);
  212. }
  213. }
  214. }
  215. // Accumulate the Householder reflections and Givens rotations to
  216. // produce the orthogonal matrix Q for which Q^T*A*Q = D. The input
  217. // 'eigenvectors' must have N*N elements. The array is filled in as
  218. // if the eigenvector matrix is stored in row-major order. The i-th
  219. // eigenvector is
  220. // (eigenvectors[i+size*0], ... eigenvectors[i+size*(size - 1)])
  221. // which is the i-th column of 'eigenvectors' as an NxN matrix stored
  222. // in row-major order.
  223. void GetEigenvectors(Real* eigenvectors) const
  224. {
  225. mEigenvectorMatrixType = -1;
  226. if (eigenvectors && mSize > 0)
  227. {
  228. // Start with the identity matrix.
  229. std::fill(eigenvectors, eigenvectors + mSize * mSize, (Real)0);
  230. for (int d = 0; d < mSize; ++d)
  231. {
  232. eigenvectors[d + mSize * d] = (Real)1;
  233. }
  234. // Multiply the Householder reflections using backward
  235. // accumulation.
  236. int r, c;
  237. for (int i = mSize - 3, rmin = i + 1; i >= 0; --i, --rmin)
  238. {
  239. // Copy the v vector and 2/Dot(v,v) from the matrix.
  240. Real const* column = &mMatrix[i];
  241. Real twoinvvdv = column[mSize * (i + 1)];
  242. for (r = 0; r < i + 1; ++r)
  243. {
  244. mVVector[r] = (Real)0;
  245. }
  246. mVVector[r] = (Real)1;
  247. for (++r; r < mSize; ++r)
  248. {
  249. mVVector[r] = column[mSize * r];
  250. }
  251. // Compute the w vector.
  252. for (r = 0; r < mSize; ++r)
  253. {
  254. mWVector[r] = (Real)0;
  255. for (c = rmin; c < mSize; ++c)
  256. {
  257. mWVector[r] += mVVector[c] * eigenvectors[r + mSize * c];
  258. }
  259. mWVector[r] *= twoinvvdv;
  260. }
  261. // Update the matrix, Q <- Q - v*w^T.
  262. for (r = rmin; r < mSize; ++r)
  263. {
  264. for (c = 0; c < mSize; ++c)
  265. {
  266. eigenvectors[c + mSize * r] -= mVVector[r] * mWVector[c];
  267. }
  268. }
  269. }
  270. // Multiply the Givens rotations.
  271. for (auto const& givens : mGivens)
  272. {
  273. for (r = 0; r < mSize; ++r)
  274. {
  275. int j = givens.index + mSize * r;
  276. Real& q0 = eigenvectors[j];
  277. Real& q1 = eigenvectors[j + 1];
  278. Real prd0 = givens.cs * q0 - givens.sn * q1;
  279. Real prd1 = givens.sn * q0 + givens.cs * q1;
  280. q0 = prd0;
  281. q1 = prd1;
  282. }
  283. }
  284. // The number of Householder reflections is H = mSize - 2. If
  285. // H is even, the product of Householder reflections is a
  286. // rotation; otherwise, H is odd and the product is a
  287. // reflection. The number of Givens rotations does not
  288. // influence the type of the product of Householder
  289. // reflections.
  290. mEigenvectorMatrixType = 1 - (mSize & 1);
  291. if (mPermutation[0] >= 0)
  292. {
  293. // Sorting was requested.
  294. std::fill(mVisited.begin(), mVisited.end(), 0);
  295. for (int i = 0; i < mSize; ++i)
  296. {
  297. if (mVisited[i] == 0 && mPermutation[i] != i)
  298. {
  299. // The item starts a cycle with 2 or more
  300. // elements.
  301. int start = i, current = i, j, next;
  302. for (j = 0; j < mSize; ++j)
  303. {
  304. mPVector[j] = eigenvectors[i + mSize * j];
  305. }
  306. while ((next = mPermutation[current]) != start)
  307. {
  308. mEigenvectorMatrixType = 1 - mEigenvectorMatrixType;
  309. mVisited[current] = 1;
  310. for (j = 0; j < mSize; ++j)
  311. {
  312. eigenvectors[current + mSize * j] =
  313. eigenvectors[next + mSize * j];
  314. }
  315. current = next;
  316. }
  317. mVisited[current] = 1;
  318. for (j = 0; j < mSize; ++j)
  319. {
  320. eigenvectors[current + mSize * j] = mPVector[j];
  321. }
  322. }
  323. }
  324. }
  325. }
  326. }
  327. // The eigenvector matrix is a rotation (return +1) or a reflection
  328. // (return 0). If the input 'size' to the constructor is 0 or the
  329. // input 'eigenvectors' to GetEigenvectors is null, the returned value
  330. // is -1.
  331. inline int GetEigenvectorMatrixType() const
  332. {
  333. return mEigenvectorMatrixType;
  334. }
  335. // Compute a single eigenvector, which amounts to computing column c
  336. // of matrix Q. The reflections and rotations are applied
  337. // incrementally. This is useful when you want only a small number of
  338. // the eigenvectors.
  339. void GetEigenvector(int c, Real* eigenvector) const
  340. {
  341. if (0 <= c && c < mSize)
  342. {
  343. // y = H*x, then x and y are swapped for the next H
  344. Real* x = eigenvector;
  345. Real* y = &mPVector[0];
  346. // Start with the Euclidean basis vector.
  347. std::memset(x, 0, mSize * sizeof(Real));
  348. if (mPermutation[0] >= 0)
  349. {
  350. // Sorting was requested.
  351. x[mPermutation[c]] = (Real)1;
  352. }
  353. else
  354. {
  355. x[c] = (Real)1;
  356. }
  357. // Apply the Givens rotations.
  358. for (auto const& givens : WwiseGTE::reverse(mGivens))
  359. {
  360. Real& xr = x[givens.index];
  361. Real& xrp1 = x[givens.index + 1];
  362. Real tmp0 = givens.cs * xr + givens.sn * xrp1;
  363. Real tmp1 = -givens.sn * xr + givens.cs * xrp1;
  364. xr = tmp0;
  365. xrp1 = tmp1;
  366. }
  367. // Apply the Householder reflections.
  368. for (int i = mSize - 3; i >= 0; --i)
  369. {
  370. // Get the Householder vector v.
  371. Real const* column = &mMatrix[i];
  372. Real twoinvvdv = column[mSize * (i + 1)];
  373. int r;
  374. for (r = 0; r < i + 1; ++r)
  375. {
  376. y[r] = x[r];
  377. }
  378. // Compute s = Dot(x,v) * 2/v^T*v.
  379. Real s = x[r]; // r = i+1, v[i+1] = 1
  380. for (int j = r + 1; j < mSize; ++j)
  381. {
  382. s += x[j] * column[mSize * j];
  383. }
  384. s *= twoinvvdv;
  385. y[r] = x[r] - s; // v[i+1] = 1
  386. // Compute the remaining components of y.
  387. for (++r; r < mSize; ++r)
  388. {
  389. y[r] = x[r] - s * column[mSize * r];
  390. }
  391. std::swap(x, y);
  392. }
  393. // The final product is stored in x.
  394. if (x != eigenvector)
  395. {
  396. size_t numBytes = mSize * sizeof(Real);
  397. std::memcpy(eigenvector, x, numBytes);
  398. }
  399. }
  400. }
  401. Real GetEigenvalue(int c) const
  402. {
  403. if (mSize > 0)
  404. {
  405. if (mPermutation[0] >= 0)
  406. {
  407. // Sorting was requested.
  408. return mDiagonal[mPermutation[c]];
  409. }
  410. else
  411. {
  412. // Sorting was not requested.
  413. return mDiagonal[c];
  414. }
  415. }
  416. else
  417. {
  418. return std::numeric_limits<Real>::max();
  419. }
  420. }
  421. private:
  422. // Tridiagonalize using Householder reflections. On input, mMatrix is
  423. // a copy of the input matrix. On output, the upper-triangular part
  424. // of mMatrix including the diagonal stores the tridiagonalization.
  425. // The lower-triangular part contains 2/Dot(v,v) that are used in
  426. // computing eigenvectors and the part below the subdiagonal stores
  427. // the essential parts of the Householder vectors v (the elements of
  428. // v after the leading 1-valued component).
  429. void Tridiagonalize()
  430. {
  431. int r, c;
  432. for (int i = 0, ip1 = 1; i < mSize - 2; ++i, ++ip1)
  433. {
  434. // Compute the Householder vector. Read the initial vector
  435. // from the row of the matrix.
  436. Real length = (Real)0;
  437. for (r = 0; r < ip1; ++r)
  438. {
  439. mVVector[r] = (Real)0;
  440. }
  441. for (r = ip1; r < mSize; ++r)
  442. {
  443. Real vr = mMatrix[r + mSize * i];
  444. mVVector[r] = vr;
  445. length += vr * vr;
  446. }
  447. Real vdv = (Real)1;
  448. length = std::sqrt(length);
  449. if (length > (Real)0)
  450. {
  451. Real& v1 = mVVector[ip1];
  452. Real sgn = (v1 >= (Real)0 ? (Real)1 : (Real)-1);
  453. Real invDenom = ((Real)1) / (v1 + sgn * length);
  454. v1 = (Real)1;
  455. for (r = ip1 + 1; r < mSize; ++r)
  456. {
  457. Real& vr = mVVector[r];
  458. vr *= invDenom;
  459. vdv += vr * vr;
  460. }
  461. }
  462. // Compute the rank-1 offsets v*w^T and w*v^T.
  463. Real invvdv = (Real)1 / vdv;
  464. Real twoinvvdv = invvdv * (Real)2;
  465. Real pdvtvdv = (Real)0;
  466. for (r = i; r < mSize; ++r)
  467. {
  468. mPVector[r] = (Real)0;
  469. for (c = i; c < r; ++c)
  470. {
  471. mPVector[r] += mMatrix[r + mSize * c] * mVVector[c];
  472. }
  473. for (/**/; c < mSize; ++c)
  474. {
  475. mPVector[r] += mMatrix[c + mSize * r] * mVVector[c];
  476. }
  477. mPVector[r] *= twoinvvdv;
  478. pdvtvdv += mPVector[r] * mVVector[r];
  479. }
  480. pdvtvdv *= invvdv;
  481. for (r = i; r < mSize; ++r)
  482. {
  483. mWVector[r] = mPVector[r] - pdvtvdv * mVVector[r];
  484. }
  485. // Update the input matrix.
  486. for (r = i; r < mSize; ++r)
  487. {
  488. Real vr = mVVector[r];
  489. Real wr = mWVector[r];
  490. Real offset = vr * wr * (Real)2;
  491. mMatrix[r + mSize * r] -= offset;
  492. for (c = r + 1; c < mSize; ++c)
  493. {
  494. offset = vr * mWVector[c] + wr * mVVector[c];
  495. mMatrix[c + mSize * r] -= offset;
  496. }
  497. }
  498. // Copy the vector to column i of the matrix. The 0-valued
  499. // components at indices 0 through i are not stored. The
  500. // 1-valued component at index i+1 is also not stored;
  501. // instead, the quantity 2/Dot(v,v) is stored for use in
  502. // eigenvector construction. That construction must take
  503. // into account the implied components that are not stored.
  504. mMatrix[i + mSize * ip1] = twoinvvdv;
  505. for (r = ip1 + 1; r < mSize; ++r)
  506. {
  507. mMatrix[i + mSize * r] = mVVector[r];
  508. }
  509. }
  510. // Copy the diagonal and subdiagonal entries for cache coherence
  511. // in the QR iterations.
  512. int k, ksup = mSize - 1, index = 0, delta = mSize + 1;
  513. for (k = 0; k < ksup; ++k, index += delta)
  514. {
  515. mDiagonal[k] = mMatrix[index];
  516. mSuperdiagonal[k] = mMatrix[index + 1];
  517. }
  518. mDiagonal[k] = mMatrix[index];
  519. }
  520. // A helper for generating Givens rotation sine and cosine robustly.
  521. void GetSinCos(Real x, Real y, Real& cs, Real& sn)
  522. {
  523. // Solves sn*x + cs*y = 0 robustly.
  524. Real tau;
  525. if (y != (Real)0)
  526. {
  527. if (std::fabs(y) > std::fabs(x))
  528. {
  529. tau = -x / y;
  530. sn = (Real)1 / std::sqrt((Real)1 + tau * tau);
  531. cs = sn * tau;
  532. }
  533. else
  534. {
  535. tau = -y / x;
  536. cs = (Real)1 / std::sqrt((Real)1 + tau * tau);
  537. sn = cs * tau;
  538. }
  539. }
  540. else
  541. {
  542. cs = (Real)1;
  543. sn = (Real)0;
  544. }
  545. }
  546. // The QR step with implicit shift. Generally, the initial T is
  547. // unreduced tridiagonal (all subdiagonal entries are nonzero). If a
  548. // QR step causes a superdiagonal entry to become zero, the matrix
  549. // decouples into a block diagonal matrix with two tridiagonal blocks.
  550. // These blocks can be reduced independently of each other, which
  551. // allows for parallelization of the algorithm. The inputs imin and
  552. // imax identify the subblock of T to be processed. That block has
  553. // upper-left element T(imin,imin) and lower-right element
  554. // T(imax,imax).
  555. void DoQRImplicitShift(int imin, int imax)
  556. {
  557. // The implicit shift. Compute the eigenvalue u of the
  558. // lower-right 2x2 block that is closer to a11.
  559. Real a00 = mDiagonal[imax];
  560. Real a01 = mSuperdiagonal[imax];
  561. Real a11 = mDiagonal[imax + 1];
  562. Real dif = (a00 - a11) * (Real)0.5;
  563. Real sgn = (dif >= (Real)0 ? (Real)1 : (Real)-1);
  564. Real a01sqr = a01 * a01;
  565. Real u = a11 - a01sqr / (dif + sgn * std::sqrt(dif * dif + a01sqr));
  566. Real x = mDiagonal[imin] - u;
  567. Real y = mSuperdiagonal[imin];
  568. Real a12, a22, a23, tmp11, tmp12, tmp21, tmp22, cs, sn;
  569. Real a02 = (Real)0;
  570. int i0 = imin - 1, i1 = imin, i2 = imin + 1;
  571. for (/**/; i1 <= imax; ++i0, ++i1, ++i2)
  572. {
  573. // Compute the Givens rotation and save it for use in
  574. // computing the eigenvectors.
  575. GetSinCos(x, y, cs, sn);
  576. mGivens.push_back(GivensRotation(i1, cs, sn));
  577. // Update the tridiagonal matrix. This amounts to updating a
  578. // 4x4 subblock,
  579. // b00 b01 b02 b03
  580. // b01 b11 b12 b13
  581. // b02 b12 b22 b23
  582. // b03 b13 b23 b33
  583. // The four corners (b00, b03, b33) do not change values. The
  584. // The interior block {{b11,b12},{b12,b22}} is updated on each
  585. // pass. For the first pass, the b0c values are out of range,
  586. // so only the values (b13, b23) change. For the last pass,
  587. // the br3 values are out of range, so only the values
  588. // (b01, b02) change. For passes between first and last, the
  589. // values (b01, b02, b13, b23) change.
  590. if (i1 > imin)
  591. {
  592. mSuperdiagonal[i0] = cs * mSuperdiagonal[i0] - sn * a02;
  593. }
  594. a11 = mDiagonal[i1];
  595. a12 = mSuperdiagonal[i1];
  596. a22 = mDiagonal[i2];
  597. tmp11 = cs * a11 - sn * a12;
  598. tmp12 = cs * a12 - sn * a22;
  599. tmp21 = sn * a11 + cs * a12;
  600. tmp22 = sn * a12 + cs * a22;
  601. mDiagonal[i1] = cs * tmp11 - sn * tmp12;
  602. mSuperdiagonal[i1] = sn * tmp11 + cs * tmp12;
  603. mDiagonal[i2] = sn * tmp21 + cs * tmp22;
  604. if (i1 < imax)
  605. {
  606. a23 = mSuperdiagonal[i2];
  607. a02 = -sn * a23;
  608. mSuperdiagonal[i2] = cs * a23;
  609. // Update the parameters for the next Givens rotation.
  610. x = mSuperdiagonal[i1];
  611. y = a02;
  612. }
  613. }
  614. }
  615. // Sort the eigenvalues and compute the corresponding permutation of
  616. // the indices of the array storing the eigenvalues. The permutation
  617. // is used for reordering the eigenvalues and eigenvectors in the
  618. // calls to GetEigenvalues(...) and GetEigenvectors(...).
  619. void ComputePermutation(int sortType)
  620. {
  621. // The number of Householder reflections is H = mSize - 2. If H
  622. // is even, the product of Householder reflections is a rotation;
  623. // otherwise, H is odd and the product is a reflection. The
  624. // number of Givens rotations does not influence the type of the
  625. // product of Householder reflections.
  626. mEigenvectorMatrixType = 1 - (mSize & 1);
  627. if (sortType == 0)
  628. {
  629. // Set a flag for GetEigenvalues() and GetEigenvectors() to
  630. // know that sorted output was not requested.
  631. mPermutation[0] = -1;
  632. return;
  633. }
  634. // Compute the permutation induced by sorting. Initially, we
  635. // start with the identity permutation I = (0,1,...,N-1).
  636. struct SortItem
  637. {
  638. Real eigenvalue;
  639. int index;
  640. };
  641. std::vector<SortItem> items(mSize);
  642. int i;
  643. for (i = 0; i < mSize; ++i)
  644. {
  645. items[i].eigenvalue = mDiagonal[i];
  646. items[i].index = i;
  647. }
  648. if (sortType > 0)
  649. {
  650. std::sort(items.begin(), items.end(),
  651. [](SortItem const& item0, SortItem const& item1)
  652. {
  653. return item0.eigenvalue < item1.eigenvalue;
  654. }
  655. );
  656. }
  657. else
  658. {
  659. std::sort(items.begin(), items.end(),
  660. [](SortItem const& item0, SortItem const& item1)
  661. {
  662. return item0.eigenvalue > item1.eigenvalue;
  663. }
  664. );
  665. }
  666. i = 0;
  667. for (auto const& item : items)
  668. {
  669. mPermutation[i++] = item.index;
  670. }
  671. // GetEigenvectors() has nontrivial code for computing the
  672. // orthogonal Q from the reflections and rotations. To avoid
  673. // complicating the code further when sorting is requested, Q is
  674. // computed as in the unsorted case. We then need to swap columns
  675. // of Q to be consistent with the sorting of the eigenvalues. To
  676. // minimize copying due to column swaps, we use permutation P.
  677. // The minimum number of transpositions to obtain P from I is N
  678. // minus the number of cycles of P. Each cycle is reordered with
  679. // a minimum number of transpositions; that is, the eigenitems are
  680. // cyclically swapped, leading to a minimum amount of copying.
  681. // For/ example, if there is a cycle i0 -> i1 -> i2 -> i3, then
  682. // the copying is
  683. // save = eigenitem[i0];
  684. // eigenitem[i1] = eigenitem[i2];
  685. // eigenitem[i2] = eigenitem[i3];
  686. // eigenitem[i3] = save;
  687. }
  688. // The number N of rows and columns of the matrices to be processed.
  689. int mSize;
  690. // The maximum number of iterations for reducing the tridiagonal
  691. // matrix to a diagonal matrix.
  692. unsigned int mMaxIterations;
  693. // The internal copy of a matrix passed to the solver. See the
  694. // comments about function Tridiagonalize() about what is stored in
  695. // the matrix.
  696. std::vector<Real> mMatrix; // NxN elements
  697. // After the initial tridiagonalization by Householder reflections, we
  698. // no longer need the full mMatrix. Copy the diagonal and
  699. // superdiagonal entries to linear arrays in order to be cache
  700. // friendly.
  701. std::vector<Real> mDiagonal; // N elements
  702. std::vector<Real> mSuperdiagonal; // N-1 elements
  703. // The Givens rotations used to reduce the initial tridiagonal matrix
  704. // to a diagonal matrix. A rotation is the identity with the
  705. // following replacement entries: R(index,index) = cs,
  706. // R(index,index+1) = sn, R(index+1,index) = -sn and
  707. // R(index+1,index+1) = cs. If N is the matrix size and K is the
  708. // maximum number of iterations, the maximum number of Givens
  709. // rotations is K*(N-1). The maximum amount of memory is allocated
  710. // to store these.
  711. struct GivensRotation
  712. {
  713. // No default initialization for fast creation of std::vector
  714. // of objects of this type.
  715. GivensRotation() = default;
  716. GivensRotation(int inIndex, Real inCs, Real inSn)
  717. :
  718. index(inIndex),
  719. cs(inCs),
  720. sn(inSn)
  721. {
  722. }
  723. int index;
  724. Real cs, sn;
  725. };
  726. std::vector<GivensRotation> mGivens; // K*(N-1) elements
  727. // When sorting is requested, the permutation associated with the sort
  728. // is stored in mPermutation. When sorting is not requested,
  729. // mPermutation[0] is set to -1. mVisited is used for finding cycles
  730. // in the permutation. mEigenvectorMatrixType is +1 if GetEigenvectors
  731. // returns a rotation matrix, 0 if GetEigenvectors returns a
  732. // reflection matrix or -1 if an input to the constructor or to
  733. // GetEigenvectors is invalid.
  734. std::vector<int> mPermutation; // N elements
  735. mutable std::vector<int> mVisited; // N elements
  736. mutable int mEigenvectorMatrixType;
  737. // Temporary storage to compute Householder reflections and to
  738. // support sorting of eigenvectors.
  739. mutable std::vector<Real> mPVector; // N elements
  740. mutable std::vector<Real> mVVector; // N elements
  741. mutable std::vector<Real> mWVector; // N elements
  742. };
  743. }