SingularValueDecomposition.h 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101
  1. // David Eberly, Geometric Tools, Redmond WA 98052
  2. // Copyright (c) 1998-2020
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // https://www.boost.org/LICENSE_1_0.txt
  5. // https://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
  6. // Version: 4.0.2019.08.13
  7. #pragma once
  8. #include <Mathematics/Math.h>
  9. #include <Mathematics/RangeIteration.h>
  10. #include <algorithm>
  11. #include <cstring>
  12. #include <vector>
  13. // The SingularValueDecomposition class is an implementation of Algorithm
  14. // 8.3.2 (The SVD Algorithm) described in "Matrix Computations, 2nd
  15. // edition" by G. H. Golub and Charles F. Van Loan, The Johns Hopkins
  16. // Press, Baltimore MD, Fourth Printing 1993. Algorithm 5.4.2 (Householder
  17. // Bidiagonalization) is used to reduce A to bidiagonal B. Algorithm 8.3.1
  18. // (Golub-Kahan SVD Step) is used for the iterative reduction from bidiagonal
  19. // to diagonal. If A is the original matrix, S is the matrix whose diagonal
  20. // entries are the singular values, and U and V are corresponding matrices,
  21. // then theoretically U^T*A*V = S. Numerically, we have errors
  22. // E = U^T*A*V - S. Algorithm 8.3.2 mentions that one expects |E| is
  23. // approximately u*|A|, where |M| denotes the Frobenius norm of M and where
  24. // u is the unit roundoff for the floating-point arithmetic: 2^{-23} for
  25. // 'float', which is FLT_EPSILON = 1.192092896e-7f, and 2^{-52} for'double',
  26. // which is DBL_EPSILON = 2.2204460492503131e-16.
  27. //
  28. // The condition |a(i,i+1)| <= epsilon*(|a(i,i) + a(i+1,i+1)|) used to
  29. // determine when the reduction decouples to smaller problems is implemented
  30. // as: sum = |a(i,i)| + |a(i+1,i+1)|; sum + |a(i,i+1)| == sum. The idea is
  31. // that the superdiagonal term is small relative to its diagonal neighbors,
  32. // and so it is effectively zero. The unit tests have shown that this
  33. // interpretation of decoupling is effective.
  34. //
  35. // The condition |a(i,i)| <= epsilon*|B| used to determine when the
  36. // reduction decouples (with a zero singular value) is implemented using
  37. // the Frobenius norm of B and epsilon = multiplier*u, where for now the
  38. // multiplier is hard-coded in Solve(...) as 8.
  39. //
  40. // The authors suggest that once you have the bidiagonal matrix, a practical
  41. // implementation will store the diagonal and superdiagonal entries in linear
  42. // arrays, ignoring the theoretically zero values not in the 2-band. This is
  43. // good for cache coherence, and we have used the suggestion. The essential
  44. // parts of the Householder u-vectors are stored in the lower-triangular
  45. // portion of the matrix and the essential parts of the Householder v-vectors
  46. // are stored in the upper-triangular portion of the matrix. To avoid having
  47. // to recompute 2/Dot(u,u) and 2/Dot(v,v) when constructing orthogonal U and
  48. // V, we store these quantities in additional memory during bidiagonalization.
  49. //
  50. // For matrices with randomly generated values in [0,1], the unit tests
  51. // produce the following information for N-by-N matrices.
  52. //
  53. // N |A| |E| |E|/|A| iterations
  54. // -------------------------------------------
  55. // 2 1.4831 4.1540e-16 2.8007e-16 1
  56. // 3 2.1065 3.5024e-16 1.6626e-16 4
  57. // 4 2.4979 7.4605e-16 2.9867e-16 6
  58. // 5 3.6591 1.8305e-15 5.0025e-16 9
  59. // 6 4.0572 2.0571e-15 5.0702e-16 10
  60. // 7 4.7745 2.9057e-15 6.0859e-16 12
  61. // 8 5.1964 2.7958e-15 5.3803e-16 13
  62. // 9 5.7599 3.3128e-15 5.7514e-16 16
  63. // 10 6.2700 3.7209e-15 5.9344e-16 16
  64. // 11 6.8220 5.0580e-15 7.4142e-16 18
  65. // 12 7.4540 5.2493e-15 7.0422e-16 21
  66. // 13 8.1225 5.6043e-15 6.8997e-16 24
  67. // 14 8.5883 5.8553e-15 6.8177e-16 26
  68. // 15 9.1337 6.9663e-15 7.6270e-16 27
  69. // 16 9.7884 9.1358e-15 9.3333e-16 29
  70. // 17 10.2407 8.2715e-15 8.0771e-16 34
  71. // 18 10.7147 8.9748e-15 8.3761e-16 33
  72. // 19 11.1887 1.0094e-14 9.0220e-16 32
  73. // 20 11.7739 9.7000e-15 8.2386e-16 35
  74. // 21 12.2822 1.1217e-14 9.1329e-16 36
  75. // 22 12.7649 1.1071e-14 8.6732e-16 37
  76. // 23 13.3366 1.1271e-14 8.4513e-16 41
  77. // 24 13.8505 1.2806e-14 9.2460e-16 43
  78. // 25 14.4332 1.3081e-14 9.0637e-16 43
  79. // 26 14.9301 1.4882e-14 9.9680e-16 46
  80. // 27 15.5214 1.5571e-14 1.0032e-15 48
  81. // 28 16.1029 1.7553e-14 1.0900e-15 49
  82. // 29 16.6407 1.6219e-14 9.7467e-16 53
  83. // 30 17.1891 1.8560e-14 1.0797e-15 55
  84. // 31 17.7773 1.8522e-14 1.0419e-15 56
  85. //
  86. // The singularvalues and |E|/|A| values were compared to those generated by
  87. // Mathematica Version 9.0; Wolfram Research, Inc., Champaign IL, 2012.
  88. // The results were all comparable with singular values agreeing to a large
  89. // number of decimal places.
  90. //
  91. // Timing on an Intel (R) Core (TM) i7-3930K CPU @ 3.20 GHZ (in seconds)
  92. // for NxN matrices:
  93. //
  94. // N |E|/|A| iters bidiag QR U-and-V comperr
  95. // -------------------------------------------------------
  96. // 512 3.8632e-15 848 0.341 0.016 1.844 2.203
  97. // 1024 5.6456e-15 1654 4.279 0.032 18.765 20.844
  98. // 2048 7.5499e-15 3250 40.421 0.141 186.607 213.216
  99. //
  100. // where iters is the number of QR steps taken, bidiag is the computation
  101. // of the Householder reflection vectors, U-and-V is the composition of
  102. // Householder reflections and Givens rotations to obtain the orthogonal
  103. // matrices of the decomposigion, and comperr is the computation E =
  104. // U^T*A*V - S.
  105. namespace WwiseGTE
  106. {
  107. template <typename Real>
  108. class SingularValueDecomposition
  109. {
  110. public:
  111. // The solver processes MxN symmetric matrices, where M >= N > 1
  112. // ('numRows' is M and 'numCols' is N) and the matrix is stored in
  113. // row-major order. The maximum number of iterations
  114. // ('maxIterations') must be specified for the reduction of a
  115. // bidiagonal matrix to a diagonal matrix. The goal is to compute
  116. // MxM orthogonal U, NxN orthogonal V and MxN matrix S for which
  117. // U^T*A*V = S. The only nonzero entries of S are on the diagonal;
  118. // the diagonal entries are the singular values of the original
  119. // matrix.
  120. SingularValueDecomposition(int numRows, int numCols, unsigned int maxIterations)
  121. :
  122. mNumRows(0),
  123. mNumCols(0),
  124. mMaxIterations(0)
  125. {
  126. if (numCols > 1 && numRows >= numCols && maxIterations > 0)
  127. {
  128. mNumRows = numRows;
  129. mNumCols = numCols;
  130. mMaxIterations = maxIterations;
  131. mMatrix.resize(numRows * numCols);
  132. mDiagonal.resize(numCols);
  133. mSuperdiagonal.resize(numCols - 1);
  134. mRGivens.reserve(maxIterations * (numCols - 1));
  135. mLGivens.reserve(maxIterations * (numCols - 1));
  136. mFixupDiagonal.resize(numCols);
  137. mPermutation.resize(numCols);
  138. mVisited.resize(numCols);
  139. mTwoInvUTU.resize(numCols);
  140. mTwoInvVTV.resize(numCols - 2);
  141. mUVector.resize(numRows);
  142. mVVector.resize(numCols);
  143. mWVector.resize(numRows);
  144. }
  145. }
  146. // A copy of the MxN input is made internally. The order of the
  147. // singular values is specified by sortType: -1 (decreasing),
  148. // 0 (no sorting), or +1 (increasing). When sorted, the columns of
  149. // the orthogonal matrices are ordered accordingly. The return value
  150. // is the number of iterations consumed when convergence occurred,
  151. // 0xFFFFFFFF when convergence did not occur or 0 when N <= 1 or
  152. // M < N was passed to the constructor.
  153. unsigned int Solve(Real const* input, int sortType)
  154. {
  155. if (mNumRows > 0)
  156. {
  157. int numElements = mNumRows * mNumCols;
  158. std::copy(input, input + numElements, mMatrix.begin());
  159. Bidiagonalize();
  160. // Compute 'threshold = multiplier*epsilon*|B|' as the
  161. // threshold for diagonal entries effectively zero; that is,
  162. // |d| <= |threshold| implies that d is (effectively) zero.
  163. // TODO: Allow the caller to pass 'multiplier' to the
  164. // constructor.
  165. //
  166. // We will use the L2-norm |B|, which is the length of the
  167. // elements of B treated as an NM-tuple. The following code
  168. // avoids overflow when accumulating the squares of the
  169. // elements when those elements are large.
  170. Real maxAbsComp = std::fabs(input[0]);
  171. for (int i = 1; i < numElements; ++i)
  172. {
  173. Real absComp = std::fabs(input[i]);
  174. if (absComp > maxAbsComp)
  175. {
  176. maxAbsComp = absComp;
  177. }
  178. }
  179. Real norm = (Real)0;
  180. if (maxAbsComp > (Real)0)
  181. {
  182. Real invMaxAbsComp = ((Real)1) / maxAbsComp;
  183. for (int i = 0; i < numElements; ++i)
  184. {
  185. Real ratio = input[i] * invMaxAbsComp;
  186. norm += ratio * ratio;
  187. }
  188. norm = maxAbsComp * std::sqrt(norm);
  189. }
  190. Real const multiplier = (Real)8; // TODO: Expose to caller.
  191. Real const epsilon = std::numeric_limits<Real>::epsilon();
  192. Real const threshold = multiplier * epsilon * norm;
  193. mRGivens.clear();
  194. mLGivens.clear();
  195. for (unsigned int j = 0; j < mMaxIterations; ++j)
  196. {
  197. int imin = -1, imax = -1;
  198. for (int i = mNumCols - 2; i >= 0; --i)
  199. {
  200. // When a01 is much smaller than its diagonal
  201. // neighbors, it is effectively zero.
  202. Real a00 = mDiagonal[i];
  203. Real a01 = mSuperdiagonal[i];
  204. Real a11 = mDiagonal[i + 1];
  205. Real sum = std::fabs(a00) + std::fabs(a11);
  206. if (sum + std::fabs(a01) != sum)
  207. {
  208. if (imax == -1)
  209. {
  210. imax = i;
  211. }
  212. imin = i;
  213. }
  214. else
  215. {
  216. // The superdiagonal term is effectively zero
  217. // compared to the neighboring diagonal terms.
  218. if (imin >= 0)
  219. {
  220. break;
  221. }
  222. }
  223. }
  224. if (imax == -1)
  225. {
  226. // The algorithm has converged.
  227. EnsureNonnegativeDiagonal();
  228. ComputePermutation(sortType);
  229. return j;
  230. }
  231. // We need to test diagonal entries of B for zero. For
  232. // each zero diagonal entry, zero the superdiagonal.
  233. if (DiagonalEntriesNonzero(imin, imax, threshold))
  234. {
  235. // Process the lower-right-most unreduced bidiagonal
  236. // block.
  237. DoGolubKahanStep(imin, imax);
  238. }
  239. }
  240. return 0xFFFFFFFF;
  241. }
  242. else
  243. {
  244. return 0;
  245. }
  246. }
  247. // Get the singular values of the matrix passed to Solve(...). The
  248. // input 'singularValues' must have N elements.
  249. void GetSingularValues(Real* singularValues) const
  250. {
  251. if (singularValues && mNumCols > 0)
  252. {
  253. if (mPermutation[0] >= 0)
  254. {
  255. // Sorting was requested.
  256. for (int i = 0; i < mNumCols; ++i)
  257. {
  258. int p = mPermutation[i];
  259. singularValues[i] = mDiagonal[p];
  260. }
  261. }
  262. else
  263. {
  264. // Sorting was not requested.
  265. size_t numBytes = mNumCols * sizeof(Real);
  266. std::memcpy(singularValues, &mDiagonal[0], numBytes);
  267. }
  268. }
  269. }
  270. // Accumulate the Householder reflections, the Givens rotations, and
  271. // the diagonal fix-up matrix to compute the orthogonal matrices U and
  272. // V for which U^T*A*V = S. The input uMatrix must be MxM and the
  273. // input vMatrix must be NxN, both stored in row-major order.
  274. void GetU(Real* uMatrix) const
  275. {
  276. if (!uMatrix || mNumCols == 0)
  277. {
  278. // Invalid input or the constructor failed.
  279. return;
  280. }
  281. // Start with the identity matrix.
  282. std::fill(uMatrix, uMatrix + mNumRows * mNumRows, (Real)0);
  283. for (int d = 0; d < mNumRows; ++d)
  284. {
  285. uMatrix[d + mNumRows * d] = (Real)1;
  286. }
  287. // Multiply the Householder reflections using backward
  288. // accumulation.
  289. int r, c;
  290. for (int i0 = mNumCols - 1, i1 = i0 + 1; i0 >= 0; --i0, --i1)
  291. {
  292. // Copy the u vector and 2/Dot(u,u) from the matrix.
  293. Real twoinvudu = mTwoInvUTU[i0];
  294. Real const* column = &mMatrix[i0];
  295. mUVector[i0] = (Real)1;
  296. for (r = i1; r < mNumRows; ++r)
  297. {
  298. mUVector[r] = column[mNumCols * r];
  299. }
  300. // Compute the w vector.
  301. mWVector[i0] = twoinvudu;
  302. for (r = i1; r < mNumRows; ++r)
  303. {
  304. mWVector[r] = (Real)0;
  305. for (c = i1; c < mNumRows; ++c)
  306. {
  307. mWVector[r] += mUVector[c] * uMatrix[r + mNumRows * c];
  308. }
  309. mWVector[r] *= twoinvudu;
  310. }
  311. // Update the matrix, U <- U - u*w^T.
  312. for (r = i0; r < mNumRows; ++r)
  313. {
  314. for (c = i0; c < mNumRows; ++c)
  315. {
  316. uMatrix[c + mNumRows * r] -= mUVector[r] * mWVector[c];
  317. }
  318. }
  319. }
  320. // Multiply the Givens rotations.
  321. for (auto const& givens : mLGivens)
  322. {
  323. int j0 = givens.index0;
  324. int j1 = givens.index1;
  325. for (r = 0; r < mNumRows; ++r, j0 += mNumRows, j1 += mNumRows)
  326. {
  327. Real& q0 = uMatrix[j0];
  328. Real& q1 = uMatrix[j1];
  329. Real prd0 = givens.cs * q0 - givens.sn * q1;
  330. Real prd1 = givens.sn * q0 + givens.cs * q1;
  331. q0 = prd0;
  332. q1 = prd1;
  333. }
  334. }
  335. if (mPermutation[0] >= 0)
  336. {
  337. // Sorting was requested.
  338. std::fill(mVisited.begin(), mVisited.end(), 0);
  339. for (c = 0; c < mNumCols; ++c)
  340. {
  341. if (mVisited[c] == 0 && mPermutation[c] != c)
  342. {
  343. // The item starts a cycle with 2 or more elements.
  344. int start = c, current = c, next;
  345. for (r = 0; r < mNumRows; ++r)
  346. {
  347. mWVector[r] = uMatrix[c + mNumRows * r];
  348. }
  349. while ((next = mPermutation[current]) != start)
  350. {
  351. mVisited[current] = 1;
  352. for (r = 0; r < mNumRows; ++r)
  353. {
  354. uMatrix[current + mNumRows * r] =
  355. uMatrix[next + mNumRows * r];
  356. }
  357. current = next;
  358. }
  359. mVisited[current] = 1;
  360. for (r = 0; r < mNumRows; ++r)
  361. {
  362. uMatrix[current + mNumRows * r] = mWVector[r];
  363. }
  364. }
  365. }
  366. }
  367. }
  368. void GetV(Real* vMatrix) const
  369. {
  370. if (!vMatrix || mNumCols == 0)
  371. {
  372. // Invalid input or the constructor failed.
  373. return;
  374. }
  375. // Start with the identity matrix.
  376. std::fill(vMatrix, vMatrix + mNumCols * mNumCols, (Real)0);
  377. for (int d = 0; d < mNumCols; ++d)
  378. {
  379. vMatrix[d + mNumCols * d] = (Real)1;
  380. }
  381. // Multiply the Householder reflections using backward accumulation.
  382. int i0 = mNumCols - 3;
  383. int i1 = i0 + 1;
  384. int i2 = i0 + 2;
  385. int r, c;
  386. for (/**/; i0 >= 0; --i0, --i1, --i2)
  387. {
  388. // Copy the v vector and 2/Dot(v,v) from the matrix.
  389. Real twoinvvdv = mTwoInvVTV[i0];
  390. Real const* row = &mMatrix[mNumCols * i0];
  391. mVVector[i1] = (Real)1;
  392. for (r = i2; r < mNumCols; ++r)
  393. {
  394. mVVector[r] = row[r];
  395. }
  396. // Compute the w vector.
  397. mWVector[i1] = twoinvvdv;
  398. for (r = i2; r < mNumCols; ++r)
  399. {
  400. mWVector[r] = (Real)0;
  401. for (c = i2; c < mNumCols; ++c)
  402. {
  403. mWVector[r] += mVVector[c] * vMatrix[r + mNumCols * c];
  404. }
  405. mWVector[r] *= twoinvvdv;
  406. }
  407. // Update the matrix, V <- V - v*w^T.
  408. for (r = i1; r < mNumCols; ++r)
  409. {
  410. for (c = i1; c < mNumCols; ++c)
  411. {
  412. vMatrix[c + mNumCols * r] -= mVVector[r] * mWVector[c];
  413. }
  414. }
  415. }
  416. // Multiply the Givens rotations.
  417. for (auto const& givens : mRGivens)
  418. {
  419. int j0 = givens.index0;
  420. int j1 = givens.index1;
  421. for (c = 0; c < mNumCols; ++c, j0 += mNumCols, j1 += mNumCols)
  422. {
  423. Real& q0 = vMatrix[j0];
  424. Real& q1 = vMatrix[j1];
  425. Real prd0 = givens.cs * q0 - givens.sn * q1;
  426. Real prd1 = givens.sn * q0 + givens.cs * q1;
  427. q0 = prd0;
  428. q1 = prd1;
  429. }
  430. }
  431. // Fix-up the diagonal.
  432. for (r = 0; r < mNumCols; ++r)
  433. {
  434. for (c = 0; c < mNumCols; ++c)
  435. {
  436. vMatrix[c + mNumCols * r] *= mFixupDiagonal[c];
  437. }
  438. }
  439. if (mPermutation[0] >= 0)
  440. {
  441. // Sorting was requested.
  442. std::fill(mVisited.begin(), mVisited.end(), 0);
  443. for (c = 0; c < mNumCols; ++c)
  444. {
  445. if (mVisited[c] == 0 && mPermutation[c] != c)
  446. {
  447. // The item starts a cycle with 2 or more elements.
  448. int start = c, current = c, next;
  449. for (r = 0; r < mNumCols; ++r)
  450. {
  451. mWVector[r] = vMatrix[c + mNumCols * r];
  452. }
  453. while ((next = mPermutation[current]) != start)
  454. {
  455. mVisited[current] = 1;
  456. for (r = 0; r < mNumCols; ++r)
  457. {
  458. vMatrix[current + mNumCols * r] =
  459. vMatrix[next + mNumCols * r];
  460. }
  461. current = next;
  462. }
  463. mVisited[current] = 1;
  464. for (r = 0; r < mNumCols; ++r)
  465. {
  466. vMatrix[current + mNumCols * r] = mWVector[r];
  467. }
  468. }
  469. }
  470. }
  471. }
  472. // Compute a single column of U or V. The reflections and rotations
  473. // are applied incrementally. This is useful when you want only a
  474. // small number of the singular values or vectors.
  475. void GetUColumn(int index, Real* uColumn) const
  476. {
  477. if (0 <= index && index < mNumRows)
  478. {
  479. // y = H*x, then x and y are swapped for the next H
  480. Real* x = uColumn;
  481. Real* y = &mWVector[0];
  482. // Start with the Euclidean basis vector.
  483. std::memset(x, 0, mNumRows * sizeof(Real));
  484. if (index < mNumCols && mPermutation[0] >= 0)
  485. {
  486. // Sorting was requested.
  487. x[mPermutation[index]] = (Real)1;
  488. }
  489. else
  490. {
  491. x[index] = (Real)1;
  492. }
  493. // Apply the Givens rotations.
  494. for (auto const& givens : WwiseGTE::reverse(mLGivens))
  495. {
  496. Real& xr0 = x[givens.index0];
  497. Real& xr1 = x[givens.index1];
  498. Real tmp0 = givens.cs * xr0 + givens.sn * xr1;
  499. Real tmp1 = -givens.sn * xr0 + givens.cs * xr1;
  500. xr0 = tmp0;
  501. xr1 = tmp1;
  502. }
  503. // Apply the Householder reflections.
  504. for (int c = mNumCols - 1; c >= 0; --c)
  505. {
  506. // Get the Householder vector u.
  507. int r;
  508. for (r = 0; r < c; ++r)
  509. {
  510. y[r] = x[r];
  511. }
  512. // Compute s = Dot(x,u) * 2/u^T*u.
  513. Real s = x[r]; // r = c, u[r] = 1
  514. for (int j = r + 1; j < mNumRows; ++j)
  515. {
  516. s += x[j] * mMatrix[c + mNumCols * j];
  517. }
  518. s *= mTwoInvUTU[c];
  519. // r = c, y[r] = x[r]*u[r] - s = x[r] - s because u[r] = 1
  520. y[r] = x[r] - s;
  521. // Compute the remaining components of y.
  522. for (++r; r < mNumRows; ++r)
  523. {
  524. y[r] = x[r] - s * mMatrix[c + mNumCols * r];
  525. }
  526. std::swap(x, y);
  527. }
  528. // The final product is stored in x.
  529. if (x != uColumn)
  530. {
  531. size_t numBytes = mNumRows * sizeof(Real);
  532. std::memcpy(uColumn, x, numBytes);
  533. }
  534. }
  535. }
  536. void GetVColumn(int index, Real* vColumn) const
  537. {
  538. if (0 <= index && index < mNumCols)
  539. {
  540. // y = H*x, then x and y are swapped for the next H
  541. Real* x = vColumn;
  542. Real* y = &mWVector[0];
  543. // Start with the Euclidean basis vector.
  544. std::memset(x, 0, mNumCols * sizeof(Real));
  545. if (mPermutation[0] >= 0)
  546. {
  547. // Sorting was requested.
  548. int p = mPermutation[index];
  549. x[p] = mFixupDiagonal[p];
  550. }
  551. else
  552. {
  553. x[index] = mFixupDiagonal[index];
  554. }
  555. // Apply the Givens rotations.
  556. for (auto const& givens : WwiseGTE::reverse(mRGivens))
  557. {
  558. Real& xr0 = x[givens.index0];
  559. Real& xr1 = x[givens.index1];
  560. Real tmp0 = givens.cs * xr0 + givens.sn * xr1;
  561. Real tmp1 = -givens.sn * xr0 + givens.cs * xr1;
  562. xr0 = tmp0;
  563. xr1 = tmp1;
  564. }
  565. // Apply the Householder reflections.
  566. for (int r = mNumCols - 3; r >= 0; --r)
  567. {
  568. // Get the Householder vector v.
  569. int c;
  570. for (c = 0; c < r + 1; ++c)
  571. {
  572. y[c] = x[c];
  573. }
  574. // Compute s = Dot(x,v) * 2/v^T*v.
  575. Real s = x[c]; // c = r+1, v[c] = 1
  576. for (int j = c + 1; j < mNumCols; ++j)
  577. {
  578. s += x[j] * mMatrix[j + mNumCols * r];
  579. }
  580. s *= mTwoInvVTV[r];
  581. // c = r+1, y[c] = x[c]*v[c] - s = x[c] - s
  582. // because v[c] = 1
  583. y[c] = x[c] - s;
  584. // Compute the remaining components of y.
  585. for (++c; c < mNumCols; ++c)
  586. {
  587. y[c] = x[c] - s * mMatrix[c + mNumCols * r];
  588. }
  589. std::swap(x, y);
  590. }
  591. // The final product is stored in x.
  592. if (x != vColumn)
  593. {
  594. size_t numBytes = mNumCols * sizeof(Real);
  595. std::memcpy(vColumn, x, numBytes);
  596. }
  597. }
  598. }
  599. Real GetSingularValue(int index) const
  600. {
  601. if (0 <= index && index < mNumCols)
  602. {
  603. if (mPermutation[0] >= 0)
  604. {
  605. // Sorting was requested.
  606. return mDiagonal[mPermutation[index]];
  607. }
  608. else
  609. {
  610. // Sorting was not requested.
  611. return mDiagonal[index];
  612. }
  613. }
  614. else
  615. {
  616. return (Real)0;
  617. }
  618. }
  619. private:
  620. // Bidiagonalize using Householder reflections. On input, mMatrix is
  621. // a copy of the input matrix and has one extra row. On output, the
  622. // diagonal and superdiagonal contain the bidiagonalized results. The
  623. // lower-triangular portion stores the essential parts of the
  624. // Householder u vectors (the elements of u after the leading 1-valued
  625. // component) and the upper-triangular portion stores the essential
  626. // parts of the Householder v vectors. To avoid recomputing
  627. // 2/Dot(u,u) and 2/Dot(v,v), these quantities are stored in
  628. // mTwoInvUTU and mTwoInvVTV.
  629. void Bidiagonalize()
  630. {
  631. int r, c;
  632. for (int i = 0, ip1 = 1; i < mNumCols; ++i, ++ip1)
  633. {
  634. // Compute the U-Householder vector.
  635. Real length = (Real)0;
  636. for (r = i; r < mNumRows; ++r)
  637. {
  638. Real ur = mMatrix[i + mNumCols * r];
  639. mUVector[r] = ur;
  640. length += ur * ur;
  641. }
  642. Real udu = (Real)1;
  643. length = std::sqrt(length);
  644. if (length > (Real)0)
  645. {
  646. Real& u1 = mUVector[i];
  647. Real sgn = (u1 >= (Real)0 ? (Real)1 : (Real)-1);
  648. Real invDenom = (Real)1 / (u1 + sgn * length);
  649. u1 = (Real)1;
  650. for (r = ip1; r < mNumRows; ++r)
  651. {
  652. Real& ur = mUVector[r];
  653. ur *= invDenom;
  654. udu += ur * ur;
  655. }
  656. }
  657. // Compute the rank-1 offset u*w^T.
  658. Real invudu = (Real)1 / udu;
  659. Real twoinvudu = invudu * (Real)2;
  660. for (c = i; c < mNumCols; ++c)
  661. {
  662. mWVector[c] = (Real)0;
  663. for (r = i; r < mNumRows; ++r)
  664. {
  665. mWVector[c] += mMatrix[c + mNumCols * r] * mUVector[r];
  666. }
  667. mWVector[c] *= twoinvudu;
  668. }
  669. // Update the input matrix.
  670. for (r = i; r < mNumRows; ++r)
  671. {
  672. for (c = i; c < mNumCols; ++c)
  673. {
  674. mMatrix[c + mNumCols * r] -= mUVector[r] * mWVector[c];
  675. }
  676. }
  677. if (i < mNumCols - 2)
  678. {
  679. // Compute the V-Householder vectors.
  680. length = (Real)0;
  681. for (c = ip1; c < mNumCols; ++c)
  682. {
  683. Real vc = mMatrix[c + mNumCols * i];
  684. mVVector[c] = vc;
  685. length += vc * vc;
  686. }
  687. Real vdv = (Real)1;
  688. length = std::sqrt(length);
  689. if (length > (Real)0)
  690. {
  691. Real& v1 = mVVector[ip1];
  692. Real sgn = (v1 >= (Real)0 ? (Real)1 : (Real)-1);
  693. Real invDenom = (Real)1 / (v1 + sgn * length);
  694. v1 = (Real)1;
  695. for (c = ip1 + 1; c < mNumCols; ++c)
  696. {
  697. Real& vc = mVVector[c];
  698. vc *= invDenom;
  699. vdv += vc * vc;
  700. }
  701. }
  702. // Compute the rank-1 offset w*v^T.
  703. Real invvdv = (Real)1 / vdv;
  704. Real twoinvvdv = invvdv * (Real)2;
  705. for (r = i; r < mNumRows; ++r)
  706. {
  707. mWVector[r] = (Real)0;
  708. for (c = ip1; c < mNumCols; ++c)
  709. {
  710. mWVector[r] += mMatrix[c + mNumCols * r] * mVVector[c];
  711. }
  712. mWVector[r] *= twoinvvdv;
  713. }
  714. // Update the input matrix.
  715. for (r = i; r < mNumRows; ++r)
  716. {
  717. for (c = ip1; c < mNumCols; ++c)
  718. {
  719. mMatrix[c + mNumCols * r] -= mWVector[r] * mVVector[c];
  720. }
  721. }
  722. mTwoInvVTV[i] = twoinvvdv;
  723. for (c = i + 2; c < mNumCols; ++c)
  724. {
  725. mMatrix[c + mNumCols * i] = mVVector[c];
  726. }
  727. }
  728. mTwoInvUTU[i] = twoinvudu;
  729. for (r = ip1; r < mNumRows; ++r)
  730. {
  731. mMatrix[i + mNumCols * r] = mUVector[r];
  732. }
  733. }
  734. // Copy the diagonal and subdiagonal for cache coherence in the
  735. // Golub-Kahan iterations.
  736. int k, ksup = mNumCols - 1, index = 0, delta = mNumCols + 1;
  737. for (k = 0; k < ksup; ++k, index += delta)
  738. {
  739. mDiagonal[k] = mMatrix[index];
  740. mSuperdiagonal[k] = mMatrix[index + 1];
  741. }
  742. mDiagonal[k] = mMatrix[index];
  743. }
  744. // A helper for generating Givens rotation sine and cosine robustly.
  745. void GetSinCos(Real x, Real y, Real& cs, Real& sn)
  746. {
  747. // Solves sn*x + cs*y = 0 robustly.
  748. Real tau;
  749. if (y != (Real)0)
  750. {
  751. if (std::fabs(y) > std::fabs(x))
  752. {
  753. tau = -x / y;
  754. sn = (Real)1 / std::sqrt((Real)1 + tau * tau);
  755. cs = sn * tau;
  756. }
  757. else
  758. {
  759. tau = -y / x;
  760. cs = (Real)1 / std::sqrt((Real)1 + tau * tau);
  761. sn = cs * tau;
  762. }
  763. }
  764. else
  765. {
  766. cs = (Real)1;
  767. sn = (Real)0;
  768. }
  769. }
  770. // Test for (effectively) zero-valued diagonal entries through all
  771. // but the last. For each such entry, the B matrix decouples. Perform
  772. // that decoupling. If there are no zero-valued entries, then the
  773. // Golub-Kahan step must be performed.
  774. bool DiagonalEntriesNonzero(int imin, int imax, Real threshold)
  775. {
  776. for (int i = imin; i <= imax; ++i)
  777. {
  778. if (std::fabs(mDiagonal[i]) <= threshold)
  779. {
  780. // Use planar rotations to case the superdiagonal entry
  781. // out of the matrix, thus producing a row of zeros.
  782. Real x, z, cs, sn;
  783. Real y = mSuperdiagonal[i];
  784. mSuperdiagonal[i] = (Real)0;
  785. for (int j = i + 1; j <= imax + 1; ++j)
  786. {
  787. x = mDiagonal[j];
  788. GetSinCos(x, y, cs, sn);
  789. mLGivens.push_back(GivensRotation(i, j, cs, sn));
  790. mDiagonal[j] = cs * x - sn * y;
  791. if (j <= imax)
  792. {
  793. z = mSuperdiagonal[j];
  794. mSuperdiagonal[j] = cs * z;
  795. y = sn * z;
  796. }
  797. }
  798. return false;
  799. }
  800. }
  801. return true;
  802. }
  803. // This is Algorithm 8.3.1 in "Matrix Computations, 2nd edition" by
  804. // G. H. Golub and C. F. Van Loan.
  805. void DoGolubKahanStep(int imin, int imax)
  806. {
  807. // The implicit shift. Compute the eigenvalue u of the
  808. // lower-right 2x2 block of A = B^T*B that is closer to b11.
  809. Real f0 = (imax >= (Real)1 ? mSuperdiagonal[imax - 1] : (Real)0);
  810. Real d1 = mDiagonal[imax];
  811. Real f1 = mSuperdiagonal[imax];
  812. Real d2 = mDiagonal[imax + 1];
  813. Real a00 = d1 * d1 + f0 * f0;
  814. Real a01 = d1 * f1;
  815. Real a11 = d2 * d2 + f1 * f1;
  816. Real dif = (a00 - a11) * (Real)0.5;
  817. Real sgn = (dif >= (Real)0 ? (Real)1 : (Real)-1);
  818. Real a01sqr = a01 * a01;
  819. Real u = a11 - a01sqr / (dif + sgn * std::sqrt(dif * dif + a01sqr));
  820. Real x = mDiagonal[imin] * mDiagonal[imin] - u;
  821. Real y = mDiagonal[imin] * mSuperdiagonal[imin];
  822. Real a12, a21, a22, a23, cs, sn;
  823. Real a02 = (Real)0;
  824. int i0 = imin - 1, i1 = imin, i2 = imin + 1;
  825. for (/**/; i1 <= imax; ++i0, ++i1, ++i2)
  826. {
  827. // Compute the Givens rotation G and save it for use in
  828. // computing V in U^T*A*V = S.
  829. GetSinCos(x, y, cs, sn);
  830. mRGivens.push_back(GivensRotation(i1, i2, cs, sn));
  831. // Update B0 = B*G.
  832. if (i1 > imin)
  833. {
  834. mSuperdiagonal[i0] = cs * mSuperdiagonal[i0] - sn * a02;
  835. }
  836. a11 = mDiagonal[i1];
  837. a12 = mSuperdiagonal[i1];
  838. a22 = mDiagonal[i2];
  839. mDiagonal[i1] = cs * a11 - sn * a12;
  840. mSuperdiagonal[i1] = sn * a11 + cs * a12;
  841. mDiagonal[i2] = cs * a22;
  842. a21 = -sn * a22;
  843. // Update the parameters for the next Givens rotations.
  844. x = mDiagonal[i1];
  845. y = a21;
  846. // Compute the Givens rotation G and save it for use in
  847. // computing U in U^T*A*V = S.
  848. GetSinCos(x, y, cs, sn);
  849. mLGivens.push_back(GivensRotation(i1, i2, cs, sn));
  850. // Update B1 = G^T*B0.
  851. a11 = mDiagonal[i1];
  852. a12 = mSuperdiagonal[i1];
  853. a22 = mDiagonal[i2];
  854. mDiagonal[i1] = cs * a11 - sn * a21;
  855. mSuperdiagonal[i1] = cs * a12 - sn * a22;
  856. mDiagonal[i2] = sn * a12 + cs * a22;
  857. if (i1 < imax)
  858. {
  859. a23 = mSuperdiagonal[i2];
  860. a02 = -sn * a23;
  861. mSuperdiagonal[i2] = cs * a23;
  862. // Update the parameters for the next Givens rotations.
  863. x = mSuperdiagonal[i1];
  864. y = a02;
  865. }
  866. }
  867. }
  868. // The diagonal entries are not guaranteed to be nonnegative during
  869. // the construction. After convergence to a diagonal matrix S, test
  870. // for negative entries and build a diagonal matrix that reverses the
  871. // sign on the S-entry.
  872. void EnsureNonnegativeDiagonal()
  873. {
  874. for (int i = 0; i < mNumCols; ++i)
  875. {
  876. if (mDiagonal[i] >= (Real)0)
  877. {
  878. mFixupDiagonal[i] = (Real)1;
  879. }
  880. else
  881. {
  882. mDiagonal[i] = -mDiagonal[i];
  883. mFixupDiagonal[i] = (Real)-1;
  884. }
  885. }
  886. }
  887. // Sort the singular values and compute the corresponding permutation
  888. // of the indices of the array storing the singular values. The
  889. // permutation is used for reordering the singular values and the
  890. // corresponding columns of the orthogonal matrix in the calls to
  891. // GetSingularValues(...) and GetOrthogonalMatrices(...).
  892. void ComputePermutation(int sortType)
  893. {
  894. if (sortType == 0)
  895. {
  896. // Set a flag for GetSingularValues() and
  897. // GetOrthogonalMatrices() to know that sorted output was not
  898. // requested.
  899. mPermutation[0] = -1;
  900. return;
  901. }
  902. // Compute the permutation induced by sorting. Initially, we
  903. // start with the identity permutation I = (0,1,...,N-1).
  904. struct SortItem
  905. {
  906. Real singularValue;
  907. int index;
  908. };
  909. std::vector<SortItem> items(mNumCols);
  910. int i;
  911. for (i = 0; i < mNumCols; ++i)
  912. {
  913. items[i].singularValue = mDiagonal[i];
  914. items[i].index = i;
  915. }
  916. if (sortType > 0)
  917. {
  918. std::sort(items.begin(), items.end(),
  919. [](SortItem const& item0, SortItem const& item1)
  920. {
  921. return item0.singularValue < item1.singularValue;
  922. }
  923. );
  924. }
  925. else
  926. {
  927. std::sort(items.begin(), items.end(),
  928. [](SortItem const& item0, SortItem const& item1)
  929. {
  930. return item0.singularValue > item1.singularValue;
  931. }
  932. );
  933. }
  934. i = 0;
  935. for (auto const& item : items)
  936. {
  937. mPermutation[i++] = item.index;
  938. }
  939. // GetOrthogonalMatrices() has nontrivial code for computing the
  940. // orthogonal U and V from the reflections and rotations. To
  941. // avoid complicating the code further when sorting is requested,
  942. // U and V are computed as in the unsorted case. We then need to
  943. // swap columns of U and V to be consistent with the sorting of
  944. // the singular values. To minimize copying due to column swaps,
  945. // we use permutation P. The minimum number of transpositions to
  946. // obtain P from I is N minus the number of cycles of P. Each
  947. // cycle is reordered with a minimum number of transpositions;
  948. // that is, the singular items are cyclically swapped, leading to
  949. // a minimum amount of copying. For example, if there is a
  950. // cycle i0 -> i1 -> i2 -> i3, then the copying is
  951. // save = singularitem[i0];
  952. // singularitem[i1] = singularitem[i2];
  953. // singularitem[i2] = singularitem[i3];
  954. // singularitem[i3] = save;
  955. }
  956. // The number rows and columns of the matrices to be processed.
  957. int mNumRows, mNumCols;
  958. // The maximum number of iterations for reducing the bidiagonal matrix
  959. // to a diagonal matrix.
  960. unsigned int mMaxIterations;
  961. // The internal copy of a matrix passed to the solver. See the
  962. // comments about function Bidiagonalize() about what is stored in the
  963. // matrix.
  964. std::vector<Real> mMatrix; // MxN elements
  965. // After the initial bidiagonalization by Householder reflections, we
  966. // no longer need the full mMatrix. Copy the diagonal and
  967. // superdiagonal entries to linear arrays in order to be cache
  968. // friendly.
  969. std::vector<Real> mDiagonal; // N elements
  970. std::vector<Real> mSuperdiagonal; // N-1 elements
  971. // The Givens rotations used to reduce the initial bidiagonal matrix
  972. // to a diagonal matrix. A rotation is the identity with the following
  973. // replacement entries: R(index0,index0) = cs, R(index0,index1) = sn,
  974. // R(index1,index0) = -sn, and R(index1,index1) = cs. If N is the
  975. // number of matrix columns and K is the maximum number of iterations,
  976. // the maximum number of right or left Givens rotations is K*(N-1).
  977. // The maximum amount of memory is allocated to store these. However,
  978. // we also potentially need left rotations to decouple the matrix when
  979. // diagonal terms are zero. Worst case is a number of matrices
  980. // quadratic in N, so for now we just use std::vector<Rotation> whose
  981. // initial capacity is K*(N-1).
  982. struct GivensRotation
  983. {
  984. // No default initialization for fast creation of std::vector of
  985. // objects of this type.
  986. GivensRotation() = default;
  987. GivensRotation(int inIndex0, int inIndex1, Real inCs, Real inSn)
  988. :
  989. index0(inIndex0),
  990. index1(inIndex1),
  991. cs(inCs),
  992. sn(inSn)
  993. {
  994. }
  995. int index0, index1;
  996. Real cs, sn;
  997. };
  998. std::vector<GivensRotation> mRGivens;
  999. std::vector<GivensRotation> mLGivens;
  1000. // The diagonal matrix that is used to convert S-entries to
  1001. // nonnegative.
  1002. std::vector<Real> mFixupDiagonal; // N elements
  1003. // When sorting is requested, the permutation associated with the sort
  1004. // is stored in mPermutation. When sorting is not requested,
  1005. // mPermutation[0] is set to -1. mVisited is used for finding cycles
  1006. // in the permutation.
  1007. std::vector<int> mPermutation; // N elements
  1008. mutable std::vector<int> mVisited; // N elements
  1009. // Temporary storage to compute Householder reflections and to support
  1010. // sorting of columns of the orthogonal matrices.
  1011. std::vector<Real> mTwoInvUTU; // N elements
  1012. std::vector<Real> mTwoInvVTV; // N-2 elements
  1013. mutable std::vector<Real> mUVector; // M elements
  1014. mutable std::vector<Real> mVVector; // N elements
  1015. mutable std::vector<Real> mWVector; // max(M,N) elements
  1016. };
  1017. }