APConversion.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. // David Eberly, Geometric Tools, Redmond WA 98052
  2. // Copyright (c) 1998-2020
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // https://www.boost.org/LICENSE_1_0.txt
  5. // https://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
  6. // Version: 4.0.2019.09.26
  7. #pragma once
  8. #include <Mathematics/ArbitraryPrecision.h>
  9. #include <Mathematics/QFNumber.h>
  10. #include <cfenv>
  11. // The conversion functions here are used to obtain arbitrary-precision
  12. // approximations to rational numbers and to quadratic field numbers.
  13. // The arbitrary-precision arithmetic is described in
  14. // https://www.geometrictools.com/Documentation/ArbitraryPrecision.pdf
  15. // The quadratic field numbers and conversions are described in
  16. // https://www.geometrictools.com/Documentation/QuadraticFields.pdf
  17. namespace WwiseGTE
  18. {
  19. template <typename Rational>
  20. class APConversion
  21. {
  22. public:
  23. using QFN1 = QFNumber<Rational, 1>;
  24. using QFN2 = QFNumber<Rational, 2>;
  25. // Construction and destruction.
  26. APConversion(int32_t precision, uint32_t maxIterations)
  27. :
  28. mZero(0),
  29. mOne(1),
  30. mThree(3),
  31. mFive(5),
  32. mPrecision(precision),
  33. mMaxIterations(maxIterations),
  34. mThreshold(std::ldexp(mOne, -mPrecision))
  35. {
  36. LogAssert(precision > 0, "Invalid precision.");
  37. LogAssert(maxIterations > 0, "Invalid maximum iterations.");
  38. }
  39. ~APConversion()
  40. {
  41. }
  42. // Member access.
  43. void SetPrecision(int32_t precision)
  44. {
  45. LogAssert(precision > 0, "Invalid precision.");
  46. mPrecision = precision;
  47. mThreshold = std::ldexp(mOne, -mPrecision);
  48. }
  49. void SetMaxIterations(uint32_t maxIterations)
  50. {
  51. LogAssert(maxIterations > 0, "Invalid maximum iterations.");
  52. mMaxIterations = maxIterations;
  53. }
  54. inline int32_t GetPrecision() const
  55. {
  56. return mPrecision;
  57. }
  58. inline uint32_t GetMaxIterations() const
  59. {
  60. return mMaxIterations;
  61. }
  62. // Disallow copying and moving.
  63. APConversion(APConversion const&) = delete;
  64. APConversion(APConversion&&) = delete;
  65. APConversion& operator=(APConversion const&) = delete;
  66. APConversion& operator=(APConversion&&) = delete;
  67. // The input a^2 is rational, but a itself is usually irrational,
  68. // although a rational value is allowed. Compute a bounding interval
  69. // for the root, aMin <= a <= aMax, where the endpoints are both
  70. // within the specified precision.
  71. uint32_t EstimateSqrt(Rational const& aSqr, Rational& aMin, Rational& aMax)
  72. {
  73. // Factor a^2 = r^2 * 2^e, where r^2 in [1/2,1). Compute s^2 and
  74. // the exponent used to generate the estimate of sqrt(a^2).
  75. Rational sSqr;
  76. int exponentA;
  77. PreprocessSqr(aSqr, sSqr, exponentA);
  78. // Use the FPU to estimate s = sqrt(sSqr) to 53-bit precision with
  79. // rounding up. Multiply by the appropriate exponent to obtain
  80. // upper bound aMax > a.
  81. aMax = GetMaxOfSqrt(sSqr, exponentA);
  82. // Compute a lower bound aMin < a.
  83. aMin = aSqr / aMax;
  84. // Compute Newton iterates until convergence. The estimate closest
  85. // to a is aMin with aMin <= a <= aMax and a - aMin <= aMax - a.
  86. uint32_t iterate;
  87. for (iterate = 1; iterate <= mMaxIterations; ++iterate)
  88. {
  89. if (aMax - aMin < mThreshold)
  90. {
  91. break;
  92. }
  93. // Compute the average aMax = (aMin + aMax) / 2. Round up
  94. // to twice the precision to avoid quadratic growth in the
  95. // number of bits and to ensure that aMin can increase.
  96. aMax = std::ldexp(aMin + aMax, -1);
  97. Convert(aMax, 2 * mPrecision, FE_UPWARD, aMax);
  98. aMin = aSqr / aMax;
  99. }
  100. return iterate;
  101. }
  102. // Compute an estimate of the root when you do not need a bounding
  103. // interval.
  104. uint32_t EstimateSqrt(Rational const& aSqr, Rational& a)
  105. {
  106. // Compute a bounding interval aMin <= a <= aMax.
  107. Rational aMin, aMax;
  108. uint32_t numIterates = EstimateSqrt(aSqr, aMin, aMax);
  109. // Use the average of the interval endpoints as the estimate.
  110. a = std::ldexp(aMin + aMax, -1);
  111. return numIterates;
  112. }
  113. uint32_t EstimateApB(Rational const& aSqr, Rational const& bSqr,
  114. Rational& tMin, Rational& tMax)
  115. {
  116. // Factor a^2 = r^2 * 2^e, where r^2 in [1/2,1). Compute u^2 and
  117. // the exponent used to generate the estimate of sqrt(a^2).
  118. Rational uSqr;
  119. int32_t exponentA;
  120. PreprocessSqr(aSqr, uSqr, exponentA);
  121. // Factor b^2 = s^2 * 2^e, where s^2 in [1/2,1). Compute v^2 and
  122. // the exponent used to generate the estimate of sqrt(b^2).
  123. Rational vSqr;
  124. int32_t exponentB;
  125. PreprocessSqr(bSqr, vSqr, exponentB);
  126. // Use the FPU to estimate u = sqrt(u^2) and v = sqrt(v^2) to
  127. // 53 bits of precision with rounding up. Multiply by the
  128. // appropriate exponents to obtain upper bounds aMax > a and
  129. // bMax > b. This ensures tMax = aMax + bMax > a + b.
  130. Rational aMax = GetMaxOfSqrt(uSqr, exponentA);
  131. Rational bMax = GetMaxOfSqrt(vSqr, exponentB);
  132. tMax = aMax + bMax;
  133. // Compute a lower bound tMin < a + b.
  134. Rational a2pb2 = aSqr + bSqr;
  135. Rational a2mb2 = aSqr - bSqr;
  136. Rational a2mb2Sqr = a2mb2 * a2mb2;
  137. Rational tMaxSqr = tMax * tMax;
  138. tMin = (a2pb2 * tMaxSqr - a2mb2Sqr) / (tMax * (tMaxSqr - a2pb2));
  139. // Compute Newton iterates until convergence. The estimate closest
  140. // to a + b is tMin with tMin < a + b < tMax and
  141. // (a + b) - tMin < tMax - (a + b).
  142. uint32_t iterate;
  143. for (iterate = 1; iterate <= mMaxIterations; ++iterate)
  144. {
  145. if (tMax - tMin < mThreshold)
  146. {
  147. break;
  148. }
  149. // Compute the weighted average tMax = (3*tMin + tMax) / 4.
  150. // Round up to twice the precision to avoid quadratic growth
  151. // in the number of bits and to ensure that tMin can increase.
  152. tMax = std::ldexp(mThree * tMax + tMin, -2);
  153. Convert(tMax, 2 * mPrecision, FE_UPWARD, tMax);
  154. tMaxSqr = tMax * tMax;
  155. tMin = (a2pb2 * tMaxSqr - a2mb2Sqr) / (tMax * (tMaxSqr - a2pb2));
  156. }
  157. return iterate;
  158. }
  159. uint32_t EstimateAmB(Rational const& aSqr, Rational const& bSqr,
  160. Rational& tMin, Rational& tMax)
  161. {
  162. // The return value of the function.
  163. uint32_t iterate = 0;
  164. // Compute various quantities that are used later in the code.
  165. Rational a2tb2 = aSqr * bSqr; // a^2 * b^2
  166. Rational a2pb2 = aSqr + bSqr; // a^2 + b^2
  167. Rational a2mb2 = aSqr - bSqr; // a^2 - b^2
  168. Rational a2mb2Sqr = a2mb2 * a2mb2; // (a^2 - b^2)^2
  169. Rational twoa2pb2 = std::ldexp(a2pb2, 1); // 2 * (a^2 + b^2)
  170. // Factor a^2 = r^2 * 2^e, where r^2 in [1/2,1). Compute u^2 and
  171. // the exponent used to generate the estimate of sqrt(a^2).
  172. Rational uSqr;
  173. int32_t exponentA;
  174. PreprocessSqr(aSqr, uSqr, exponentA);
  175. // Factor b^2 = s^2 * 2^e, where s^2 in [1/2,1). Compute v^2 and
  176. // the exponent used to generate the estimate of sqrt(b^2).
  177. Rational vSqr;
  178. int32_t exponentB;
  179. PreprocessSqr(bSqr, vSqr, exponentB);
  180. // Compute the sign of f''(a-b)/8 = a^2 - 3*a*b + b^2. It can be
  181. // shown that Sign(a^2-3*a*b+b^2) = Sign(a^4-7*a^2*b^2+b^4) =
  182. // Sign((a^2-b^2)^2-5*a^2*b^2).
  183. Rational signSecDer = a2mb2Sqr - mFive * a2tb2;
  184. // Local variables shared by the two main blocks of code.
  185. Rational aMin, aMax, bMin, bMax, tMinSqr, tMaxSqr, tMid, tMidSqr, f;
  186. if (signSecDer > mZero)
  187. {
  188. // Choose an initial guess tMin < a-b. Use the FPU to
  189. // estimate u = sqrt(u^2) and v = sqrt(v^2) to 53 bits of
  190. // precision with specified rounding. Multiply by the
  191. // appropriate exponents to obtain tMin = aMin - bMax < a-b.
  192. aMin = GetMinOfSqrt(uSqr, exponentA);
  193. bMax = GetMaxOfSqrt(vSqr, exponentB);
  194. tMin = aMin - bMax;
  195. // When a-b is nearly zero, it is possible the lower bound is
  196. // negative. Clamp tMin to zero to stay on the nonnegative
  197. // t-axis where the f"-positive basin is.
  198. if (tMin < mZero)
  199. {
  200. tMin = mZero;
  201. }
  202. // Test whether tMin is in the positive f"(t) basin containing
  203. // a-b. If it is not, compute a tMin that is in the basis. The
  204. // sign test is applied to f"(t)/4 = 3*t^2 - (a^2+b^2).
  205. tMinSqr = tMin * tMin;
  206. signSecDer = mThree * tMinSqr - a2pb2;
  207. if (signSecDer < mZero)
  208. {
  209. // The initial guess satisfies f"(tMin) < 0. Compute an
  210. // upper bound tMax > a-b and bisect [tMin,tMax] until
  211. // either the t-value is an estimate to a-b within the
  212. // specified precision or until f"(t) >= 0 and f(t) >= 0.
  213. // In the latter case, continue on to Newton's method,
  214. // which is then guaranteed to converge.
  215. aMax = GetMaxOfSqrt(uSqr, exponentA);
  216. bMin = GetMinOfSqrt(vSqr, exponentB);
  217. tMax = aMax - bMin;
  218. for (iterate = 1; iterate <= mMaxIterations; ++iterate)
  219. {
  220. if (tMax - tMin < mThreshold)
  221. {
  222. return iterate;
  223. }
  224. tMid = std::ldexp(tMin + tMax, -1);
  225. tMidSqr = tMid * tMid;
  226. signSecDer = mThree * tMidSqr - a2pb2;
  227. if (signSecDer >= mZero)
  228. {
  229. f = tMidSqr * (tMidSqr - twoa2pb2) + a2mb2Sqr;
  230. if (f >= mZero)
  231. {
  232. tMin = tMid;
  233. tMinSqr = tMidSqr;
  234. break;
  235. }
  236. else
  237. {
  238. // Round up to twice the precision to avoid
  239. // quadratic growth in the number of bits.
  240. tMax = tMid;
  241. Convert(tMax, 2 * mPrecision, FE_UPWARD, tMax);
  242. }
  243. }
  244. else
  245. {
  246. // Round down to twice the precision to avoid
  247. // quadratic growth in the number of bits.
  248. tMin = tMid;
  249. Convert(tMin, 2 * mPrecision, FE_DOWNWARD, tMin);
  250. }
  251. }
  252. }
  253. // Compute an upper bound tMax > a-b.
  254. tMax = (a2pb2 * tMinSqr - a2mb2Sqr) / (tMin * (tMinSqr - a2pb2));
  255. // Compute Newton iterates until convergence. The estimate
  256. // closest to a-b is tMax with tMin < a-b < tMax and
  257. // tMax - (a-b) < (a-b) - tMin.
  258. for (iterate = 1; iterate <= mMaxIterations; ++iterate)
  259. {
  260. if (tMax - tMin < mThreshold)
  261. {
  262. break;
  263. }
  264. // Compute the weighted average tMin = (3*tMin+tMax)/4.
  265. // Round down to twice the precision to avoid quadratic
  266. // growth in the number of bits and to ensure that tMax
  267. // can decrease.
  268. tMin = std::ldexp(mThree * tMin + tMax, -2);
  269. Convert(tMin, 2 * mPrecision, FE_DOWNWARD, tMin);
  270. tMinSqr = tMin * tMin;
  271. tMax = (a2pb2 * tMinSqr - a2mb2Sqr) / (tMin * (tMinSqr - a2pb2));
  272. }
  273. return iterate;
  274. }
  275. if (signSecDer < mZero)
  276. {
  277. // Choose an initial guess tMax > a-b. Use the FPU to
  278. // estimate u = sqrt(u^2) and v = sqrt(v^2) to 53 bits of
  279. // precision with specified rounding. Multiply by the
  280. // appropriate exponents to obtain tMax = aMax - bMin > a-b.
  281. aMax = GetMaxOfSqrt(uSqr, exponentA);
  282. bMin = GetMinOfSqrt(vSqr, exponentB);
  283. tMax = aMax - bMin;
  284. // Test whether tMax is in the negative f"(t) basin containing
  285. // a-b. If it is not, compute a tMax that is in the basis. The
  286. // sign test is applied to f"(t)/4 = 3*t^2 - (a^2+b^2).
  287. tMaxSqr = tMax * tMax;
  288. signSecDer = mThree * tMaxSqr - a2pb2;
  289. if (signSecDer > mZero)
  290. {
  291. // The initial guess satisfies f"(tMax) > 0. Compute a
  292. // lower bound tMin < a-b and bisect [tMin,tMax] until
  293. // either the t-value is an estimate to a-b within the
  294. // specified precision or until f"(t) <= 0 and f(t) <= 0.
  295. // In the latter case, continue on to Newton's method,
  296. // which is then guaranteed to converge.
  297. aMin = GetMinOfSqrt(uSqr, exponentA);
  298. bMax = GetMaxOfSqrt(vSqr, exponentB);
  299. tMin = aMin - bMax;
  300. for (iterate = 1; iterate <= mMaxIterations; ++iterate)
  301. {
  302. if (tMax - tMin < mThreshold)
  303. {
  304. return iterate;
  305. }
  306. tMid = std::ldexp(tMin + tMax, -1);
  307. tMidSqr = tMid * tMid;
  308. signSecDer = mThree * tMidSqr - a2pb2;
  309. if (signSecDer <= mZero)
  310. {
  311. f = tMidSqr * (tMidSqr - twoa2pb2) + a2mb2Sqr;
  312. if (f <= mZero)
  313. {
  314. tMax = tMid;
  315. tMaxSqr = tMidSqr;
  316. break;
  317. }
  318. else
  319. {
  320. // Round down to twice the precision to avoid
  321. // quadratic growth in the number of bits.
  322. tMin = tMid;
  323. Convert(tMin, 2 * mPrecision, FE_DOWNWARD, tMin);
  324. }
  325. }
  326. else
  327. {
  328. // Round up to twice the precision to avoid
  329. // quadratic growth in the number of bits.
  330. tMax = tMid;
  331. Convert(tMax, 2 * mPrecision, FE_UPWARD, tMax);
  332. }
  333. }
  334. }
  335. // Compute a lower bound tMin < a-b.
  336. tMin = (a2pb2 * tMaxSqr - a2mb2Sqr) / (tMax * (tMaxSqr - a2pb2));
  337. // Compute Newton iterates until convergence. The estimate
  338. // closest to a-b is tMin with tMin < a - b < tMax and
  339. // (a-b) - tMin < tMax - (a-b).
  340. for (iterate = 1; iterate <= mMaxIterations; ++iterate)
  341. {
  342. if (tMax - tMin < mThreshold)
  343. {
  344. break;
  345. }
  346. // Compute the weighted average tMax = (3*tMax+tMin)/4.
  347. // Round up to twice the precision to avoid quadratic
  348. // growth in the number of bits and to ensure that tMin
  349. // can increase.
  350. tMax = std::ldexp(mThree * tMax + tMin, -2);
  351. Convert(tMax, 2 * mPrecision, FE_UPWARD, tMax);
  352. tMaxSqr = tMax * tMax;
  353. tMin = (a2pb2 * tMaxSqr - a2mb2Sqr) / (tMax * (tMaxSqr - a2pb2));
  354. }
  355. return iterate;
  356. }
  357. // The sign of the second derivative is Sign(a^4-7*a^2*b^2+b^4)
  358. // and cannot be zero. Define rational r = a^2/b^2 so that
  359. // a^4-7*a^2*b^2+b^4 = 0. This implies r^2 - 7*r^2 + 1 = 0. The
  360. // irrational roots are r = (7 +- sqrt(45))/2, which is a
  361. // contradiction.
  362. LogError("This second derivative cannot be zero at a-b.");
  363. }
  364. // Compute a bounding interval for the root, qMin <= q <= qMax, where
  365. // the endpoints are both within the specified precision.
  366. uint32_t Estimate(QFN1 const& q, Rational& qMin, Rational& qMax)
  367. {
  368. Rational const& x = q.x[0];
  369. Rational const& y = q.x[1];
  370. Rational const& d = q.d;
  371. uint32_t numIterates;
  372. if (d != mZero && y != mZero)
  373. {
  374. Rational aSqr = y * y * d;
  375. numIterates = EstimateSqrt(aSqr, qMin, qMax);
  376. if (y > mZero)
  377. {
  378. qMin = x + qMin;
  379. qMax = x + qMax;
  380. }
  381. else
  382. {
  383. Rational diff = x - qMax;
  384. qMax = x - qMin;
  385. qMin = diff;
  386. }
  387. }
  388. else
  389. {
  390. numIterates = 0;
  391. qMin = x;
  392. qMax = x;
  393. }
  394. return numIterates;
  395. }
  396. // Compute an estimate of the root when you do not need a bounding
  397. // interval.
  398. uint32_t Estimate(QFN1 const& q, Rational& qEstimate)
  399. {
  400. // Compute a bounding interval qMin <= q <= qMax.
  401. Rational qMin, qMax;
  402. uint32_t numIterates = Estimate(q, qMin, qMax);
  403. // Use the average of the interval endpoints as the estimate.
  404. qEstimate = std::ldexp(qMin + qMax, -1);
  405. return numIterates;
  406. }
  407. private:
  408. void PreprocessSqr(Rational const& aSqr, Rational& rSqr, int& exponentA)
  409. {
  410. // Factor a^2 = r^2 * 2^e, where r^2 in [1/2,1).
  411. int32_t exponentASqr;
  412. rSqr = std::frexp(aSqr, &exponentASqr);
  413. if (exponentASqr & 1) // odd exponent
  414. {
  415. // a = sqrt(2*r^2) * 2^{(e-1)/2}
  416. exponentA = (exponentASqr - 1) / 2;
  417. rSqr = std::ldexp(rSqr, 1); // = 2*rSqr
  418. // rSqr in [1,2)
  419. }
  420. else // even exponent
  421. {
  422. // a = sqrt(r^2) * 2^{e/2}
  423. exponentA = exponentASqr / 2;
  424. // rSqr in [1/2,1)
  425. }
  426. }
  427. Rational GetMinOfSqrt(Rational const& rSqr, int exponent)
  428. {
  429. double lowerRSqr;
  430. Convert(rSqr, FE_DOWNWARD, lowerRSqr);
  431. int saveRoundingMode = std::fegetround();
  432. std::fesetround(FE_DOWNWARD);
  433. Rational aMin = std::sqrt(lowerRSqr);
  434. std::fesetround(saveRoundingMode);
  435. aMin = std::ldexp(aMin, exponent);
  436. return aMin;
  437. }
  438. Rational GetMaxOfSqrt(Rational const& rSqr, int exponent)
  439. {
  440. double upperRSqr;
  441. Convert(rSqr, FE_UPWARD, upperRSqr);
  442. int saveRoundingMode = std::fegetround();
  443. std::fesetround(FE_UPWARD);
  444. Rational aMax = std::sqrt(upperRSqr);
  445. std::fesetround(saveRoundingMode);
  446. aMax = std::ldexp(aMax, exponent);
  447. return aMax;
  448. }
  449. Rational const mZero, mOne, mThree, mFive;
  450. int32_t mPrecision;
  451. uint32_t mMaxIterations;
  452. Rational mThreshold;
  453. };
  454. }