UIntegerALU32.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. // David Eberly, Geometric Tools, Redmond WA 98052
  2. // Copyright (c) 1998-2020
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // https://www.boost.org/LICENSE_1_0.txt
  5. // https://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
  6. // Version: 4.0.2019.09.03
  7. #pragma once
  8. #include <Mathematics/BitHacks.h>
  9. #include <algorithm>
  10. // Support for unsigned integer arithmetic in BSNumber and BSRational. The
  11. // Curiously Recurring Template Paradigm is used to allow the UInteger
  12. // types to share code without introducing virtual functions.
  13. namespace WwiseGTE
  14. {
  15. template <typename UInteger>
  16. class UIntegerALU32
  17. {
  18. public:
  19. // Comparisons. These are not generic. They rely on their being
  20. // called when the two BSNumber arguments to BSNumber::operatorX()
  21. // are of the form 1.u*2^p and 1.v*2^p. The comparisons apply to
  22. // 1.u and 1.v as unsigned integers with their leading 1-bits aligned.
  23. bool operator==(UInteger const& number) const
  24. {
  25. UInteger const& self = *(UInteger const*)this;
  26. int32_t numBits = self.GetNumBits();
  27. if (numBits != number.GetNumBits())
  28. {
  29. return false;
  30. }
  31. if (numBits > 0)
  32. {
  33. auto const& bits = self.GetBits();
  34. auto const& nBits = number.GetBits();
  35. int32_t const last = self.GetSize() - 1;
  36. for (int32_t i = last; i >= 0; --i)
  37. {
  38. if (bits[i] != nBits[i])
  39. {
  40. return false;
  41. }
  42. }
  43. }
  44. return true;
  45. }
  46. bool operator!=(UInteger const& number) const
  47. {
  48. return !operator==(number);
  49. }
  50. bool operator< (UInteger const& number) const
  51. {
  52. UInteger const& self = *(UInteger const*)this;
  53. int32_t nNumBits = number.GetNumBits();
  54. auto const& nBits = number.GetBits();
  55. int32_t numBits = self.GetNumBits();
  56. if (numBits > 0 && nNumBits > 0)
  57. {
  58. // The numbers must be compared as if they are left-aligned
  59. // with each other. We got here because we had
  60. // self = 1.u * 2^p and number = 1.v * 2^p. Although they
  61. // have the same exponent, it is possible that
  62. // 'self < number' but 'numBits(1u) > numBits(1v)'. Compare
  63. // the bits one 32-bit block at a time.
  64. auto const& bits = self.GetBits();
  65. int bitIndex0 = numBits - 1;
  66. int bitIndex1 = nNumBits - 1;
  67. int block0 = bitIndex0 / 32;
  68. int block1 = bitIndex1 / 32;
  69. int numBlockBits0 = 1 + (bitIndex0 % 32);
  70. int numBlockBits1 = 1 + (bitIndex1 % 32);
  71. uint64_t n0shift = bits[block0];
  72. uint64_t n1shift = nBits[block1];
  73. while (block0 >= 0 && block1 >= 0)
  74. {
  75. // Shift the bits in the leading blocks to the high-order bit.
  76. uint32_t value0 = (uint32_t)((n0shift << (32 - numBlockBits0)) & 0x00000000FFFFFFFFull);
  77. uint32_t value1 = (uint32_t)((n1shift << (32 - numBlockBits1)) & 0x00000000FFFFFFFFull);
  78. // Shift bits in the next block (if any) to fill the current
  79. // block.
  80. if (--block0 >= 0)
  81. {
  82. n0shift = bits[block0];
  83. value0 |= (uint32_t)((n0shift >> numBlockBits0) & 0x00000000FFFFFFFFull);
  84. }
  85. if (--block1 >= 0)
  86. {
  87. n1shift = nBits[block1];
  88. value1 |= (uint32_t)((n1shift >> numBlockBits1) & 0x00000000FFFFFFFFull);
  89. }
  90. if (value0 < value1)
  91. {
  92. return true;
  93. }
  94. if (value0 > value1)
  95. {
  96. return false;
  97. }
  98. }
  99. return block0 < block1;
  100. }
  101. else
  102. {
  103. // One or both numbers are negative. The only time 'less than' is
  104. // 'true' is when 'number' is positive.
  105. return nNumBits > 0;
  106. }
  107. }
  108. bool operator<=(UInteger const& number) const
  109. {
  110. return operator<(number) || operator==(number);
  111. }
  112. bool operator> (UInteger const& number) const
  113. {
  114. return !operator<=(number);
  115. }
  116. bool operator>=(UInteger const& number) const
  117. {
  118. return !operator<(number);
  119. }
  120. // Arithmetic operations. These are performed in-place; that is, the
  121. // result is stored in 'this' object. The goal is to reduce the
  122. // number of object copies, much like the goal is for std::move. The
  123. // Sub function requires the inputs to satisfy n0 > n1.
  124. void Add(UInteger const& n0, UInteger const& n1)
  125. {
  126. UInteger& self = *(UInteger*)this;
  127. int32_t n0NumBits = n0.GetNumBits();
  128. int32_t n1NumBits = n1.GetNumBits();
  129. // Add the numbers considered as positive integers. Set the last
  130. // block to zero in case no carry-out occurs.
  131. int numBits = std::max(n0NumBits, n1NumBits) + 1;
  132. self.SetNumBits(numBits);
  133. self.SetBack(0);
  134. // Get the input array sizes.
  135. int32_t numElements0 = n0.GetSize();
  136. int32_t numElements1 = n1.GetSize();
  137. // Order the inputs so that the first has the most blocks.
  138. auto const& u0 = (numElements0 >= numElements1 ? n0.GetBits() : n1.GetBits());
  139. auto const& u1 = (numElements0 >= numElements1 ? n1.GetBits() : n0.GetBits());
  140. auto numElements = std::minmax(numElements0, numElements1);
  141. // Add the u1-blocks to u0-blocks.
  142. auto& bits = self.GetBits();
  143. uint64_t carry = 0, sum;
  144. int32_t i;
  145. for (i = 0; i < numElements.first; ++i)
  146. {
  147. sum = u0[i] + (u1[i] + carry);
  148. bits[i] = (uint32_t)(sum & 0x00000000FFFFFFFFull);
  149. carry = (sum >> 32);
  150. }
  151. // We have no more u1-blocks. Propagate the carry-out, if there is
  152. // one, or copy the remaining blocks if there is not.
  153. if (carry > 0)
  154. {
  155. for (/**/; i < numElements.second; ++i)
  156. {
  157. sum = u0[i] + carry;
  158. bits[i] = (uint32_t)(sum & 0x00000000FFFFFFFFull);
  159. carry = (sum >> 32);
  160. }
  161. if (carry > 0)
  162. {
  163. bits[i] = (uint32_t)(carry & 0x00000000FFFFFFFFull);
  164. }
  165. }
  166. else
  167. {
  168. for (/**/; i < numElements.second; ++i)
  169. {
  170. bits[i] = u0[i];
  171. }
  172. }
  173. // Reduce the number of bits if there was not a carry-out.
  174. uint32_t firstBitIndex = (numBits - 1) % 32;
  175. uint32_t mask = (1 << firstBitIndex);
  176. if ((mask & self.GetBack()) == 0)
  177. {
  178. self.SetNumBits(--numBits);
  179. }
  180. }
  181. void Sub(UInteger const& n0, UInteger const& n1)
  182. {
  183. UInteger& self = *(UInteger*)this;
  184. int32_t n0NumBits = n0.GetNumBits();
  185. auto const& n0Bits = n0.GetBits();
  186. auto const& n1Bits = n1.GetBits();
  187. // Subtract the numbers considered as positive integers. We know
  188. // that n0 > n1, so create a number n2 that has the same number of
  189. // bits as n0 and use two's-complement to generate -n2, and then
  190. // add n0 and -n2. The result is nonnegative, so we do not need
  191. // to apply two's complement to a negative result to extract the
  192. // sign and absolute value.
  193. // Get the input array sizes. We know
  194. // numElements0 >= numElements1.
  195. int32_t numElements0 = n0.GetSize();
  196. int32_t numElements1 = n1.GetSize();
  197. // Create the two's-complement number n2. We know
  198. // n2.GetNumElements() is the same as numElements0.
  199. UInteger n2;
  200. n2.SetNumBits(n0NumBits);
  201. auto& n2Bits = n2.GetBits();
  202. int32_t i;
  203. for (i = 0; i < numElements1; ++i)
  204. {
  205. n2Bits[i] = ~n1Bits[i];
  206. }
  207. for (/**/; i < numElements0; ++i)
  208. {
  209. n2Bits[i] = ~0u;
  210. }
  211. // Now add 1 to the bit-negated result to obtain -n1.
  212. uint64_t carry = 1, sum;
  213. for (i = 0; i < numElements0; ++i)
  214. {
  215. sum = n2Bits[i] + carry;
  216. n2Bits[i] = (uint32_t)(sum & 0x00000000FFFFFFFFull);
  217. carry = (sum >> 32);
  218. }
  219. // Add the numbers as positive integers. Set the last block to
  220. // zero in case no carry-out occurs.
  221. self.SetNumBits(n0NumBits + 1);
  222. self.SetBack(0);
  223. // Add the n0-blocks to n2-blocks.
  224. auto & bits = self.GetBits();
  225. for (i = 0, carry = 0; i < numElements0; ++i)
  226. {
  227. sum = n2Bits[i] + (n0Bits[i] + carry);
  228. bits[i] = (uint32_t)(sum & 0x00000000FFFFFFFFull);
  229. carry = (sum >> 32);
  230. }
  231. // Strip off the bits introduced by two's-complement.
  232. int32_t block;
  233. for (block = numElements0 - 1; block >= 0; --block)
  234. {
  235. if (bits[block] > 0)
  236. {
  237. break;
  238. }
  239. }
  240. if (block >= 0)
  241. {
  242. self.SetNumBits(32 * block + BitHacks::GetLeadingBit(bits[block]) + 1);
  243. }
  244. else
  245. {
  246. self.SetNumBits(0);
  247. }
  248. }
  249. void Mul(UInteger const& n0, UInteger const& n1)
  250. {
  251. UInteger& self = *(UInteger*)this;
  252. int32_t n0NumBits = n0.GetNumBits();
  253. int32_t n1NumBits = n1.GetNumBits();
  254. auto const& n0Bits = n0.GetBits();
  255. auto const& n1Bits = n1.GetBits();
  256. // The number of bits is at most this, possibly one bit smaller.
  257. int numBits = n0NumBits + n1NumBits;
  258. self.SetNumBits(numBits);
  259. auto& bits = self.GetBits();
  260. // Product of a single-block number with a multiple-block number.
  261. UInteger product;
  262. product.SetNumBits(numBits);
  263. auto& pBits = product.GetBits();
  264. // Get the array sizes.
  265. int32_t const numElements0 = n0.GetSize();
  266. int32_t const numElements1 = n1.GetSize();
  267. int32_t const numElements = self.GetSize();
  268. // Compute the product v = u0*u1.
  269. int32_t i0, i1, i2;
  270. uint64_t term, sum;
  271. // The case i0 == 0 is handled separately to initialize the
  272. // accumulator with u0[0]*v. This avoids having to fill the bytes
  273. // of 'bits' with zeros outside the double loop, something that
  274. // can be a performance issue when 'numBits' is large.
  275. uint64_t block0 = n0Bits[0];
  276. uint64_t carry = 0;
  277. for (i1 = 0; i1 < numElements1; ++i1)
  278. {
  279. term = block0 * n1Bits[i1] + carry;
  280. bits[i1] = (uint32_t)(term & 0x00000000FFFFFFFFull);
  281. carry = (term >> 32);
  282. }
  283. if (i1 < numElements)
  284. {
  285. bits[i1] = (uint32_t)(carry & 0x00000000FFFFFFFFull);
  286. }
  287. for (i0 = 1; i0 < numElements0; ++i0)
  288. {
  289. // Compute the product p = u0[i0]*u1.
  290. block0 = n0Bits[i0];
  291. carry = 0;
  292. for (i1 = 0, i2 = i0; i1 < numElements1; ++i1, ++i2)
  293. {
  294. term = block0 * n1Bits[i1] + carry;
  295. pBits[i2] = (uint32_t)(term & 0x00000000FFFFFFFFull);
  296. carry = (term >> 32);
  297. }
  298. if (i2 < numElements)
  299. {
  300. pBits[i2] = (uint32_t)(carry & 0x00000000FFFFFFFFull);
  301. }
  302. // Add p to the accumulator v.
  303. carry = 0;
  304. for (i1 = 0, i2 = i0; i1 < numElements1; ++i1, ++i2)
  305. {
  306. sum = pBits[i2] + (bits[i2] + carry);
  307. bits[i2] = (uint32_t)(sum & 0x00000000FFFFFFFFull);
  308. carry = (sum >> 32);
  309. }
  310. if (i2 < numElements)
  311. {
  312. sum = pBits[i2] + carry;
  313. bits[i2] = (uint32_t)(sum & 0x00000000FFFFFFFFull);
  314. }
  315. }
  316. // Reduce the number of bits if there was not a carry-out.
  317. uint32_t firstBitIndex = (numBits - 1) % 32;
  318. uint32_t mask = (1 << firstBitIndex);
  319. if ((mask & self.GetBack()) == 0)
  320. {
  321. self.SetNumBits(--numBits);
  322. }
  323. }
  324. // The shift is performed in-place; that is, the result is stored in
  325. // 'this' object.
  326. void ShiftLeft(UInteger const& number, int32_t shift)
  327. {
  328. UInteger& self = *(UInteger*)this;
  329. int32_t nNumBits = number.GetNumBits();
  330. auto const& nBits = number.GetBits();
  331. // Shift the 'number' considered as an odd positive integer.
  332. self.SetNumBits(nNumBits + shift);
  333. // Set the low-order bits to zero.
  334. auto& bits = self.GetBits();
  335. int32_t const shiftBlock = shift / 32;
  336. for (int32_t i = 0; i < shiftBlock; ++i)
  337. {
  338. bits[i] = 0;
  339. }
  340. // Get the location of the low-order 1-bit within the result.
  341. int32_t const numInElements = number.GetSize();
  342. int32_t const lshift = shift % 32;
  343. int32_t i, j;
  344. if (lshift > 0)
  345. {
  346. // The trailing 1-bits for source and target are at different
  347. // relative indices. Each shifted source block straddles a
  348. // boundary between two target blocks, so we must extract the
  349. // subblocks and copy accordingly.
  350. int32_t const rshift = 32 - lshift;
  351. uint32_t prev = 0, curr;
  352. for (i = shiftBlock, j = 0; j < numInElements; ++i, ++j)
  353. {
  354. curr = nBits[j];
  355. bits[i] = (curr << lshift) | (prev >> rshift);
  356. prev = curr;
  357. }
  358. if (i < self.GetSize())
  359. {
  360. // The leading 1-bit of the source is at a relative index
  361. // such that when you add the shift amount, that bit
  362. // occurs in a new block.
  363. bits[i] = (prev >> rshift);
  364. }
  365. }
  366. else
  367. {
  368. // The trailing 1-bits for source and target are at the same
  369. // relative index. The shift reduces to a block copy.
  370. for (i = shiftBlock, j = 0; j < numInElements; ++i, ++j)
  371. {
  372. bits[i] = nBits[j];
  373. }
  374. }
  375. }
  376. // The 'number' is even and positive. It is shifted right to become
  377. // an odd number and the return value is the amount shifted. The
  378. // operation is performed in-place; that is, the result is stored in
  379. // 'this' object.
  380. int32_t ShiftRightToOdd(UInteger const& number)
  381. {
  382. UInteger& self = *(UInteger*)this;
  383. auto const& nBits = number.GetBits();
  384. // Get the leading 1-bit.
  385. int32_t const numElements = number.GetSize();
  386. int32_t const numM1 = numElements - 1;
  387. int32_t firstBitIndex = 32 * numM1 + BitHacks::GetLeadingBit(nBits[numM1]);
  388. // Get the trailing 1-bit.
  389. int32_t lastBitIndex = -1;
  390. for (int32_t block = 0; block < numElements; ++block)
  391. {
  392. uint32_t value = nBits[block];
  393. if (value > 0)
  394. {
  395. lastBitIndex = 32 * block + BitHacks::GetTrailingBit(value);
  396. break;
  397. }
  398. }
  399. // The right-shifted result.
  400. self.SetNumBits(firstBitIndex - lastBitIndex + 1);
  401. auto& bits = self.GetBits();
  402. int32_t const numBlocks = self.GetSize();
  403. // Get the location of the low-order 1-bit within the result.
  404. int32_t const shiftBlock = lastBitIndex / 32;
  405. int32_t rshift = lastBitIndex % 32;
  406. if (rshift > 0)
  407. {
  408. int32_t const lshift = 32 - rshift;
  409. int32_t i, j = shiftBlock;
  410. uint32_t curr = nBits[j++];
  411. for (i = 0; j < numElements; ++i, ++j)
  412. {
  413. uint32_t next = nBits[j];
  414. bits[i] = (curr >> rshift) | (next << lshift);
  415. curr = next;
  416. }
  417. if (i < numBlocks)
  418. {
  419. bits[i] = (curr >> rshift);
  420. }
  421. }
  422. else
  423. {
  424. for (int32_t i = 0, j = shiftBlock; i < numBlocks; ++i, ++j)
  425. {
  426. bits[i] = nBits[j];
  427. }
  428. }
  429. return rshift + 32 * shiftBlock;
  430. }
  431. // Add 1 to 'this', useful for rounding modes in conversions of
  432. // BSNumber and BSRational. The operation is performed in-place;
  433. // that is, the result is stored in 'this' object. The return value
  434. // is the amount shifted after the addition in order to obtain an
  435. // odd integer.
  436. int32_t RoundUp()
  437. {
  438. UInteger const& self = *(UInteger const*)this;
  439. UInteger rounded;
  440. rounded.Add(self, UInteger(1u));
  441. return ShiftRightToOdd(rounded);
  442. }
  443. // Get a block of numRequested bits starting with the leading 1-bit of
  444. // the nonzero number. The returned number has the prefix stored in
  445. // the high-order bits. Additional bits are copied and used by the
  446. // caller for rounding. This function supports conversions from
  447. // 'float' and 'double'. The input 'numRequested' is smaller than 64.
  448. uint64_t GetPrefix(int32_t numRequested) const
  449. {
  450. UInteger const& self = *(UInteger const*)this;
  451. auto const& bits = self.GetBits();
  452. // Copy to 'prefix' the leading 32-bit block that is nonzero.
  453. int32_t bitIndex = self.GetNumBits() - 1;
  454. int32_t blockIndex = bitIndex / 32;
  455. uint64_t prefix = bits[blockIndex];
  456. // Get the number of bits in the block starting with the leading
  457. // 1-bit.
  458. int32_t firstBitIndex = bitIndex % 32;
  459. int32_t numBlockBits = firstBitIndex + 1;
  460. // Shift the leading 1-bit to bit-63 of prefix. We have consumed
  461. // numBlockBits, which might not be the entire budget.
  462. int32_t targetIndex = 63;
  463. prefix <<= targetIndex - firstBitIndex;
  464. numRequested -= numBlockBits;
  465. targetIndex -= numBlockBits;
  466. if (numRequested > 0 && --blockIndex >= 0)
  467. {
  468. // More bits are available. Copy and shift the entire 32-bit
  469. // next block and OR it into the 'prefix'. For 'float', we
  470. // will have consumed the entire budget. For 'double', we
  471. // might have to get bits from a third block.
  472. uint64_t nextBlock = bits[blockIndex];
  473. nextBlock <<= targetIndex - 31; // Shift amount is positive.
  474. prefix |= nextBlock;
  475. numRequested -= 32;
  476. targetIndex -= 32;
  477. if (numRequested > 0 && --blockIndex >= 0)
  478. {
  479. // We know that targetIndex > 0; only 'double' allows us
  480. // to get here, so numRequested is at most 53. We also
  481. // know that targetIndex < 32 because we started with 63
  482. // and subtracted at least 32 from it. Thus, the shift
  483. // amount is positive.
  484. nextBlock = bits[blockIndex];
  485. nextBlock >>= 31 - targetIndex;
  486. prefix |= nextBlock;
  487. }
  488. }
  489. return prefix;
  490. }
  491. };
  492. }