IEEEBinary16.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. // David Eberly, Geometric Tools, Redmond WA 98052
  2. // Copyright (c) 1998-2020
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // https://www.boost.org/LICENSE_1_0.txt
  5. // https://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
  6. // Version: 4.0.2020.01.08
  7. #pragma once
  8. #include <Mathematics/BitHacks.h>
  9. #include <Mathematics/Math.h>
  10. #include <Mathematics/IEEEBinary.h>
  11. namespace WwiseGTE
  12. {
  13. class IEEEBinary16 : public IEEEBinary<int16_t, uint16_t, 16, 11>
  14. {
  15. public:
  16. // Construction and destruction. The base class destructor is hidden,
  17. // but this is safe because there are no side effects of the
  18. // destruction.
  19. ~IEEEBinary16() = default;
  20. IEEEBinary16()
  21. :
  22. IEEEBinary<int16_t, uint16_t, 16, 11>()
  23. {
  24. // uninitialized
  25. }
  26. IEEEBinary16(IEEEBinary16 const& object)
  27. :
  28. IEEEBinary<int16_t, uint16_t, 16, 11>(object)
  29. {
  30. }
  31. IEEEBinary16(float inNumber)
  32. :
  33. IEEEBinary<int16_t, uint16_t, 16, 11>()
  34. {
  35. union { float n; uint32_t e; } temp = { inNumber };
  36. encoding = Convert32To16(temp.e);
  37. }
  38. IEEEBinary16(double inNumber)
  39. :
  40. IEEEBinary<int16_t, uint16_t, 16, 11>()
  41. {
  42. union { float n; uint32_t e; } temp;
  43. temp.n = (float)inNumber;
  44. encoding = Convert32To16(temp.e);
  45. }
  46. IEEEBinary16(uint16_t inEncoding)
  47. :
  48. IEEEBinary<int16_t, uint16_t, 16, 11>(inEncoding)
  49. {
  50. }
  51. // Implicit conversions.
  52. operator float() const
  53. {
  54. union { uint32_t e; float n; } temp = { Convert16To32(encoding) };
  55. return temp.n;
  56. }
  57. operator double() const
  58. {
  59. union { uint32_t e; float n; } temp = { Convert16To32(encoding) };
  60. return (double)temp.n;
  61. }
  62. // Assignment.
  63. IEEEBinary16& operator=(IEEEBinary16 const& object)
  64. {
  65. IEEEBinary<int16_t, uint16_t, 16, 11>::operator=(object);
  66. return *this;
  67. }
  68. // Comparison.
  69. bool operator==(IEEEBinary16 const& object) const
  70. {
  71. return (float)*this == (float)object;
  72. }
  73. bool operator!=(IEEEBinary16 const& object) const
  74. {
  75. return (float)*this != (float)object;
  76. }
  77. bool operator< (IEEEBinary16 const& object) const
  78. {
  79. return (float)*this < (float)object;
  80. }
  81. bool operator<=(IEEEBinary16 const& object) const
  82. {
  83. return (float)*this <= (float)object;
  84. }
  85. bool operator> (IEEEBinary16 const& object) const
  86. {
  87. return (float)* this > (float)object;
  88. }
  89. bool operator>=(IEEEBinary16 const& object) const
  90. {
  91. return (float)* this >= (float)object;
  92. }
  93. private:
  94. // Support for conversions between encodings.
  95. enum
  96. {
  97. F32_NUM_ENCODING_BITS = 32,
  98. F32_NUM_TRAILING_BITS = 23,
  99. F32_EXPONENT_BIAS = 127,
  100. F32_MAX_BIASED_EXPONENT = 255,
  101. F32_SIGN_MASK = 0x80000000,
  102. F32_NOT_SIGN_MASK = 0x7FFFFFFF,
  103. F32_BIASED_EXPONENT_MASK = 0x7F800000,
  104. F32_TRAILING_MASK = 0x007FFFFF,
  105. F16_AVR_MIN_SUBNORMAL_ZERO = 0x33000000,
  106. F16_MIN_SUBNORMAL = 0x33800000,
  107. F16_MIN_NORMAL = 0x38800000,
  108. F16_MAX_NORMAL = 0x477FE000,
  109. F16_AVR_MAX_NORMAL_INFINITY = 0x477FF000,
  110. DIFF_NUM_ENCODING_BITS = 16,
  111. DIFF_NUM_TRAILING_BITS = 13,
  112. DIFF_PAYLOAD_SHIFT = 13,
  113. INT_PART_MASK = 0x007FE000,
  114. FRC_PART_MASK = 0x00001FFF,
  115. FRC_HALF = 0x00001000
  116. };
  117. static uint16_t Convert32To16(uint32_t inEncoding)
  118. {
  119. // Extract the channels for the binary32 number.
  120. uint32_t sign32 = (inEncoding & F32_SIGN_MASK);
  121. uint32_t biased32 =
  122. ((inEncoding & F32_BIASED_EXPONENT_MASK) >> F32_NUM_TRAILING_BITS);
  123. uint32_t trailing32 = (inEncoding & F32_TRAILING_MASK);
  124. uint32_t nonneg32 = (inEncoding & F32_NOT_SIGN_MASK);
  125. // Generate the channels for the IEEEBinary16 number.
  126. uint16_t sign16 = static_cast<uint16_t>(sign32 >> DIFF_NUM_ENCODING_BITS);
  127. uint16_t biased16, trailing16;
  128. uint32_t frcpart;
  129. if (biased32 == 0)
  130. {
  131. // nonneg32 is 32-zero or 32-subnormal, nearest is 16-zero.
  132. return sign16;
  133. }
  134. if (biased32 < F32_MAX_BIASED_EXPONENT)
  135. {
  136. // nonneg32 is 32-normal.
  137. if (nonneg32 <= F16_AVR_MIN_SUBNORMAL_ZERO)
  138. {
  139. // nonneg32 <= 2^{-25}, nearest is 16-zero.
  140. return sign16;
  141. }
  142. if (nonneg32 <= F16_MIN_SUBNORMAL)
  143. {
  144. // 2^{-25} < nonneg32 <= 2^{-24}, nearest is
  145. // 16-min-subnormal.
  146. return sign16 | IEEEBinary16::MIN_SUBNORMAL;
  147. }
  148. if (nonneg32 < F16_MIN_NORMAL)
  149. {
  150. // 2^{-24} < nonneg32 < 2^{-14}, round to nearest
  151. // 16-subnormal with ties to even. Note that biased16 is
  152. // zero.
  153. trailing16 = static_cast<uint16_t>(((trailing32 & INT_PART_MASK) >> DIFF_NUM_TRAILING_BITS));
  154. frcpart = (trailing32 & FRC_PART_MASK);
  155. if (frcpart > FRC_HALF || (frcpart == FRC_HALF && (trailing16 & 1)))
  156. {
  157. // If there is a carry into the exponent, the nearest
  158. // is actually 16-min-normal 1.0*2^{-14}, so the
  159. // high-order bit of trailing16 makes biased16 equal
  160. // to 1 and the result is correct.
  161. ++trailing16;
  162. }
  163. return sign16 | trailing16;
  164. }
  165. if (nonneg32 <= F16_MAX_NORMAL)
  166. {
  167. // 2^{-14} <= nonneg32 <= 1.1111111111*2^{15}, round to
  168. // nearest 16-normal with ties to even.
  169. biased16 = static_cast<uint16_t>((biased32 - F32_EXPONENT_BIAS +
  170. IEEEBinary16::EXPONENT_BIAS)
  171. << IEEEBinary16::NUM_TRAILING_BITS);
  172. trailing16 = static_cast<uint16_t>(((trailing32 & INT_PART_MASK) >> DIFF_NUM_TRAILING_BITS));
  173. frcpart = (trailing32 & FRC_PART_MASK);
  174. if (frcpart > FRC_HALF || (frcpart == FRC_HALF && (trailing16 & 1)))
  175. {
  176. // If there is a carry into the exponent, the addition
  177. // of trailing16 to biased16 (rather than or-ing)
  178. // produces the correct result.
  179. ++trailing16;
  180. }
  181. return sign16 | (biased16 + trailing16);
  182. }
  183. if (nonneg32 < F16_AVR_MAX_NORMAL_INFINITY)
  184. {
  185. // 1.1111111111*2^{15} < nonneg32 < (MAX_NORMAL+INFINITY)/2,
  186. // so the number is closest to 16-max-normal.
  187. return sign16 | IEEEBinary16::MAX_NORMAL;
  188. }
  189. // nonneg32 >= (MAX_NORMAL+INFINITY)/2, so convert to
  190. // 16-infinite.
  191. return sign16 | IEEEBinary16::POS_INFINITY;
  192. }
  193. if (trailing32 == 0)
  194. {
  195. // The number is 32-infinite. Convert to 16-infinite.
  196. return sign16 | IEEEBinary16::POS_INFINITY;
  197. }
  198. // The number is 32-NaN. Convert to 16-NaN with 16-payload the
  199. // high-order 9 bits of the 32-payload. The code also grabs the
  200. // 32-quietNaN mask bit.
  201. uint16_t maskPayload = static_cast<uint16_t>((trailing32 & 0x007FE000u) >> 13);
  202. return sign16 | IEEEBinary16::EXPONENT_MASK | maskPayload;
  203. }
  204. static uint32_t Convert16To32(uint16_t inEncoding)
  205. {
  206. // Extract the channels for the IEEEBinary16 number.
  207. uint16_t sign16 = (inEncoding & IEEEBinary16::SIGN_MASK);
  208. uint16_t biased16 = ((inEncoding & IEEEBinary16::EXPONENT_MASK) >> IEEEBinary16::NUM_TRAILING_BITS);
  209. uint16_t trailing16 = (inEncoding & IEEEBinary16::TRAILING_MASK);
  210. // Generate the channels for the binary32 number.
  211. uint32_t sign32 = static_cast<uint32_t>(sign16 << DIFF_NUM_ENCODING_BITS);
  212. uint32_t biased32, trailing32;
  213. if (biased16 == 0)
  214. {
  215. if (trailing16 == 0)
  216. {
  217. // The number is 16-zero. Convert to 32-zero.
  218. return sign32;
  219. }
  220. else
  221. {
  222. // The number is 16-subnormal. Convert to 32-normal.
  223. trailing32 = static_cast<uint32_t>(trailing16);
  224. int32_t leading = BitHacks::GetLeadingBit(trailing32);
  225. int32_t shift = 23 - leading;
  226. biased32 = static_cast<uint32_t>(F32_EXPONENT_BIAS - 1 - shift);
  227. trailing32 = (trailing32 << shift) & F32_TRAILING_MASK;
  228. return sign32 | (biased32 << F32_NUM_TRAILING_BITS) | trailing32;
  229. }
  230. }
  231. if (biased16 < IEEEBinary16::MAX_BIASED_EXPONENT)
  232. {
  233. // The number is 16-normal. Convert to 32-normal.
  234. biased32 = static_cast<uint32_t>(biased16 - IEEEBinary16::EXPONENT_BIAS + F32_EXPONENT_BIAS);
  235. trailing32 = (static_cast<uint32_t>(trailing16) << DIFF_NUM_TRAILING_BITS);
  236. return sign32 | (biased32 << F32_NUM_TRAILING_BITS) | trailing32;
  237. }
  238. if (trailing16 == 0)
  239. {
  240. // The number is 16-infinite. Convert to 32-infinite.
  241. return sign32 | F32_BIASED_EXPONENT_MASK;
  242. }
  243. // The number is 16-NaN. Convert to 32-NaN with 16-payload
  244. // embedded in the high-order 9 bits of the 32-payload. The
  245. // code also copies the 16-quietNaN mask bit.
  246. uint32_t maskPayload = ((trailing16 & IEEEBinary16::TRAILING_MASK) << DIFF_PAYLOAD_SHIFT);
  247. return sign32 | F32_BIASED_EXPONENT_MASK | maskPayload;
  248. }
  249. };
  250. // Arithmetic operations (high-precision).
  251. inline IEEEBinary16 operator-(IEEEBinary16 x)
  252. {
  253. uint16_t result = static_cast<uint16_t>(x) ^ IEEEBinary16::SIGN_MASK;
  254. return result;
  255. }
  256. inline float operator+(IEEEBinary16 x, IEEEBinary16 y)
  257. {
  258. return static_cast<float>(x) + static_cast<float>(y);
  259. }
  260. inline float operator-(IEEEBinary16 x, IEEEBinary16 y)
  261. {
  262. return static_cast<float>(x) - static_cast<float>(y);
  263. }
  264. inline float operator*(IEEEBinary16 x, IEEEBinary16 y)
  265. {
  266. return static_cast<float>(x)* static_cast<float>(y);
  267. }
  268. inline float operator/(IEEEBinary16 x, IEEEBinary16 y)
  269. {
  270. return static_cast<float>(x) / static_cast<float>(y);
  271. }
  272. inline float operator+(IEEEBinary16 x, float y)
  273. {
  274. return static_cast<float>(x) + y;
  275. }
  276. inline float operator-(IEEEBinary16 x, float y)
  277. {
  278. return static_cast<float>(x) - y;
  279. }
  280. inline float operator*(IEEEBinary16 x, float y)
  281. {
  282. return static_cast<float>(x)* y;
  283. }
  284. inline float operator/(IEEEBinary16 x, float y)
  285. {
  286. return static_cast<float>(x) / y;
  287. }
  288. inline float operator+(float x, IEEEBinary16 y)
  289. {
  290. return x + static_cast<float>(y);
  291. }
  292. inline float operator-(float x, IEEEBinary16 y)
  293. {
  294. return x - static_cast<float>(y);
  295. }
  296. inline float operator*(float x, IEEEBinary16 y)
  297. {
  298. return x * static_cast<float>(y);
  299. }
  300. inline float operator/(float x, IEEEBinary16 y)
  301. {
  302. return x / static_cast<float>(y);
  303. }
  304. // Arithmetic updates.
  305. inline IEEEBinary16& operator+=(IEEEBinary16& x, IEEEBinary16 y)
  306. {
  307. x = static_cast<float>(x) + static_cast<float>(y);
  308. return x;
  309. }
  310. inline IEEEBinary16& operator-=(IEEEBinary16& x, IEEEBinary16 y)
  311. {
  312. x = static_cast<float>(x) - static_cast<float>(y);
  313. return x;
  314. }
  315. inline IEEEBinary16& operator*=(IEEEBinary16& x, IEEEBinary16 y)
  316. {
  317. x = static_cast<float>(x) * static_cast<float>(y);
  318. return x;
  319. }
  320. inline IEEEBinary16& operator/=(IEEEBinary16& x, IEEEBinary16 y)
  321. {
  322. x = static_cast<float>(x) / static_cast<float>(y);
  323. return x;
  324. }
  325. inline IEEEBinary16& operator+=(IEEEBinary16& x, float y)
  326. {
  327. x = static_cast<float>(x) + y;
  328. return x;
  329. }
  330. inline IEEEBinary16& operator-=(IEEEBinary16& x, float y)
  331. {
  332. x = static_cast<float>(x) - y;
  333. return x;
  334. }
  335. inline IEEEBinary16& operator*=(IEEEBinary16& x, float y)
  336. {
  337. x = static_cast<float>(x) * y;
  338. return x;
  339. }
  340. inline IEEEBinary16& operator/=(IEEEBinary16& x, float y)
  341. {
  342. x = static_cast<float>(x) / y;
  343. return x;
  344. }
  345. }
  346. namespace std
  347. {
  348. inline WwiseGTE::IEEEBinary16 acos(WwiseGTE::IEEEBinary16 x)
  349. {
  350. return (WwiseGTE::IEEEBinary16)std::acos((float)x);
  351. }
  352. inline WwiseGTE::IEEEBinary16 acosh(WwiseGTE::IEEEBinary16 x)
  353. {
  354. return (WwiseGTE::IEEEBinary16)std::acosh((float)x);
  355. }
  356. inline WwiseGTE::IEEEBinary16 asin(WwiseGTE::IEEEBinary16 x)
  357. {
  358. return (WwiseGTE::IEEEBinary16)std::asin((float)x);
  359. }
  360. inline WwiseGTE::IEEEBinary16 asinh(WwiseGTE::IEEEBinary16 x)
  361. {
  362. return (WwiseGTE::IEEEBinary16)std::asin((float)x);
  363. }
  364. inline WwiseGTE::IEEEBinary16 atan(WwiseGTE::IEEEBinary16 x)
  365. {
  366. return (WwiseGTE::IEEEBinary16)std::atan((float)x);
  367. }
  368. inline WwiseGTE::IEEEBinary16 atanh(WwiseGTE::IEEEBinary16 x)
  369. {
  370. return (WwiseGTE::IEEEBinary16)std::atanh((float)x);
  371. }
  372. inline WwiseGTE::IEEEBinary16 atan2(WwiseGTE::IEEEBinary16 y, WwiseGTE::IEEEBinary16 x)
  373. {
  374. return (WwiseGTE::IEEEBinary16)std::atan2((float)y, (float)x);
  375. }
  376. inline WwiseGTE::IEEEBinary16 ceil(WwiseGTE::IEEEBinary16 x)
  377. {
  378. return (WwiseGTE::IEEEBinary16)std::ceil((float)x);
  379. }
  380. inline WwiseGTE::IEEEBinary16 cos(WwiseGTE::IEEEBinary16 x)
  381. {
  382. return (WwiseGTE::IEEEBinary16)std::cos((float)x);
  383. }
  384. inline WwiseGTE::IEEEBinary16 cosh(WwiseGTE::IEEEBinary16 x)
  385. {
  386. return (WwiseGTE::IEEEBinary16)std::cosh((float)x);
  387. }
  388. inline WwiseGTE::IEEEBinary16 exp(WwiseGTE::IEEEBinary16 x)
  389. {
  390. return (WwiseGTE::IEEEBinary16)std::exp((float)x);
  391. }
  392. inline WwiseGTE::IEEEBinary16 exp2(WwiseGTE::IEEEBinary16 x)
  393. {
  394. return (WwiseGTE::IEEEBinary16)std::exp2((float)x);
  395. }
  396. inline WwiseGTE::IEEEBinary16 fabs(WwiseGTE::IEEEBinary16 x)
  397. {
  398. return (WwiseGTE::IEEEBinary16)std::fabs((float)x);
  399. }
  400. inline WwiseGTE::IEEEBinary16 floor(WwiseGTE::IEEEBinary16 x)
  401. {
  402. return (WwiseGTE::IEEEBinary16)std::floor((float)x);
  403. }
  404. inline WwiseGTE::IEEEBinary16 fmod(WwiseGTE::IEEEBinary16 x, WwiseGTE::IEEEBinary16 y)
  405. {
  406. return (WwiseGTE::IEEEBinary16)std::fmod((float)x, (float)y);
  407. }
  408. inline WwiseGTE::IEEEBinary16 frexp(WwiseGTE::IEEEBinary16 x, int* exponent)
  409. {
  410. return (WwiseGTE::IEEEBinary16)std::frexp((float)x, exponent);
  411. }
  412. inline WwiseGTE::IEEEBinary16 ldexp(WwiseGTE::IEEEBinary16 x, int exponent)
  413. {
  414. return (WwiseGTE::IEEEBinary16)std::ldexp((float)x, exponent);
  415. }
  416. inline WwiseGTE::IEEEBinary16 log(WwiseGTE::IEEEBinary16 x)
  417. {
  418. return (WwiseGTE::IEEEBinary16)std::log((float)x);
  419. }
  420. inline WwiseGTE::IEEEBinary16 log2(WwiseGTE::IEEEBinary16 x)
  421. {
  422. return (WwiseGTE::IEEEBinary16)std::log2((float)x);
  423. }
  424. inline WwiseGTE::IEEEBinary16 log10(WwiseGTE::IEEEBinary16 x)
  425. {
  426. return (WwiseGTE::IEEEBinary16)std::log10((float)x);
  427. }
  428. inline WwiseGTE::IEEEBinary16 pow(WwiseGTE::IEEEBinary16 x, WwiseGTE::IEEEBinary16 y)
  429. {
  430. return (WwiseGTE::IEEEBinary16)std::pow((float)x, (float)y);
  431. }
  432. inline WwiseGTE::IEEEBinary16 sin(WwiseGTE::IEEEBinary16 x)
  433. {
  434. return (WwiseGTE::IEEEBinary16)std::sin((float)x);
  435. }
  436. inline WwiseGTE::IEEEBinary16 sinh(WwiseGTE::IEEEBinary16 x)
  437. {
  438. return (WwiseGTE::IEEEBinary16)std::sinh((float)x);
  439. }
  440. inline WwiseGTE::IEEEBinary16 sqrt(WwiseGTE::IEEEBinary16 x)
  441. {
  442. return (WwiseGTE::IEEEBinary16)std::sqrt((float)x);
  443. }
  444. inline WwiseGTE::IEEEBinary16 tan(WwiseGTE::IEEEBinary16 x)
  445. {
  446. return (WwiseGTE::IEEEBinary16)std::tan((float)x);
  447. }
  448. inline WwiseGTE::IEEEBinary16 tanh(WwiseGTE::IEEEBinary16 x)
  449. {
  450. return (WwiseGTE::IEEEBinary16)std::tanh((float)x);
  451. }
  452. }
  453. namespace WwiseGTE
  454. {
  455. inline IEEEBinary16 atandivpi(IEEEBinary16 x)
  456. {
  457. return (IEEEBinary16)atandivpi((float)x);
  458. }
  459. inline IEEEBinary16 atan2divpi(IEEEBinary16 y, IEEEBinary16 x)
  460. {
  461. return (IEEEBinary16)atan2divpi((float)y, (float)x);
  462. }
  463. inline IEEEBinary16 clamp(IEEEBinary16 x, IEEEBinary16 xmin, IEEEBinary16 xmax)
  464. {
  465. return (IEEEBinary16)clamp((float)x, (float)xmin, (float)xmax);
  466. }
  467. inline IEEEBinary16 cospi(IEEEBinary16 x)
  468. {
  469. return (IEEEBinary16)cospi((float)x);
  470. }
  471. inline IEEEBinary16 exp10(IEEEBinary16 x)
  472. {
  473. return (IEEEBinary16)exp10((float)x);
  474. }
  475. inline IEEEBinary16 invsqrt(IEEEBinary16 x)
  476. {
  477. return (IEEEBinary16)invsqrt((float)x);
  478. }
  479. inline int isign(IEEEBinary16 x)
  480. {
  481. return isign((float)x);
  482. }
  483. inline IEEEBinary16 saturate(IEEEBinary16 x)
  484. {
  485. return (IEEEBinary16)saturate((float)x);
  486. }
  487. inline IEEEBinary16 sign(IEEEBinary16 x)
  488. {
  489. return (IEEEBinary16)sign((float)x);
  490. }
  491. inline IEEEBinary16 sinpi(IEEEBinary16 x)
  492. {
  493. return (IEEEBinary16)sinpi((float)x);
  494. }
  495. inline IEEEBinary16 sqr(IEEEBinary16 x)
  496. {
  497. return (IEEEBinary16)sqr((float)x);
  498. }
  499. }