123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236 |
- #ifndef _AKSIMDMATH_H_
- #define _AKSIMDMATH_H_
- #include <AK/Tools/Common/AkPlatformFuncs.h>
- #include <AK/SoundEngine/Common/AkSimd.h>
- #include <AkMath.h>
- namespace AkMath
- {
-
- AkForceInline void PermuteVectors3(const AKSIMD_V4F32& v0, const AKSIMD_V4F32& v1, const AKSIMD_V4F32& v2, const AKSIMD_V4F32& v3,
- AKSIMD_V4F32& out_xxxx, AKSIMD_V4F32& out_yyyy, AKSIMD_V4F32& out_zzzz)
- {
- AKSIMD_V4F32 xyxy0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(1, 0, 1, 0));
- AKSIMD_V4F32 xyxy1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(1, 0, 1, 0));
- out_xxxx = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(2, 0, 2, 0));
- out_yyyy = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(3, 1, 3, 1));
- AKSIMD_V4F32 zwzw0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(3, 2, 3, 2));
- AKSIMD_V4F32 zwzw1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(3, 2, 3, 2));
- out_zzzz = AKSIMD_SHUFFLE_V4F32(zwzw0, zwzw1, AKSIMD_SHUFFLE(2, 0, 2, 0));
- }
-
- AkForceInline void UnpermuteVectors3(const AKSIMD_V4F32& xxxx, const AKSIMD_V4F32& yyyy, const AKSIMD_V4F32& zzzz,
- AKSIMD_V4F32& out_v0, AKSIMD_V4F32& out_v1, AKSIMD_V4F32& out_v2, AKSIMD_V4F32& out_v3)
- {
-
- AKSIMD_V4F32 x0x1y0y1 = AKSIMD_SHUFFLE_V4F32(xxxx, yyyy, AKSIMD_SHUFFLE(1, 0, 1, 0));
- AKSIMD_V4F32 z0z1z0z1 = AKSIMD_SHUFFLE_V4F32(zzzz, zzzz, AKSIMD_SHUFFLE(1, 0, 1, 0));
-
- out_v0 = AKSIMD_SHUFFLE_V4F32(x0x1y0y1, z0z1z0z1, AKSIMD_SHUFFLE(2, 0, 2, 0));
- out_v1 = AKSIMD_SHUFFLE_V4F32(x0x1y0y1, z0z1z0z1, AKSIMD_SHUFFLE(3, 1, 3, 1));
- AKSIMD_V4F32 x2x3y2y3 = AKSIMD_SHUFFLE_V4F32(xxxx, yyyy, AKSIMD_SHUFFLE(3, 2, 3, 2));
- AKSIMD_V4F32 z2z3z2z3 = AKSIMD_SHUFFLE_V4F32(zzzz, zzzz, AKSIMD_SHUFFLE(3, 2, 3, 2));
-
- out_v2 = AKSIMD_SHUFFLE_V4F32(x2x3y2y3, z2z3z2z3, AKSIMD_SHUFFLE(2, 0, 2, 0));
- out_v3 = AKSIMD_SHUFFLE_V4F32(x2x3y2y3, z2z3z2z3, AKSIMD_SHUFFLE(3, 1, 3, 1));
- }
-
- AkForceInline void PermuteVectors4(const AKSIMD_V4F32& v0, const AKSIMD_V4F32& v1, const AKSIMD_V4F32& v2, const AKSIMD_V4F32& v3,
- AKSIMD_V4F32& out_xxxx, AKSIMD_V4F32& out_yyyy, AKSIMD_V4F32& out_zzzz, AKSIMD_V4F32& out_wwww)
- {
- AKSIMD_V4F32 xyxy0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(1, 0, 1, 0));
- AKSIMD_V4F32 xyxy1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(1, 0, 1, 0));
- out_xxxx = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(2, 0, 2, 0));
- out_yyyy = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(3, 1, 3, 1));
- AKSIMD_V4F32 zwzw0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(3, 2, 3, 2));
- AKSIMD_V4F32 zwzw1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(3, 2, 3, 2));
- out_zzzz = AKSIMD_SHUFFLE_V4F32(zwzw0, zwzw1, AKSIMD_SHUFFLE(2, 0, 2, 0));
- out_wwww = AKSIMD_SHUFFLE_V4F32(zwzw0, zwzw1, AKSIMD_SHUFFLE(3, 1, 3, 1));
- }
-
- AkForceInline AKSIMD_V4F32 DotPoduct3_4x4(const AKSIMD_V4F32& v0_x, const AKSIMD_V4F32& v0_y, const AKSIMD_V4F32& v0_z,
- const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z)
- {
- return AKSIMD_ADD_V4F32(AKSIMD_ADD_V4F32(AKSIMD_MUL_V4F32(v0_x, v1_x), AKSIMD_MUL_V4F32(v0_y, v1_y)), AKSIMD_MUL_V4F32(v0_z, v1_z));
- }
-
- AkForceInline AKSIMD_V4F32 DotPoduct3_1x4(const AKSIMD_V4F32& v0_xyz, const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z)
- {
- const AKSIMD_V4F32 v0_x = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(0, 0, 0, 0));
- const AKSIMD_V4F32 v0_y = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(1, 1, 1, 1));
- const AKSIMD_V4F32 v0_z = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(2, 2, 2, 2));
- return DotPoduct3_4x4(v0_x, v0_y, v0_z, v1_x, v1_y, v1_z);
- }
-
- AkForceInline AKSIMD_V4F32 DotPoduct4_4x4(const AKSIMD_V4F32& v0_x, const AKSIMD_V4F32& v0_y, const AKSIMD_V4F32& v0_z, const AKSIMD_V4F32& v0_w,
- const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z, const AKSIMD_V4F32& v1_w)
- {
- return AKSIMD_ADD_V4F32(
- AKSIMD_ADD_V4F32(
- AKSIMD_MUL_V4F32(v0_x, v1_x),
- AKSIMD_MUL_V4F32(v0_y, v1_y)),
- AKSIMD_ADD_V4F32(
- AKSIMD_MUL_V4F32(v0_z, v1_z),
- AKSIMD_MUL_V4F32(v0_w, v1_w)));
- }
-
- AkForceInline AKSIMD_V4F32 DotPoduct4_1x4(const AKSIMD_V4F32& v0_xyz, const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z, const AKSIMD_V4F32& v1_w)
- {
- const AKSIMD_V4F32 v0_x = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(0, 0, 0, 0));
- const AKSIMD_V4F32 v0_y = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(1, 1, 1, 1));
- const AKSIMD_V4F32 v0_z = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(2, 2, 2, 2));
- const AKSIMD_V4F32 v0_w = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(2, 2, 2, 2));
- return DotPoduct4_4x4(v0_x, v0_y, v0_z, v0_w, v1_x, v1_y, v1_z, v1_w);
- }
-
- AkForceInline void CrossProduct3_4x4(
- const AKSIMD_V4F32& u_x, const AKSIMD_V4F32& u_y, const AKSIMD_V4F32& u_z,
- const AKSIMD_V4F32& v_x, const AKSIMD_V4F32& v_y, const AKSIMD_V4F32& v_z,
- AKSIMD_V4F32& uXv_x, AKSIMD_V4F32& uXv_y, AKSIMD_V4F32& uXv_z
- )
- {
- uXv_x = AKSIMD_SUB_V4F32(AKSIMD_MUL_V4F32(u_y, v_z), AKSIMD_MUL_V4F32(u_z, v_y));
- uXv_y = AKSIMD_SUB_V4F32(AKSIMD_MUL_V4F32(u_z, v_x), AKSIMD_MUL_V4F32(u_x, v_z));
- uXv_z = AKSIMD_SUB_V4F32(AKSIMD_MUL_V4F32(u_x, v_y), AKSIMD_MUL_V4F32(u_y, v_x));
- }
-
- AkForceInline AKSIMD_V4F32 AKSIMD_SIN_V4F32(const AKSIMD_V4F32 x)
- {
- const AKSIMD_V4F32 B = AKSIMD_SET_V4F32(4 / PI);
- const AKSIMD_V4F32 C = AKSIMD_SET_V4F32(-4 / (PI * PI));
- const AKSIMD_V4F32 P = AKSIMD_SET_V4F32(0.225f);
-
- AKSIMD_V4F32 y = AKSIMD_ABS_V4F32(x);
- y = AKSIMD_MADD_V4F32(y, C, B);
- y = AKSIMD_MUL_V4F32(y, x);
-
- AKSIMD_V4F32 sine = AKSIMD_ABS_V4F32(y);
- sine = AKSIMD_MSUB_V4F32(y, sine, y);
- sine = AKSIMD_MADD_V4F32(sine, P, y);
- return sine;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_COS_V4F32(const AKSIMD_V4F32 x)
- {
-
- const AKSIMD_V4F32 offsetNoWrap = AKSIMD_SET_V4F32(PI / 2);
- const AKSIMD_V4F32 offsetWrap = AKSIMD_SET_V4F32(PI / 2 - 2 * PI);
- const AKSIMD_V4F32 vHalfPI = AKSIMD_SET_V4F32(PI / 2);
-
- AKSIMD_V4F32 offset = AKSIMD_SEL_GTEZ_V4F32(AKSIMD_SUB_V4F32(x, vHalfPI), offsetWrap, offsetNoWrap);
- return AKSIMD_SIN_V4F32(AKSIMD_ADD_V4F32(x, offset));
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_ATAN2_V4F32(AKSIMD_V4F32 y, AKSIMD_V4F32 x)
- {
- const AKSIMD_V4F32 vNeg = AKSIMD_SET_V4F32(-1.0f);
- const AKSIMD_V4F32 vOne = AKSIMD_SET_V4F32(1.0f);
- const AKSIMD_V4F32 vZero = AKSIMD_SET_V4F32(0.0f);
- const AKSIMD_V4F32 vK = AKSIMD_SET_V4F32(0.28f);
- const AKSIMD_V4F32 vKRepro = AKSIMD_SET_V4F32(1.f / 0.28f);
- const AKSIMD_V4F32 vHalfPI = AKSIMD_SET_V4F32(PI / 2);
- const AKSIMD_V4F32 vPI = AKSIMD_SET_V4F32(PI);
- const AKSIMD_V4F32 vEpsilon = AKSIMD_SET_V4F32(1e-20f);
-
- x = AKSIMD_VSEL_V4F32(x, vEpsilon, AKSIMD_EQ_V4F32(x, vZero));
- AKSIMD_V4F32 z = AKSIMD_DIV_V4F32(y, x);
- AKSIMD_V4F32 absz = AKSIMD_ABS_V4F32(z);
- AKSIMD_V4COND zcond = AKSIMD_GTEQ_V4F32(vOne, absz);
-
-
- AKSIMD_V4F32 a = AKSIMD_VSEL_V4F32(vNeg, vK, zcond);
-
- AKSIMD_V4F32 b = AKSIMD_VSEL_V4F32(vK, vKRepro, zcond);
- AKSIMD_V4F32 atan = AKSIMD_MADD_V4F32(z, z, b);
- atan = AKSIMD_MUL_V4F32(atan, a);
- atan = AKSIMD_DIV_V4F32(z, atan);
-
-
-
-
-
-
-
-
-
-
- AKSIMD_V4F32 offsetByX = AKSIMD_SEL_GTEZ_V4F32(x, vZero, vPI);
- AKSIMD_V4F32 offset = AKSIMD_VSEL_V4F32(vHalfPI, offsetByX, zcond);
- AKSIMD_V4F32 sign = AKSIMD_SEL_GTEZ_V4F32(y, vOne, vNeg);
-
- atan = AKSIMD_MADD_V4F32(offset, sign, atan);
- return atan;
- }
-
- AkForceInline AkReal32 FastSqrtLarge(AkReal32 x)
- {
- AKSIMD_V4F32 y = AKSIMD_SQRT_V4F32(AKSIMD_SET_V4F32(x));
- return AKSIMD_GETELEMENT_V4F32(y, 0);
- }
-
- AkForceInline AkReal32 FastRSqrt(AkReal32 x)
- {
- AKSIMD_V4F32 y = AKSIMD_RSQRT_V4F32(AKSIMD_SET_V4F32(x));
- return AKSIMD_GETELEMENT_V4F32(y, 0);
- }
- AkForceInline AkReal32 FastRcp(AkReal32 x)
- {
- AKSIMD_V4F32 y = AKSIMD_RECIP_V4F32(AKSIMD_SET_V4F32(x));
- return AKSIMD_GETELEMENT_V4F32(y, 0);
- }
- }
- #endif
|