123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851 |
- #ifndef _AKSIMD_GENERIC_H_
- #define _AKSIMD_GENERIC_H_
- #include <math.h>
- #include <string.h>
- #include <AK/SoundEngine/Common/AkTypes.h>
- #include <AK/Tools/Common/AkPlatformFuncs.h>
- typedef AkInt32 AKSIMD_I32;
- typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32;
- typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32;
- typedef AkReal32 AKSIMD_F32;
- typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32;
- typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32;
- typedef AKSIMD_V4UI32 AKSIMD_V4COND;
- #pragma pack(push,1)
- typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32_UNALIGNED;
- typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32_UNALIGNED;
- typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32_UNALIGNED;
- typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32_UNALIGNED;
- #pragma pack(pop)
- #define AKSIMD_ARCHCACHELINESIZE (32)
- #define AKSIMD_ARCHMAXPREFETCHSIZE (512)
- #define AKSIMD_PREFETCHMEMORY( __offset__, __add__ )
- #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15)
- #define AKSIMD_LOADU_V4I32( in_pData ) (*(in_pData))
- #define AKSIMD_LOADU_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue))
- #define AKSIMD_LOAD_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue))
- AkForceInline AKSIMD_V4F32 AKSIMD_LOAD1_V4F32( AKSIMD_F32 in_value )
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = in_value;
- vector.m_data[1] = in_value;
- vector.m_data[2] = in_value;
- vector.m_data[3] = in_value;
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_SET_V4F32( AKSIMD_F32 in_value )
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = in_value;
- vector.m_data[1] = in_value;
- vector.m_data[2] = in_value;
- vector.m_data[3] = in_value;
-
- return vector;
- }
- AkForceInline AKSIMD_V2F32 AKSIMD_SET_V2F32( AKSIMD_F32 in_value )
- {
- AKSIMD_V2F32 vector;
- vector.m_data[0] = in_value;
- vector.m_data[1] = in_value;
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_SETZERO_V4F32()
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = 0.f;
- vector.m_data[1] = 0.f;
- vector.m_data[2] = 0.f;
- vector.m_data[3] = 0.f;
-
- return vector;
- }
- AkForceInline AKSIMD_V2F32 AKSIMD_SETZERO_V2F32()
- {
- AKSIMD_V2F32 vector;
- vector.m_data[0] = 0.f;
- vector.m_data[1] = 0.f;
-
- return vector;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_SETZERO_V4I32()
- {
- AKSIMD_V4I32 vector;
- vector.m_data[0] = 0;
- vector.m_data[1] = 0;
- vector.m_data[2] = 0;
- vector.m_data[3] = 0;
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_LOAD_SS_V4F32( const AKSIMD_F32* in_pData )
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = *in_pData;
- vector.m_data[1] = 0.f;
- vector.m_data[2] = 0.f;
- vector.m_data[3] = 0.f;
-
- return vector;
- }
- #define AKSIMD_STOREU_V4F32( in_pTo, in_vec ) (*(AKSIMD_V4F32*)(in_pTo)) = (in_vec)
- #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) AKSIMD_STOREU_V4F32(__addr__, __vName__)
- #define AKSIMD_STOREU_V4I32( in_pTo, in_vec ) (*(AKSIMD_V4I32*)(in_pTo)) = (in_vec)
- AkForceInline void AKSIMD_STORE1_V4F32( AKSIMD_F32* in_pTo, const AKSIMD_V4F32& in_vec )
- {
- ((AKSIMD_V4F32*)in_pTo)->m_data[0] = in_vec.m_data[0];
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_CONVERT_V4I32_TO_V4F32( const AKSIMD_V4I32& in_from )
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = (AkReal32)in_from.m_data[0];
- vector.m_data[1] = (AkReal32)in_from.m_data[1];
- vector.m_data[2] = (AkReal32)in_from.m_data[2];
- vector.m_data[3] = (AkReal32)in_from.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_TRUNCATE_V4F32_TO_V4I32( const AKSIMD_V4F32& in_from )
- {
- AKSIMD_V4I32 vector;
- vector.m_data[0] = (AkInt32)in_from.m_data[0];
- vector.m_data[1] = (AkInt32)in_from.m_data[1];
- vector.m_data[2] = (AkInt32)in_from.m_data[2];
- vector.m_data[3] = (AkInt32)in_from.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_AND_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
- {
- AKSIMD_V4I32 vector;
- vector.m_data[0] = in_vec1.m_data[0] & in_vec2.m_data[0];
- vector.m_data[1] = in_vec1.m_data[1] & in_vec2.m_data[1];
- vector.m_data[2] = in_vec1.m_data[2] & in_vec2.m_data[2];
- vector.m_data[3] = in_vec1.m_data[3] & in_vec2.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_CMPGT_V8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
- {
- AKSIMD_V4I32 vector;
-
- AkInt16 *pVec1,*pVec2,*pVec3;
- pVec1 = (AkInt16*)&in_vec1;
- pVec2 = (AkInt16*)&in_vec2;
- pVec3 = (AkInt16*)&vector;
-
- pVec3[0] = (pVec1[0] > pVec2[0]) ? 0xffff : 0x0;
- pVec3[1] = (pVec1[1] > pVec2[1]) ? 0xffff : 0x0;
- pVec3[2] = (pVec1[2] > pVec2[2]) ? 0xffff : 0x0;
- pVec3[3] = (pVec1[3] > pVec2[3]) ? 0xffff : 0x0;
- pVec3[4] = (pVec1[4] > pVec2[4]) ? 0xffff : 0x0;
- pVec3[5] = (pVec1[5] > pVec2[5]) ? 0xffff : 0x0;
- pVec3[6] = (pVec1[6] > pVec2[6]) ? 0xffff : 0x0;
- pVec3[7] = (pVec1[7] > pVec2[7]) ? 0xffff : 0x0;
- return vector;
- }
- AkForceInline AKSIMD_V4UI32 AKSIMD_CMPLE_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4UI32 vector;
-
- vector.m_data[0] = (in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0;
- vector.m_data[1] = (in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0;
- vector.m_data[2] = (in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0;
- vector.m_data[3] = (in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0;
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_GTEQ_V4F32(const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2)
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] >= in_vec2.m_data[0]) ? 0xffffffff : 0x0);
- vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] >= in_vec2.m_data[1]) ? 0xffffffff : 0x0);
- vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] >= in_vec2.m_data[2]) ? 0xffffffff : 0x0);
- vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] >= in_vec2.m_data[3]) ? 0xffffffff : 0x0);
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_GT_V4F32(const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2)
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] > in_vec2.m_data[0]) ? 0xffffffff : 0x0);
- vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] > in_vec2.m_data[1]) ? 0xffffffff : 0x0);
- vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] > in_vec2.m_data[2]) ? 0xffffffff : 0x0);
- vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] > in_vec2.m_data[3]) ? 0xffffffff : 0x0);
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_LTEQ_V4F32(const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2)
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0);
- vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0);
- vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0);
- vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0);
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_LT_V4F32(const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2)
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] < in_vec2.m_data[0]) ? 0xffffffff : 0x0);
- vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] < in_vec2.m_data[1]) ? 0xffffffff : 0x0);
- vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] < in_vec2.m_data[2]) ? 0xffffffff : 0x0);
- vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] < in_vec2.m_data[3]) ? 0xffffffff : 0x0);
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_EQ_V4F32(const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2)
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] == in_vec2.m_data[0]) ? 0xffffffff : 0x0);
- vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] == in_vec2.m_data[1]) ? 0xffffffff : 0x0);
- vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] == in_vec2.m_data[2]) ? 0xffffffff : 0x0);
- vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] == in_vec2.m_data[3]) ? 0xffffffff : 0x0);
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_XOR_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = (AkReal32)(((AkUInt32)in_vec1.m_data[0]) ^ ((AkUInt32)in_vec2.m_data[0]));
- vector.m_data[1] = (AkReal32)(((AkUInt32)in_vec1.m_data[1]) ^ ((AkUInt32)in_vec2.m_data[1]));
- vector.m_data[2] = (AkReal32)(((AkUInt32)in_vec1.m_data[2]) ^ ((AkUInt32)in_vec2.m_data[2]));
- vector.m_data[3] = (AkReal32)(((AkUInt32)in_vec1.m_data[3]) ^ ((AkUInt32)in_vec2.m_data[3]));
-
- return vector;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTLEFT_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy)
- {
- in_vector.m_data[0] <<= in_shiftBy;
- in_vector.m_data[1] <<= in_shiftBy;
- in_vector.m_data[2] <<= in_shiftBy;
- in_vector.m_data[3] <<= in_shiftBy;
-
- return in_vector;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTRIGHT_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy)
- {
- in_vector.m_data[0] = (AkInt32)((AkUInt32)in_vector.m_data[0] >> in_shiftBy);
- in_vector.m_data[1] = (AkInt32)((AkUInt32)in_vector.m_data[1] >> in_shiftBy);
- in_vector.m_data[2] = (AkInt32)((AkUInt32)in_vector.m_data[2] >> in_shiftBy);
- in_vector.m_data[3] = (AkInt32)((AkUInt32)in_vector.m_data[3] >> in_shiftBy);
- return in_vector;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTRIGHTARITH_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy)
- {
- in_vector.m_data[0] >>= in_shiftBy;
- in_vector.m_data[1] >>= in_shiftBy;
- in_vector.m_data[2] >>= in_shiftBy;
- in_vector.m_data[3] >>= in_shiftBy;
-
- return in_vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_SUB_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0];
- vector.m_data[1] = in_vec1.m_data[1] - in_vec2.m_data[1];
- vector.m_data[2] = in_vec1.m_data[2] - in_vec2.m_data[2];
- vector.m_data[3] = in_vec1.m_data[3] - in_vec2.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_SUB_SS_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0];
- vector.m_data[1] = in_vec1.m_data[1];
- vector.m_data[2] = in_vec1.m_data[2];
- vector.m_data[3] = in_vec1.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_ADD_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0];
- vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1];
- vector.m_data[2] = in_vec1.m_data[2] + in_vec2.m_data[2];
- vector.m_data[3] = in_vec1.m_data[3] + in_vec2.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_DIV_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = in_vec1.m_data[0] / in_vec2.m_data[0];
- vector.m_data[1] = in_vec1.m_data[1] / in_vec2.m_data[1];
- vector.m_data[2] = in_vec1.m_data[2] / in_vec2.m_data[2];
- vector.m_data[3] = in_vec1.m_data[3] / in_vec2.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V2F32 AKSIMD_ADD_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
- {
- AKSIMD_V2F32 vector;
-
- vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0];
- vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1];
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_ADD_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = a.m_data[0] + b.m_data[0];
- vector.m_data[1] = a.m_data[1];
- vector.m_data[2] = a.m_data[2];
- vector.m_data[3] = a.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_MUL_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0];
- vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1];
- vector.m_data[2] = in_vec1.m_data[2] * in_vec2.m_data[2];
- vector.m_data[3] = in_vec1.m_data[3] * in_vec2.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V2F32 AKSIMD_MUL_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
- {
- AKSIMD_V2F32 vector;
-
- vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0];
- vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1];
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_MUL_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = a.m_data[0] * b.m_data[0];
- vector.m_data[1] = a.m_data[1];
- vector.m_data[2] = a.m_data[2];
- vector.m_data[3] = a.m_data[3];
-
- return vector;
- }
- #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
- #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
- #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( (__a__), (__b__) ), (__c__) )
- AkForceInline AKSIMD_V4F32 AKSIMD_MIN_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]);
- vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]);
- vector.m_data[2] = AkMin(in_vec1.m_data[2], in_vec2.m_data[2]);
- vector.m_data[3] = AkMin(in_vec1.m_data[3], in_vec2.m_data[3]);
-
- return vector;
- }
- AkForceInline AKSIMD_V2F32 AKSIMD_MIN_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
- {
- AKSIMD_V2F32 vector;
-
- vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]);
- vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]);
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_MAX_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
-
- vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]);
- vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]);
- vector.m_data[2] = AkMax(in_vec1.m_data[2], in_vec2.m_data[2]);
- vector.m_data[3] = AkMax(in_vec1.m_data[3], in_vec2.m_data[3]);
-
- return vector;
- }
- AkForceInline AKSIMD_V2F32 AKSIMD_MAX_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
- {
- AKSIMD_V2F32 vector;
-
- vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]);
- vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]);
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_ABS_V4F32( const AKSIMD_V4F32& in_vec1 )
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = fabsf(in_vec1.m_data[0]);
- vector.m_data[1] = fabsf(in_vec1.m_data[1]);
- vector.m_data[2] = fabsf(in_vec1.m_data[2]);
- vector.m_data[3] = fabsf(in_vec1.m_data[3]);
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_NEG_V4F32( const AKSIMD_V4F32& in_vec1 )
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = -in_vec1.m_data[0];
- vector.m_data[1] = -in_vec1.m_data[1];
- vector.m_data[2] = -in_vec1.m_data[2];
- vector.m_data[3] = -in_vec1.m_data[3];
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_SQRT_V4F32( const AKSIMD_V4F32& in_vec )
- {
- AKSIMD_V4F32 vCompare;
- AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,0) );
- AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,1) );
- AKSIMD_GETELEMENT_V4F32(vCompare,2) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,2) );
- AKSIMD_GETELEMENT_V4F32(vCompare,3) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,3) );
-
- return vCompare ;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_RSQRT_V4F32(const AKSIMD_V4F32& in_vec)
- {
- AKSIMD_V4F32 vCompare;
- AKSIMD_GETELEMENT_V4F32(vCompare, 0) = 1.f / sqrtf(AKSIMD_GETELEMENT_V4F32(in_vec, 0));
- AKSIMD_GETELEMENT_V4F32(vCompare, 1) = 1.f / sqrtf(AKSIMD_GETELEMENT_V4F32(in_vec, 1));
- AKSIMD_GETELEMENT_V4F32(vCompare, 2) = 1.f / sqrtf(AKSIMD_GETELEMENT_V4F32(in_vec, 2));
- AKSIMD_GETELEMENT_V4F32(vCompare, 3) = 1.f / sqrtf(AKSIMD_GETELEMENT_V4F32(in_vec, 3));
- return vCompare;
- }
- AkForceInline AKSIMD_V2F32 AKSIMD_SQRT_V2F32( const AKSIMD_V2F32& in_vec )
- {
- AKSIMD_V2F32 vCompare;
- AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,0) );
- AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,1) );
-
-
-
- return vCompare ;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKLO_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
- {
- AKSIMD_V4I32 vector;
- AkInt16 *pVec1,*pVec2,*pDest;
- pVec1 = (AkInt16*)&in_vec1;
- pVec2 = (AkInt16*)&in_vec2;
- pDest = (AkInt16*)&vector;
-
- pDest[0] = pVec1[0];
- pDest[1] = pVec2[0];
- pDest[2] = pVec1[1];
- pDest[3] = pVec2[1];
- pDest[4] = pVec1[2];
- pDest[5] = pVec2[2];
- pDest[6] = pVec1[3];
- pDest[7] = pVec2[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKHI_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
- {
- AKSIMD_V4I32 vector;
- AkInt16 *pVec1,*pVec2,*pDest;
- pVec1 = (AkInt16*)&in_vec1;
- pVec2 = (AkInt16*)&in_vec2;
- pDest = (AkInt16*)&vector;
-
- pDest[0] = pVec1[4];
- pDest[1] = pVec2[4];
- pDest[2] = pVec1[5];
- pDest[3] = pVec2[5];
- pDest[4] = pVec1[6];
- pDest[5] = pVec2[6];
- pDest[6] = pVec1[7];
- pDest[7] = pVec2[7];
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = in_vec1.m_data[0];
- vector.m_data[1] = in_vec2.m_data[0];
- vector.m_data[2] = in_vec1.m_data[1];
- vector.m_data[3] = in_vec2.m_data[1];
-
- return vector;
- }
- AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = in_vec1.m_data[2];
- vector.m_data[1] = in_vec2.m_data[2];
- vector.m_data[2] = in_vec1.m_data[3];
- vector.m_data[3] = in_vec2.m_data[3];
-
- return vector;
- }
- AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
- {
- AKSIMD_V4I32 vector;
- AkInt16 *pDest = (AkInt16*)&vector;
-
- pDest[0] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[0], -32768, 32767);
- pDest[1] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[1], -32768, 32767);
- pDest[2] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[2], -32768, 32767);
- pDest[3] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[3], -32768, 32767);
- pDest[4] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[0], -32768, 32767);
- pDest[5] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[1], -32768, 32767);
- pDest[6] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[2], -32768, 32767);
- pDest[7] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[3], -32768, 32767);
-
- return vector;
- }
- #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \
- (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
- AkForceInline AKSIMD_V4F32 AKSIMD_SHUFFLE_V4F32( const AKSIMD_V4F32& xyzw, const AKSIMD_V4F32& abcd, int mask )
- {
- AKSIMD_V4F32 vector;
- vector.m_data[0] = xyzw.m_data[(mask) & 0x3];
- vector.m_data[1] = xyzw.m_data[(mask >> 2) & 0x3];
- vector.m_data[2] = abcd.m_data[(mask >> 4) & 0x3];
- vector.m_data[3] = abcd.m_data[(mask >> 6) & 0x3];
-
- return vector;
- }
- #define AKSIMD_MOVEHL_V4F32( a, b ) \
- AKSIMD_SHUFFLE_V4F32( (b), (a), AKSIMD_SHUFFLE(3, 2, 3, 2) )
- #define AKSIMD_MOVELH_V4F32( a, b ) \
- AKSIMD_SHUFFLE_V4F32( (a), (b), AKSIMD_SHUFFLE(1, 0, 1, 0) )
- #define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1));
- #define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2));
- #define AKSIMD_SHUFFLE_BCDA( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(0,3,2,1))
-
- #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
-
- #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
- #define AkReal32Vector AKSIMD_V4F32
- #define AKSIMD_LOAD1( __scalar__ ) AKSIMD_LOAD1_V4F32( &__scalar__ )
- #define AKSIMD_LOADVEC(v) AKSIMD_LOAD_V4F32((const AKSIMD_F32*)((v)))
- #define AKSIMD_MUL AKSIMD_MUL_V4F32
- #define AKSIMD_STOREVEC AKSIMD_STORE_V4F32
- static AkForceInline AKSIMD_V4F32 AKSIMD_HORIZONTALADD_V4F32( AKSIMD_V4F32 vVec )
- {
- AKSIMD_V4F32 vAb = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0xB1);
- AKSIMD_V4F32 vHaddAb = AKSIMD_ADD_V4F32(vVec, vAb);
- AKSIMD_V4F32 vHaddCd = AKSIMD_SHUFFLE_V4F32(vHaddAb, vHaddAb, 0x4E);
- AKSIMD_V4F32 vHaddAbcd = AKSIMD_ADD_V4F32(vHaddAb, vHaddCd);
- return vHaddAbcd;
- }
- static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL_V4F32( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 )
- {
- static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f };
- AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0));
- vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 );
- AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1));
- vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign );
- vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 );
- vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 );
- vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 );
- return vTmp2;
- }
- #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
- #define AK_SIGN_BIT( val ) (((AkUInt32)val) >> 31)
- static AkForceInline int AKSIMD_MASK_V4F32( const AKSIMD_V4F32& in_vec )
- {
- return AK_SIGN_BIT(in_vec.m_data[0]) | AK_SIGN_BIT(in_vec.m_data[1]) << 1 | AK_SIGN_BIT(in_vec.m_data[2]) << 2 | AK_SIGN_BIT(in_vec.m_data[3]) << 3;
- }
- static AkForceInline AKSIMD_V4F32 AKSIMD_RECIP_V4F32(const AKSIMD_V4F32 &v)
- {
- AKSIMD_V4F32 r;
- r.m_data[0] = 1.f / v.m_data[0];
- r.m_data[1] = 1.f / v.m_data[1];
- r.m_data[2] = 1.f / v.m_data[2];
- r.m_data[3] = 1.f / v.m_data[3];
- return r;
- }
- static AkForceInline AKSIMD_V4F32 AKSIMD_CEIL_V4F32(const AKSIMD_V4F32 & x)
- {
- AKSIMD_V4F32 r;
- r.m_data[0] = ceil(x.m_data[0]);
- r.m_data[1] = ceil(x.m_data[1]);
- r.m_data[2] = ceil(x.m_data[2]);
- r.m_data[3] = ceil(x.m_data[3]);
- return r;
- }
- #endif
|