3PL_ALWAYS_INLINE plSimdVec4u::plSimdVec4u()
5 PL_CHECK_SIMD_ALIGNMENT(
this);
7#if PL_ENABLED(PL_MATH_CHECK_FOR_NAN)
8 m_v = _mm_set1_epi32(0xCDCDCDCD);
12PL_ALWAYS_INLINE plSimdVec4u::plSimdVec4u(plUInt32 uiXyzw)
14 PL_CHECK_SIMD_ALIGNMENT(
this);
16 m_v = _mm_set1_epi32(uiXyzw);
19PL_ALWAYS_INLINE plSimdVec4u::plSimdVec4u(plUInt32 x, plUInt32 y, plUInt32 z, plUInt32 w)
21 PL_CHECK_SIMD_ALIGNMENT(
this);
23 m_v = _mm_setr_epi32(x, y, z, w);
31PL_ALWAYS_INLINE
void plSimdVec4u::Set(plUInt32 uiXyzw)
33 m_v = _mm_set1_epi32(uiXyzw);
36PL_ALWAYS_INLINE
void plSimdVec4u::Set(plUInt32 x, plUInt32 y, plUInt32 z, plUInt32 w)
38 m_v = _mm_setr_epi32(x, y, z, w);
41PL_ALWAYS_INLINE
void plSimdVec4u::SetZero()
43 m_v = _mm_setzero_si128();
47PL_ALWAYS_INLINE plSimdVec4i::plSimdVec4i(
const plSimdVec4u& u)
52PL_ALWAYS_INLINE plSimdVec4u::plSimdVec4u(
const plSimdVec4i& i)
57PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4u::ToFloat()
const
59 __m128 two16 = _mm_set1_ps((
float)0x10000);
60 __m128i high = _mm_srli_epi32(m_v, 16);
61 __m128i low = _mm_srli_epi32(_mm_slli_epi32(m_v, 16), 16);
62 __m128 fHigh = _mm_mul_ps(_mm_cvtepi32_ps(high), two16);
63 __m128 fLow = _mm_cvtepi32_ps(low);
65 return _mm_add_ps(fHigh, fLow);
71 alignas(16)
const float fmax[4] = {2.14748364e+009f, 2.14748364e+009f, 2.14748364e+009f, 2.14748364e+009f};
72 alignas(16)
const float fmax_unsigned[4] = {4.29496729e+009f, 4.29496729e+009f, 4.29496729e+009f, 4.29496729e+009f};
73 __m128i zero = _mm_setzero_si128();
74 __m128i mask = _mm_cmpgt_epi32(_mm_castps_si128(f.m_v), zero);
75 __m128 min = _mm_and_ps(_mm_castsi128_ps(mask), f.m_v);
76 __m128 max = _mm_min_ps(min, _mm_load_ps(fmax_unsigned));
78 __m128 diff = _mm_sub_ps(max, _mm_load_ps(fmax));
79 mask = _mm_cmpgt_epi32(_mm_castps_si128(diff), zero);
80 diff = _mm_and_ps(_mm_castsi128_ps(mask), diff);
82 __m128i res1 = _mm_cvttps_epi32(diff);
83 __m128i res2 = _mm_cvttps_epi32(max);
84 return _mm_add_epi32(res1, res2);
88PL_ALWAYS_INLINE plUInt32 plSimdVec4u::GetComponent()
const
90#if PL_SSE_LEVEL >= PL_SSE_41
91 return _mm_extract_epi32(m_v, N);
93 return m_v.m128i_i32[N];
97PL_ALWAYS_INLINE plUInt32 plSimdVec4u::x()
const
99 return GetComponent<0>();
102PL_ALWAYS_INLINE plUInt32 plSimdVec4u::y()
const
104 return GetComponent<1>();
107PL_ALWAYS_INLINE plUInt32 plSimdVec4u::z()
const
109 return GetComponent<2>();
112PL_ALWAYS_INLINE plUInt32 plSimdVec4u::w()
const
114 return GetComponent<3>();
117template <plSwizzle::Enum s>
118PL_ALWAYS_INLINE
plSimdVec4u plSimdVec4u::Get()
const
120 return _mm_shuffle_epi32(m_v, PL_TO_SHUFFLE(s));
125 return _mm_add_epi32(m_v, v.m_v);
130 return _mm_sub_epi32(m_v, v.m_v);
135#if PL_SSE_LEVEL >= PL_SSE_41
136 return _mm_mullo_epi32(m_v, v.m_v);
138 PL_ASSERT_NOT_IMPLEMENTED;
139 __m128i tmp1 = _mm_mul_epu32(m_v, v.m_v);
140 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_v, 4), _mm_srli_si128(v.m_v, 4));
141 return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, PL_SHUFFLE(0, 2, 0, 0)), _mm_shuffle_epi32(tmp2, PL_SHUFFLE(0, 2, 0, 0)));
147 return _mm_or_si128(m_v, v.m_v);
152 return _mm_and_si128(m_v, v.m_v);
157 return _mm_xor_si128(m_v, v.m_v);
160PL_ALWAYS_INLINE
plSimdVec4u plSimdVec4u::operator~()
const
162 __m128i ones = _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128());
163 return _mm_xor_si128(ones, m_v);
166PL_ALWAYS_INLINE
plSimdVec4u plSimdVec4u::operator<<(plUInt32 uiShift)
const
168 return _mm_slli_epi32(m_v, uiShift);
171PL_ALWAYS_INLINE
plSimdVec4u plSimdVec4u::operator>>(plUInt32 uiShift)
const
173 return _mm_srli_epi32(m_v, uiShift);
178 m_v = _mm_add_epi32(m_v, v.m_v);
184 m_v = _mm_sub_epi32(m_v, v.m_v);
190 m_v = _mm_or_si128(m_v, v.m_v);
196 m_v = _mm_and_si128(m_v, v.m_v);
202 m_v = _mm_xor_si128(m_v, v.m_v);
206PL_ALWAYS_INLINE
plSimdVec4u& plSimdVec4u::operator<<=(plUInt32 uiShift)
208 m_v = _mm_slli_epi32(m_v, uiShift);
212PL_ALWAYS_INLINE
plSimdVec4u& plSimdVec4u::operator>>=(plUInt32 uiShift)
214 m_v = _mm_srli_epi32(m_v, uiShift);
220#if PL_SSE_LEVEL >= PL_SSE_41
221 return _mm_min_epu32(m_v, v.m_v);
223 __m128i mask = _mm_cmplt_epi32(m_v, v.m_v);
224 return _mm_or_si128(_mm_and_si128(mask, m_v), _mm_andnot_si128(mask, v.m_v));
230#if PL_SSE_LEVEL >= PL_SSE_41
231 return _mm_max_epu32(m_v, v.m_v);
233 __m128i mask = _mm_cmpgt_epi32(m_v, v.m_v);
234 return _mm_or_si128(_mm_and_si128(mask, m_v), _mm_andnot_si128(mask, v.m_v));
240 return _mm_castsi128_ps(_mm_cmpeq_epi32(m_v, v.m_v));
245 return !(*
this == v);
250#if PL_SSE_LEVEL >= PL_SSE_41
251 __m128i minValue = _mm_min_epu32(m_v, v.m_v);
252 return _mm_castsi128_ps(_mm_cmpeq_epi32(minValue, m_v));
260 __m128i signBit = _mm_set1_epi32(0x80000000);
261 __m128i a = _mm_sub_epi32(m_v, signBit);
262 __m128i b = _mm_sub_epi32(v.m_v, signBit);
263 return _mm_castsi128_ps(_mm_cmplt_epi32(a, b));
268#if PL_SSE_LEVEL >= PL_SSE_41
269 __m128i maxValue = _mm_max_epu32(m_v, v.m_v);
270 return _mm_castsi128_ps(_mm_cmpeq_epi32(maxValue, m_v));
278 __m128i signBit = _mm_set1_epi32(0x80000000);
279 __m128i a = _mm_sub_epi32(m_v, signBit);
280 __m128i b = _mm_sub_epi32(v.m_v, signBit);
281 return _mm_castsi128_ps(_mm_cmpgt_epi32(a, b));
287 return _mm_setzero_si128();
294 __m128i T0 = _mm_unpacklo_epi32(v0.m_v, v1.m_v);
295 __m128i T1 = _mm_unpacklo_epi32(v2.m_v, v3.m_v);
296 __m128i T2 = _mm_unpackhi_epi32(v0.m_v, v1.m_v);
297 __m128i T3 = _mm_unpackhi_epi32(v2.m_v, v3.m_v);
299 v0.m_v = _mm_unpacklo_epi64(T0, T1);
300 v1.m_v = _mm_unpackhi_epi64(T0, T1);
301 v2.m_v = _mm_unpacklo_epi64(T2, T3);
302 v3.m_v = _mm_unpackhi_epi64(T2, T3);
A 4-component SIMD vector class.
Definition SimdVec4f.h:8
A SIMD 4-component vector class of signed 32b integers.
Definition SimdVec4i.h:9
A SIMD 4-component vector class of unsigned 32b integers.
Definition SimdVec4u.h:7
static plSimdVec4u MakeZero()
Creates an plSimdVec4u that is initialized to zero.
Definition FPUVec4u_inl.h:313