Plasma Engine  2.0
Loading...
Searching...
No Matches
SSEVec4u_inl.h
1#pragma once
2
3PL_ALWAYS_INLINE plSimdVec4u::plSimdVec4u()
4{
5 PL_CHECK_SIMD_ALIGNMENT(this);
6
7#if PL_ENABLED(PL_MATH_CHECK_FOR_NAN)
8 m_v = _mm_set1_epi32(0xCDCDCDCD);
9#endif
10}
11
12PL_ALWAYS_INLINE plSimdVec4u::plSimdVec4u(plUInt32 uiXyzw)
13{
14 PL_CHECK_SIMD_ALIGNMENT(this);
15
16 m_v = _mm_set1_epi32(uiXyzw);
17}
18
19PL_ALWAYS_INLINE plSimdVec4u::plSimdVec4u(plUInt32 x, plUInt32 y, plUInt32 z, plUInt32 w)
20{
21 PL_CHECK_SIMD_ALIGNMENT(this);
22
23 m_v = _mm_setr_epi32(x, y, z, w);
24}
25
26PL_ALWAYS_INLINE plSimdVec4u::plSimdVec4u(plInternal::QuadInt v)
27{
28 m_v = v;
29}
30
31PL_ALWAYS_INLINE void plSimdVec4u::Set(plUInt32 uiXyzw)
32{
33 m_v = _mm_set1_epi32(uiXyzw);
34}
35
36PL_ALWAYS_INLINE void plSimdVec4u::Set(plUInt32 x, plUInt32 y, plUInt32 z, plUInt32 w)
37{
38 m_v = _mm_setr_epi32(x, y, z, w);
39}
40
41PL_ALWAYS_INLINE void plSimdVec4u::SetZero()
42{
43 m_v = _mm_setzero_si128();
44}
45
46// needs to be implemented here because of include dependencies
47PL_ALWAYS_INLINE plSimdVec4i::plSimdVec4i(const plSimdVec4u& u)
48 : m_v(u.m_v)
49{
50}
51
52PL_ALWAYS_INLINE plSimdVec4u::plSimdVec4u(const plSimdVec4i& i)
53 : m_v(i.m_v)
54{
55}
56
57PL_ALWAYS_INLINE plSimdVec4f plSimdVec4u::ToFloat() const
58{
59 __m128 two16 = _mm_set1_ps((float)0x10000); // 2^16
60 __m128i high = _mm_srli_epi32(m_v, 16);
61 __m128i low = _mm_srli_epi32(_mm_slli_epi32(m_v, 16), 16);
62 __m128 fHigh = _mm_mul_ps(_mm_cvtepi32_ps(high), two16);
63 __m128 fLow = _mm_cvtepi32_ps(low);
64
65 return _mm_add_ps(fHigh, fLow);
66}
67
68// static
69PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::Truncate(const plSimdVec4f& f)
70{
71 alignas(16) const float fmax[4] = {2.14748364e+009f, 2.14748364e+009f, 2.14748364e+009f, 2.14748364e+009f};
72 alignas(16) const float fmax_unsigned[4] = {4.29496729e+009f, 4.29496729e+009f, 4.29496729e+009f, 4.29496729e+009f};
73 __m128i zero = _mm_setzero_si128();
74 __m128i mask = _mm_cmpgt_epi32(_mm_castps_si128(f.m_v), zero);
75 __m128 min = _mm_and_ps(_mm_castsi128_ps(mask), f.m_v);
76 __m128 max = _mm_min_ps(min, _mm_load_ps(fmax_unsigned)); // clamped in 0 - 4.29496729+009
77
78 __m128 diff = _mm_sub_ps(max, _mm_load_ps(fmax));
79 mask = _mm_cmpgt_epi32(_mm_castps_si128(diff), zero);
80 diff = _mm_and_ps(_mm_castsi128_ps(mask), diff);
81
82 __m128i res1 = _mm_cvttps_epi32(diff);
83 __m128i res2 = _mm_cvttps_epi32(max);
84 return _mm_add_epi32(res1, res2);
85}
86
87template <int N>
88PL_ALWAYS_INLINE plUInt32 plSimdVec4u::GetComponent() const
89{
90#if PL_SSE_LEVEL >= PL_SSE_41
91 return _mm_extract_epi32(m_v, N);
92#else
93 return m_v.m128i_i32[N];
94#endif
95}
96
97PL_ALWAYS_INLINE plUInt32 plSimdVec4u::x() const
98{
99 return GetComponent<0>();
100}
101
102PL_ALWAYS_INLINE plUInt32 plSimdVec4u::y() const
103{
104 return GetComponent<1>();
105}
106
107PL_ALWAYS_INLINE plUInt32 plSimdVec4u::z() const
108{
109 return GetComponent<2>();
110}
111
112PL_ALWAYS_INLINE plUInt32 plSimdVec4u::w() const
113{
114 return GetComponent<3>();
115}
116
117template <plSwizzle::Enum s>
118PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::Get() const
119{
120 return _mm_shuffle_epi32(m_v, PL_TO_SHUFFLE(s));
121}
122
123PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::operator+(const plSimdVec4u& v) const
124{
125 return _mm_add_epi32(m_v, v.m_v);
126}
127
128PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::operator-(const plSimdVec4u& v) const
129{
130 return _mm_sub_epi32(m_v, v.m_v);
131}
132
133PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::CompMul(const plSimdVec4u& v) const
134{
135#if PL_SSE_LEVEL >= PL_SSE_41
136 return _mm_mullo_epi32(m_v, v.m_v);
137#else
138 PL_ASSERT_NOT_IMPLEMENTED; // not sure whether this code works so better assert
139 __m128i tmp1 = _mm_mul_epu32(m_v, v.m_v);
140 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_v, 4), _mm_srli_si128(v.m_v, 4));
141 return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, PL_SHUFFLE(0, 2, 0, 0)), _mm_shuffle_epi32(tmp2, PL_SHUFFLE(0, 2, 0, 0)));
142#endif
143}
144
145PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::operator|(const plSimdVec4u& v) const
146{
147 return _mm_or_si128(m_v, v.m_v);
148}
149
150PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::operator&(const plSimdVec4u& v) const
151{
152 return _mm_and_si128(m_v, v.m_v);
153}
154
155PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::operator^(const plSimdVec4u& v) const
156{
157 return _mm_xor_si128(m_v, v.m_v);
158}
159
160PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::operator~() const
161{
162 __m128i ones = _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128());
163 return _mm_xor_si128(ones, m_v);
164}
165
166PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::operator<<(plUInt32 uiShift) const
167{
168 return _mm_slli_epi32(m_v, uiShift);
169}
170
171PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::operator>>(plUInt32 uiShift) const
172{
173 return _mm_srli_epi32(m_v, uiShift);
174}
175
176PL_ALWAYS_INLINE plSimdVec4u& plSimdVec4u::operator+=(const plSimdVec4u& v)
177{
178 m_v = _mm_add_epi32(m_v, v.m_v);
179 return *this;
180}
181
182PL_ALWAYS_INLINE plSimdVec4u& plSimdVec4u::operator-=(const plSimdVec4u& v)
183{
184 m_v = _mm_sub_epi32(m_v, v.m_v);
185 return *this;
186}
187
188PL_ALWAYS_INLINE plSimdVec4u& plSimdVec4u::operator|=(const plSimdVec4u& v)
189{
190 m_v = _mm_or_si128(m_v, v.m_v);
191 return *this;
192}
193
194PL_ALWAYS_INLINE plSimdVec4u& plSimdVec4u::operator&=(const plSimdVec4u& v)
195{
196 m_v = _mm_and_si128(m_v, v.m_v);
197 return *this;
198}
199
200PL_ALWAYS_INLINE plSimdVec4u& plSimdVec4u::operator^=(const plSimdVec4u& v)
201{
202 m_v = _mm_xor_si128(m_v, v.m_v);
203 return *this;
204}
205
206PL_ALWAYS_INLINE plSimdVec4u& plSimdVec4u::operator<<=(plUInt32 uiShift)
207{
208 m_v = _mm_slli_epi32(m_v, uiShift);
209 return *this;
210}
211
212PL_ALWAYS_INLINE plSimdVec4u& plSimdVec4u::operator>>=(plUInt32 uiShift)
213{
214 m_v = _mm_srli_epi32(m_v, uiShift);
215 return *this;
216}
217
218PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::CompMin(const plSimdVec4u& v) const
219{
220#if PL_SSE_LEVEL >= PL_SSE_41
221 return _mm_min_epu32(m_v, v.m_v);
222#else
223 __m128i mask = _mm_cmplt_epi32(m_v, v.m_v);
224 return _mm_or_si128(_mm_and_si128(mask, m_v), _mm_andnot_si128(mask, v.m_v));
225#endif
226}
227
228PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::CompMax(const plSimdVec4u& v) const
229{
230#if PL_SSE_LEVEL >= PL_SSE_41
231 return _mm_max_epu32(m_v, v.m_v);
232#else
233 __m128i mask = _mm_cmpgt_epi32(m_v, v.m_v);
234 return _mm_or_si128(_mm_and_si128(mask, m_v), _mm_andnot_si128(mask, v.m_v));
235#endif
236}
237
238PL_ALWAYS_INLINE plSimdVec4b plSimdVec4u::operator==(const plSimdVec4u& v) const
239{
240 return _mm_castsi128_ps(_mm_cmpeq_epi32(m_v, v.m_v));
241}
242
243PL_ALWAYS_INLINE plSimdVec4b plSimdVec4u::operator!=(const plSimdVec4u& v) const
244{
245 return !(*this == v);
246}
247
248PL_ALWAYS_INLINE plSimdVec4b plSimdVec4u::operator<=(const plSimdVec4u& v) const
249{
250#if PL_SSE_LEVEL >= PL_SSE_41
251 __m128i minValue = _mm_min_epu32(m_v, v.m_v);
252 return _mm_castsi128_ps(_mm_cmpeq_epi32(minValue, m_v));
253#else
254 return !(*this > v);
255#endif
256}
257
258PL_ALWAYS_INLINE plSimdVec4b plSimdVec4u::operator<(const plSimdVec4u& v) const
259{
260 __m128i signBit = _mm_set1_epi32(0x80000000);
261 __m128i a = _mm_sub_epi32(m_v, signBit);
262 __m128i b = _mm_sub_epi32(v.m_v, signBit);
263 return _mm_castsi128_ps(_mm_cmplt_epi32(a, b));
264}
265
266PL_ALWAYS_INLINE plSimdVec4b plSimdVec4u::operator>=(const plSimdVec4u& v) const
267{
268#if PL_SSE_LEVEL >= PL_SSE_41
269 __m128i maxValue = _mm_max_epu32(m_v, v.m_v);
270 return _mm_castsi128_ps(_mm_cmpeq_epi32(maxValue, m_v));
271#else
272 return !(*this < v);
273#endif
274}
275
276PL_ALWAYS_INLINE plSimdVec4b plSimdVec4u::operator>(const plSimdVec4u& v) const
277{
278 __m128i signBit = _mm_set1_epi32(0x80000000);
279 __m128i a = _mm_sub_epi32(m_v, signBit);
280 __m128i b = _mm_sub_epi32(v.m_v, signBit);
281 return _mm_castsi128_ps(_mm_cmpgt_epi32(a, b));
282}
283
284// static
285PL_ALWAYS_INLINE plSimdVec4u plSimdVec4u::MakeZero()
286{
287 return _mm_setzero_si128();
288}
289
290// not needed atm
291#if 0
292void plSimdVec4u::Transpose(plSimdVec4u& v0, plSimdVec4u& v1, plSimdVec4u& v2, plSimdVec4u& v3)
293{
294 __m128i T0 = _mm_unpacklo_epi32(v0.m_v, v1.m_v);
295 __m128i T1 = _mm_unpacklo_epi32(v2.m_v, v3.m_v);
296 __m128i T2 = _mm_unpackhi_epi32(v0.m_v, v1.m_v);
297 __m128i T3 = _mm_unpackhi_epi32(v2.m_v, v3.m_v);
298
299 v0.m_v = _mm_unpacklo_epi64(T0, T1);
300 v1.m_v = _mm_unpackhi_epi64(T0, T1);
301 v2.m_v = _mm_unpacklo_epi64(T2, T3);
302 v3.m_v = _mm_unpackhi_epi64(T2, T3);
303}
304#endif
Definition SimdVec4b.h:7
A 4-component SIMD vector class.
Definition SimdVec4f.h:8
A SIMD 4-component vector class of signed 32b integers.
Definition SimdVec4i.h:9
A SIMD 4-component vector class of unsigned 32b integers.
Definition SimdVec4u.h:7
static plSimdVec4u MakeZero()
Creates an plSimdVec4u that is initialized to zero.
Definition FPUVec4u_inl.h:313