Plasma Engine  2.0
Loading...
Searching...
No Matches
SSEVec4i_inl.h
1#pragma once
2
3#if PL_ENABLED(PL_COMPILER_MSVC)
4# include <intrin.h>
5#endif
6
7PL_ALWAYS_INLINE plSimdVec4i::plSimdVec4i()
8{
9 PL_CHECK_SIMD_ALIGNMENT(this);
10
11#if PL_ENABLED(PL_MATH_CHECK_FOR_NAN)
12 m_v = _mm_set1_epi32(0xCDCDCDCD);
13#endif
14}
15
16PL_ALWAYS_INLINE plSimdVec4i::plSimdVec4i(plInt32 iXyzw)
17{
18 PL_CHECK_SIMD_ALIGNMENT(this);
19
20 m_v = _mm_set1_epi32(iXyzw);
21}
22
23PL_ALWAYS_INLINE plSimdVec4i::plSimdVec4i(plInt32 x, plInt32 y, plInt32 z, plInt32 w)
24{
25 PL_CHECK_SIMD_ALIGNMENT(this);
26
27 m_v = _mm_setr_epi32(x, y, z, w);
28}
29
30PL_ALWAYS_INLINE plSimdVec4i::plSimdVec4i(plInternal::QuadInt v)
31{
32 m_v = v;
33}
34
35PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::MakeZero()
36{
37 return _mm_setzero_si128();
38}
39
40PL_ALWAYS_INLINE void plSimdVec4i::Set(plInt32 iXyzw)
41{
42 m_v = _mm_set1_epi32(iXyzw);
43}
44
45PL_ALWAYS_INLINE void plSimdVec4i::Set(plInt32 x, plInt32 y, plInt32 z, plInt32 w)
46{
47 m_v = _mm_setr_epi32(x, y, z, w);
48}
49
50PL_ALWAYS_INLINE void plSimdVec4i::SetZero()
51{
52 m_v = _mm_setzero_si128();
53}
54
55template <>
56PL_ALWAYS_INLINE void plSimdVec4i::Load<1>(const plInt32* pInts)
57{
58 m_v = _mm_loadu_si32(pInts);
59}
60
61template <>
62PL_ALWAYS_INLINE void plSimdVec4i::Load<2>(const plInt32* pInts)
63{
64 m_v = _mm_loadu_si64(pInts);
65}
66
67template <>
68PL_ALWAYS_INLINE void plSimdVec4i::Load<3>(const plInt32* pInts)
69{
70 m_v = _mm_setr_epi32(pInts[0], pInts[1], pInts[2], 0);
71}
72
73template <>
74PL_ALWAYS_INLINE void plSimdVec4i::Load<4>(const plInt32* pInts)
75{
76 m_v = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pInts));
77}
78
79template <>
80PL_ALWAYS_INLINE void plSimdVec4i::Store<1>(plInt32* pInts) const
81{
82 _mm_storeu_si32(pInts, m_v);
83}
84
85template <>
86PL_ALWAYS_INLINE void plSimdVec4i::Store<2>(plInt32* pInts) const
87{
88 _mm_storeu_si64(pInts, m_v);
89}
90
91template <>
92PL_ALWAYS_INLINE void plSimdVec4i::Store<3>(plInt32* pInts) const
93{
94 _mm_storeu_si64(pInts, m_v);
95 _mm_storeu_si32(pInts + 2, _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(m_v), _mm_castsi128_ps(m_v))));
96}
97
98template <>
99PL_ALWAYS_INLINE void plSimdVec4i::Store<4>(plInt32* pInts) const
100{
101 _mm_storeu_si128(reinterpret_cast<__m128i*>(pInts), m_v);
102}
103
104PL_ALWAYS_INLINE plSimdVec4f plSimdVec4i::ToFloat() const
105{
106 return _mm_cvtepi32_ps(m_v);
107}
108
109// static
110PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::Truncate(const plSimdVec4f& f)
111{
112 return _mm_cvttps_epi32(f.m_v);
113}
114
115template <int N>
116PL_ALWAYS_INLINE plInt32 plSimdVec4i::GetComponent() const
117{
118#if PL_SSE_LEVEL >= PL_SSE_41
119 return _mm_extract_epi32(m_v, N);
120#else
121 return m_v.m128i_i32[N];
122#endif
123}
124
125PL_ALWAYS_INLINE plInt32 plSimdVec4i::x() const
126{
127 return GetComponent<0>();
128}
129
130PL_ALWAYS_INLINE plInt32 plSimdVec4i::y() const
131{
132 return GetComponent<1>();
133}
134
135PL_ALWAYS_INLINE plInt32 plSimdVec4i::z() const
136{
137 return GetComponent<2>();
138}
139
140PL_ALWAYS_INLINE plInt32 plSimdVec4i::w() const
141{
142 return GetComponent<3>();
143}
144
145template <plSwizzle::Enum s>
146PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::Get() const
147{
148 return _mm_shuffle_epi32(m_v, PL_TO_SHUFFLE(s));
149}
150
151template <plSwizzle::Enum s>
152PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::GetCombined(const plSimdVec4i& other) const
153{
154 return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(m_v), _mm_castsi128_ps(other.m_v), PL_TO_SHUFFLE(s)));
155}
156
157PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::operator-() const
158{
159 return _mm_sub_epi32(_mm_setzero_si128(), m_v);
160}
161
162PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::operator+(const plSimdVec4i& v) const
163{
164 return _mm_add_epi32(m_v, v.m_v);
165}
166
167PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::operator-(const plSimdVec4i& v) const
168{
169 return _mm_sub_epi32(m_v, v.m_v);
170}
171
172PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::CompMul(const plSimdVec4i& v) const
173{
174#if PL_SSE_LEVEL >= PL_SSE_41
175 return _mm_mullo_epi32(m_v, v.m_v);
176#else
177 PL_ASSERT_NOT_IMPLEMENTED; // not sure whether this code works so better assert
178 __m128i tmp1 = _mm_mul_epu32(m_v, v.m_v);
179 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_v, 4), _mm_srli_si128(v.m_v, 4));
180 return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, PL_SHUFFLE(0, 2, 0, 0)), _mm_shuffle_epi32(tmp2, PL_SHUFFLE(0, 2, 0, 0)));
181#endif
182}
183
184PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::CompDiv(const plSimdVec4i& v) const
185{
186#if PL_ENABLED(PL_COMPILER_MSVC)
187 return _mm_div_epi32(m_v, v.m_v);
188#else
189 int a[4];
190 int b[4];
191 Store<4>(a);
192 v.Store<4>(b);
193
194 for (plUInt32 i = 0; i < 4; ++i)
195 {
196 a[i] = a[i] / b[i];
197 }
198
199 plSimdVec4i r;
200 r.Load<4>(a);
201 return r;
202#endif
203}
204
205PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::operator|(const plSimdVec4i& v) const
206{
207 return _mm_or_si128(m_v, v.m_v);
208}
209
210PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::operator&(const plSimdVec4i& v) const
211{
212 return _mm_and_si128(m_v, v.m_v);
213}
214
215PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::operator^(const plSimdVec4i& v) const
216{
217 return _mm_xor_si128(m_v, v.m_v);
218}
219
220PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::operator~() const
221{
222 __m128i ones = _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128());
223 return _mm_xor_si128(ones, m_v);
224}
225
226PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::operator<<(plUInt32 uiShift) const
227{
228 return _mm_slli_epi32(m_v, uiShift);
229}
230
231PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::operator>>(plUInt32 uiShift) const
232{
233 return _mm_srai_epi32(m_v, uiShift);
234}
235
236PL_FORCE_INLINE plSimdVec4i plSimdVec4i::operator<<(const plSimdVec4i& v) const
237{
238 int a[4];
239 int b[4];
240 Store<4>(a);
241 v.Store<4>(b);
242
243 for (plUInt32 i = 0; i < 4; ++i)
244 {
245 a[i] = a[i] << b[i];
246 }
247
248 plSimdVec4i r;
249 r.Load<4>(a);
250 return r;
251}
252
253PL_FORCE_INLINE plSimdVec4i plSimdVec4i::operator>>(const plSimdVec4i& v) const
254{
255 int a[4];
256 int b[4];
257 Store<4>(a);
258 v.Store<4>(b);
259
260 for (plUInt32 i = 0; i < 4; ++i)
261 {
262 a[i] = a[i] >> b[i];
263 }
264
265 plSimdVec4i r;
266 r.Load<4>(a);
267 return r;
268}
269
270PL_ALWAYS_INLINE plSimdVec4i& plSimdVec4i::operator+=(const plSimdVec4i& v)
271{
272 m_v = _mm_add_epi32(m_v, v.m_v);
273 return *this;
274}
275
276PL_ALWAYS_INLINE plSimdVec4i& plSimdVec4i::operator-=(const plSimdVec4i& v)
277{
278 m_v = _mm_sub_epi32(m_v, v.m_v);
279 return *this;
280}
281
282PL_ALWAYS_INLINE plSimdVec4i& plSimdVec4i::operator|=(const plSimdVec4i& v)
283{
284 m_v = _mm_or_si128(m_v, v.m_v);
285 return *this;
286}
287
288PL_ALWAYS_INLINE plSimdVec4i& plSimdVec4i::operator&=(const plSimdVec4i& v)
289{
290 m_v = _mm_and_si128(m_v, v.m_v);
291 return *this;
292}
293
294PL_ALWAYS_INLINE plSimdVec4i& plSimdVec4i::operator^=(const plSimdVec4i& v)
295{
296 m_v = _mm_xor_si128(m_v, v.m_v);
297 return *this;
298}
299
300PL_ALWAYS_INLINE plSimdVec4i& plSimdVec4i::operator<<=(plUInt32 uiShift)
301{
302 m_v = _mm_slli_epi32(m_v, uiShift);
303 return *this;
304}
305
306PL_ALWAYS_INLINE plSimdVec4i& plSimdVec4i::operator>>=(plUInt32 uiShift)
307{
308 m_v = _mm_srai_epi32(m_v, uiShift);
309 return *this;
310}
311
312PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::CompMin(const plSimdVec4i& v) const
313{
314#if PL_SSE_LEVEL >= PL_SSE_41
315 return _mm_min_epi32(m_v, v.m_v);
316#else
317 __m128i mask = _mm_cmplt_epi32(m_v, v.m_v);
318 return _mm_or_si128(_mm_and_si128(mask, m_v), _mm_andnot_si128(mask, v.m_v));
319#endif
320}
321
322PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::CompMax(const plSimdVec4i& v) const
323{
324#if PL_SSE_LEVEL >= PL_SSE_41
325 return _mm_max_epi32(m_v, v.m_v);
326#else
327 __m128i mask = _mm_cmpgt_epi32(m_v, v.m_v);
328 return _mm_or_si128(_mm_and_si128(mask, m_v), _mm_andnot_si128(mask, v.m_v));
329#endif
330}
331
332PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::Abs() const
333{
334#if PL_SSE_LEVEL >= PL_SSE_31
335 return _mm_abs_epi32(m_v);
336#else
337 __m128i negMask = _mm_cmplt_epi32(m_v, _mm_setzero_si128());
338 __m128i neg = _mm_sub_epi32(_mm_setzero_si128(), m_v);
339 return _mm_or_si128(_mm_and_si128(negMask, neg), _mm_andnot_si128(negMask, m_v));
340#endif
341}
342
343PL_ALWAYS_INLINE plSimdVec4b plSimdVec4i::operator==(const plSimdVec4i& v) const
344{
345 return _mm_castsi128_ps(_mm_cmpeq_epi32(m_v, v.m_v));
346}
347
348PL_ALWAYS_INLINE plSimdVec4b plSimdVec4i::operator!=(const plSimdVec4i& v) const
349{
350 return !(*this == v);
351}
352
353PL_ALWAYS_INLINE plSimdVec4b plSimdVec4i::operator<=(const plSimdVec4i& v) const
354{
355 return !(*this > v);
356}
357
358PL_ALWAYS_INLINE plSimdVec4b plSimdVec4i::operator<(const plSimdVec4i& v) const
359{
360 return _mm_castsi128_ps(_mm_cmplt_epi32(m_v, v.m_v));
361}
362
363PL_ALWAYS_INLINE plSimdVec4b plSimdVec4i::operator>=(const plSimdVec4i& v) const
364{
365 return !(*this < v);
366}
367
368PL_ALWAYS_INLINE plSimdVec4b plSimdVec4i::operator>(const plSimdVec4i& v) const
369{
370 return _mm_castsi128_ps(_mm_cmpgt_epi32(m_v, v.m_v));
371}
372
373// static
374PL_ALWAYS_INLINE plSimdVec4i plSimdVec4i::Select(const plSimdVec4b& vCmp, const plSimdVec4i& vTrue, const plSimdVec4i& vFalse)
375{
376#if PL_SSE_LEVEL >= PL_SSE_41
377 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(vFalse.m_v), _mm_castsi128_ps(vTrue.m_v), vCmp.m_v));
378#else
379 return _mm_castps_si128(_mm_or_ps(_mm_andnot_ps(cmp.m_v, _mm_castsi128_ps(ifFalse.m_v)), _mm_and_ps(cmp.m_v, _mm_castsi128_ps(ifTrue.m_v))));
380#endif
381}
382
383// not needed atm
384#if 0
385void plSimdVec4i::Transpose(plSimdVec4i& v0, plSimdVec4i& v1, plSimdVec4i& v2, plSimdVec4i& v3)
386{
387 __m128i T0 = _mm_unpacklo_epi32(v0.m_v, v1.m_v);
388 __m128i T1 = _mm_unpacklo_epi32(v2.m_v, v3.m_v);
389 __m128i T2 = _mm_unpackhi_epi32(v0.m_v, v1.m_v);
390 __m128i T3 = _mm_unpackhi_epi32(v2.m_v, v3.m_v);
391
392 v0.m_v = _mm_unpacklo_epi64(T0, T1);
393 v1.m_v = _mm_unpackhi_epi64(T0, T1);
394 v2.m_v = _mm_unpacklo_epi64(T2, T3);
395 v3.m_v = _mm_unpackhi_epi64(T2, T3);
396}
397#endif
Definition SimdVec4b.h:7
A 4-component SIMD vector class.
Definition SimdVec4f.h:8
A SIMD 4-component vector class of signed 32b integers.
Definition SimdVec4i.h:9
plSimdVec4i GetCombined(const plSimdVec4i &other) const
x = this[s0], y = this[s1], z = other[s2], w = other[s3]
static plSimdVec4i MakeZero()
Creates an plSimdVec4i that is initialized to zero.
Definition FPUVec4i_inl.h:25