Plasma Engine  2.0
Loading...
Searching...
No Matches
SSEVec4f_inl.h
1#pragma once
2
3PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f()
4{
5 PL_CHECK_SIMD_ALIGNMENT(this);
6
7#if PL_ENABLED(PL_MATH_CHECK_FOR_NAN)
8 // Initialize all data to NaN in debug mode to find problems with uninitialized data easier.
9 m_v = _mm_set1_ps(plMath::NaN<float>());
10#endif
11}
12
13PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f(float fXyzw)
14{
15 PL_CHECK_SIMD_ALIGNMENT(this);
16
17 m_v = _mm_set1_ps(fXyzw);
18}
19
20PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f(const plSimdFloat& fXyzw)
21{
22 PL_CHECK_SIMD_ALIGNMENT(this);
23
24 m_v = fXyzw.m_v;
25}
26
27PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f(float x, float y, float z, float w)
28{
29 PL_CHECK_SIMD_ALIGNMENT(this);
30
31 m_v = _mm_setr_ps(x, y, z, w);
32}
33
34PL_ALWAYS_INLINE void plSimdVec4f::Set(float fXyzw)
35{
36 m_v = _mm_set1_ps(fXyzw);
37}
38
39PL_ALWAYS_INLINE void plSimdVec4f::Set(float x, float y, float z, float w)
40{
41 m_v = _mm_setr_ps(x, y, z, w);
42}
43
44PL_ALWAYS_INLINE void plSimdVec4f::SetX(const plSimdFloat& f)
45{
46 m_v = _mm_move_ss(m_v, f.m_v);
47}
48
49PL_ALWAYS_INLINE void plSimdVec4f::SetY(const plSimdFloat& f)
50{
51 m_v = _mm_shuffle_ps(_mm_unpacklo_ps(m_v, f.m_v), m_v, PL_TO_SHUFFLE(plSwizzle::XYZW));
52}
53
54PL_ALWAYS_INLINE void plSimdVec4f::SetZ(const plSimdFloat& f)
55{
56 m_v = _mm_shuffle_ps(m_v, _mm_unpackhi_ps(f.m_v, m_v), PL_TO_SHUFFLE(plSwizzle::XYZW));
57}
58
59PL_ALWAYS_INLINE void plSimdVec4f::SetW(const plSimdFloat& f)
60{
61 m_v = _mm_shuffle_ps(m_v, _mm_unpackhi_ps(m_v, f.m_v), PL_TO_SHUFFLE(plSwizzle::XYXY));
62}
63
64PL_ALWAYS_INLINE void plSimdVec4f::SetZero()
65{
66 m_v = _mm_setzero_ps();
67}
68
69template <>
70PL_ALWAYS_INLINE void plSimdVec4f::Load<1>(const float* pFloat)
71{
72 m_v = _mm_load_ss(pFloat);
73}
74
75template <>
76PL_ALWAYS_INLINE void plSimdVec4f::Load<2>(const float* pFloat)
77{
78 m_v = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(pFloat)));
79}
80
81template <>
82PL_ALWAYS_INLINE void plSimdVec4f::Load<3>(const float* pFloat)
83{
84// There is a compiler bug in GCC where GCC will incorrectly optimize the alternative faster implementation.
85#if PL_ENABLED(PL_COMPILER_GCC)
86 m_v = _mm_set_ps(0.0f, pFloat[2], pFloat[1], pFloat[0]);
87#else
88 m_v = _mm_movelh_ps(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(pFloat))), _mm_load_ss(pFloat + 2));
89#endif
90}
91
92template <>
93PL_ALWAYS_INLINE void plSimdVec4f::Load<4>(const float* pFloat)
94{
95 m_v = _mm_loadu_ps(pFloat);
96}
97
98template <>
99PL_ALWAYS_INLINE void plSimdVec4f::Store<1>(float* pFloat) const
100{
101 _mm_store_ss(pFloat, m_v);
102}
103
104template <>
105PL_ALWAYS_INLINE void plSimdVec4f::Store<2>(float* pFloat) const
106{
107 _mm_store_sd(reinterpret_cast<double*>(pFloat), _mm_castps_pd(m_v));
108}
109
110template <>
111PL_ALWAYS_INLINE void plSimdVec4f::Store<3>(float* pFloat) const
112{
113 _mm_store_sd(reinterpret_cast<double*>(pFloat), _mm_castps_pd(m_v));
114 _mm_store_ss(pFloat + 2, _mm_movehl_ps(m_v, m_v));
115}
116
117template <>
118PL_ALWAYS_INLINE void plSimdVec4f::Store<4>(float* pFloat) const
119{
120 _mm_storeu_ps(pFloat, m_v);
121}
122
123template <>
124PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetReciprocal<plMathAcc::BITS_12>() const
125{
126 return _mm_rcp_ps(m_v);
127}
128
129template <>
130PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetReciprocal<plMathAcc::BITS_23>() const
131{
132 __m128 x0 = _mm_rcp_ps(m_v);
133
134 // One Newton-Raphson iteration
135 __m128 x1 = _mm_mul_ps(x0, _mm_sub_ps(_mm_set1_ps(2.0f), _mm_mul_ps(m_v, x0)));
136
137 return x1;
138}
139
140template <>
141PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetReciprocal<plMathAcc::FULL>() const
142{
143 return _mm_div_ps(_mm_set1_ps(1.0f), m_v);
144}
145
146template <>
147PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetSqrt<plMathAcc::BITS_12>() const
148{
149 return _mm_mul_ps(m_v, _mm_rsqrt_ps(m_v));
150}
151
152template <>
153PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetSqrt<plMathAcc::BITS_23>() const
154{
155 __m128 x0 = _mm_rsqrt_ps(m_v);
156
157 // One iteration of Newton-Raphson
158 __m128 x1 = _mm_mul_ps(_mm_mul_ps(_mm_set1_ps(0.5f), x0), _mm_sub_ps(_mm_set1_ps(3.0f), _mm_mul_ps(_mm_mul_ps(m_v, x0), x0)));
159
160 return _mm_mul_ps(m_v, x1);
161}
162
163template <>
164PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetSqrt<plMathAcc::FULL>() const
165{
166 return _mm_sqrt_ps(m_v);
167}
168
169template <>
170PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetInvSqrt<plMathAcc::FULL>() const
171{
172 return _mm_div_ps(_mm_set1_ps(1.0f), _mm_sqrt_ps(m_v));
173}
174
175template <>
176PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetInvSqrt<plMathAcc::BITS_23>() const
177{
178 const __m128 x0 = _mm_rsqrt_ps(m_v);
179
180 // One iteration of Newton-Raphson
181 return _mm_mul_ps(_mm_mul_ps(_mm_set1_ps(0.5f), x0), _mm_sub_ps(_mm_set1_ps(3.0f), _mm_mul_ps(_mm_mul_ps(m_v, x0), x0)));
182}
183
184template <>
185PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetInvSqrt<plMathAcc::BITS_12>() const
186{
187 return _mm_rsqrt_ps(m_v);
188}
189
190template <int N, plMathAcc::Enum acc>
191void plSimdVec4f::NormalizeIfNotZero(const plSimdFloat& fEpsilon)
192{
193 plSimdFloat sqLength = GetLengthSquared<N>();
194 __m128 isNotZero = _mm_cmpgt_ps(sqLength.m_v, fEpsilon.m_v);
195 m_v = _mm_mul_ps(m_v, sqLength.GetInvSqrt<acc>().m_v);
196 m_v = _mm_and_ps(isNotZero, m_v);
197}
198
199template <int N>
200PL_ALWAYS_INLINE bool plSimdVec4f::IsZero() const
201{
202 const int mask = PL_BIT(N) - 1;
203 return (_mm_movemask_ps(_mm_cmpeq_ps(m_v, _mm_setzero_ps())) & mask) == mask;
204}
205
206template <int N>
207PL_ALWAYS_INLINE bool plSimdVec4f::IsZero(const plSimdFloat& fEpsilon) const
208{
209 const int mask = PL_BIT(N) - 1;
210 __m128 absVal = Abs().m_v;
211 return (_mm_movemask_ps(_mm_cmplt_ps(absVal, fEpsilon.m_v)) & mask) == mask;
212}
213
214template <int N>
215inline bool plSimdVec4f::IsNaN() const
216{
217 // NAN -> (exponent = all 1, mantissa = non-zero)
218
219 alignas(16) const plUInt32 s_exponentMask[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000};
220 alignas(16) const plUInt32 s_mantissaMask[4] = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF};
221
222 __m128 exponentMask = _mm_load_ps(reinterpret_cast<const float*>(s_exponentMask));
223 __m128 mantissaMask = _mm_load_ps(reinterpret_cast<const float*>(s_mantissaMask));
224
225 __m128 exponentAll1 = _mm_cmpeq_ps(_mm_and_ps(m_v, exponentMask), exponentMask);
226 __m128 mantissaNon0 = _mm_cmpneq_ps(_mm_and_ps(m_v, mantissaMask), _mm_setzero_ps());
227
228 const int mask = PL_BIT(N) - 1;
229 return (_mm_movemask_ps(_mm_and_ps(exponentAll1, mantissaNon0)) & mask) != 0;
230}
231
232template <int N>
233PL_ALWAYS_INLINE bool plSimdVec4f::IsValid() const
234{
235 // Check the 8 exponent bits.
236 // NAN -> (exponent = all 1, mantissa = non-zero)
237 // INF -> (exponent = all 1, mantissa = zero)
238
239 alignas(16) const plUInt32 s_exponentMask[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000};
240
241 __m128 exponentMask = _mm_load_ps(reinterpret_cast<const float*>(s_exponentMask));
242
243 __m128 exponentNot1 = _mm_cmpneq_ps(_mm_and_ps(m_v, exponentMask), exponentMask);
244
245 const int mask = PL_BIT(N) - 1;
246 return (_mm_movemask_ps(exponentNot1) & mask) == mask;
247}
248
249template <int N>
250PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::GetComponent() const
251{
252 return _mm_shuffle_ps(m_v, m_v, PL_SHUFFLE(N, N, N, N));
253}
254
255PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::x() const
256{
257 return GetComponent<0>();
258}
259
260PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::y() const
261{
262 return GetComponent<1>();
263}
264
265PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::z() const
266{
267 return GetComponent<2>();
268}
269
270PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::w() const
271{
272 return GetComponent<3>();
273}
274
275template <plSwizzle::Enum s>
276PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::Get() const
277{
278 return _mm_shuffle_ps(m_v, m_v, PL_TO_SHUFFLE(s));
279}
280
281template <plSwizzle::Enum s>
282PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetCombined(const plSimdVec4f& other) const
283{
284 return _mm_shuffle_ps(m_v, other.m_v, PL_TO_SHUFFLE(s));
285}
286
287PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::operator-() const
288{
289 return _mm_sub_ps(_mm_setzero_ps(), m_v);
290}
291
292PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::operator+(const plSimdVec4f& v) const
293{
294 return _mm_add_ps(m_v, v.m_v);
295}
296
297PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::operator-(const plSimdVec4f& v) const
298{
299 return _mm_sub_ps(m_v, v.m_v);
300}
301
302PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::operator*(const plSimdFloat& f) const
303{
304 return _mm_mul_ps(m_v, f.m_v);
305}
306
307PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::operator/(const plSimdFloat& f) const
308{
309 return _mm_div_ps(m_v, f.m_v);
310}
311
312PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::CompMul(const plSimdVec4f& v) const
313{
314 return _mm_mul_ps(m_v, v.m_v);
315}
316
317template <>
318PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::CompDiv<plMathAcc::FULL>(const plSimdVec4f& v) const
319{
320 return _mm_div_ps(m_v, v.m_v);
321}
322
323template <>
324PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::CompDiv<plMathAcc::BITS_23>(const plSimdVec4f& v) const
325{
326 __m128 x0 = _mm_rcp_ps(v.m_v);
327
328 // One iteration of Newton-Raphson
329 __m128 x1 = _mm_mul_ps(x0, _mm_sub_ps(_mm_set1_ps(2.0f), _mm_mul_ps(v.m_v, x0)));
330
331 return _mm_mul_ps(m_v, x1);
332}
333
334template <>
335PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::CompDiv<plMathAcc::BITS_12>(const plSimdVec4f& v) const
336{
337 return _mm_mul_ps(m_v, _mm_rcp_ps(v.m_v));
338}
339
340PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::CompMin(const plSimdVec4f& v) const
341{
342 return _mm_min_ps(m_v, v.m_v);
343}
344
345PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::CompMax(const plSimdVec4f& v) const
346{
347 return _mm_max_ps(m_v, v.m_v);
348}
349
350PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::Abs() const
351{
352 return _mm_andnot_ps(_mm_set1_ps(-0.0f), m_v);
353}
354
355PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::Round() const
356{
357#if PL_SSE_LEVEL >= PL_SSE_41
358 return _mm_round_ps(m_v, _MM_FROUND_NINT);
359#else
360 PL_ASSERT_NOT_IMPLEMENTED;
361#endif
362}
363
364PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::Floor() const
365{
366#if PL_SSE_LEVEL >= PL_SSE_41
367 return _mm_round_ps(m_v, _MM_FROUND_FLOOR);
368#else
369 PL_ASSERT_NOT_IMPLEMENTED;
370#endif
371}
372
373PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::Ceil() const
374{
375#if PL_SSE_LEVEL >= PL_SSE_41
376 return _mm_round_ps(m_v, _MM_FROUND_CEIL);
377#else
378 PL_ASSERT_NOT_IMPLEMENTED;
379#endif
380}
381
382PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::Trunc() const
383{
384#if PL_SSE_LEVEL >= PL_SSE_41
385 return _mm_round_ps(m_v, _MM_FROUND_TRUNC);
386#else
387 PL_ASSERT_NOT_IMPLEMENTED;
388#endif
389}
390
391PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::FlipSign(const plSimdVec4b& vCmp) const
392{
393 return _mm_xor_ps(m_v, _mm_and_ps(vCmp.m_v, _mm_set1_ps(-0.0f)));
394}
395
396// static
397PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::Select(const plSimdVec4b& vCmp, const plSimdVec4f& vTrue, const plSimdVec4f& vFalse)
398{
399#if PL_SSE_LEVEL >= PL_SSE_41
400 return _mm_blendv_ps(vFalse.m_v, vTrue.m_v, vCmp.m_v);
401#else
402 return _mm_or_ps(_mm_andnot_ps(cmp.m_v, ifFalse.m_v), _mm_and_ps(cmp.m_v, ifTrue.m_v));
403#endif
404}
405
406PL_ALWAYS_INLINE plSimdVec4f& plSimdVec4f::operator+=(const plSimdVec4f& v)
407{
408 m_v = _mm_add_ps(m_v, v.m_v);
409 return *this;
410}
411
412PL_ALWAYS_INLINE plSimdVec4f& plSimdVec4f::operator-=(const plSimdVec4f& v)
413{
414 m_v = _mm_sub_ps(m_v, v.m_v);
415 return *this;
416}
417
418PL_ALWAYS_INLINE plSimdVec4f& plSimdVec4f::operator*=(const plSimdFloat& f)
419{
420 m_v = _mm_mul_ps(m_v, f.m_v);
421 return *this;
422}
423
424PL_ALWAYS_INLINE plSimdVec4f& plSimdVec4f::operator/=(const plSimdFloat& f)
425{
426 m_v = _mm_div_ps(m_v, f.m_v);
427 return *this;
428}
429
430PL_ALWAYS_INLINE plSimdVec4b plSimdVec4f::operator==(const plSimdVec4f& v) const
431{
432 return _mm_cmpeq_ps(m_v, v.m_v);
433}
434
435PL_ALWAYS_INLINE plSimdVec4b plSimdVec4f::operator!=(const plSimdVec4f& v) const
436{
437 return _mm_cmpneq_ps(m_v, v.m_v);
438}
439
440PL_ALWAYS_INLINE plSimdVec4b plSimdVec4f::operator<=(const plSimdVec4f& v) const
441{
442 return _mm_cmple_ps(m_v, v.m_v);
443}
444
445PL_ALWAYS_INLINE plSimdVec4b plSimdVec4f::operator<(const plSimdVec4f& v) const
446{
447 return _mm_cmplt_ps(m_v, v.m_v);
448}
449
450PL_ALWAYS_INLINE plSimdVec4b plSimdVec4f::operator>=(const plSimdVec4f& v) const
451{
452 return _mm_cmpge_ps(m_v, v.m_v);
453}
454
455PL_ALWAYS_INLINE plSimdVec4b plSimdVec4f::operator>(const plSimdVec4f& v) const
456{
457 return _mm_cmpgt_ps(m_v, v.m_v);
458}
459
460template <>
461PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::HorizontalSum<2>() const
462{
463#if PL_SSE_LEVEL >= PL_SSE_31
464 __m128 a = _mm_hadd_ps(m_v, m_v);
465 return _mm_shuffle_ps(a, a, PL_TO_SHUFFLE(plSwizzle::XXXX));
466#else
467 return GetComponent<0>() + GetComponent<1>();
468#endif
469}
470
471template <>
472PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::HorizontalSum<3>() const
473{
474 return HorizontalSum<2>() + GetComponent<2>();
475}
476
477template <>
478PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::HorizontalSum<4>() const
479{
480#if PL_SSE_LEVEL >= PL_SSE_31
481 __m128 a = _mm_hadd_ps(m_v, m_v);
482 return _mm_hadd_ps(a, a);
483#else
484 return (GetComponent<0>() + GetComponent<1>()) + (GetComponent<2>() + GetComponent<3>());
485#endif
486}
487
488template <>
489PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::HorizontalMin<2>() const
490{
491 return _mm_min_ps(GetComponent<0>().m_v, GetComponent<1>().m_v);
492}
493
494template <>
495PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::HorizontalMin<3>() const
496{
497 return _mm_min_ps(_mm_min_ps(GetComponent<0>().m_v, GetComponent<1>().m_v), GetComponent<2>().m_v);
498}
499
500template <>
501PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::HorizontalMin<4>() const
502{
503 __m128 xyxyzwzw = _mm_min_ps(_mm_shuffle_ps(m_v, m_v, PL_TO_SHUFFLE(plSwizzle::ZWXY)), m_v);
504 __m128 zwzwxyxy = _mm_shuffle_ps(xyxyzwzw, xyxyzwzw, PL_TO_SHUFFLE(plSwizzle::YXWZ));
505 return _mm_min_ps(xyxyzwzw, zwzwxyxy);
506}
507
508template <>
509PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::HorizontalMax<2>() const
510{
511 return _mm_max_ps(GetComponent<0>().m_v, GetComponent<1>().m_v);
512}
513
514template <>
515PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::HorizontalMax<3>() const
516{
517 return _mm_max_ps(_mm_max_ps(GetComponent<0>().m_v, GetComponent<1>().m_v), GetComponent<2>().m_v);
518}
519
520template <>
521PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::HorizontalMax<4>() const
522{
523 __m128 xyxyzwzw = _mm_max_ps(_mm_shuffle_ps(m_v, m_v, PL_TO_SHUFFLE(plSwizzle::ZWXY)), m_v);
524 __m128 zwzwxyxy = _mm_shuffle_ps(xyxyzwzw, xyxyzwzw, PL_TO_SHUFFLE(plSwizzle::YXWZ));
525 return _mm_max_ps(xyxyzwzw, zwzwxyxy);
526}
527
528template <>
529PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::Dot<1>(const plSimdVec4f& v) const
530{
531#if PL_SSE_LEVEL >= PL_SSE_41
532 return _mm_dp_ps(m_v, v.m_v, 0x1f);
533#else
534 return CompMul(v).HorizontalSum<1>();
535#endif
536}
537
538template <>
539PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::Dot<2>(const plSimdVec4f& v) const
540{
541#if PL_SSE_LEVEL >= PL_SSE_41
542 return _mm_dp_ps(m_v, v.m_v, 0x3f);
543#else
544 return CompMul(v).HorizontalSum<2>();
545#endif
546}
547
548template <>
549PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::Dot<3>(const plSimdVec4f& v) const
550{
551#if PL_SSE_LEVEL >= PL_SSE_41
552 return _mm_dp_ps(m_v, v.m_v, 0x7f);
553#else
554 return CompMul(v).HorizontalSum<3>();
555#endif
556}
557
558template <>
559PL_ALWAYS_INLINE plSimdFloat plSimdVec4f::Dot<4>(const plSimdVec4f& v) const
560{
561#if PL_SSE_LEVEL >= PL_SSE_41
562 return _mm_dp_ps(m_v, v.m_v, 0xff);
563#else
564 return CompMul(v).HorizontalSum<4>();
565#endif
566}
567
568PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::CrossRH(const plSimdVec4f& v) const
569{
570 __m128 a = _mm_mul_ps(m_v, _mm_shuffle_ps(v.m_v, v.m_v, PL_TO_SHUFFLE(plSwizzle::YZXW)));
571 __m128 b = _mm_mul_ps(v.m_v, _mm_shuffle_ps(m_v, m_v, PL_TO_SHUFFLE(plSwizzle::YZXW)));
572 __m128 c = _mm_sub_ps(a, b);
573
574 return _mm_shuffle_ps(c, c, PL_TO_SHUFFLE(plSwizzle::YZXW));
575}
576
577PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::GetOrthogonalVector() const
578{
579 // See http://blog.selfshadow.com/2011/10/17/perp-vectors/ - this is Stark's first variant, SIMDified.
580 return CrossRH(_mm_and_ps(m_v, _mm_cmpeq_ps(m_v, HorizontalMin<3>().m_v)));
581}
582
583// static
584PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::MulAdd(const plSimdVec4f& a, const plSimdVec4f& b, const plSimdVec4f& c)
585{
586#if PL_SSE_LEVEL >= PL_SSE_AVX2
587 return _mm_fmadd_ps(a.m_v, b.m_v, c.m_v);
588#else
589 return a.CompMul(b) + c;
590#endif
591}
592
593// static
594PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::MulAdd(const plSimdVec4f& a, const plSimdFloat& b, const plSimdVec4f& c)
595{
596#if PL_SSE_LEVEL >= PL_SSE_AVX2
597 return _mm_fmadd_ps(a.m_v, b.m_v, c.m_v);
598#else
599 return a * b + c;
600#endif
601}
602
603// static
604PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::MulSub(const plSimdVec4f& a, const plSimdVec4f& b, const plSimdVec4f& c)
605{
606#if PL_SSE_LEVEL >= PL_SSE_AVX2
607 return _mm_fmsub_ps(a.m_v, b.m_v, c.m_v);
608#else
609 return a.CompMul(b) - c;
610#endif
611}
612
613// static
614PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::MulSub(const plSimdVec4f& a, const plSimdFloat& b, const plSimdVec4f& c)
615{
616#if PL_SSE_LEVEL >= PL_SSE_AVX2
617 return _mm_fmsub_ps(a.m_v, b.m_v, c.m_v);
618#else
619 return a * b - c;
620#endif
621}
622
623// static
624PL_ALWAYS_INLINE plSimdVec4f plSimdVec4f::CopySign(const plSimdVec4f& vMagnitude, const plSimdVec4f& vSign)
625{
626 __m128 minusZero = _mm_set1_ps(-0.0f);
627 return _mm_or_ps(_mm_andnot_ps(minusZero, vMagnitude.m_v), _mm_and_ps(minusZero, vSign.m_v));
628}
Definition SimdFloat.h:7
Definition SimdVec4b.h:7
A 4-component SIMD vector class.
Definition SimdVec4f.h:8
plSimdVec4f CrossRH(const plSimdVec4f &v) const
3D cross product, w is ignored.
Definition FPUVec4f_inl.h:473
plSimdVec4f GetOrthogonalVector() const
Generates an arbitrary vector such that Dot<3>(GetOrthogonalVector()) == 0.
Definition FPUVec4f_inl.h:478
plSimdVec4f GetCombined(const plSimdVec4f &other) const
x = this[s0], y = this[s1], z = other[s2], w = other[s3]
constexpr TYPE NaN()
Returns the value for NaN as the template type. Returns zero, if the type does not support NaN.
Definition Constants_inl.h:58