3PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f()
5 PL_CHECK_SIMD_ALIGNMENT(
this);
7#if PL_ENABLED(PL_MATH_CHECK_FOR_NAN)
13PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f(
float fXyzw)
15 PL_CHECK_SIMD_ALIGNMENT(
this);
17 m_v = _mm_set1_ps(fXyzw);
20PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f(
const plSimdFloat& fXyzw)
22 PL_CHECK_SIMD_ALIGNMENT(
this);
27PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f(
float x,
float y,
float z,
float w)
29 PL_CHECK_SIMD_ALIGNMENT(
this);
31 m_v = _mm_setr_ps(x, y, z, w);
34PL_ALWAYS_INLINE
void plSimdVec4f::Set(
float fXyzw)
36 m_v = _mm_set1_ps(fXyzw);
39PL_ALWAYS_INLINE
void plSimdVec4f::Set(
float x,
float y,
float z,
float w)
41 m_v = _mm_setr_ps(x, y, z, w);
44PL_ALWAYS_INLINE
void plSimdVec4f::SetX(
const plSimdFloat& f)
46 m_v = _mm_move_ss(m_v, f.m_v);
49PL_ALWAYS_INLINE
void plSimdVec4f::SetY(
const plSimdFloat& f)
51 m_v = _mm_shuffle_ps(_mm_unpacklo_ps(m_v, f.m_v), m_v, PL_TO_SHUFFLE(plSwizzle::XYZW));
54PL_ALWAYS_INLINE
void plSimdVec4f::SetZ(
const plSimdFloat& f)
56 m_v = _mm_shuffle_ps(m_v, _mm_unpackhi_ps(f.m_v, m_v), PL_TO_SHUFFLE(plSwizzle::XYZW));
59PL_ALWAYS_INLINE
void plSimdVec4f::SetW(
const plSimdFloat& f)
61 m_v = _mm_shuffle_ps(m_v, _mm_unpackhi_ps(m_v, f.m_v), PL_TO_SHUFFLE(plSwizzle::XYXY));
64PL_ALWAYS_INLINE
void plSimdVec4f::SetZero()
66 m_v = _mm_setzero_ps();
70PL_ALWAYS_INLINE
void plSimdVec4f::Load<1>(
const float* pFloat)
72 m_v = _mm_load_ss(pFloat);
76PL_ALWAYS_INLINE
void plSimdVec4f::Load<2>(
const float* pFloat)
78 m_v = _mm_castpd_ps(_mm_load_sd(
reinterpret_cast<const double*
>(pFloat)));
82PL_ALWAYS_INLINE
void plSimdVec4f::Load<3>(
const float* pFloat)
85#if PL_ENABLED(PL_COMPILER_GCC)
86 m_v = _mm_set_ps(0.0f, pFloat[2], pFloat[1], pFloat[0]);
88 m_v = _mm_movelh_ps(_mm_castpd_ps(_mm_load_sd(
reinterpret_cast<const double*
>(pFloat))), _mm_load_ss(pFloat + 2));
93PL_ALWAYS_INLINE
void plSimdVec4f::Load<4>(
const float* pFloat)
95 m_v = _mm_loadu_ps(pFloat);
99PL_ALWAYS_INLINE
void plSimdVec4f::Store<1>(
float* pFloat)
const
101 _mm_store_ss(pFloat, m_v);
105PL_ALWAYS_INLINE
void plSimdVec4f::Store<2>(
float* pFloat)
const
107 _mm_store_sd(
reinterpret_cast<double*
>(pFloat), _mm_castps_pd(m_v));
111PL_ALWAYS_INLINE
void plSimdVec4f::Store<3>(
float* pFloat)
const
113 _mm_store_sd(
reinterpret_cast<double*
>(pFloat), _mm_castps_pd(m_v));
114 _mm_store_ss(pFloat + 2, _mm_movehl_ps(m_v, m_v));
118PL_ALWAYS_INLINE
void plSimdVec4f::Store<4>(
float* pFloat)
const
120 _mm_storeu_ps(pFloat, m_v);
124PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetReciprocal<plMathAcc::BITS_12>()
const
126 return _mm_rcp_ps(m_v);
130PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetReciprocal<plMathAcc::BITS_23>()
const
132 __m128 x0 = _mm_rcp_ps(m_v);
135 __m128 x1 = _mm_mul_ps(x0, _mm_sub_ps(_mm_set1_ps(2.0f), _mm_mul_ps(m_v, x0)));
141PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetReciprocal<plMathAcc::FULL>()
const
143 return _mm_div_ps(_mm_set1_ps(1.0f), m_v);
147PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetSqrt<plMathAcc::BITS_12>()
const
149 return _mm_mul_ps(m_v, _mm_rsqrt_ps(m_v));
153PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetSqrt<plMathAcc::BITS_23>()
const
155 __m128 x0 = _mm_rsqrt_ps(m_v);
158 __m128 x1 = _mm_mul_ps(_mm_mul_ps(_mm_set1_ps(0.5f), x0), _mm_sub_ps(_mm_set1_ps(3.0f), _mm_mul_ps(_mm_mul_ps(m_v, x0), x0)));
160 return _mm_mul_ps(m_v, x1);
164PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetSqrt<plMathAcc::FULL>()
const
166 return _mm_sqrt_ps(m_v);
170PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetInvSqrt<plMathAcc::FULL>()
const
172 return _mm_div_ps(_mm_set1_ps(1.0f), _mm_sqrt_ps(m_v));
176PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetInvSqrt<plMathAcc::BITS_23>()
const
178 const __m128 x0 = _mm_rsqrt_ps(m_v);
181 return _mm_mul_ps(_mm_mul_ps(_mm_set1_ps(0.5f), x0), _mm_sub_ps(_mm_set1_ps(3.0f), _mm_mul_ps(_mm_mul_ps(m_v, x0), x0)));
185PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetInvSqrt<plMathAcc::BITS_12>()
const
187 return _mm_rsqrt_ps(m_v);
190template <
int N, plMathAcc::Enum acc>
191void plSimdVec4f::NormalizeIfNotZero(
const plSimdFloat& fEpsilon)
194 __m128 isNotZero = _mm_cmpgt_ps(sqLength.m_v, fEpsilon.m_v);
195 m_v = _mm_mul_ps(m_v, sqLength.GetInvSqrt<acc>().m_v);
196 m_v = _mm_and_ps(isNotZero, m_v);
200PL_ALWAYS_INLINE
bool plSimdVec4f::IsZero()
const
202 const int mask = PL_BIT(N) - 1;
203 return (_mm_movemask_ps(_mm_cmpeq_ps(m_v, _mm_setzero_ps())) & mask) == mask;
207PL_ALWAYS_INLINE
bool plSimdVec4f::IsZero(
const plSimdFloat& fEpsilon)
const
209 const int mask = PL_BIT(N) - 1;
210 __m128 absVal = Abs().m_v;
211 return (_mm_movemask_ps(_mm_cmplt_ps(absVal, fEpsilon.m_v)) & mask) == mask;
215inline bool plSimdVec4f::IsNaN()
const
219 alignas(16)
const plUInt32 s_exponentMask[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000};
220 alignas(16)
const plUInt32 s_mantissaMask[4] = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF};
222 __m128 exponentMask = _mm_load_ps(
reinterpret_cast<const float*
>(s_exponentMask));
223 __m128 mantissaMask = _mm_load_ps(
reinterpret_cast<const float*
>(s_mantissaMask));
225 __m128 exponentAll1 = _mm_cmpeq_ps(_mm_and_ps(m_v, exponentMask), exponentMask);
226 __m128 mantissaNon0 = _mm_cmpneq_ps(_mm_and_ps(m_v, mantissaMask), _mm_setzero_ps());
228 const int mask = PL_BIT(N) - 1;
229 return (_mm_movemask_ps(_mm_and_ps(exponentAll1, mantissaNon0)) & mask) != 0;
233PL_ALWAYS_INLINE
bool plSimdVec4f::IsValid()
const
239 alignas(16)
const plUInt32 s_exponentMask[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000};
241 __m128 exponentMask = _mm_load_ps(
reinterpret_cast<const float*
>(s_exponentMask));
243 __m128 exponentNot1 = _mm_cmpneq_ps(_mm_and_ps(m_v, exponentMask), exponentMask);
245 const int mask = PL_BIT(N) - 1;
246 return (_mm_movemask_ps(exponentNot1) & mask) == mask;
250PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::GetComponent()
const
252 return _mm_shuffle_ps(m_v, m_v, PL_SHUFFLE(N, N, N, N));
257 return GetComponent<0>();
262 return GetComponent<1>();
267 return GetComponent<2>();
272 return GetComponent<3>();
275template <plSwizzle::Enum s>
276PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Get()
const
278 return _mm_shuffle_ps(m_v, m_v, PL_TO_SHUFFLE(s));
281template <plSwizzle::Enum s>
284 return _mm_shuffle_ps(m_v, other.m_v, PL_TO_SHUFFLE(s));
287PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::operator-()
const
289 return _mm_sub_ps(_mm_setzero_ps(), m_v);
294 return _mm_add_ps(m_v, v.m_v);
299 return _mm_sub_ps(m_v, v.m_v);
304 return _mm_mul_ps(m_v, f.m_v);
309 return _mm_div_ps(m_v, f.m_v);
314 return _mm_mul_ps(m_v, v.m_v);
320 return _mm_div_ps(m_v, v.m_v);
326 __m128 x0 = _mm_rcp_ps(v.m_v);
329 __m128 x1 = _mm_mul_ps(x0, _mm_sub_ps(_mm_set1_ps(2.0f), _mm_mul_ps(v.m_v, x0)));
331 return _mm_mul_ps(m_v, x1);
337 return _mm_mul_ps(m_v, _mm_rcp_ps(v.m_v));
342 return _mm_min_ps(m_v, v.m_v);
347 return _mm_max_ps(m_v, v.m_v);
350PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Abs()
const
352 return _mm_andnot_ps(_mm_set1_ps(-0.0f), m_v);
355PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Round()
const
357#if PL_SSE_LEVEL >= PL_SSE_41
358 return _mm_round_ps(m_v, _MM_FROUND_NINT);
360 PL_ASSERT_NOT_IMPLEMENTED;
364PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Floor()
const
366#if PL_SSE_LEVEL >= PL_SSE_41
367 return _mm_round_ps(m_v, _MM_FROUND_FLOOR);
369 PL_ASSERT_NOT_IMPLEMENTED;
373PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Ceil()
const
375#if PL_SSE_LEVEL >= PL_SSE_41
376 return _mm_round_ps(m_v, _MM_FROUND_CEIL);
378 PL_ASSERT_NOT_IMPLEMENTED;
382PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Trunc()
const
384#if PL_SSE_LEVEL >= PL_SSE_41
385 return _mm_round_ps(m_v, _MM_FROUND_TRUNC);
387 PL_ASSERT_NOT_IMPLEMENTED;
393 return _mm_xor_ps(m_v, _mm_and_ps(vCmp.m_v, _mm_set1_ps(-0.0f)));
399#if PL_SSE_LEVEL >= PL_SSE_41
400 return _mm_blendv_ps(vFalse.m_v, vTrue.m_v, vCmp.m_v);
402 return _mm_or_ps(_mm_andnot_ps(cmp.m_v, ifFalse.m_v), _mm_and_ps(cmp.m_v, ifTrue.m_v));
408 m_v = _mm_add_ps(m_v, v.m_v);
414 m_v = _mm_sub_ps(m_v, v.m_v);
420 m_v = _mm_mul_ps(m_v, f.m_v);
426 m_v = _mm_div_ps(m_v, f.m_v);
432 return _mm_cmpeq_ps(m_v, v.m_v);
437 return _mm_cmpneq_ps(m_v, v.m_v);
442 return _mm_cmple_ps(m_v, v.m_v);
447 return _mm_cmplt_ps(m_v, v.m_v);
452 return _mm_cmpge_ps(m_v, v.m_v);
457 return _mm_cmpgt_ps(m_v, v.m_v);
461PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalSum<2>()
const
463#if PL_SSE_LEVEL >= PL_SSE_31
464 __m128 a = _mm_hadd_ps(m_v, m_v);
465 return _mm_shuffle_ps(a, a, PL_TO_SHUFFLE(plSwizzle::XXXX));
467 return GetComponent<0>() + GetComponent<1>();
472PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalSum<3>()
const
474 return HorizontalSum<2>() + GetComponent<2>();
478PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalSum<4>()
const
480#if PL_SSE_LEVEL >= PL_SSE_31
481 __m128 a = _mm_hadd_ps(m_v, m_v);
482 return _mm_hadd_ps(a, a);
484 return (GetComponent<0>() + GetComponent<1>()) + (GetComponent<2>() + GetComponent<3>());
489PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMin<2>()
const
491 return _mm_min_ps(GetComponent<0>().m_v, GetComponent<1>().m_v);
495PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMin<3>()
const
497 return _mm_min_ps(_mm_min_ps(GetComponent<0>().m_v, GetComponent<1>().m_v), GetComponent<2>().m_v);
501PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMin<4>()
const
503 __m128 xyxyzwzw = _mm_min_ps(_mm_shuffle_ps(m_v, m_v, PL_TO_SHUFFLE(plSwizzle::ZWXY)), m_v);
504 __m128 zwzwxyxy = _mm_shuffle_ps(xyxyzwzw, xyxyzwzw, PL_TO_SHUFFLE(plSwizzle::YXWZ));
505 return _mm_min_ps(xyxyzwzw, zwzwxyxy);
509PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMax<2>()
const
511 return _mm_max_ps(GetComponent<0>().m_v, GetComponent<1>().m_v);
515PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMax<3>()
const
517 return _mm_max_ps(_mm_max_ps(GetComponent<0>().m_v, GetComponent<1>().m_v), GetComponent<2>().m_v);
521PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMax<4>()
const
523 __m128 xyxyzwzw = _mm_max_ps(_mm_shuffle_ps(m_v, m_v, PL_TO_SHUFFLE(plSwizzle::ZWXY)), m_v);
524 __m128 zwzwxyxy = _mm_shuffle_ps(xyxyzwzw, xyxyzwzw, PL_TO_SHUFFLE(plSwizzle::YXWZ));
525 return _mm_max_ps(xyxyzwzw, zwzwxyxy);
531#if PL_SSE_LEVEL >= PL_SSE_41
532 return _mm_dp_ps(m_v, v.m_v, 0x1f);
534 return CompMul(v).HorizontalSum<1>();
541#if PL_SSE_LEVEL >= PL_SSE_41
542 return _mm_dp_ps(m_v, v.m_v, 0x3f);
544 return CompMul(v).HorizontalSum<2>();
551#if PL_SSE_LEVEL >= PL_SSE_41
552 return _mm_dp_ps(m_v, v.m_v, 0x7f);
554 return CompMul(v).HorizontalSum<3>();
561#if PL_SSE_LEVEL >= PL_SSE_41
562 return _mm_dp_ps(m_v, v.m_v, 0xff);
564 return CompMul(v).HorizontalSum<4>();
570 __m128 a = _mm_mul_ps(m_v, _mm_shuffle_ps(v.m_v, v.m_v, PL_TO_SHUFFLE(plSwizzle::YZXW)));
571 __m128 b = _mm_mul_ps(v.m_v, _mm_shuffle_ps(m_v, m_v, PL_TO_SHUFFLE(plSwizzle::YZXW)));
572 __m128 c = _mm_sub_ps(a, b);
574 return _mm_shuffle_ps(c, c, PL_TO_SHUFFLE(plSwizzle::YZXW));
580 return CrossRH(_mm_and_ps(m_v, _mm_cmpeq_ps(m_v, HorizontalMin<3>().m_v)));
586#if PL_SSE_LEVEL >= PL_SSE_AVX2
587 return _mm_fmadd_ps(a.m_v, b.m_v, c.m_v);
589 return a.CompMul(b) + c;
596#if PL_SSE_LEVEL >= PL_SSE_AVX2
597 return _mm_fmadd_ps(a.m_v, b.m_v, c.m_v);
606#if PL_SSE_LEVEL >= PL_SSE_AVX2
607 return _mm_fmsub_ps(a.m_v, b.m_v, c.m_v);
609 return a.CompMul(b) - c;
616#if PL_SSE_LEVEL >= PL_SSE_AVX2
617 return _mm_fmsub_ps(a.m_v, b.m_v, c.m_v);
626 __m128 minusZero = _mm_set1_ps(-0.0f);
627 return _mm_or_ps(_mm_andnot_ps(minusZero, vMagnitude.m_v), _mm_and_ps(minusZero, vSign.m_v));
A 4-component SIMD vector class.
Definition SimdVec4f.h:8
plSimdVec4f CrossRH(const plSimdVec4f &v) const
3D cross product, w is ignored.
Definition FPUVec4f_inl.h:473
plSimdVec4f GetOrthogonalVector() const
Generates an arbitrary vector such that Dot<3>(GetOrthogonalVector()) == 0.
Definition FPUVec4f_inl.h:478
plSimdVec4f GetCombined(const plSimdVec4f &other) const
x = this[s0], y = this[s1], z = other[s2], w = other[s3]
constexpr TYPE NaN()
Returns the value for NaN as the template type. Returns zero, if the type does not support NaN.
Definition Constants_inl.h:58