3PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f()
5 PL_CHECK_SIMD_ALIGNMENT(
this);
7#if PL_ENABLED(PL_MATH_CHECK_FOR_NAN)
13PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f(
float xyzw)
15 PL_CHECK_SIMD_ALIGNMENT(
this);
17 m_v = vmovq_n_f32(xyzw);
20PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f(
const plSimdFloat& xyzw)
22 PL_CHECK_SIMD_ALIGNMENT(
this);
27PL_ALWAYS_INLINE plSimdVec4f::plSimdVec4f(
float x,
float y,
float z,
float w)
29 PL_CHECK_SIMD_ALIGNMENT(
this);
31 alignas(16)
float values[4] = {x, y, z, w};
32 m_v = vld1q_f32(values);
35PL_ALWAYS_INLINE
void plSimdVec4f::Set(
float xyzw)
37 m_v = vmovq_n_f32(xyzw);
40PL_ALWAYS_INLINE
void plSimdVec4f::Set(
float x,
float y,
float z,
float w)
42 alignas(16)
float values[4] = {x, y, z, w};
43 m_v = vld1q_f32(values);
46PL_ALWAYS_INLINE
void plSimdVec4f::SetX(
const plSimdFloat& f)
48 m_v = vsetq_lane_f32(f, m_v, 0);
51PL_ALWAYS_INLINE
void plSimdVec4f::SetY(
const plSimdFloat& f)
53 m_v = vsetq_lane_f32(f, m_v, 1);
56PL_ALWAYS_INLINE
void plSimdVec4f::SetZ(
const plSimdFloat& f)
58 m_v = vsetq_lane_f32(f, m_v, 2);
61PL_ALWAYS_INLINE
void plSimdVec4f::SetW(
const plSimdFloat& f)
63 m_v = vsetq_lane_f32(f, m_v, 3);
66PL_ALWAYS_INLINE
void plSimdVec4f::SetZero()
68 m_v = vmovq_n_f32(0.0f);
72PL_ALWAYS_INLINE
void plSimdVec4f::Load<1>(
const float* pFloat)
74 m_v = vld1q_lane_f32(pFloat, vmovq_n_f32(0.0f), 0);
78PL_ALWAYS_INLINE
void plSimdVec4f::Load<2>(
const float* pFloat)
80 m_v = vreinterpretq_f32_f64(vld1q_lane_f64(
reinterpret_cast<const float64_t*
>(pFloat), vmovq_n_f64(0.0), 0));
84PL_ALWAYS_INLINE
void plSimdVec4f::Load<3>(
const float* pFloat)
86 m_v = vcombine_f32(vld1_f32(pFloat), vld1_lane_f32(pFloat + 2, vmov_n_f32(0.0f), 0));
90PL_ALWAYS_INLINE
void plSimdVec4f::Load<4>(
const float* pFloat)
92 m_v = vld1q_f32(pFloat);
96PL_ALWAYS_INLINE
void plSimdVec4f::Store<1>(
float* pFloat)
const
98 vst1q_lane_f32(pFloat, m_v, 0);
102PL_ALWAYS_INLINE
void plSimdVec4f::Store<2>(
float* pFloat)
const
104 vst1q_lane_f64(
reinterpret_cast<float64_t*
>(pFloat), vreinterpretq_f64_f32(m_v), 0);
108PL_ALWAYS_INLINE
void plSimdVec4f::Store<3>(
float* pFloat)
const
110 vst1q_lane_f64(
reinterpret_cast<float64_t*
>(pFloat), vreinterpretq_f64_f32(m_v), 0);
111 vst1q_lane_f32(pFloat + 2, m_v, 2);
115PL_ALWAYS_INLINE
void plSimdVec4f::Store<4>(
float* pFloat)
const
117 vst1q_f32(pFloat, m_v);
121PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetReciprocal<plMathAcc::BITS_12>()
const
123 float32x4_t x0 = vrecpeq_f32(m_v);
126 float32x4_t x1 = vmulq_f32(vrecpsq_f32(m_v, x0), x0);
132PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetReciprocal<plMathAcc::BITS_23>()
const
134 float32x4_t x0 = vrecpeq_f32(m_v);
137 float32x4_t x1 = vmulq_f32(vrecpsq_f32(m_v, x0), x0);
138 float32x4_t x2 = vmulq_f32(vrecpsq_f32(m_v, x1), x1);
144PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetReciprocal<plMathAcc::FULL>()
const
146 return vdivq_f32(vmovq_n_f32(1.0f), m_v);
150PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetInvSqrt<plMathAcc::FULL>()
const
152 return vdivq_f32(vmovq_n_f32(1.0f), vsqrtq_f32(m_v));
156PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetInvSqrt<plMathAcc::BITS_23>()
const
158 const float32x4_t x0 = vrsqrteq_f32(m_v);
161 const float32x4_t x1 = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x0, m_v), x0), x0);
162 return vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, m_v), x1), x1);
166PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetInvSqrt<plMathAcc::BITS_12>()
const
168 const float32x4_t x0 = vrsqrteq_f32(m_v);
171 return vmulq_f32(vrsqrtsq_f32(vmulq_f32(x0, m_v), x0), x0);
175PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetSqrt<plMathAcc::BITS_12>()
const
177 return CompMul(GetInvSqrt<plMathAcc::BITS_12>());
181PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetSqrt<plMathAcc::BITS_23>()
const
183 return CompMul(GetInvSqrt<plMathAcc::BITS_23>());
187PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::GetSqrt<plMathAcc::FULL>()
const
189 return vsqrtq_f32(m_v);
192template <
int N, plMathAcc::Enum acc>
193void plSimdVec4f::NormalizeIfNotZero(
const plSimdFloat& fEpsilon)
196 uint32x4_t isNotZero = vcgtq_f32(sqLength.m_v, fEpsilon.m_v);
197 m_v = vmulq_f32(m_v, sqLength.GetInvSqrt<acc>().m_v);
198 m_v = vreinterpretq_f32_u32(vandq_u32(isNotZero, vreinterpretq_u32_f32(m_v)));
202PL_ALWAYS_INLINE
bool plSimdVec4f::IsZero()
const
204 const int mask = PL_BIT(N) - 1;
205 return (plInternal::NeonMoveMask(vceqzq_f32(m_v)) & mask) == mask;
209PL_ALWAYS_INLINE
bool plSimdVec4f::IsZero(
const plSimdFloat& fEpsilon)
const
211 const int mask = PL_BIT(N) - 1;
212 float32x4_t absVal = Abs().m_v;
213 return (plInternal::NeonMoveMask(vcltq_f32(absVal, fEpsilon.m_v)) & mask) == mask;
217inline bool plSimdVec4f::IsNaN()
const
219 const int mask = PL_BIT(N) - 1;
220 return (plInternal::NeonMoveMask(vceqq_f32(m_v, m_v)) & mask) != mask;
224PL_ALWAYS_INLINE
bool plSimdVec4f::IsValid()
const
226 const int mask = PL_BIT(N) - 1;
227 return (plInternal::NeonMoveMask(vcgeq_u32(vreinterpretq_u32_f32(m_v), vmovq_n_u32(0x7f800000))) & mask) == 0;
231PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::GetComponent()
const
233 return vdupq_laneq_f32(m_v, N);
238 return GetComponent<0>();
243 return GetComponent<1>();
248 return GetComponent<2>();
253 return GetComponent<3>();
256template <plSwizzle::Enum s>
257PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Get()
const
259 return __builtin_shufflevector(m_v, m_v, PL_TO_SHUFFLE(s));
262template <plSwizzle::Enum s>
265 return __builtin_shufflevector(m_v, other.m_v, PL_TO_SHUFFLE(s));
268PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::operator-()
const
270 return vnegq_f32(m_v);
275 return vaddq_f32(m_v, v.m_v);
280 return vsubq_f32(m_v, v.m_v);
285 return vmulq_f32(m_v, f.m_v);
290 return vdivq_f32(m_v, f.m_v);
295 return vmulq_f32(m_v, v.m_v);
301 return vdivq_f32(m_v, v.m_v);
307 return CompMul(v.GetReciprocal<plMathAcc::BITS_23>());
313 return CompMul(v.GetReciprocal<plMathAcc::BITS_12>());
318 return vminq_f32(m_v, v.m_v);
323 return vmaxq_f32(m_v, v.m_v);
326PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Abs()
const
328 return vabsq_f32(m_v);
331PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Round()
const
333 return vrndnq_f32(m_v);
336PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Floor()
const
338 return vrndmq_f32(m_v);
341PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Ceil()
const
343 return vrndpq_f32(m_v);
346PL_ALWAYS_INLINE
plSimdVec4f plSimdVec4f::Trunc()
const
348 return vrndq_f32(m_v);
353 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(m_v), vshlq_n_u32(cmp.m_v, 31)));
359 return vbslq_f32(cmp.m_v, ifTrue.m_v, ifFalse.m_v);
364 m_v = vaddq_f32(m_v, v.m_v);
370 m_v = vsubq_f32(m_v, v.m_v);
376 m_v = vmulq_f32(m_v, f.m_v);
382 m_v = vdivq_f32(m_v, f.m_v);
388 return vceqq_f32(m_v, v.m_v);
393 return vmvnq_u32(vceqq_f32(m_v, v.m_v));
398 return vcleq_f32(m_v, v.m_v);
403 return vcltq_f32(m_v, v.m_v);
408 return vcgeq_f32(m_v, v.m_v);
413 return vcgtq_f32(m_v, v.m_v);
417PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalSum<2>()
const
419 return vpadds_f32(vget_low_f32(m_v));
423PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalSum<3>()
const
425 return HorizontalSum<2>() + GetComponent<2>();
429PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalSum<4>()
const
431 float32x2_t x0 = vpadd_f32(vget_low_f32(m_v), vget_high_f32(m_v));
432 return vpadds_f32(x0);
436PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMin<2>()
const
438 return vpmins_f32(vget_low_f32(m_v));
442PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMin<3>()
const
444 return vminq_f32(vmovq_n_f32(vpmins_f32(vget_low_f32(m_v))), vdupq_laneq_f32(m_v, 2));
448PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMin<4>()
const
450 return vpmins_f32(vpmin_f32(vget_low_f32(m_v), vget_high_f32(m_v)));
454PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMax<2>()
const
456 return vpmaxs_f32(vget_low_f32(m_v));
460PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMax<3>()
const
462 return vmaxq_f32(vmovq_n_f32(vpmaxs_f32(vget_low_f32(m_v))), vdupq_laneq_f32(m_v, 2));
466PL_ALWAYS_INLINE
plSimdFloat plSimdVec4f::HorizontalMax<4>()
const
468 return vpmaxs_f32(vpmax_f32(vget_low_f32(m_v), vget_high_f32(m_v)));
474 return vdupq_laneq_f32(vmulq_f32(m_v, v.m_v), 0);
480 return vpadds_f32(vmul_f32(vget_low_f32(m_v), vget_low_f32(v.m_v)));
486 return CompMul(v).HorizontalSum<3>();
492 return CompMul(v).HorizontalSum<4>();
497 float32x4_t a = vmulq_f32(m_v, __builtin_shufflevector(v.m_v, v.m_v, PL_TO_SHUFFLE(plSwizzle::YZXW)));
498 float32x4_t b = vmulq_f32(v.m_v, __builtin_shufflevector(m_v, m_v, PL_TO_SHUFFLE(plSwizzle::YZXW)));
499 float32x4_t c = vsubq_f32(a, b);
501 return __builtin_shufflevector(c, c, PL_TO_SHUFFLE(plSwizzle::YZXW));
507 return CrossRH(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(m_v), vceqq_f32(m_v, HorizontalMin<3>().m_v))));
513 return vfmaq_f32(c.m_v, a.m_v, b.m_v);
519 return vfmaq_f32(c.m_v, a.m_v, b.m_v);
525 return vnegq_f32(vfmsq_f32(c.m_v, a.m_v, b.m_v));
531 return vnegq_f32(vfmsq_f32(c.m_v, a.m_v, b.m_v));
537 return vbslq_f32(vmovq_n_u32(0x80000000), sign.m_v, magnitude.m_v);
A 4-component SIMD vector class.
Definition SimdVec4f.h:8
plSimdVec4f CrossRH(const plSimdVec4f &v) const
3D cross product, w is ignored.
Definition FPUVec4f_inl.h:473
plSimdVec4f GetOrthogonalVector() const
Generates an arbitrary vector such that Dot<3>(GetOrthogonalVector()) == 0.
Definition FPUVec4f_inl.h:478
plSimdVec4f GetCombined(const plSimdVec4f &other) const
x = this[s0], y = this[s1], z = other[s2], w = other[s3]
constexpr TYPE NaN()
Returns the value for NaN as the template type. Returns zero, if the type does not support NaN.
Definition Constants_inl.h:58