3#include <Foundation/CodeUtils/Expression/ExpressionByteCode.h>
4#include <Foundation/Math/Float16.h>
5#include <Foundation/SimdMath/SimdMath.h>
9 struct ExecutionContext
12 plUInt32 m_uiNumInstances = 0;
13 plUInt32 m_uiNumSimd4Instances = 0;
20 using ByteCodeType = plExpressionByteCode::StorageType;
21 using OpFunc = void (*)(
const ByteCodeType*& pByteCode, ExecutionContext& context);
23#define DEFINE_TARGET_REGISTER() \
24 plExpression::Register* r = context.m_pRegisters + plExpressionByteCode::GetRegisterIndex(pByteCode) * context.m_uiNumSimd4Instances; \
25 plExpression::Register* re = r + context.m_uiNumSimd4Instances; \
28#define DEFINE_OP_REGISTER(name) \
29 const plExpression::Register* name = context.m_pRegisters + plExpressionByteCode::GetRegisterIndex(pByteCode) * context.m_uiNumSimd4Instances;
31#define DEFINE_CONSTANT(name) \
32 const plUInt32 PL_PP_CONCAT(name, Raw) = *pByteCode; \
33 PL_IGNORE_UNUSED(PL_PP_CONCAT(name, Raw)); \
34 const plExpression::Register tmp = plExpressionByteCode::GetConstant(pByteCode); \
35 const plExpression::Register* name = &tmp;
37#define UNARY_OP_INNER_LOOP(code) \
42#define DEFINE_UNARY_OP(name, code) \
43 void PL_PP_CONCAT(name, _4)(const ByteCodeType*& pByteCode, ExecutionContext& context) \
45 DEFINE_TARGET_REGISTER(); \
46 DEFINE_OP_REGISTER(a); \
49 UNARY_OP_INNER_LOOP(code) \
53#define BINARY_OP_INNER_LOOP(code) \
57 if constexpr (RightIsConstant == false) \
62#define DEFINE_BINARY_OP(name, code) \
63 template <bool RightIsConstant> \
64 void PL_PP_CONCAT(name, _4)(const ByteCodeType*& pByteCode, ExecutionContext& context) \
66 DEFINE_TARGET_REGISTER(); \
67 DEFINE_OP_REGISTER(a); \
69 PL_IGNORE_UNUSED(bRaw); \
70 plExpression::Register bConstant; \
71 const plExpression::Register* b; \
72 PL_IGNORE_UNUSED(b); \
73 if constexpr (RightIsConstant) \
76 bConstant = plExpressionByteCode::GetConstant(pByteCode); \
81 b = context.m_pRegisters + plExpressionByteCode::GetRegisterIndex(pByteCode) * context.m_uiNumSimd4Instances; \
85 BINARY_OP_INNER_LOOP(code) \
89#define TERNARY_OP_INNER_LOOP(code) \
96#define DEFINE_TERNARY_OP(name, code) \
97 void PL_PP_CONCAT(name, _4)(const ByteCodeType*& pByteCode, ExecutionContext& context) \
99 DEFINE_TARGET_REGISTER(); \
100 DEFINE_OP_REGISTER(a); \
101 DEFINE_OP_REGISTER(b); \
102 DEFINE_OP_REGISTER(c); \
105 TERNARY_OP_INNER_LOOP(code) \
109 DEFINE_UNARY_OP(AbsF, r->f = a->f.Abs());
110 DEFINE_UNARY_OP(AbsI, r->i = a->i.Abs());
111 DEFINE_UNARY_OP(SqrtF, r->f = a->f.GetSqrt());
114 DEFINE_UNARY_OP(LnF, r->f = plSimdMath::Ln(a->f));
115 DEFINE_UNARY_OP(Log2F, r->f = plSimdMath::Log2(a->f));
116 DEFINE_UNARY_OP(Log2I, r->i = plSimdMath::Log2i(a->i));
117 DEFINE_UNARY_OP(Log10F, r->f = plSimdMath::Log10(a->f));
118 DEFINE_UNARY_OP(Pow2F, r->f = plSimdMath::Pow2(a->f));
120 DEFINE_UNARY_OP(SinF, r->f = plSimdMath::Sin(a->f));
121 DEFINE_UNARY_OP(CosF, r->f = plSimdMath::Cos(a->f));
122 DEFINE_UNARY_OP(TanF, r->f = plSimdMath::Tan(a->f));
124 DEFINE_UNARY_OP(ASinF, r->f = plSimdMath::ASin(a->f));
125 DEFINE_UNARY_OP(ACosF, r->f = plSimdMath::ACos(a->f));
126 DEFINE_UNARY_OP(ATanF, r->f = plSimdMath::ATan(a->f));
128 DEFINE_UNARY_OP(RoundF, r->f = a->f.Round());
129 DEFINE_UNARY_OP(FloorF, r->f = a->f.Floor());
130 DEFINE_UNARY_OP(CeilF, r->f = a->f.Ceil());
131 DEFINE_UNARY_OP(TruncF, r->f = a->f.Trunc());
133 DEFINE_UNARY_OP(NotI, r->i = ~a->i);
134 DEFINE_UNARY_OP(NotB, r->b = !a->b);
136 DEFINE_UNARY_OP(IToF, r->f = a->i.ToFloat());
137 DEFINE_UNARY_OP(FToI, r->i = plSimdVec4i::Truncate(a->f));
139 DEFINE_BINARY_OP(AddF, r->f = a->f + b->f);
140 DEFINE_BINARY_OP(AddI, r->i = a->i + b->i);
142 DEFINE_BINARY_OP(SubF, r->f = a->f - b->f);
143 DEFINE_BINARY_OP(SubI, r->i = a->i - b->i);
145 DEFINE_BINARY_OP(MulF, r->f = a->f.CompMul(b->f));
146 DEFINE_BINARY_OP(MulI, r->i = a->i.CompMul(b->i));
148 DEFINE_BINARY_OP(DivF, r->f = a->f.CompDiv(b->f));
149 DEFINE_BINARY_OP(DivI, r->i = a->i.CompDiv(b->i));
151 DEFINE_BINARY_OP(MinF, r->f = a->f.CompMin(b->f));
152 DEFINE_BINARY_OP(MinI, r->i = a->i.CompMin(b->i));
154 DEFINE_BINARY_OP(MaxF, r->f = a->f.CompMax(b->f));
155 DEFINE_BINARY_OP(MaxI, r->i = a->i.CompMax(b->i));
157 DEFINE_BINARY_OP(ShlI, r->i = a->i << b->i);
158 DEFINE_BINARY_OP(ShrI, r->i = a->i >> b->i);
159 DEFINE_BINARY_OP(ShlI_C, r->i = a->i << bRaw);
160 DEFINE_BINARY_OP(ShrI_C, r->i = a->i >> bRaw);
161 DEFINE_BINARY_OP(AndI, r->i = a->i & b->i);
162 DEFINE_BINARY_OP(XorI, r->i = a->i ^ b->i);
163 DEFINE_BINARY_OP(OrI, r->i = a->i | b->i);
165 DEFINE_BINARY_OP(EqF, r->b = a->f == b->f);
166 DEFINE_BINARY_OP(EqI, r->b = a->i == b->i);
167 DEFINE_BINARY_OP(EqB, r->b = a->b == b->b);
169 DEFINE_BINARY_OP(NEqF, r->b = a->f != b->f);
170 DEFINE_BINARY_OP(NEqI, r->b = a->i != b->i);
171 DEFINE_BINARY_OP(NEqB, r->b = a->b != b->b);
173 DEFINE_BINARY_OP(LtF, r->b = a->f < b->f);
174 DEFINE_BINARY_OP(LtI, r->b = a->i < b->i);
176 DEFINE_BINARY_OP(LEqF, r->b = a->f <= b->f);
177 DEFINE_BINARY_OP(LEqI, r->b = a->i <= b->i);
179 DEFINE_BINARY_OP(GtF, r->b = a->f > b->f);
180 DEFINE_BINARY_OP(GtI, r->b = a->i > b->i);
182 DEFINE_BINARY_OP(GEqF, r->b = a->f >= b->f);
183 DEFINE_BINARY_OP(GEqI, r->b = a->i >= b->i);
185 DEFINE_BINARY_OP(AndB, r->b = a->b && b->b);
186 DEFINE_BINARY_OP(OrB, r->b = a->b || b->b);
188 DEFINE_TERNARY_OP(SelF, r->f = plSimdVec4f::Select(a->b, b->f, c->f));
189 DEFINE_TERNARY_OP(SelI, r->i = plSimdVec4i::Select(a->b, b->i, c->i));
190 DEFINE_TERNARY_OP(SelB, r->b = plSimdVec4b::Select(a->b, b->b, c->b));
192 void VM_MovX_R_4(
const ByteCodeType*& pByteCode, ExecutionContext& context)
194 DEFINE_TARGET_REGISTER();
195 DEFINE_OP_REGISTER(a);
204 void VM_MovX_C_4(
const ByteCodeType*& pByteCode, ExecutionContext& context)
207 PL_WARNING_DISABLE_MSVC(4189)
209 DEFINE_TARGET_REGISTER();
220 template <
typename ValueType,
typename StreamType>
221 PL_ALWAYS_INLINE ValueType ReadInputData(
const plUInt8*& ref_pData, plUInt32 uiStride)
223 ValueType value = *
reinterpret_cast<const StreamType*
>(ref_pData);
224 ref_pData += uiStride;
228 template <
typename RegisterType,
typename ValueType,
typename StreamType>
229 void LoadInput(RegisterType* r, RegisterType* pRe,
const plProcessingStream& input, plUInt32 uiNumRemainderInstances)
231 const plUInt8* pInputData = input.
GetData<plUInt8>();
234 if (uiByteStride ==
sizeof(ValueType) && std::is_same<ValueType, StreamType>::value)
238 r->template Load<4>(
reinterpret_cast<const ValueType*
>(pInputData));
241 pInputData +=
sizeof(ValueType) * 4;
249 x[0] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
250 x[1] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
251 x[2] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
252 x[3] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
254 r->template Load<4>(x);
260 if (uiNumRemainderInstances > 0)
263 x[0] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
264 x[1] = uiNumRemainderInstances >= 2 ? ReadInputData<ValueType, StreamType>(pInputData, uiByteStride) : x[0];
265 x[2] = uiNumRemainderInstances >= 3 ? ReadInputData<ValueType, StreamType>(pInputData, uiByteStride) : x[1];
267 r->Set(x[0], x[1], x[2], x[2]);
271 template <
typename ValueType,
typename StreamType>
272 PL_ALWAYS_INLINE
void StoreOutputData(plUInt8*& ref_pData, plUInt32 uiStride, ValueType value)
274 *
reinterpret_cast<StreamType*
>(ref_pData) =
static_cast<StreamType
>(value);
275 ref_pData += uiStride;
278 template <
typename RegisterType,
typename ValueType,
typename StreamType>
279 void StoreOutput(RegisterType* r, RegisterType* pRe,
plProcessingStream& ref_output, plUInt32 uiNumRemainderInstances)
284 if (uiByteStride ==
sizeof(ValueType) && std::is_same<ValueType, StreamType>::value)
288 r->template Store<4>(
reinterpret_cast<ValueType*
>(pOutputData));
291 pOutputData +=
sizeof(ValueType) * 4;
299 r->template Store<4>(x);
301 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[0]);
302 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[1]);
303 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[2]);
304 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[3]);
310 if (uiNumRemainderInstances > 0)
313 r->template Store<4>(x);
315 for (plUInt32 i = 0; i < uiNumRemainderInstances; ++i)
317 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[i]);
322 void VM_LoadF_4(
const ByteCodeType*& pByteCode, ExecutionContext& context)
324 const plUInt32 uiNumRemainderInstances = context.m_uiNumInstances & 0x3;
326 DEFINE_TARGET_REGISTER();
327 if (uiNumRemainderInstances > 0)
330 const plUInt32 uiInputIndex = plExpressionByteCode::GetRegisterIndex(pByteCode);
331 auto& input = *context.m_Inputs[uiInputIndex];
333 if (input.
GetDataType() == plProcessingStream::DataType::Float)
335 LoadInput<plSimdVec4f, float, float>(
reinterpret_cast<plSimdVec4f*
>(r),
reinterpret_cast<plSimdVec4f*
>(re), input, uiNumRemainderInstances);
339 PL_ASSERT_DEBUG(input.
GetDataType() == plProcessingStream::DataType::Half,
"Unsupported input type '{}' for LoadF instruction", plProcessingStream::GetDataTypeName(input.
GetDataType()));
340 LoadInput<plSimdVec4f, float, plFloat16>(
reinterpret_cast<plSimdVec4f*
>(r),
reinterpret_cast<plSimdVec4f*
>(re), input, uiNumRemainderInstances);
344 void VM_LoadI_4(
const ByteCodeType*& pByteCode, ExecutionContext& context)
346 const plUInt32 uiNumRemainderInstances = context.m_uiNumInstances & 0x3;
348 DEFINE_TARGET_REGISTER();
349 if (uiNumRemainderInstances > 0)
352 const plUInt32 uiInputIndex = plExpressionByteCode::GetRegisterIndex(pByteCode);
353 auto& input = *context.m_Inputs[uiInputIndex];
355 if (input.
GetDataType() == plProcessingStream::DataType::Int)
357 LoadInput<plSimdVec4i, int, int>(
reinterpret_cast<plSimdVec4i*
>(r),
reinterpret_cast<plSimdVec4i*
>(re), input, uiNumRemainderInstances);
359 else if (input.
GetDataType() == plProcessingStream::DataType::Short)
361 LoadInput<plSimdVec4i, int, plInt16>(
reinterpret_cast<plSimdVec4i*
>(r),
reinterpret_cast<plSimdVec4i*
>(re), input, uiNumRemainderInstances);
365 PL_ASSERT_DEBUG(input.
GetDataType() == plProcessingStream::DataType::Byte,
"Unsupported input type '{}' for LoadI instruction", plProcessingStream::GetDataTypeName(input.
GetDataType()));
366 LoadInput<plSimdVec4i, int, plInt8>(
reinterpret_cast<plSimdVec4i*
>(r),
reinterpret_cast<plSimdVec4i*
>(re), input, uiNumRemainderInstances);
370 void VM_StoreF_4(
const ByteCodeType*& pByteCode, ExecutionContext& context)
372 const plUInt32 uiNumRemainderInstances = context.m_uiNumInstances & 0x3;
374 plUInt32 uiOutputIndex = plExpressionByteCode::GetRegisterIndex(pByteCode);
375 auto& output = *context.m_Outputs[uiOutputIndex];
378 DEFINE_TARGET_REGISTER();
379 if (uiNumRemainderInstances > 0)
382 if (output.GetDataType() == plProcessingStream::DataType::Float)
384 StoreOutput<plSimdVec4f, float, float>(
reinterpret_cast<plSimdVec4f*
>(r),
reinterpret_cast<plSimdVec4f*
>(re), output, uiNumRemainderInstances);
388 PL_ASSERT_DEBUG(output.GetDataType() == plProcessingStream::DataType::Half,
"Unsupported input type '{}' for StoreF instruction", plProcessingStream::GetDataTypeName(output.GetDataType()));
389 StoreOutput<plSimdVec4f, float, plFloat16>(
reinterpret_cast<plSimdVec4f*
>(r),
reinterpret_cast<plSimdVec4f*
>(re), output, uiNumRemainderInstances);
393 void VM_StoreI_4(
const ByteCodeType*& pByteCode, ExecutionContext& context)
395 const plUInt32 uiNumRemainderInstances = context.m_uiNumInstances & 0x3;
397 plUInt32 uiOutputIndex = plExpressionByteCode::GetRegisterIndex(pByteCode);
398 auto& output = *context.m_Outputs[uiOutputIndex];
401 DEFINE_TARGET_REGISTER();
402 if (uiNumRemainderInstances > 0)
405 if (output.GetDataType() == plProcessingStream::DataType::Int)
407 StoreOutput<plSimdVec4i, int, int>(
reinterpret_cast<plSimdVec4i*
>(r),
reinterpret_cast<plSimdVec4i*
>(re), output, uiNumRemainderInstances);
409 else if (output.GetDataType() == plProcessingStream::DataType::Short)
411 StoreOutput<plSimdVec4i, int, plInt16>(
reinterpret_cast<plSimdVec4i*
>(r),
reinterpret_cast<plSimdVec4i*
>(re), output, uiNumRemainderInstances);
415 PL_ASSERT_DEBUG(output.GetDataType() == plProcessingStream::DataType::Byte,
"Unsupported input type '{}' for StoreI instruction", plProcessingStream::GetDataTypeName(output.GetDataType()));
416 StoreOutput<plSimdVec4i, int, plInt8>(
reinterpret_cast<plSimdVec4i*
>(r),
reinterpret_cast<plSimdVec4i*
>(re), output, uiNumRemainderInstances);
420 void VM_Call(
const ByteCodeType*& pByteCode, ExecutionContext& context)
423 PL_WARNING_DISABLE_MSVC(4189)
426 auto& function = *context.m_Functions[uiFunctionIndex];
428 DEFINE_TARGET_REGISTER();
432 inputs.Reserve(uiNumArgs);
433 for (plUInt32 uiArgIndex = 0; uiArgIndex < uiNumArgs; ++uiArgIndex)
435 DEFINE_OP_REGISTER(x);
436 inputs.PushBack(plMakeArrayPtr(x, context.m_uiNumSimd4Instances));
441 function.m_Func(inputs, output, *context.m_pGlobalData);
446 static constexpr OpFunc s_Simd4Funcs[] = {
603 static_assert(PL_ARRAY_SIZE(s_Simd4Funcs) == plExpressionByteCode::OpCode::Count);
607#undef DEFINE_TARGET_REGISTER
608#undef DEFINE_OP_REGISTER
609#undef DEFINE_CONSTANT
610#undef UNARY_OP_INNER_LOOP
611#undef DEFINE_UNARY_OP
612#undef BINARY_OP_INNER_LOOP
613#undef DEFINE_BINARY_OP
614#undef TERNARY_OP_INNER_LOOP
615#undef DEFINE_TERNARY_OP
This class encapsulates an array and it's size. It is recommended to use this class instead of plain ...
Definition ArrayPtr.h:37
Definition ExpressionByteCode.h:10
A hybrid array uses in-place storage to handle the first few elements without any allocation....
Definition HybridArray.h:12
A single stream in a stream group holding contiguous data of a given type.
Definition ProcessingStream.h:8
T * GetWritableData() const
Returns a non-const pointer to the data casted to the type T, note that no type check is done!
Definition ProcessingStream.h:60
const T * GetData() const
Returns a const pointer to the data casted to the type T, note that no type check is done!
Definition ProcessingStream.h:50
plUInt16 GetElementStride() const
Returns the stride between two elements of the stream in bytes.
Definition ProcessingStream.h:83
DataType GetDataType() const
Returns the data type of the stream.
Definition ProcessingStream.h:77
A 4-component SIMD vector class.
Definition SimdVec4f.h:8
A SIMD 4-component vector class of signed 32b integers.
Definition SimdVec4i.h:9
Definition ExpressionDeclarations.h:17
static plSimdVec4f Exp(const plSimdVec4f &f)
Definition SimdMath_inl.h:6