Plasma Engine  2.0
Loading...
Searching...
No Matches
ExpressionVMOperations.h
1#pragma once
2
3#include <Foundation/CodeUtils/Expression/ExpressionByteCode.h>
4#include <Foundation/Math/Float16.h>
5#include <Foundation/SimdMath/SimdMath.h>
6
7namespace
8{
9 struct ExecutionContext
10 {
11 plExpression::Register* m_pRegisters = nullptr;
12 plUInt32 m_uiNumInstances = 0;
13 plUInt32 m_uiNumSimd4Instances = 0;
17 const plExpression::GlobalData* m_pGlobalData = nullptr;
18 };
19
20 using ByteCodeType = plExpressionByteCode::StorageType;
21 using OpFunc = void (*)(const ByteCodeType*& pByteCode, ExecutionContext& context);
22
23#define DEFINE_TARGET_REGISTER() \
24 plExpression::Register* r = context.m_pRegisters + plExpressionByteCode::GetRegisterIndex(pByteCode) * context.m_uiNumSimd4Instances; \
25 plExpression::Register* re = r + context.m_uiNumSimd4Instances; \
26 PL_IGNORE_UNUSED(re);
27
28#define DEFINE_OP_REGISTER(name) \
29 const plExpression::Register* name = context.m_pRegisters + plExpressionByteCode::GetRegisterIndex(pByteCode) * context.m_uiNumSimd4Instances;
30
31#define DEFINE_CONSTANT(name) \
32 const plUInt32 PL_PP_CONCAT(name, Raw) = *pByteCode; \
33 PL_IGNORE_UNUSED(PL_PP_CONCAT(name, Raw)); \
34 const plExpression::Register tmp = plExpressionByteCode::GetConstant(pByteCode); \
35 const plExpression::Register* name = &tmp;
36
37#define UNARY_OP_INNER_LOOP(code) \
38 code; \
39 ++r; \
40 ++a;
41
42#define DEFINE_UNARY_OP(name, code) \
43 void PL_PP_CONCAT(name, _4)(const ByteCodeType*& pByteCode, ExecutionContext& context) \
44 { \
45 DEFINE_TARGET_REGISTER(); \
46 DEFINE_OP_REGISTER(a); \
47 while (r != re) \
48 { \
49 UNARY_OP_INNER_LOOP(code) \
50 } \
51 }
52
53#define BINARY_OP_INNER_LOOP(code) \
54 code; \
55 ++r; \
56 ++a; \
57 if constexpr (RightIsConstant == false) \
58 { \
59 ++b; \
60 }
61
62#define DEFINE_BINARY_OP(name, code) \
63 template <bool RightIsConstant> \
64 void PL_PP_CONCAT(name, _4)(const ByteCodeType*& pByteCode, ExecutionContext& context) \
65 { \
66 DEFINE_TARGET_REGISTER(); \
67 DEFINE_OP_REGISTER(a); \
68 plUInt32 bRaw; \
69 PL_IGNORE_UNUSED(bRaw); \
70 plExpression::Register bConstant; \
71 const plExpression::Register* b; \
72 PL_IGNORE_UNUSED(b); \
73 if constexpr (RightIsConstant) \
74 { \
75 bRaw = *pByteCode; \
76 bConstant = plExpressionByteCode::GetConstant(pByteCode); \
77 b = &bConstant; \
78 } \
79 else \
80 { \
81 b = context.m_pRegisters + plExpressionByteCode::GetRegisterIndex(pByteCode) * context.m_uiNumSimd4Instances; \
82 } \
83 while (r != re) \
84 { \
85 BINARY_OP_INNER_LOOP(code) \
86 } \
87 }
88
89#define TERNARY_OP_INNER_LOOP(code) \
90 code; \
91 ++r; \
92 ++a; \
93 ++b; \
94 ++c;
95
96#define DEFINE_TERNARY_OP(name, code) \
97 void PL_PP_CONCAT(name, _4)(const ByteCodeType*& pByteCode, ExecutionContext& context) \
98 { \
99 DEFINE_TARGET_REGISTER(); \
100 DEFINE_OP_REGISTER(a); \
101 DEFINE_OP_REGISTER(b); \
102 DEFINE_OP_REGISTER(c); \
103 while (r != re) \
104 { \
105 TERNARY_OP_INNER_LOOP(code) \
106 } \
107 }
108
109 DEFINE_UNARY_OP(AbsF, r->f = a->f.Abs());
110 DEFINE_UNARY_OP(AbsI, r->i = a->i.Abs());
111 DEFINE_UNARY_OP(SqrtF, r->f = a->f.GetSqrt());
112
113 DEFINE_UNARY_OP(ExpF, r->f = plSimdMath::Exp(a->f));
114 DEFINE_UNARY_OP(LnF, r->f = plSimdMath::Ln(a->f));
115 DEFINE_UNARY_OP(Log2F, r->f = plSimdMath::Log2(a->f));
116 DEFINE_UNARY_OP(Log2I, r->i = plSimdMath::Log2i(a->i));
117 DEFINE_UNARY_OP(Log10F, r->f = plSimdMath::Log10(a->f));
118 DEFINE_UNARY_OP(Pow2F, r->f = plSimdMath::Pow2(a->f));
119
120 DEFINE_UNARY_OP(SinF, r->f = plSimdMath::Sin(a->f));
121 DEFINE_UNARY_OP(CosF, r->f = plSimdMath::Cos(a->f));
122 DEFINE_UNARY_OP(TanF, r->f = plSimdMath::Tan(a->f));
123
124 DEFINE_UNARY_OP(ASinF, r->f = plSimdMath::ASin(a->f));
125 DEFINE_UNARY_OP(ACosF, r->f = plSimdMath::ACos(a->f));
126 DEFINE_UNARY_OP(ATanF, r->f = plSimdMath::ATan(a->f));
127
128 DEFINE_UNARY_OP(RoundF, r->f = a->f.Round());
129 DEFINE_UNARY_OP(FloorF, r->f = a->f.Floor());
130 DEFINE_UNARY_OP(CeilF, r->f = a->f.Ceil());
131 DEFINE_UNARY_OP(TruncF, r->f = a->f.Trunc());
132
133 DEFINE_UNARY_OP(NotI, r->i = ~a->i);
134 DEFINE_UNARY_OP(NotB, r->b = !a->b);
135
136 DEFINE_UNARY_OP(IToF, r->f = a->i.ToFloat());
137 DEFINE_UNARY_OP(FToI, r->i = plSimdVec4i::Truncate(a->f));
138
139 DEFINE_BINARY_OP(AddF, r->f = a->f + b->f);
140 DEFINE_BINARY_OP(AddI, r->i = a->i + b->i);
141
142 DEFINE_BINARY_OP(SubF, r->f = a->f - b->f);
143 DEFINE_BINARY_OP(SubI, r->i = a->i - b->i);
144
145 DEFINE_BINARY_OP(MulF, r->f = a->f.CompMul(b->f));
146 DEFINE_BINARY_OP(MulI, r->i = a->i.CompMul(b->i));
147
148 DEFINE_BINARY_OP(DivF, r->f = a->f.CompDiv(b->f));
149 DEFINE_BINARY_OP(DivI, r->i = a->i.CompDiv(b->i));
150
151 DEFINE_BINARY_OP(MinF, r->f = a->f.CompMin(b->f));
152 DEFINE_BINARY_OP(MinI, r->i = a->i.CompMin(b->i));
153
154 DEFINE_BINARY_OP(MaxF, r->f = a->f.CompMax(b->f));
155 DEFINE_BINARY_OP(MaxI, r->i = a->i.CompMax(b->i));
156
157 DEFINE_BINARY_OP(ShlI, r->i = a->i << b->i);
158 DEFINE_BINARY_OP(ShrI, r->i = a->i >> b->i);
159 DEFINE_BINARY_OP(ShlI_C, r->i = a->i << bRaw);
160 DEFINE_BINARY_OP(ShrI_C, r->i = a->i >> bRaw);
161 DEFINE_BINARY_OP(AndI, r->i = a->i & b->i);
162 DEFINE_BINARY_OP(XorI, r->i = a->i ^ b->i);
163 DEFINE_BINARY_OP(OrI, r->i = a->i | b->i);
164
165 DEFINE_BINARY_OP(EqF, r->b = a->f == b->f);
166 DEFINE_BINARY_OP(EqI, r->b = a->i == b->i);
167 DEFINE_BINARY_OP(EqB, r->b = a->b == b->b);
168
169 DEFINE_BINARY_OP(NEqF, r->b = a->f != b->f);
170 DEFINE_BINARY_OP(NEqI, r->b = a->i != b->i);
171 DEFINE_BINARY_OP(NEqB, r->b = a->b != b->b);
172
173 DEFINE_BINARY_OP(LtF, r->b = a->f < b->f);
174 DEFINE_BINARY_OP(LtI, r->b = a->i < b->i);
175
176 DEFINE_BINARY_OP(LEqF, r->b = a->f <= b->f);
177 DEFINE_BINARY_OP(LEqI, r->b = a->i <= b->i);
178
179 DEFINE_BINARY_OP(GtF, r->b = a->f > b->f);
180 DEFINE_BINARY_OP(GtI, r->b = a->i > b->i);
181
182 DEFINE_BINARY_OP(GEqF, r->b = a->f >= b->f);
183 DEFINE_BINARY_OP(GEqI, r->b = a->i >= b->i);
184
185 DEFINE_BINARY_OP(AndB, r->b = a->b && b->b);
186 DEFINE_BINARY_OP(OrB, r->b = a->b || b->b);
187
188 DEFINE_TERNARY_OP(SelF, r->f = plSimdVec4f::Select(a->b, b->f, c->f));
189 DEFINE_TERNARY_OP(SelI, r->i = plSimdVec4i::Select(a->b, b->i, c->i));
190 DEFINE_TERNARY_OP(SelB, r->b = plSimdVec4b::Select(a->b, b->b, c->b));
191
192 void VM_MovX_R_4(const ByteCodeType*& pByteCode, ExecutionContext& context)
193 {
194 DEFINE_TARGET_REGISTER();
195 DEFINE_OP_REGISTER(a);
196 while (r != re)
197 {
198 r->i = a->i;
199 ++r;
200 ++a;
201 }
202 }
203
204 void VM_MovX_C_4(const ByteCodeType*& pByteCode, ExecutionContext& context)
205 {
206 PL_WARNING_PUSH()
207 PL_WARNING_DISABLE_MSVC(4189)
208
209 DEFINE_TARGET_REGISTER();
210 DEFINE_CONSTANT(a);
211 while (r != re)
212 {
213 r->i = a->i;
214 ++r;
215 }
216
217 PL_WARNING_POP()
218 }
219
220 template <typename ValueType, typename StreamType>
221 PL_ALWAYS_INLINE ValueType ReadInputData(const plUInt8*& ref_pData, plUInt32 uiStride)
222 {
223 ValueType value = *reinterpret_cast<const StreamType*>(ref_pData);
224 ref_pData += uiStride;
225 return value;
226 }
227
228 template <typename RegisterType, typename ValueType, typename StreamType>
229 void LoadInput(RegisterType* r, RegisterType* pRe, const plProcessingStream& input, plUInt32 uiNumRemainderInstances)
230 {
231 const plUInt8* pInputData = input.GetData<plUInt8>();
232 const plUInt32 uiByteStride = input.GetElementStride();
233
234 if (uiByteStride == sizeof(ValueType) && std::is_same<ValueType, StreamType>::value)
235 {
236 while (r != pRe)
237 {
238 r->template Load<4>(reinterpret_cast<const ValueType*>(pInputData));
239
240 ++r;
241 pInputData += sizeof(ValueType) * 4;
242 }
243 }
244 else
245 {
246 ValueType x[4] = {};
247 while (r != pRe)
248 {
249 x[0] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
250 x[1] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
251 x[2] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
252 x[3] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
253
254 r->template Load<4>(x);
255
256 ++r;
257 }
258 }
259
260 if (uiNumRemainderInstances > 0)
261 {
262 ValueType x[3];
263 x[0] = ReadInputData<ValueType, StreamType>(pInputData, uiByteStride);
264 x[1] = uiNumRemainderInstances >= 2 ? ReadInputData<ValueType, StreamType>(pInputData, uiByteStride) : x[0];
265 x[2] = uiNumRemainderInstances >= 3 ? ReadInputData<ValueType, StreamType>(pInputData, uiByteStride) : x[1];
266
267 r->Set(x[0], x[1], x[2], x[2]);
268 }
269 }
270
271 template <typename ValueType, typename StreamType>
272 PL_ALWAYS_INLINE void StoreOutputData(plUInt8*& ref_pData, plUInt32 uiStride, ValueType value)
273 {
274 *reinterpret_cast<StreamType*>(ref_pData) = static_cast<StreamType>(value);
275 ref_pData += uiStride;
276 }
277
278 template <typename RegisterType, typename ValueType, typename StreamType>
279 void StoreOutput(RegisterType* r, RegisterType* pRe, plProcessingStream& ref_output, plUInt32 uiNumRemainderInstances)
280 {
281 plUInt8* pOutputData = ref_output.GetWritableData<plUInt8>();
282 const plUInt32 uiByteStride = ref_output.GetElementStride();
283
284 if (uiByteStride == sizeof(ValueType) && std::is_same<ValueType, StreamType>::value)
285 {
286 while (r != pRe)
287 {
288 r->template Store<4>(reinterpret_cast<ValueType*>(pOutputData));
289
290 ++r;
291 pOutputData += sizeof(ValueType) * 4;
292 }
293 }
294 else
295 {
296 ValueType x[4] = {};
297 while (r != pRe)
298 {
299 r->template Store<4>(x);
300
301 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[0]);
302 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[1]);
303 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[2]);
304 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[3]);
305
306 ++r;
307 }
308 }
309
310 if (uiNumRemainderInstances > 0)
311 {
312 ValueType x[4];
313 r->template Store<4>(x);
314
315 for (plUInt32 i = 0; i < uiNumRemainderInstances; ++i)
316 {
317 StoreOutputData<ValueType, StreamType>(pOutputData, uiByteStride, x[i]);
318 }
319 }
320 }
321
322 void VM_LoadF_4(const ByteCodeType*& pByteCode, ExecutionContext& context)
323 {
324 const plUInt32 uiNumRemainderInstances = context.m_uiNumInstances & 0x3;
325
326 DEFINE_TARGET_REGISTER();
327 if (uiNumRemainderInstances > 0)
328 --re;
329
330 const plUInt32 uiInputIndex = plExpressionByteCode::GetRegisterIndex(pByteCode);
331 auto& input = *context.m_Inputs[uiInputIndex];
332
333 if (input.GetDataType() == plProcessingStream::DataType::Float)
334 {
335 LoadInput<plSimdVec4f, float, float>(reinterpret_cast<plSimdVec4f*>(r), reinterpret_cast<plSimdVec4f*>(re), input, uiNumRemainderInstances);
336 }
337 else
338 {
339 PL_ASSERT_DEBUG(input.GetDataType() == plProcessingStream::DataType::Half, "Unsupported input type '{}' for LoadF instruction", plProcessingStream::GetDataTypeName(input.GetDataType()));
340 LoadInput<plSimdVec4f, float, plFloat16>(reinterpret_cast<plSimdVec4f*>(r), reinterpret_cast<plSimdVec4f*>(re), input, uiNumRemainderInstances);
341 }
342 }
343
344 void VM_LoadI_4(const ByteCodeType*& pByteCode, ExecutionContext& context)
345 {
346 const plUInt32 uiNumRemainderInstances = context.m_uiNumInstances & 0x3;
347
348 DEFINE_TARGET_REGISTER();
349 if (uiNumRemainderInstances > 0)
350 --re;
351
352 const plUInt32 uiInputIndex = plExpressionByteCode::GetRegisterIndex(pByteCode);
353 auto& input = *context.m_Inputs[uiInputIndex];
354
355 if (input.GetDataType() == plProcessingStream::DataType::Int)
356 {
357 LoadInput<plSimdVec4i, int, int>(reinterpret_cast<plSimdVec4i*>(r), reinterpret_cast<plSimdVec4i*>(re), input, uiNumRemainderInstances);
358 }
359 else if (input.GetDataType() == plProcessingStream::DataType::Short)
360 {
361 LoadInput<plSimdVec4i, int, plInt16>(reinterpret_cast<plSimdVec4i*>(r), reinterpret_cast<plSimdVec4i*>(re), input, uiNumRemainderInstances);
362 }
363 else
364 {
365 PL_ASSERT_DEBUG(input.GetDataType() == plProcessingStream::DataType::Byte, "Unsupported input type '{}' for LoadI instruction", plProcessingStream::GetDataTypeName(input.GetDataType()));
366 LoadInput<plSimdVec4i, int, plInt8>(reinterpret_cast<plSimdVec4i*>(r), reinterpret_cast<plSimdVec4i*>(re), input, uiNumRemainderInstances);
367 }
368 }
369
370 void VM_StoreF_4(const ByteCodeType*& pByteCode, ExecutionContext& context)
371 {
372 const plUInt32 uiNumRemainderInstances = context.m_uiNumInstances & 0x3;
373
374 plUInt32 uiOutputIndex = plExpressionByteCode::GetRegisterIndex(pByteCode);
375 auto& output = *context.m_Outputs[uiOutputIndex];
376
377 // actually not target register but operand register in the is case, but we need something to loop over so we use the target register macro here.
378 DEFINE_TARGET_REGISTER();
379 if (uiNumRemainderInstances > 0)
380 --re;
381
382 if (output.GetDataType() == plProcessingStream::DataType::Float)
383 {
384 StoreOutput<plSimdVec4f, float, float>(reinterpret_cast<plSimdVec4f*>(r), reinterpret_cast<plSimdVec4f*>(re), output, uiNumRemainderInstances);
385 }
386 else
387 {
388 PL_ASSERT_DEBUG(output.GetDataType() == plProcessingStream::DataType::Half, "Unsupported input type '{}' for StoreF instruction", plProcessingStream::GetDataTypeName(output.GetDataType()));
389 StoreOutput<plSimdVec4f, float, plFloat16>(reinterpret_cast<plSimdVec4f*>(r), reinterpret_cast<plSimdVec4f*>(re), output, uiNumRemainderInstances);
390 }
391 }
392
393 void VM_StoreI_4(const ByteCodeType*& pByteCode, ExecutionContext& context)
394 {
395 const plUInt32 uiNumRemainderInstances = context.m_uiNumInstances & 0x3;
396
397 plUInt32 uiOutputIndex = plExpressionByteCode::GetRegisterIndex(pByteCode);
398 auto& output = *context.m_Outputs[uiOutputIndex];
399
400 // actually not target register but operand register in the is case, but we need something to loop over so we use the target register macro here.
401 DEFINE_TARGET_REGISTER();
402 if (uiNumRemainderInstances > 0)
403 --re;
404
405 if (output.GetDataType() == plProcessingStream::DataType::Int)
406 {
407 StoreOutput<plSimdVec4i, int, int>(reinterpret_cast<plSimdVec4i*>(r), reinterpret_cast<plSimdVec4i*>(re), output, uiNumRemainderInstances);
408 }
409 else if (output.GetDataType() == plProcessingStream::DataType::Short)
410 {
411 StoreOutput<plSimdVec4i, int, plInt16>(reinterpret_cast<plSimdVec4i*>(r), reinterpret_cast<plSimdVec4i*>(re), output, uiNumRemainderInstances);
412 }
413 else
414 {
415 PL_ASSERT_DEBUG(output.GetDataType() == plProcessingStream::DataType::Byte, "Unsupported input type '{}' for StoreI instruction", plProcessingStream::GetDataTypeName(output.GetDataType()));
416 StoreOutput<plSimdVec4i, int, plInt8>(reinterpret_cast<plSimdVec4i*>(r), reinterpret_cast<plSimdVec4i*>(re), output, uiNumRemainderInstances);
417 }
418 }
419
420 void VM_Call(const ByteCodeType*& pByteCode, ExecutionContext& context)
421 {
422 PL_WARNING_PUSH()
423 PL_WARNING_DISABLE_MSVC(4189)
424
425 plUInt32 uiFunctionIndex = plExpressionByteCode::GetRegisterIndex(pByteCode);
426 auto& function = *context.m_Functions[uiFunctionIndex];
427
428 DEFINE_TARGET_REGISTER();
429 plUInt32 uiNumArgs = plExpressionByteCode::GetFunctionArgCount(pByteCode);
430
431 plHybridArray<plArrayPtr<const plExpression::Register>, 32> inputs;
432 inputs.Reserve(uiNumArgs);
433 for (plUInt32 uiArgIndex = 0; uiArgIndex < uiNumArgs; ++uiArgIndex)
434 {
435 DEFINE_OP_REGISTER(x);
436 inputs.PushBack(plMakeArrayPtr(x, context.m_uiNumSimd4Instances));
437 }
438
439 plExpression::Output output = plMakeArrayPtr(r, context.m_uiNumSimd4Instances);
440
441 function.m_Func(inputs, output, *context.m_pGlobalData);
442
443 PL_WARNING_POP()
444 }
445
446 static constexpr OpFunc s_Simd4Funcs[] = {
447 nullptr, // Nop,
448
449 nullptr, // FirstUnary,
450
451 &AbsF_4, // AbsF_R,
452 &AbsI_4, // AbsI_R,
453 &SqrtF_4, // SqrtF_R,
454
455 &ExpF_4, // ExpF_R,
456 &LnF_4, // LnF_R,
457 &Log2F_4, // Log2F_R,
458 &Log2I_4, // Log2I_R,
459 &Log10F_4, // Log10F_R,
460 &Pow2F_4, // Pow2F_R,
461
462 &SinF_4, // SinF_R,
463 &CosF_4, // CosF_R,
464 &TanF_4, // TanF_R,
465
466 &ASinF_4, // ASinF_R,
467 &ACosF_4, // ACosF_R,
468 &ATanF_4, // ATanF_R,
469
470 &RoundF_4, // RoundF_R,
471 &FloorF_4, // FloorF_R,
472 &CeilF_4, // CeilF_R,
473 &TruncF_4, // TruncF_R,
474
475 &NotI_4, // NotI_R,
476 &NotB_4, // NotB_R,
477
478 &IToF_4, // IToF_R,
479 &FToI_4, // FToI_R,
480
481 nullptr, // LastUnary,
482 nullptr, // FirstBinary,
483
484 &AddF_4<false>, // AddF_RR,
485 &AddI_4<false>, // AddI_RR,
486
487 &SubF_4<false>, // SubF_RR,
488 &SubI_4<false>, // SubI_RR,
489
490 &MulF_4<false>, // MulF_RR,
491 &MulI_4<false>, // MulI_RR,
492
493 &DivF_4<false>, // DivF_RR,
494 &DivI_4<false>, // DivI_RR,
495
496 &MinF_4<false>, // MinF_RR,
497 &MinI_4<false>, // MinI_RR,
498
499 &MaxF_4<false>, // MaxF_RR,
500 &MaxI_4<false>, // MaxI_RR,
501
502 &ShlI_4<false>, // ShlI_RR,
503 &ShrI_4<false>, // ShrI_RR,
504 &AndI_4<false>, // AndI_RR,
505 &XorI_4<false>, // XorI_RR,
506 &OrI_4<false>, // OrI_RR,
507
508 &EqF_4<false>, // EqF_RR,
509 &EqI_4<false>, // EqI_RR,
510 &EqB_4<false>, // EqB_RR,
511
512 &NEqF_4<false>, // NEqF_RR,
513 &NEqI_4<false>, // NEqI_RR,
514 &NEqB_4<false>, // NEqB_RR,
515
516 &LtF_4<false>, // LtF_RR,
517 &LtI_4<false>, // LtI_RR,
518
519 &LEqF_4<false>, // LEqF_RR,
520 &LEqI_4<false>, // LEqI_RR,
521
522 &GtF_4<false>, // GtF_RR,
523 &GtI_4<false>, // GtI_RR,
524
525 &GEqF_4<false>, // GEqF_RR,
526 &GEqI_4<false>, // GEqI_RR,
527
528 &AndB_4<false>, // AndB_RR,
529 &OrB_4<false>, // OrB_RR,
530
531 nullptr, // LastBinary,
532 nullptr, // FirstBinaryWithConstant,
533
534 &AddF_4<true>, // AddF_RC,
535 &AddI_4<true>, // AddI_RC,
536
537 &SubF_4<true>, // SubF_RC,
538 &SubI_4<true>, // SubI_RC,
539
540 &MulF_4<true>, // MulF_RC,
541 &MulI_4<true>, // MulI_RC,
542
543 &DivF_4<true>, // DivF_RC,
544 &DivI_4<true>, // DivI_RC,
545
546 &MinF_4<true>, // MinF_RC,
547 &MinI_4<true>, // MinI_RC,
548
549 &MaxF_4<true>, // MaxF_RC,
550 &MaxI_4<true>, // MaxI_RC,
551
552 &ShlI_C_4<true>, // ShlI_RC,
553 &ShrI_C_4<true>, // ShrI_RC,
554 &AndI_4<true>, // AndI_RC,
555 &XorI_4<true>, // XorI_RC,
556 &OrI_4<true>, // OrI_RC,
557
558 &EqF_4<true>, // EqF_RC,
559 &EqI_4<true>, // EqI_RC,
560 &EqB_4<true>, // EqB_RC
561
562 &NEqF_4<true>, // NEqF_RC,
563 &NEqI_4<true>, // NEqI_RC,
564 &NEqB_4<true>, // NEqB_RC
565
566 &LtF_4<true>, // LtF_RC,
567 &LtI_4<true>, // LtI_RC
568
569 &LEqF_4<true>, // LEqF_RC,
570 &LEqI_4<true>, // LEqI_RC
571
572 &GtF_4<true>, // GtF_RC,
573 &GtI_4<true>, // GtI_RC
574
575 &GEqF_4<true>, // GEqF_RC,
576 &GEqI_4<true>, // GEqI_RC
577
578 &AndB_4<true>, // AndB_RC,
579 &OrB_4<true>, // OrB_RC,
580
581 nullptr, // LastBinaryWithConstant,
582 nullptr, // FirstTernary,
583
584 &SelF_4, // SelF_RRR,
585 &SelI_4, // SelI_RRR,
586 &SelB_4, // SelB_RRR,
587
588 nullptr, // LastTernary,
589 nullptr, // FirstSpecial,
590
591 &VM_MovX_R_4, // MovX_R,
592 &VM_MovX_C_4, // MovX_C,
593 &VM_LoadF_4, // LoadF,
594 &VM_LoadI_4, // LoadI,
595 &VM_StoreF_4, // StoreF,
596 &VM_StoreI_4, // StoreI,
597
598 &VM_Call, // Call,
599
600 nullptr, // LastSpecial,
601 };
602
603 static_assert(PL_ARRAY_SIZE(s_Simd4Funcs) == plExpressionByteCode::OpCode::Count);
604
605} // namespace
606
607#undef DEFINE_TARGET_REGISTER
608#undef DEFINE_OP_REGISTER
609#undef DEFINE_CONSTANT
610#undef UNARY_OP_INNER_LOOP
611#undef DEFINE_UNARY_OP
612#undef BINARY_OP_INNER_LOOP
613#undef DEFINE_BINARY_OP
614#undef TERNARY_OP_INNER_LOOP
615#undef DEFINE_TERNARY_OP
This class encapsulates an array and it's size. It is recommended to use this class instead of plain ...
Definition ArrayPtr.h:37
Definition ExpressionByteCode.h:10
A hybrid array uses in-place storage to handle the first few elements without any allocation....
Definition HybridArray.h:12
A single stream in a stream group holding contiguous data of a given type.
Definition ProcessingStream.h:8
T * GetWritableData() const
Returns a non-const pointer to the data casted to the type T, note that no type check is done!
Definition ProcessingStream.h:60
const T * GetData() const
Returns a const pointer to the data casted to the type T, note that no type check is done!
Definition ProcessingStream.h:50
plUInt16 GetElementStride() const
Returns the stride between two elements of the stream in bytes.
Definition ProcessingStream.h:83
DataType GetDataType() const
Returns the data type of the stream.
Definition ProcessingStream.h:77
A 4-component SIMD vector class.
Definition SimdVec4f.h:8
A SIMD 4-component vector class of signed 32b integers.
Definition SimdVec4i.h:9
Definition ExpressionDeclarations.h:17
static plSimdVec4f Exp(const plSimdVec4f &f)
Definition SimdMath_inl.h:6