Nebula
Loading...
Searching...
No Matches
vec4.h
Go to the documentation of this file.
1#pragma once
2//------------------------------------------------------------------------------
14#include "core/types.h"
15#include "math/scalar.h"
16#include "vec3.h"
17
18//------------------------------------------------------------------------------
19namespace Math
20{
21struct mat4;
22
24{
25public:
27 vec4() = default;
31 explicit vec4(scalar v);
33 vec4(const vec4& rhs) = default;
35 vec4(const vec3& rhs, float w);
37 vec4(const __m128& rhs);
38
40 void operator=(const __m128 &rhs);
42 void operator+=(const vec4 &rhs);
44 void operator-=(const vec4 &rhs);
46 void operator*=(scalar s);
48 void operator*=(const vec4& rhs);
50 void operator/=(const vec4& rhs);
52 bool operator==(const vec4 &rhs) const;
54 bool operator!=(const vec4 &rhs) const;
55
57 void load(const scalar* ptr);
59 void loadu(const scalar* ptr);
61 void store(scalar* ptr) const;
63 void storeu(scalar* ptr) const;
65 void store3(scalar* ptr) const;
67 void storeu3(scalar* ptr) const;
69 void stream(scalar* ptr) const;
70
72 void load_float3(const void* ptr, float w);
74 void load_ubyte4n(const void* ptr);
76 void load_byte4n(const void* ptr);
78 void set(scalar x, scalar y, scalar z, scalar w);
79
81 template<int X, int Y, int Z, int W>
82 vec4 swizzle(const vec4& v);
83
85 scalar& operator[](const int index);
87 scalar operator[](const int index) const;
89 operator vec3() const { return Math::vec3(this->x, this->y, this->z); }
90
91 union
92 {
93 struct
94 {
95 float x, y, z, w;
96 };
97 __m128 vec;
98 float v[4];
99 };
100};
101
102//------------------------------------------------------------------------------
105__forceinline
107{
108 this->vec = _mm_setr_ps(x, y, z, w);
109}
110
111//------------------------------------------------------------------------------
114__forceinline
116{
117 this->vec = _mm_set1_ps(v);
118}
119
120//------------------------------------------------------------------------------
123__forceinline
124vec4::vec4(const __m128& rhs)
125{
126 this->vec = rhs;
127}
128
129//------------------------------------------------------------------------------
132__forceinline
133vec4::vec4(const vec3& rhs, float w)
134{
135 this->vec = rhs.vec;
136 this->w = w;
137}
138
139//------------------------------------------------------------------------------
142__forceinline void
143vec4::operator=(const __m128& rhs)
144{
145 this->vec = rhs;
146}
147
148//------------------------------------------------------------------------------
151__forceinline bool
152vec4::operator==(const vec4& rhs) const
153{
154 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
155 return ((_mm_movemask_ps(vTemp) == 0x0f) != 0);
156}
157
158//------------------------------------------------------------------------------
161__forceinline bool
162vec4::operator!=(const vec4& rhs) const
163{
164 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
165 return ((_mm_movemask_ps(vTemp) == 0x0f) == 0);
166}
167
168//------------------------------------------------------------------------------
172__forceinline void
174{
175 this->vec = _mm_load_ps(ptr);
176}
177
178//------------------------------------------------------------------------------
182__forceinline void
184{
185 this->vec = _mm_loadu_ps(ptr);
186}
187
188//------------------------------------------------------------------------------
192__forceinline void
194{
195 _mm_store_ps(ptr, this->vec);
196}
197
198//------------------------------------------------------------------------------
202__forceinline void
204{
205 _mm_storeu_ps(ptr, this->vec);
206}
207
208//------------------------------------------------------------------------------
211__forceinline void
213{
214 __m128 vv = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
215 _mm_storel_epi64(reinterpret_cast<__m128i*>(ptr), _mm_castps_si128(this->vec));
216 _mm_store_ss(&ptr[2], vv);
217}
218
219//------------------------------------------------------------------------------
222__forceinline void
224{
225 __m128 t1 = _mm_permute_ps(this->vec, _MM_SHUFFLE(1, 1, 1, 1));
226 __m128 t2 = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
227 _mm_store_ss(&ptr[0], this->vec);
228 _mm_store_ss(&ptr[1], t1);
229 _mm_store_ss(&ptr[2], t2);
230}
231
232//------------------------------------------------------------------------------
235__forceinline void
237{
238 this->store(ptr);
239}
240
241//------------------------------------------------------------------------------
244__forceinline void
245vec4::load_float3(const void* ptr, float w)
246{
247 float* source = (float*)ptr;
248 this->vec = _mm_load_ps(source);
249 this->w = w;
250}
251
252
253//------------------------------------------------------------------------------
256__forceinline void
258{
259 this->vec = _mm_mul_ps(this->vec, rhs.vec);
260}
261
262//------------------------------------------------------------------------------
265__forceinline void
267{
268 this->vec = _mm_div_ps(this->vec, rhs.vec);
269}
270
271//------------------------------------------------------------------------------
274__forceinline void
276{
277 this->vec = _mm_add_ps(this->vec, rhs.vec);
278}
279
280//------------------------------------------------------------------------------
283__forceinline void
285{
286 this->vec = _mm_sub_ps(this->vec, rhs.vec);
287}
288
289//------------------------------------------------------------------------------
292__forceinline void
294{
295 __m128 temp = _mm_set1_ps(s);
296 this->vec = _mm_mul_ps(this->vec, temp);
297}
298
299//------------------------------------------------------------------------------
302__forceinline void
304{
305 this->vec = _mm_setr_ps(x, y, z, w);
306}
307
308//------------------------------------------------------------------------------
311template<int X, int Y, int Z, int W>
312inline vec4
314{
315 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(W, Z, Y, X));
316}
317
318//------------------------------------------------------------------------------
321__forceinline scalar&
322vec4::operator[](const int index)
323{
324 n_assert(index < 4);
325 return this->v[index];
326}
327
328//------------------------------------------------------------------------------
331__forceinline scalar
332vec4::operator[](const int index) const
333{
334 n_assert(index < 4);
335 return this->v[index];
336}
337
338//------------------------------------------------------------------------------
341__forceinline vec4
342operator-(const vec4& lhs)
343{
344 return vec4(_mm_xor_ps(_mm_castsi128_ps(_sign), lhs.vec));
345}
346
347//------------------------------------------------------------------------------
350__forceinline vec4
351operator*(const vec4& lhs, scalar t)
352{
353 __m128 temp = _mm_set1_ps(t);
354 return _mm_mul_ps(lhs.vec, temp);
355}
356
357//------------------------------------------------------------------------------
360__forceinline vec4
361operator*(const vec4& lhs, const vec4& rhs)
362{
363 return _mm_mul_ps(lhs.vec, rhs.vec);
364}
365
366//------------------------------------------------------------------------------
369__forceinline vec4
370operator+(const vec4& lhs, const vec4& rhs)
371{
372 return _mm_add_ps(lhs.vec, rhs.vec);
373}
374
375//------------------------------------------------------------------------------
378__forceinline vec4
379operator-(const vec4& lhs, const vec4& rhs)
380{
381 return _mm_sub_ps(lhs.vec, rhs.vec);
382}
383
384//------------------------------------------------------------------------------
387__forceinline scalar
388length(const vec4& v)
389{
390 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.vec, v.vec, 0xF1)));
391}
392
393//------------------------------------------------------------------------------
396__forceinline scalar
397length3(const vec4& v)
398{
399 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.vec, v.vec, 0x71)));
400}
401
402//------------------------------------------------------------------------------
405__forceinline scalar
406lengthsq(const vec4& v)
407{
408 return _mm_cvtss_f32(_mm_dp_ps(v.vec, v.vec, 0xF1));
409}
410
411//------------------------------------------------------------------------------
414__forceinline scalar
416{
417 return _mm_cvtss_f32(_mm_dp_ps(v.vec, v.vec, 0x71));
418}
419
420//------------------------------------------------------------------------------
423__forceinline vec4
425{
426 return _mm_div_ps(_plus1, v.vec);
427}
428
429//------------------------------------------------------------------------------
432__forceinline vec4
434{
435 return _mm_rcp_ps(v.vec);
436}
437
438//------------------------------------------------------------------------------
441__forceinline vec4
442multiply(const vec4& v0, const vec4& v1)
443{
444 return _mm_mul_ps(v0.vec, v1.vec);
445}
446
447//------------------------------------------------------------------------------
450__forceinline vec4
451multiplyadd(const vec4& v0, const vec4& v1, const vec4& v2)
452{
453#if NEBULA_MATH_FMA
454 return _mm_fmadd_ps(v0.vec, v1.vec, v2.vec);
455#else
456 return _mm_add_ps(_mm_mul_ps(v0.vec, v1.vec), v2.vec);
457#endif
458}
459
460//------------------------------------------------------------------------------
463__forceinline vec4
464divide(const vec4& v0, const vec4& v1)
465{
466 return _mm_div_ps(v0.vec, v1.vec);
467}
468
469//------------------------------------------------------------------------------
472__forceinline vec4
473abs(const vec4& v)
474{
475 unsigned int val = 0x7fffffff;
476 __m128 temp = _mm_set1_ps(*(float*)&val);
477 return _mm_and_ps(v.vec, temp);
478}
479
480//------------------------------------------------------------------------------
483__forceinline vec4
484cross3(const vec4& v0, const vec4& v1)
485{
486 __m128 tmp0, tmp1, tmp2, tmp3, result;
487 tmp0 = _mm_shuffle_ps(v0.vec, v0.vec, _MM_SHUFFLE(3, 0, 2, 1));
488 tmp1 = _mm_shuffle_ps(v1.vec, v1.vec, _MM_SHUFFLE(3, 1, 0, 2));
489 tmp2 = _mm_shuffle_ps(v0.vec, v0.vec, _MM_SHUFFLE(3, 1, 0, 2));
490 tmp3 = _mm_shuffle_ps(v1.vec, v1.vec, _MM_SHUFFLE(3, 0, 2, 1));
491 result = _mm_mul_ps(tmp0, tmp1);
492 result = _mm_sub_ps(result, _mm_mul_ps(tmp2, tmp3));
493 return result;
494}
495
496//------------------------------------------------------------------------------
499__forceinline scalar
500dot(const vec4& v0, const vec4& v1)
501{
502 return _mm_cvtss_f32(_mm_dp_ps(v0.vec, v1.vec, 0xF1));
503}
504
505//------------------------------------------------------------------------------
508__forceinline scalar
509dot3(const vec4& v0, const vec4& v1)
510{
511 return _mm_cvtss_f32(_mm_dp_ps(v0.vec, v1.vec, 0x71));
512}
513
514//------------------------------------------------------------------------------
518__forceinline vec4
519barycentric(const vec4& v0, const vec4& v1, const vec4& v2, scalar f, scalar g)
520{
521 __m128 R1 = _mm_sub_ps(v1.vec, v0.vec);
522 __m128 SF = _mm_set_ps1(f);
523 __m128 R2 = _mm_sub_ps(v2.vec, v0.vec);
524 __m128 SG = _mm_set_ps1(g);
525 R1 = _mm_mul_ps(R1, SF);
526 R2 = _mm_mul_ps(R2, SG);
527 R1 = _mm_add_ps(R1, v0.vec);
528 R1 = _mm_add_ps(R1, R2);
529 return R1;
530}
531
532//------------------------------------------------------------------------------
535__forceinline vec4
536catmullrom(const vec4& v0, const vec4& v1, const vec4& v2, const vec4& v3, scalar s)
537{
538 scalar s2 = s * s;
539 scalar s3 = s * s2;
540
541 __m128 P0 = _mm_set_ps1((-s3 + 2.0f * s2 - s) * 0.5f);
542 __m128 P1 = _mm_set_ps1((3.0f * s3 - 5.0f * s2 + 2.0f) * 0.5f);
543 __m128 P2 = _mm_set_ps1((-3.0f * s3 + 4.0f * s2 + s) * 0.5f);
544 __m128 P3 = _mm_set_ps1((s3 - s2) * 0.5f);
545
546 P0 = _mm_mul_ps(P0, v0.vec);
547 P1 = _mm_mul_ps(P1, v1.vec);
548 P2 = _mm_mul_ps(P2, v2.vec);
549 P3 = _mm_mul_ps(P3, v3.vec);
550 P0 = _mm_add_ps(P0, P1);
551 P2 = _mm_add_ps(P2, P3);
552 P0 = _mm_add_ps(P0, P2);
553 return P0;
554}
555
556//------------------------------------------------------------------------------
559__forceinline vec4
560hermite(const vec4& v1, const vec4& t1, const vec4& v2, const vec4& t2, scalar s)
561{
562 scalar s2 = s * s;
563 scalar s3 = s * s2;
564
565 __m128 P0 = _mm_set_ps1(2.0f * s3 - 3.0f * s2 + 1.0f);
566 __m128 T0 = _mm_set_ps1(s3 - 2.0f * s2 + s);
567 __m128 P1 = _mm_set_ps1(-2.0f * s3 + 3.0f * s2);
568 __m128 T1 = _mm_set_ps1(s3 - s2);
569
570 __m128 vResult = _mm_mul_ps(P0, v1.vec);
571 __m128 vTemp = _mm_mul_ps(T0, t1.vec);
572 vResult = _mm_add_ps(vResult, vTemp);
573 vTemp = _mm_mul_ps(P1, v2.vec);
574 vResult = _mm_add_ps(vResult, vTemp);
575 vTemp = _mm_mul_ps(T1, t2.vec);
576 vResult = _mm_add_ps(vResult, vTemp);
577 return vResult;
578}
579
580//------------------------------------------------------------------------------
583__forceinline scalar
584angle(const vec4& v0, const vec4& v1)
585{
586
587 __m128 l0 = _mm_mul_ps(v0.vec, v0.vec);
588 l0 = _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(0, 0, 0, 0)),
589 _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l0, l0, _MM_SHUFFLE(2, 2, 2, 2))));
590
591 __m128 l1 = _mm_mul_ps(v1.vec, v1.vec);
592 l1 = _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(0, 0, 0, 0)),
593 _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l1, l1, _MM_SHUFFLE(2, 2, 2, 2))));
594
595 __m128 l = _mm_shuffle_ps(l0, l1, _MM_SHUFFLE(0, 0, 0, 0));
596 l = _mm_rsqrt_ps(l);
597 l = _mm_mul_ss(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)));
598
599
600 __m128 dot = _mm_mul_ps(v0.vec, v1.vec);
601 dot = _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(0, 0, 0, 0)),
602 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(1, 1, 1, 1)),
603 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(2, 2, 2, 2)), _mm_shuffle_ps(dot, dot, _MM_SHUFFLE(3, 3, 3, 3)))));
604
605 dot = _mm_mul_ss(dot, l);
606
607 dot = _mm_max_ss(dot, _minus1);
608 dot = _mm_min_ss(dot, _plus1);
609
610 scalar cangle;
611 _mm_store_ss(&cangle, dot);
612 return Math::acos(cangle);
613}
614
615//------------------------------------------------------------------------------
618__forceinline vec4
619lerp(const vec4& v0, const vec4& v1, scalar s)
620{
621 return v0 + ((v1 - v0) * s);
622}
623
624//------------------------------------------------------------------------------
627__forceinline vec4
628maximize(const vec4& v0, const vec4& v1)
629{
630 return _mm_max_ps(v0.vec, v1.vec);
631}
632
633//------------------------------------------------------------------------------
636__forceinline vec4
637minimize(const vec4& v0, const vec4& v1)
638{
639 return _mm_min_ps(v0.vec, v1.vec);
640}
641
642//------------------------------------------------------------------------------
645__forceinline vec4
646clamp(const vec4& clamp, const vec4& min, const vec4& max)
647{
648 __m128 temp = _mm_max_ps(min.vec, clamp.vec);
649 temp = _mm_min_ps(temp, max.vec);
650 return vec4(temp);
651}
652
653//------------------------------------------------------------------------------
656__forceinline vec4
658{
659 if (v == vec4(0)) return v;
660 return _mm_div_ps(v.vec, _mm_sqrt_ps(_mm_dp_ps(v.vec, v.vec, 0xFF)));
661}
662
663//------------------------------------------------------------------------------
666__forceinline vec4
668{
669 if (v == vec4(0)) return v;
670 return _mm_mul_ps(v.vec, _mm_rsqrt_ps(_mm_dp_ps(v.vec, v.vec, 0xFF)));
671}
672
673//------------------------------------------------------------------------------
676__forceinline vec4
678{
679 if (v == vec4(0)) return v;
680 __m128 t = _mm_div_ps(v.vec, _mm_sqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x77)));
681 return _mm_insert_ps(t, v.vec, 0xF0);
682}
683
684//------------------------------------------------------------------------------
687__forceinline vec4
689{
690 if (v == vec4(0)) return v;
691 __m128 t = _mm_mul_ps(v.vec, _mm_rsqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x77)));
692 return _mm_insert_ps(t, v.vec, 0xF0);
693}
694//------------------------------------------------------------------------------
697__forceinline vec4
698reflect(const vec4& normal, const vec4& incident)
699{
700 __m128 res = _mm_mul_ps(incident.vec, normal.vec);
701 res = _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(0, 0, 0, 0)),
702 _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(res, res, _MM_SHUFFLE(2, 2, 2, 2))));
703 res = _mm_add_ps(res, res);
704 res = _mm_mul_ps(res, normal.vec);
705 res = _mm_sub_ps(incident.vec, res);
706 return res;
707}
708
709//------------------------------------------------------------------------------
712__forceinline vec4
714{
715 __m128 d = _mm_set_ps1(1.0f / v.w);
716 return _mm_mul_ps(v.vec, d);
717}
718
719//------------------------------------------------------------------------------
722__forceinline bool
723less_any(const vec4& v0, const vec4& v1)
724{
725 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
726 int res = _mm_movemask_ps(vTemp);
727 return res != 0xf;
728}
729
730//------------------------------------------------------------------------------
733__forceinline bool
734less_all(const vec4& v0, const vec4& v1)
735{
736 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
737 int res = _mm_movemask_ps(vTemp);
738 return res == 0;
739}
740
741//------------------------------------------------------------------------------
744__forceinline bool
745lessequal_any(const vec4& v0, const vec4& v1)
746{
747 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
748 int res = _mm_movemask_ps(vTemp);
749 return res != 0xf;
750}
751
752//------------------------------------------------------------------------------
755__forceinline bool
756lessequal_all(const vec4& v0, const vec4& v1)
757{
758 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
759 int res = _mm_movemask_ps(vTemp);
760 return res == 0;
761}
762
763//------------------------------------------------------------------------------
766__forceinline bool
767greater_any(const vec4& v0, const vec4& v1)
768{
769 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
770 int res = _mm_movemask_ps(vTemp);
771 return res != 0;
772}
773
774//------------------------------------------------------------------------------
777__forceinline bool
778greater_all(const vec4& v0, const vec4& v1)
779{
780 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
781 int res = _mm_movemask_ps(vTemp);
782 return res == 0xf;
783}
784
785//------------------------------------------------------------------------------
788__forceinline bool
789greaterequal_any(const vec4& v0, const vec4& v1)
790{
791 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
792 int res = _mm_movemask_ps(vTemp);
793 return res != 0;
794}
795
796//------------------------------------------------------------------------------
799__forceinline bool
800greaterequal_all(const vec4& v0, const vec4& v1)
801{
802 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
803 int res = _mm_movemask_ps(vTemp);
804 return res == 0xf;
805}
806
807//------------------------------------------------------------------------------
810__forceinline bool
811equal_any(const vec4& v0, const vec4& v1)
812{
813 __m128 vTemp = _mm_cmpeq_ps(v0.vec, v1.vec);
814 int res = _mm_movemask_ps(vTemp);
815 return res != 0;
816}
817
818//------------------------------------------------------------------------------
821__forceinline bool
822nearequal(const vec4& v0, const vec4& v1, const float epsilon)
823{
824 __m128 eps = _mm_set1_ps(epsilon);
825 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
826 __m128 temp = _mm_setzero_ps();
827 temp = _mm_sub_ps(temp, delta);
828 temp = _mm_max_ps(temp, delta);
829 temp = _mm_cmple_ps(temp, eps);
830 return (_mm_movemask_ps(temp) == 0xf) != 0;
831}
832
833//------------------------------------------------------------------------------
836__forceinline bool
837nearequal(const vec4& v0, const vec4& v1, const vec4& epsilon)
838{
839 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
840 __m128 temp = _mm_setzero_ps();
841 temp = _mm_sub_ps(temp, delta);
842 temp = _mm_max_ps(temp, delta);
843 temp = _mm_cmple_ps(temp, epsilon.vec);
844 return (_mm_movemask_ps(temp) == 0xf) != 0;
845}
846
847//------------------------------------------------------------------------------
850__forceinline vec4
851less(const vec4& v0, const vec4& v1)
852{
853 return _mm_min_ps(_mm_cmplt_ps(v0.vec, v1.vec), _plus1);
854}
855
856//------------------------------------------------------------------------------
859__forceinline vec4
860greater(const vec4& v0, const vec4& v1)
861{
862 return _mm_min_ps(_mm_cmpgt_ps(v0.vec, v1.vec), _plus1);
863}
864
865//------------------------------------------------------------------------------
868__forceinline vec4
869equal(const vec4& v0, const vec4& v1)
870{
871 return _mm_min_ps(_mm_cmpeq_ps(v0.vec, v1.vec), _plus1);
872}
873
874//------------------------------------------------------------------------------
877__forceinline bool
878less3_any(const vec4& v0, const vec4& v1)
879{
880 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
881 int res = _mm_movemask_ps(vTemp) & 7;
882 return res != 7;
883}
884
885//------------------------------------------------------------------------------
888__forceinline bool
889less3_all(const vec4& v0, const vec4& v1)
890{
891 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
892 int res = _mm_movemask_ps(vTemp) & 7;
893 return res == 0;
894}
895
896//------------------------------------------------------------------------------
899__forceinline bool
900lessequal3_any(const vec4& v0, const vec4& v1)
901{
902 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
903 int res = _mm_movemask_ps(vTemp) & 7;
904 return res != 0x7;
905}
906
907//------------------------------------------------------------------------------
910__forceinline bool
911lessequal3_all(const vec4& v0, const vec4& v1)
912{
913 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
914 int res = _mm_movemask_ps(vTemp) & 7;
915 return res == 0;
916}
917
918//------------------------------------------------------------------------------
921__forceinline bool
922greater3_any(const vec4& v0, const vec4& v1)
923{
924 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
925 int res = _mm_movemask_ps(vTemp) & 7;
926 return res != 0;
927}
928
929//------------------------------------------------------------------------------
932__forceinline bool
933greater3_all(const vec4& v0, const vec4& v1)
934{
935 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
936 int res = _mm_movemask_ps(vTemp) & 7;
937 return res == 0x7;
938}
939
940//------------------------------------------------------------------------------
943__forceinline bool
944greaterequal3_any(const vec4& v0, const vec4& v1)
945{
946 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
947 int res = _mm_movemask_ps(vTemp) & 7;
948 return res != 0;
949}
950
951//------------------------------------------------------------------------------
954__forceinline bool
955greaterequal3_all(const vec4& v0, const vec4& v1)
956{
957 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
958 int res = _mm_movemask_ps(vTemp) & 7;
959 return res == 0x7;
960}
961
962//------------------------------------------------------------------------------
965__forceinline bool
966equal3_any(const vec4& v0, const vec4& v1)
967{
968 __m128 vTemp = _mm_cmpeq_ps(v0.vec, v1.vec);
969 int res = _mm_movemask_ps(vTemp) & 7;
970 return res != 0;
971}
972
973//------------------------------------------------------------------------------
976__forceinline bool
977equal3_all(const vec4& v0, const vec4& v1)
978{
979 __m128 vTemp = _mm_cmpeq_ps(v0.vec, v1.vec);
980 int res = _mm_movemask_ps(vTemp) & 7;
981 return res == 0x7;
982}
983
984//------------------------------------------------------------------------------
987__forceinline bool
988nearequal3(const vec4& v0, const vec4& v1, const vec4& epsilon)
989{
990 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
991 __m128 temp = _mm_setzero_ps();
992 temp = _mm_sub_ps(temp, delta);
993 temp = _mm_max_ps(temp, delta);
994 temp = _mm_cmple_ps(temp, epsilon.vec);
995 return (_mm_movemask_ps(temp) == 0x7) != 0;
996}
997
998//------------------------------------------------------------------------------
1001__forceinline vec4
1002splat(const vec4& v, uint element)
1003{
1004 n_assert(element < 4 && element >= 0);
1005
1006 switch (element)
1007 {
1008 case 0:
1009 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
1010 case 1:
1011 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
1012 case 2:
1013 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
1014 }
1015 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(3, 3, 3, 3));
1016}
1017
1018//------------------------------------------------------------------------------
1021__forceinline vec4
1022splat_x(const vec4& v)
1023{
1024 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
1025}
1026
1027//------------------------------------------------------------------------------
1030__forceinline vec4
1031splat_y(const vec4& v)
1032{
1033 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
1034}
1035
1036//------------------------------------------------------------------------------
1039__forceinline vec4
1040splat_z(const vec4& v)
1041{
1042 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
1043}
1044
1045//------------------------------------------------------------------------------
1048__forceinline vec4
1049splat_w(const vec4& v)
1050{
1051 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(3, 3, 3, 3));
1052}
1053
1054static const unsigned int PERMUTE_0X = 0;
1055static const unsigned int PERMUTE_0Y = 1;
1056static const unsigned int PERMUTE_0Z = 2;
1057static const unsigned int PERMUTE_0W = 3;
1058static const unsigned int PERMUTE_1X = 4;
1059static const unsigned int PERMUTE_1Y = 5;
1060static const unsigned int PERMUTE_1Z = 6;
1061static const unsigned int PERMUTE_1W = 7;
1062//------------------------------------------------------------------------------
1065__forceinline vec4
1066permute(const vec4& v0, const vec4& v1, unsigned int i0, unsigned int i1, unsigned int i2, unsigned int i3)
1067{
1068 static __m128i three = _mm_set_epi32(3, 3, 3, 3);
1069
1070 NEBULA_ALIGN16 unsigned int elem[4] = { i0, i1, i2, i3 };
1071 __m128i vControl = _mm_load_si128(reinterpret_cast<const __m128i*>(&elem[0]));
1072
1073 __m128i vSelect = _mm_cmpgt_epi32(vControl, three);
1074 vControl = _mm_and_si128(vControl, three);
1075
1076 __m128 shuffled1 = _mm_permutevar_ps(v0.vec, vControl);
1077 __m128 shuffled2 = _mm_permutevar_ps(v1.vec, vControl);
1078
1079 __m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1);
1080 __m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2);
1081
1082 return _mm_or_ps(masked1, masked2);
1083}
1084
1085//------------------------------------------------------------------------------
1088__forceinline vec4
1089select(const vec4& v0, const vec4& v1, const uint i0, const uint i1, const uint i2, const uint i3)
1090{
1091 //FIXME this should be converted to something similiar as XMVectorSelect
1092 return permute(v0, v1, i0, i1, i2, i3);
1093}
1094
1095//------------------------------------------------------------------------------
1098__forceinline vec4
1099select(const vec4& v0, const vec4& v1, const vec4& control)
1100{
1101 __m128 v0masked = _mm_andnot_ps(control.vec, v0.vec);
1102 __m128 v1masked = _mm_and_ps(v1.vec, control.vec);
1103 return _mm_or_ps(v0masked, v1masked);
1104}
1105
1106//------------------------------------------------------------------------------
1109__forceinline vec4
1110floor(const vec4& v)
1111{
1112 return _mm_floor_ps(v.vec);
1113}
1114
1115//------------------------------------------------------------------------------
1118__forceinline vec4
1119ceil(const vec4& v)
1120{
1121 return _mm_ceil_ps(v.vec);
1122}
1123
1124//------------------------------------------------------------------------------
1127__forceinline vec3
1128xyz(const vec4& v)
1129{
1130 vec3 res;
1131 res.vec = _mm_and_ps(v.vec, _mask_xyz);
1132 return res;
1133}
1134
1135} // namespace Math
1136//------------------------------------------------------------------------------
#define n_assert(exp)
Definition debug.h:50
@ Y
Definition euler.h:8
@ Z
Definition euler.h:8
@ W
Definition euler.h:8
#define X(x)
Definition materialloader.cc:121
Different curves.
Definition angularpfeedbackloop.h:17
__forceinline point less(const point &v0, const point &v1)
Definition point.h:501
__forceinline bool equal3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:966
__forceinline point maximize(const point &v0, const point &v1)
Definition point.h:368
__forceinline quat barycentric(const quat &q0, const quat &q1, const quat &q2, scalar f, scalar g)
Definition quat.h:296
__forceinline scalar dot3(const vec4 &v0, const vec4 &v1)
Definition vec4.h:509
static const unsigned int PERMUTE_0Y
Definition vec4.h:1055
__forceinline vec4 normalize3approx(const vec4 &v)
Definition vec4.h:688
__forceinline bool equal_any(const point &v0, const point &v1)
Definition point.h:474
__forceinline bool lessequal3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:911
__forceinline bool greaterequal_any(const point &v0, const point &v1)
Definition point.h:452
__forceinline bool greaterequal3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:955
__forceinline vec3 splat_z(const vec3 &v)
Definition vec3.h:847
__forceinline vec4 perspective_div(const vec4 &v)
Definition vec4.h:713
__forceinline scalar length3(const vec4 &v)
Definition vec4.h:397
static const __m128i _sign
Definition vec3.h:36
__forceinline vec3 hermite(const vec3 &v1, const vec3 &t1, const vec3 &v2, const vec3 &t2, scalar s)
Definition vec3.h:508
__forceinline point equal(const point &v0, const point &v1)
Definition point.h:519
__forceinline vec3 splat_x(const vec3 &v)
Definition vec3.h:825
__forceinline vec3 splat(const vec3 &v, uint element)
Definition vec3.h:800
mat4 reflect(const vec4 &p)
based on this http://www.opengl.org/discussion_boards/showthread.php/169605-reflection-matrix-how-to-...
Definition mat4.cc:22
__forceinline vec3 multiply(const vec3 &v0, const vec3 &v1)
Definition vec3.h:399
__forceinline scalar angle(const vec3 &v0, const vec3 &v1)
Definition vec3.h:532
__forceinline vec3 divide(const vec3 &v0, const vec3 &v1)
Definition vec3.h:421
static const unsigned int PERMUTE_0W
Definition vec4.h:1057
static const unsigned int PERMUTE_1Y
Definition vec4.h:1059
__forceinline bool less3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:889
static const unsigned int PERMUTE_1X
Definition vec4.h:1058
__forceinline vec3 reciprocal(const vec3 &v)
Definition vec3.h:381
__forceinline scalar length(const quat &q)
Definition quat.h:260
__forceinline scalar lengthsq(const quat &q)
Definition quat.h:269
__forceinline scalar dot(const plane &p, const vec4 &v1)
Definition plane.h:252
__forceinline vec3 permute(const vec3 &v0, const vec3 &v1, unsigned int i0, unsigned int i1, unsigned int i2)
Definition vec3.h:858
__forceinline bool less3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:878
__forceinline plane normalize(const plane &p)
Definition plane.h:261
__forceinline float ceil(float val)
Floating point ceiling.
Definition scalar.h:532
__forceinline bool greaterequal_all(const point &v0, const point &v1)
Definition point.h:463
__forceinline bool greater_any(const point &v0, const point &v1)
Definition point.h:430
__forceinline float lerp(float x, float y, float l)
Linearly interpolate between 2 values: ret = x + l * (y - x)
Definition scalar.h:606
static const unsigned int PERMUTE_1Z
Definition vec4.h:1060
__forceinline float clamp(float val, float minVal, float maxVal)
Float clamping.
Definition scalar.h:496
half operator-(half one, half two)
Definition half.h:114
__forceinline bool less_any(const point &v0, const point &v1)
Definition point.h:386
__forceinline bool nearequal3(const vec4 &v0, const vec4 &v1, const vec4 &epsilon)
Definition vec4.h:988
__forceinline vec3 catmullrom(const vec3 &v0, const vec3 &v1, const vec3 &v2, const vec3 &v3, scalar s)
Definition vec3.h:484
__forceinline bool greater3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:922
__forceinline TYPE min(TYPE a, TYPE b)
Definition scalar.h:399
__forceinline bool less_all(const point &v0, const point &v1)
Definition point.h:397
__forceinline bool lessequal_all(const point &v0, const point &v1)
Definition point.h:419
__forceinline vec3 select(const vec3 &v0, const vec3 &v1, const uint i0, const uint i1, const uint i2)
Definition vec3.h:881
__forceinline vec3 splat_y(const vec3 &v)
Definition vec3.h:836
static const __m128 _minus1
Definition vec3.h:33
__forceinline point minimize(const point &v0, const point &v1)
Definition point.h:377
float scalar
Definition scalar.h:45
__forceinline vec4 normalize3(const vec4 &v)
Definition vec4.h:677
__forceinline bool greater3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:933
static const unsigned int PERMUTE_0X
Definition vec4.h:1054
__forceinline bool lessequal3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:900
__forceinline scalar lengthsq3(const vec4 &v)
Definition vec4.h:415
__forceinline point greater(const point &v0, const point &v1)
Definition point.h:510
half operator+(half one, half two)
Definition half.h:105
half operator*(half one, half two)
Definition half.h:123
__forceinline TYPE max(TYPE a, TYPE b)
Definition scalar.h:368
__forceinline scalar abs(scalar a)
Definition scalar.h:441
__forceinline vec3 multiplyadd(const vec3 &v0, const vec3 &v1, const vec3 &v2)
Definition vec3.h:408
__forceinline vec4 cross3(const vec4 &v0, const vec4 &v1)
Definition vec4.h:484
static const unsigned int PERMUTE_1W
Definition vec4.h:1061
__forceinline scalar acos(scalar x)
Definition scalar.h:218
__forceinline bool greater_all(const point &v0, const point &v1)
Definition point.h:441
__forceinline bool nearequal(const point &v0, const point &v1, float epsilon)
Definition point.h:485
static const __m128 _mask_xyz
Definition vec3.h:37
__forceinline bool lessequal_any(const point &v0, const point &v1)
Definition point.h:408
__forceinline bool greaterequal3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:944
__forceinline vec3 xyz(const point &v)
Definition point.h:528
static const unsigned int PERMUTE_0Z
Definition vec4.h:1056
__forceinline float floor(float val)
Floating point flooring.
Definition scalar.h:542
__forceinline vec3 normalizeapprox(const vec3 &v)
Definition vec3.h:616
__forceinline vec4 splat_w(const vec4 &v)
Definition vec4.h:1049
__forceinline bool equal3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:977
static const __m128 _plus1
Definition vec3.h:34
__forceinline vec3 reciprocalapprox(const vec3 &v)
Definition vec3.h:390
Nebula's scalar datatype.
A 4x4 single point precision float matrix.
Definition mat4.h:49
A 3D vector.
Definition vec3.h:40
__m128 vec
Definition vec3.h:98
A 4D vector.
Definition vec4.h:24
void loadu(const scalar *ptr)
load content from unaligned memory
Definition vec4.h:183
void load_ubyte4n(const void *ptr)
load from UByte4N packed vector
Definition vec4.cc:16
void store(scalar *ptr) const
write content to 16-byte-aligned memory through the write cache
Definition vec4.h:193
void stream(scalar *ptr) const
stream content to 16-byte-aligned memory circumventing the write-cache
Definition vec4.h:236
scalar & operator[](const int index)
read-only access to indexed component
Definition vec4.h:322
float y
Definition vec4.h:95
void operator-=(const vec4 &rhs)
inplace sub
Definition vec4.h:284
float z
Definition vec4.h:95
void operator*=(scalar s)
inplace scalar multiply
Definition vec4.h:293
vec4()=default
default constructor, NOTE: does NOT setup components!
void store3(scalar *ptr) const
write content to 16-byte-aligned memory through the write cache
Definition vec4.h:212
vec4 swizzle(const vec4 &v)
swizzle vector
Definition vec4.h:313
__m128 vec
Definition vec4.h:97
float v[4]
Definition vec4.h:98
bool operator==(const vec4 &rhs) const
equality operator
Definition vec4.h:152
bool operator!=(const vec4 &rhs) const
inequality operator
Definition vec4.h:162
void storeu3(scalar *ptr) const
write content to unaligned memory through the write cache
Definition vec4.h:223
vec4(const vec4 &rhs)=default
copy constructor
void load_byte4n(const void *ptr)
load from Byte4N packed vector
Definition vec4.cc:32
void operator=(const __m128 &rhs)
assign an vmVector4
Definition vec4.h:143
float w
Definition vec4.h:95
void load_float3(const void *ptr, float w)
load 3 floats into x,y,z from unaligned memory
Definition vec4.h:245
void storeu(scalar *ptr) const
write content to unaligned memory through the write cache
Definition vec4.h:203
void set(scalar x, scalar y, scalar z, scalar w)
set content
Definition vec4.h:303
float x
Definition vec4.h:95
void load(const scalar *ptr)
load content from 16-byte-aligned memory
Definition vec4.h:173
void operator/=(const vec4 &rhs)
divide by a vector component-wise
Definition vec4.h:266
void operator+=(const vec4 &rhs)
inplace add
Definition vec4.h:275
bool operator==(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:272
bool operator!=(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:282
#define NEBULA_ALIGN16
Definition types.h:146
unsigned int uint
Definition types.h:31