Nebula
Loading...
Searching...
No Matches
vec4.h
Go to the documentation of this file.
1#pragma once
2//------------------------------------------------------------------------------
14#include "core/types.h"
15#include "math/scalar.h"
16#include "vec3.h"
17
18//------------------------------------------------------------------------------
19namespace Math
20{
21struct mat4;
22
24{
25public:
27 vec4() = default;
29 vec4(scalar x, scalar y, scalar z, scalar w);
31 explicit vec4(scalar v);
33 vec4(const vec4& rhs) = default;
35 vec4(const vec3& rhs, float w);
37 vec4(const __m128& rhs);
38
40 void operator=(const __m128 &rhs);
42 void operator+=(const vec4 &rhs);
44 void operator-=(const vec4 &rhs);
46 void operator*=(scalar s);
48 void operator*=(const vec4& rhs);
50 void operator/=(const vec4& rhs);
52 bool operator==(const vec4 &rhs) const;
54 bool operator!=(const vec4 &rhs) const;
55
57 void load(const scalar* ptr);
59 void loadu(const scalar* ptr);
61 void store(scalar* ptr) const;
63 void storeu(scalar* ptr) const;
65 void store3(scalar* ptr) const;
67 void storeu3(scalar* ptr) const;
69 void stream(scalar* ptr) const;
70
72 void load_float3(const void* ptr, float w);
74 void load_ubyte4n(const void* ptr);
76 void load_byte4n(const void* ptr);
78 void set(scalar x, scalar y, scalar z, scalar w);
79
81 template<int X, int Y, int Z, int W>
82 vec4 swizzle(const vec4& v);
83
85 scalar& operator[](const int index);
87 scalar operator[](const int index) const;
88
89 union
90 {
91 struct
92 {
93 float x, y, z, w;
94 };
95 __m128 vec;
96 float v[4];
97 };
98};
99
100//------------------------------------------------------------------------------
103__forceinline
105{
106 this->vec = _mm_setr_ps(x, y, z, w);
107}
108
109//------------------------------------------------------------------------------
112__forceinline
114{
115 this->vec = _mm_set1_ps(v);
116}
117
118//------------------------------------------------------------------------------
121__forceinline
122vec4::vec4(const __m128& rhs)
123{
124 this->vec = rhs;
125}
126
127//------------------------------------------------------------------------------
130__forceinline
131vec4::vec4(const vec3& rhs, float w)
132{
133 this->vec = rhs.vec;
134 this->w = w;
135}
136
137//------------------------------------------------------------------------------
140__forceinline void
141vec4::operator=(const __m128& rhs)
142{
143 this->vec = rhs;
144}
145
146//------------------------------------------------------------------------------
149__forceinline bool
150vec4::operator==(const vec4& rhs) const
151{
152 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
153 return ((_mm_movemask_ps(vTemp) == 0x0f) != 0);
154}
155
156//------------------------------------------------------------------------------
159__forceinline bool
160vec4::operator!=(const vec4& rhs) const
161{
162 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
163 return ((_mm_movemask_ps(vTemp) == 0x0f) == 0);
164}
165
166//------------------------------------------------------------------------------
170__forceinline void
172{
173 this->vec = _mm_load_ps(ptr);
174}
175
176//------------------------------------------------------------------------------
180__forceinline void
182{
183 this->vec = _mm_loadu_ps(ptr);
184}
185
186//------------------------------------------------------------------------------
190__forceinline void
192{
193 _mm_store_ps(ptr, this->vec);
194}
195
196//------------------------------------------------------------------------------
200__forceinline void
202{
203 _mm_storeu_ps(ptr, this->vec);
204}
205
206//------------------------------------------------------------------------------
209__forceinline void
211{
212 __m128 vv = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
213 _mm_storel_epi64(reinterpret_cast<__m128i*>(ptr), _mm_castps_si128(this->vec));
214 _mm_store_ss(&ptr[2], vv);
215}
216
217//------------------------------------------------------------------------------
220__forceinline void
222{
223 __m128 t1 = _mm_permute_ps(this->vec, _MM_SHUFFLE(1, 1, 1, 1));
224 __m128 t2 = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
225 _mm_store_ss(&ptr[0], this->vec);
226 _mm_store_ss(&ptr[1], t1);
227 _mm_store_ss(&ptr[2], t2);
228}
229
230//------------------------------------------------------------------------------
233__forceinline void
235{
236 this->store(ptr);
237}
238
239//------------------------------------------------------------------------------
242__forceinline void
243vec4::load_float3(const void* ptr, float w)
244{
245 float* source = (float*)ptr;
246 this->vec = _mm_load_ps(source);
247 this->w = w;
248}
249
250
251//------------------------------------------------------------------------------
254__forceinline void
256{
257 this->vec = _mm_mul_ps(this->vec, rhs.vec);
258}
259
260//------------------------------------------------------------------------------
263__forceinline void
265{
266 this->vec = _mm_div_ps(this->vec, rhs.vec);
267}
268
269//------------------------------------------------------------------------------
272__forceinline void
274{
275 this->vec = _mm_add_ps(this->vec, rhs.vec);
276}
277
278//------------------------------------------------------------------------------
281__forceinline void
283{
284 this->vec = _mm_sub_ps(this->vec, rhs.vec);
285}
286
287//------------------------------------------------------------------------------
290__forceinline void
292{
293 __m128 temp = _mm_set1_ps(s);
294 this->vec = _mm_mul_ps(this->vec, temp);
295}
296
297//------------------------------------------------------------------------------
300__forceinline void
302{
303 this->vec = _mm_setr_ps(x, y, z, w);
304}
305
306//------------------------------------------------------------------------------
309template<int X, int Y, int Z, int W>
310inline vec4
312{
313 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(W, Z, Y, X));
314}
315
316//------------------------------------------------------------------------------
319__forceinline scalar&
320vec4::operator[](const int index)
321{
322 n_assert(index < 4);
323 return this->v[index];
324}
325
326//------------------------------------------------------------------------------
329__forceinline scalar
330vec4::operator[](const int index) const
331{
332 n_assert(index < 4);
333 return this->v[index];
334}
335
336//------------------------------------------------------------------------------
339__forceinline vec4
340operator-(const vec4& lhs)
341{
342 return vec4(_mm_xor_ps(_mm_castsi128_ps(_sign), lhs.vec));
343}
344
345//------------------------------------------------------------------------------
348__forceinline vec4
349operator*(const vec4& lhs, scalar t)
350{
351 __m128 temp = _mm_set1_ps(t);
352 return _mm_mul_ps(lhs.vec, temp);
353}
354
355//------------------------------------------------------------------------------
358__forceinline vec4
359operator*(const vec4& lhs, const vec4& rhs)
360{
361 return _mm_mul_ps(lhs.vec, rhs.vec);
362}
363
364//------------------------------------------------------------------------------
367__forceinline vec4
368operator+(const vec4& lhs, const vec4& rhs)
369{
370 return _mm_add_ps(lhs.vec, rhs.vec);
371}
372
373//------------------------------------------------------------------------------
376__forceinline vec4
377operator-(const vec4& lhs, const vec4& rhs)
378{
379 return _mm_sub_ps(lhs.vec, rhs.vec);
380}
381
382//------------------------------------------------------------------------------
385__forceinline scalar
386length(const vec4& v)
387{
388 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.vec, v.vec, 0xF1)));
389}
390
391//------------------------------------------------------------------------------
394__forceinline scalar
395length3(const vec4& v)
396{
397 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.vec, v.vec, 0x71)));
398}
399
400//------------------------------------------------------------------------------
403__forceinline scalar
404lengthsq(const vec4& v)
405{
406 return _mm_cvtss_f32(_mm_dp_ps(v.vec, v.vec, 0xF1));
407}
408
409//------------------------------------------------------------------------------
412__forceinline scalar
414{
415 return _mm_cvtss_f32(_mm_dp_ps(v.vec, v.vec, 0x71));
416}
417
418//------------------------------------------------------------------------------
421__forceinline vec4
423{
424 return _mm_div_ps(_plus1, v.vec);
425}
426
427//------------------------------------------------------------------------------
430__forceinline vec4
432{
433 return _mm_rcp_ps(v.vec);
434}
435
436//------------------------------------------------------------------------------
439__forceinline vec4
440multiply(const vec4& v0, const vec4& v1)
441{
442 return _mm_mul_ps(v0.vec, v1.vec);
443}
444
445//------------------------------------------------------------------------------
448__forceinline vec4
449multiplyadd(const vec4& v0, const vec4& v1, const vec4& v2)
450{
451#if NEBULA_MATH_FMA
452 return _mm_fmadd_ps(v0.vec, v1.vec, v2.vec);
453#else
454 return _mm_add_ps(_mm_mul_ps(v0.vec, v1.vec), v2.vec);
455#endif
456}
457
458//------------------------------------------------------------------------------
461__forceinline vec4
462divide(const vec4& v0, const vec4& v1)
463{
464 return _mm_div_ps(v0.vec, v1.vec);
465}
466
467//------------------------------------------------------------------------------
470__forceinline vec4
471abs(const vec4& v)
472{
473 unsigned int val = 0x7fffffff;
474 __m128 temp = _mm_set1_ps(*(float*)&val);
475 return _mm_and_ps(v.vec, temp);
476}
477
478//------------------------------------------------------------------------------
481__forceinline vec4
482cross3(const vec4& v0, const vec4& v1)
483{
484 __m128 tmp0, tmp1, tmp2, tmp3, result;
485 tmp0 = _mm_shuffle_ps(v0.vec, v0.vec, _MM_SHUFFLE(3, 0, 2, 1));
486 tmp1 = _mm_shuffle_ps(v1.vec, v1.vec, _MM_SHUFFLE(3, 1, 0, 2));
487 tmp2 = _mm_shuffle_ps(v0.vec, v0.vec, _MM_SHUFFLE(3, 1, 0, 2));
488 tmp3 = _mm_shuffle_ps(v1.vec, v1.vec, _MM_SHUFFLE(3, 0, 2, 1));
489 result = _mm_mul_ps(tmp0, tmp1);
490 result = _mm_sub_ps(result, _mm_mul_ps(tmp2, tmp3));
491 return result;
492}
493
494//------------------------------------------------------------------------------
497__forceinline scalar
498dot(const vec4& v0, const vec4& v1)
499{
500 return _mm_cvtss_f32(_mm_dp_ps(v0.vec, v1.vec, 0xF1));
501}
502
503//------------------------------------------------------------------------------
506__forceinline scalar
507dot3(const vec4& v0, const vec4& v1)
508{
509 return _mm_cvtss_f32(_mm_dp_ps(v0.vec, v1.vec, 0x71));
510}
511
512//------------------------------------------------------------------------------
516__forceinline vec4
517barycentric(const vec4& v0, const vec4& v1, const vec4& v2, scalar f, scalar g)
518{
519 __m128 R1 = _mm_sub_ps(v1.vec, v0.vec);
520 __m128 SF = _mm_set_ps1(f);
521 __m128 R2 = _mm_sub_ps(v2.vec, v0.vec);
522 __m128 SG = _mm_set_ps1(g);
523 R1 = _mm_mul_ps(R1, SF);
524 R2 = _mm_mul_ps(R2, SG);
525 R1 = _mm_add_ps(R1, v0.vec);
526 R1 = _mm_add_ps(R1, R2);
527 return R1;
528}
529
530//------------------------------------------------------------------------------
533__forceinline vec4
534catmullrom(const vec4& v0, const vec4& v1, const vec4& v2, const vec4& v3, scalar s)
535{
536 scalar s2 = s * s;
537 scalar s3 = s * s2;
538
539 __m128 P0 = _mm_set_ps1((-s3 + 2.0f * s2 - s) * 0.5f);
540 __m128 P1 = _mm_set_ps1((3.0f * s3 - 5.0f * s2 + 2.0f) * 0.5f);
541 __m128 P2 = _mm_set_ps1((-3.0f * s3 + 4.0f * s2 + s) * 0.5f);
542 __m128 P3 = _mm_set_ps1((s3 - s2) * 0.5f);
543
544 P0 = _mm_mul_ps(P0, v0.vec);
545 P1 = _mm_mul_ps(P1, v1.vec);
546 P2 = _mm_mul_ps(P2, v2.vec);
547 P3 = _mm_mul_ps(P3, v3.vec);
548 P0 = _mm_add_ps(P0, P1);
549 P2 = _mm_add_ps(P2, P3);
550 P0 = _mm_add_ps(P0, P2);
551 return P0;
552}
553
554//------------------------------------------------------------------------------
557__forceinline vec4
558hermite(const vec4& v1, const vec4& t1, const vec4& v2, const vec4& t2, scalar s)
559{
560 scalar s2 = s * s;
561 scalar s3 = s * s2;
562
563 __m128 P0 = _mm_set_ps1(2.0f * s3 - 3.0f * s2 + 1.0f);
564 __m128 T0 = _mm_set_ps1(s3 - 2.0f * s2 + s);
565 __m128 P1 = _mm_set_ps1(-2.0f * s3 + 3.0f * s2);
566 __m128 T1 = _mm_set_ps1(s3 - s2);
567
568 __m128 vResult = _mm_mul_ps(P0, v1.vec);
569 __m128 vTemp = _mm_mul_ps(T0, t1.vec);
570 vResult = _mm_add_ps(vResult, vTemp);
571 vTemp = _mm_mul_ps(P1, v2.vec);
572 vResult = _mm_add_ps(vResult, vTemp);
573 vTemp = _mm_mul_ps(T1, t2.vec);
574 vResult = _mm_add_ps(vResult, vTemp);
575 return vResult;
576}
577
578//------------------------------------------------------------------------------
581__forceinline scalar
582angle(const vec4& v0, const vec4& v1)
583{
584
585 __m128 l0 = _mm_mul_ps(v0.vec, v0.vec);
586 l0 = _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(0, 0, 0, 0)),
587 _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l0, l0, _MM_SHUFFLE(2, 2, 2, 2))));
588
589 __m128 l1 = _mm_mul_ps(v1.vec, v1.vec);
590 l1 = _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(0, 0, 0, 0)),
591 _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l1, l1, _MM_SHUFFLE(2, 2, 2, 2))));
592
593 __m128 l = _mm_shuffle_ps(l0, l1, _MM_SHUFFLE(0, 0, 0, 0));
594 l = _mm_rsqrt_ps(l);
595 l = _mm_mul_ss(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)));
596
597
598 __m128 dot = _mm_mul_ps(v0.vec, v1.vec);
599 dot = _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(0, 0, 0, 0)),
600 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(1, 1, 1, 1)),
601 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(2, 2, 2, 2)), _mm_shuffle_ps(dot, dot, _MM_SHUFFLE(3, 3, 3, 3)))));
602
603 dot = _mm_mul_ss(dot, l);
604
605 dot = _mm_max_ss(dot, _minus1);
606 dot = _mm_min_ss(dot, _plus1);
607
608 scalar cangle;
609 _mm_store_ss(&cangle, dot);
610 return Math::acos(cangle);
611}
612
613//------------------------------------------------------------------------------
616__forceinline vec4
617lerp(const vec4& v0, const vec4& v1, scalar s)
618{
619 return v0 + ((v1 - v0) * s);
620}
621
622//------------------------------------------------------------------------------
625__forceinline vec4
626maximize(const vec4& v0, const vec4& v1)
627{
628 return _mm_max_ps(v0.vec, v1.vec);
629}
630
631//------------------------------------------------------------------------------
634__forceinline vec4
635minimize(const vec4& v0, const vec4& v1)
636{
637 return _mm_min_ps(v0.vec, v1.vec);
638}
639
640//------------------------------------------------------------------------------
643__forceinline vec4
644clamp(const vec4& clamp, const vec4& min, const vec4& max)
645{
646 __m128 temp = _mm_max_ps(min.vec, clamp.vec);
647 temp = _mm_min_ps(temp, max.vec);
648 return vec4(temp);
649}
650
651//------------------------------------------------------------------------------
654__forceinline vec4
656{
657 if (v == vec4(0)) return v;
658 return _mm_div_ps(v.vec, _mm_sqrt_ps(_mm_dp_ps(v.vec, v.vec, 0xFF)));
659}
660
661//------------------------------------------------------------------------------
664__forceinline vec4
666{
667 if (v == vec4(0)) return v;
668 return _mm_mul_ps(v.vec, _mm_rsqrt_ps(_mm_dp_ps(v.vec, v.vec, 0xFF)));
669}
670
671//------------------------------------------------------------------------------
674__forceinline vec4
676{
677 if (v == vec4(0)) return v;
678 __m128 t = _mm_div_ps(v.vec, _mm_sqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x77)));
679 return _mm_insert_ps(t, v.vec, 0xF0);
680}
681
682//------------------------------------------------------------------------------
685__forceinline vec4
687{
688 if (v == vec4(0)) return v;
689 __m128 t = _mm_mul_ps(v.vec, _mm_rsqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x77)));
690 return _mm_insert_ps(t, v.vec, 0xF0);
691}
692//------------------------------------------------------------------------------
695__forceinline vec4
696reflect(const vec4& normal, const vec4& incident)
697{
698 __m128 res = _mm_mul_ps(incident.vec, normal.vec);
699 res = _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(0, 0, 0, 0)),
700 _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(res, res, _MM_SHUFFLE(2, 2, 2, 2))));
701 res = _mm_add_ps(res, res);
702 res = _mm_mul_ps(res, normal.vec);
703 res = _mm_sub_ps(incident.vec, res);
704 return res;
705}
706
707//------------------------------------------------------------------------------
710__forceinline vec4
712{
713 __m128 d = _mm_set_ps1(1.0f / v.w);
714 return _mm_mul_ps(v.vec, d);
715}
716
717//------------------------------------------------------------------------------
720__forceinline bool
721less_any(const vec4& v0, const vec4& v1)
722{
723 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
724 int res = _mm_movemask_ps(vTemp);
725 return res != 0xf;
726}
727
728//------------------------------------------------------------------------------
731__forceinline bool
732less_all(const vec4& v0, const vec4& v1)
733{
734 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
735 int res = _mm_movemask_ps(vTemp);
736 return res == 0;
737}
738
739//------------------------------------------------------------------------------
742__forceinline bool
743lessequal_any(const vec4& v0, const vec4& v1)
744{
745 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
746 int res = _mm_movemask_ps(vTemp);
747 return res != 0xf;
748}
749
750//------------------------------------------------------------------------------
753__forceinline bool
754lessequal_all(const vec4& v0, const vec4& v1)
755{
756 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
757 int res = _mm_movemask_ps(vTemp);
758 return res == 0;
759}
760
761//------------------------------------------------------------------------------
764__forceinline bool
765greater_any(const vec4& v0, const vec4& v1)
766{
767 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
768 int res = _mm_movemask_ps(vTemp);
769 return res != 0;
770}
771
772//------------------------------------------------------------------------------
775__forceinline bool
776greater_all(const vec4& v0, const vec4& v1)
777{
778 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
779 int res = _mm_movemask_ps(vTemp);
780 return res == 0xf;
781}
782
783//------------------------------------------------------------------------------
786__forceinline bool
787greaterequal_any(const vec4& v0, const vec4& v1)
788{
789 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
790 int res = _mm_movemask_ps(vTemp);
791 return res != 0;
792}
793
794//------------------------------------------------------------------------------
797__forceinline bool
798greaterequal_all(const vec4& v0, const vec4& v1)
799{
800 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
801 int res = _mm_movemask_ps(vTemp);
802 return res == 0xf;
803}
804
805//------------------------------------------------------------------------------
808__forceinline bool
809equal_any(const vec4& v0, const vec4& v1)
810{
811 __m128 vTemp = _mm_cmpeq_ps(v0.vec, v1.vec);
812 int res = _mm_movemask_ps(vTemp);
813 return res != 0;
814}
815
816//------------------------------------------------------------------------------
819__forceinline bool
820nearequal(const vec4& v0, const vec4& v1, const float epsilon)
821{
822 __m128 eps = _mm_set1_ps(epsilon);
823 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
824 __m128 temp = _mm_setzero_ps();
825 temp = _mm_sub_ps(temp, delta);
826 temp = _mm_max_ps(temp, delta);
827 temp = _mm_cmple_ps(temp, eps);
828 return (_mm_movemask_ps(temp) == 0xf) != 0;
829}
830
831//------------------------------------------------------------------------------
834__forceinline bool
835nearequal(const vec4& v0, const vec4& v1, const vec4& epsilon)
836{
837 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
838 __m128 temp = _mm_setzero_ps();
839 temp = _mm_sub_ps(temp, delta);
840 temp = _mm_max_ps(temp, delta);
841 temp = _mm_cmple_ps(temp, epsilon.vec);
842 return (_mm_movemask_ps(temp) == 0xf) != 0;
843}
844
845//------------------------------------------------------------------------------
848__forceinline vec4
849less(const vec4& v0, const vec4& v1)
850{
851 return _mm_min_ps(_mm_cmplt_ps(v0.vec, v1.vec), _plus1);
852}
853
854//------------------------------------------------------------------------------
857__forceinline vec4
858greater(const vec4& v0, const vec4& v1)
859{
860 return _mm_min_ps(_mm_cmpgt_ps(v0.vec, v1.vec), _plus1);
861}
862
863//------------------------------------------------------------------------------
866__forceinline vec4
867equal(const vec4& v0, const vec4& v1)
868{
869 return _mm_min_ps(_mm_cmpeq_ps(v0.vec, v1.vec), _plus1);
870}
871
872//------------------------------------------------------------------------------
875__forceinline bool
876less3_any(const vec4& v0, const vec4& v1)
877{
878 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
879 int res = _mm_movemask_ps(vTemp) & 7;
880 return res != 7;
881}
882
883//------------------------------------------------------------------------------
886__forceinline bool
887less3_all(const vec4& v0, const vec4& v1)
888{
889 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
890 int res = _mm_movemask_ps(vTemp) & 7;
891 return res == 0;
892}
893
894//------------------------------------------------------------------------------
897__forceinline bool
898lessequal3_any(const vec4& v0, const vec4& v1)
899{
900 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
901 int res = _mm_movemask_ps(vTemp) & 7;
902 return res != 0x7;
903}
904
905//------------------------------------------------------------------------------
908__forceinline bool
909lessequal3_all(const vec4& v0, const vec4& v1)
910{
911 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
912 int res = _mm_movemask_ps(vTemp) & 7;
913 return res == 0;
914}
915
916//------------------------------------------------------------------------------
919__forceinline bool
920greater3_any(const vec4& v0, const vec4& v1)
921{
922 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
923 int res = _mm_movemask_ps(vTemp) & 7;
924 return res != 0;
925}
926
927//------------------------------------------------------------------------------
930__forceinline bool
931greater3_all(const vec4& v0, const vec4& v1)
932{
933 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
934 int res = _mm_movemask_ps(vTemp) & 7;
935 return res == 0x7;
936}
937
938//------------------------------------------------------------------------------
941__forceinline bool
942greaterequal3_any(const vec4& v0, const vec4& v1)
943{
944 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
945 int res = _mm_movemask_ps(vTemp) & 7;
946 return res != 0;
947}
948
949//------------------------------------------------------------------------------
952__forceinline bool
953greaterequal3_all(const vec4& v0, const vec4& v1)
954{
955 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
956 int res = _mm_movemask_ps(vTemp) & 7;
957 return res == 0x7;
958}
959
960//------------------------------------------------------------------------------
963__forceinline bool
964equal3_any(const vec4& v0, const vec4& v1)
965{
966 __m128 vTemp = _mm_cmpeq_ps(v0.vec, v1.vec);
967 int res = _mm_movemask_ps(vTemp) & 7;
968 return res != 0;
969}
970
971//------------------------------------------------------------------------------
974__forceinline bool
975equal3_all(const vec4& v0, const vec4& v1)
976{
977 __m128 vTemp = _mm_cmpeq_ps(v0.vec, v1.vec);
978 int res = _mm_movemask_ps(vTemp) & 7;
979 return res == 0x7;
980}
981
982//------------------------------------------------------------------------------
985__forceinline bool
986nearequal3(const vec4& v0, const vec4& v1, const vec4& epsilon)
987{
988 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
989 __m128 temp = _mm_setzero_ps();
990 temp = _mm_sub_ps(temp, delta);
991 temp = _mm_max_ps(temp, delta);
992 temp = _mm_cmple_ps(temp, epsilon.vec);
993 return (_mm_movemask_ps(temp) == 0x7) != 0;
994}
995
996//------------------------------------------------------------------------------
999__forceinline vec4
1000splat(const vec4& v, uint element)
1001{
1002 n_assert(element < 4 && element >= 0);
1003
1004 switch (element)
1005 {
1006 case 0:
1007 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
1008 case 1:
1009 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
1010 case 2:
1011 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
1012 }
1013 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(3, 3, 3, 3));
1014}
1015
1016//------------------------------------------------------------------------------
1019__forceinline vec4
1020splat_x(const vec4& v)
1021{
1022 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
1023}
1024
1025//------------------------------------------------------------------------------
1028__forceinline vec4
1029splat_y(const vec4& v)
1030{
1031 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
1032}
1033
1034//------------------------------------------------------------------------------
1037__forceinline vec4
1038splat_z(const vec4& v)
1039{
1040 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
1041}
1042
1043//------------------------------------------------------------------------------
1046__forceinline vec4
1047splat_w(const vec4& v)
1048{
1049 return _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(3, 3, 3, 3));
1050}
1051
1052static const unsigned int PERMUTE_0X = 0;
1053static const unsigned int PERMUTE_0Y = 1;
1054static const unsigned int PERMUTE_0Z = 2;
1055static const unsigned int PERMUTE_0W = 3;
1056static const unsigned int PERMUTE_1X = 4;
1057static const unsigned int PERMUTE_1Y = 5;
1058static const unsigned int PERMUTE_1Z = 6;
1059static const unsigned int PERMUTE_1W = 7;
1060//------------------------------------------------------------------------------
1063__forceinline vec4
1064permute(const vec4& v0, const vec4& v1, unsigned int i0, unsigned int i1, unsigned int i2, unsigned int i3)
1065{
1066 static __m128i three = _mm_set_epi32(3, 3, 3, 3);
1067
1068 NEBULA_ALIGN16 unsigned int elem[4] = { i0, i1, i2, i3 };
1069 __m128i vControl = _mm_load_si128(reinterpret_cast<const __m128i*>(&elem[0]));
1070
1071 __m128i vSelect = _mm_cmpgt_epi32(vControl, three);
1072 vControl = _mm_and_si128(vControl, three);
1073
1074 __m128 shuffled1 = _mm_permutevar_ps(v0.vec, vControl);
1075 __m128 shuffled2 = _mm_permutevar_ps(v1.vec, vControl);
1076
1077 __m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1);
1078 __m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2);
1079
1080 return _mm_or_ps(masked1, masked2);
1081}
1082
1083//------------------------------------------------------------------------------
1086__forceinline vec4
1087select(const vec4& v0, const vec4& v1, const uint i0, const uint i1, const uint i2, const uint i3)
1088{
1089 //FIXME this should be converted to something similiar as XMVectorSelect
1090 return permute(v0, v1, i0, i1, i2, i3);
1091}
1092
1093//------------------------------------------------------------------------------
1096__forceinline vec4
1097select(const vec4& v0, const vec4& v1, const vec4& control)
1098{
1099 __m128 v0masked = _mm_andnot_ps(control.vec, v0.vec);
1100 __m128 v1masked = _mm_and_ps(v1.vec, control.vec);
1101 return _mm_or_ps(v0masked, v1masked);
1102}
1103
1104//------------------------------------------------------------------------------
1107__forceinline vec4
1108floor(const vec4& v)
1109{
1110 return _mm_floor_ps(v.vec);
1111}
1112
1113//------------------------------------------------------------------------------
1116__forceinline vec4
1117ceil(const vec4& v)
1118{
1119 return _mm_ceil_ps(v.vec);
1120}
1121
1122//------------------------------------------------------------------------------
1125__forceinline vec3
1126xyz(const vec4& v)
1127{
1128 vec3 res;
1129 res.vec = _mm_and_ps(v.vec, _mask_xyz);
1130 return res;
1131}
1132
1133} // namespace Math
1134//------------------------------------------------------------------------------
#define n_assert(exp)
Definition debug.h:50
@ Y
Definition euler.h:8
@ Z
Definition euler.h:8
@ W
Definition euler.h:8
#define X(x)
Definition materialloader.cc:153
Different curves.
Definition angularpfeedbackloop.h:17
__forceinline point less(const point &v0, const point &v1)
Definition point.h:501
__forceinline bool equal3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:964
__forceinline point maximize(const point &v0, const point &v1)
Definition point.h:368
__forceinline quat barycentric(const quat &q0, const quat &q1, const quat &q2, scalar f, scalar g)
Definition quat.h:295
__forceinline scalar dot3(const vec4 &v0, const vec4 &v1)
Definition vec4.h:507
static const unsigned int PERMUTE_0Y
Definition vec4.h:1053
__forceinline vec4 normalize3approx(const vec4 &v)
Definition vec4.h:686
__forceinline bool equal_any(const point &v0, const point &v1)
Definition point.h:474
__forceinline bool lessequal3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:909
__forceinline bool greaterequal_any(const point &v0, const point &v1)
Definition point.h:452
__forceinline bool greaterequal3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:953
__forceinline vec3 splat_z(const vec3 &v)
Definition vec3.h:823
__forceinline vec4 perspective_div(const vec4 &v)
Definition vec4.h:711
__forceinline scalar length3(const vec4 &v)
Definition vec4.h:395
static const __m128i _sign
Definition vec3.h:36
__forceinline vec3 hermite(const vec3 &v1, const vec3 &t1, const vec3 &v2, const vec3 &t2, scalar s)
Definition vec3.h:484
__forceinline point equal(const point &v0, const point &v1)
Definition point.h:519
__forceinline vec3 splat_x(const vec3 &v)
Definition vec3.h:801
__forceinline vec3 splat(const vec3 &v, uint element)
Definition vec3.h:776
mat4 reflect(const vec4 &p)
based on this http://www.opengl.org/discussion_boards/showthread.php/169605-reflection-matrix-how-to-...
Definition mat4.cc:22
__forceinline vec3 multiply(const vec3 &v0, const vec3 &v1)
Definition vec3.h:375
__forceinline scalar angle(const vec3 &v0, const vec3 &v1)
Definition vec3.h:508
__forceinline vec3 divide(const vec3 &v0, const vec3 &v1)
Definition vec3.h:397
static const unsigned int PERMUTE_0W
Definition vec4.h:1055
static const unsigned int PERMUTE_1Y
Definition vec4.h:1057
__forceinline bool less3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:887
static const unsigned int PERMUTE_1X
Definition vec4.h:1056
__forceinline vec3 reciprocal(const vec3 &v)
Definition vec3.h:357
__forceinline __m128 swizzle(__m128 v)
Definition sse.h:83
__forceinline scalar length(const quat &q)
Definition quat.h:259
__forceinline scalar lengthsq(const quat &q)
Definition quat.h:268
__forceinline scalar dot(const plane &p, const vec4 &v1)
Definition plane.h:246
__forceinline vec3 permute(const vec3 &v0, const vec3 &v1, unsigned int i0, unsigned int i1, unsigned int i2)
Definition vec3.h:834
__forceinline bool less3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:876
__forceinline plane normalize(const plane &p)
Definition plane.h:255
__forceinline float ceil(float val)
Floating point ceiling.
Definition scalar.h:523
__forceinline bool greaterequal_all(const point &v0, const point &v1)
Definition point.h:463
__forceinline bool greater_any(const point &v0, const point &v1)
Definition point.h:430
__forceinline float lerp(float x, float y, float l)
Linearly interpolate between 2 values: ret = x + l * (y - x)
Definition scalar.h:597
static const unsigned int PERMUTE_1Z
Definition vec4.h:1058
__forceinline float clamp(float val, float minVal, float maxVal)
Float clamping.
Definition scalar.h:487
half operator-(half one, half two)
Definition half.h:114
__forceinline bool less_any(const point &v0, const point &v1)
Definition point.h:386
__forceinline bool nearequal3(const vec4 &v0, const vec4 &v1, const vec4 &epsilon)
Definition vec4.h:986
__forceinline vec3 catmullrom(const vec3 &v0, const vec3 &v1, const vec3 &v2, const vec3 &v3, scalar s)
Definition vec3.h:460
__forceinline bool greater3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:920
__forceinline TYPE min(TYPE a, TYPE b)
Definition scalar.h:390
__forceinline bool less_all(const point &v0, const point &v1)
Definition point.h:397
__forceinline bool lessequal_all(const point &v0, const point &v1)
Definition point.h:419
__forceinline vec3 select(const vec3 &v0, const vec3 &v1, const uint i0, const uint i1, const uint i2)
Definition vec3.h:857
__forceinline vec3 splat_y(const vec3 &v)
Definition vec3.h:812
static const __m128 _minus1
Definition vec3.h:33
__forceinline point minimize(const point &v0, const point &v1)
Definition point.h:377
float scalar
Definition scalar.h:45
__forceinline vec4 normalize3(const vec4 &v)
Definition vec4.h:675
__forceinline bool greater3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:931
static const unsigned int PERMUTE_0X
Definition vec4.h:1052
__forceinline bool lessequal3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:898
__forceinline scalar lengthsq3(const vec4 &v)
Definition vec4.h:413
__forceinline point greater(const point &v0, const point &v1)
Definition point.h:510
half operator+(half one, half two)
Definition half.h:105
half operator*(half one, half two)
Definition half.h:123
__forceinline TYPE max(TYPE a, TYPE b)
Definition scalar.h:359
__forceinline scalar abs(scalar a)
Definition scalar.h:432
__forceinline vec3 multiplyadd(const vec3 &v0, const vec3 &v1, const vec3 &v2)
Definition vec3.h:384
__forceinline vec4 cross3(const vec4 &v0, const vec4 &v1)
Definition vec4.h:482
static const unsigned int PERMUTE_1W
Definition vec4.h:1059
__forceinline scalar acos(scalar x)
Definition scalar.h:218
__forceinline bool greater_all(const point &v0, const point &v1)
Definition point.h:441
__forceinline bool nearequal(const point &v0, const point &v1, float epsilon)
Definition point.h:485
static const __m128 _mask_xyz
Definition vec3.h:37
__forceinline bool lessequal_any(const point &v0, const point &v1)
Definition point.h:408
__forceinline bool greaterequal3_any(const vec4 &v0, const vec4 &v1)
Definition vec4.h:942
__forceinline vec3 xyz(const point &v)
Definition point.h:528
static const unsigned int PERMUTE_0Z
Definition vec4.h:1054
__forceinline float floor(float val)
Floating point flooring.
Definition scalar.h:533
__forceinline vec3 normalizeapprox(const vec3 &v)
Definition vec3.h:592
__forceinline vec4 splat_w(const vec4 &v)
Definition vec4.h:1047
__forceinline bool equal3_all(const vec4 &v0, const vec4 &v1)
Definition vec4.h:975
static const __m128 _plus1
Definition vec3.h:34
__forceinline vec3 reciprocalapprox(const vec3 &v)
Definition vec3.h:366
Nebula's scalar datatype.
A 3D vector.
Definition vec3.h:40
__m128 vec
Definition vec3.h:96
A 4D vector.
Definition vec4.h:24
void loadu(const scalar *ptr)
load content from unaligned memory
Definition vec4.h:181
void store(scalar *ptr) const
write content to 16-byte-aligned memory through the write cache
Definition vec4.h:191
void stream(scalar *ptr) const
stream content to 16-byte-aligned memory circumventing the write-cache
Definition vec4.h:234
scalar & operator[](const int index)
read-only access to indexed component
Definition vec4.h:320
float y
Definition vec4.h:93
void operator-=(const vec4 &rhs)
inplace sub
Definition vec4.h:282
float z
Definition vec4.h:93
void operator*=(scalar s)
inplace scalar multiply
Definition vec4.h:291
vec4()=default
default constructor, NOTE: does NOT setup components!
void store3(scalar *ptr) const
write content to 16-byte-aligned memory through the write cache
Definition vec4.h:210
vec4 swizzle(const vec4 &v)
swizzle vector
Definition vec4.h:311
__m128 vec
Definition vec4.h:95
float v[4]
Definition vec4.h:96
bool operator==(const vec4 &rhs) const
equality operator
Definition vec4.h:150
bool operator!=(const vec4 &rhs) const
inequality operator
Definition vec4.h:160
void storeu3(scalar *ptr) const
write content to unaligned memory through the write cache
Definition vec4.h:221
vec4(const vec4 &rhs)=default
copy constructor
void operator=(const __m128 &rhs)
assign an vmVector4
Definition vec4.h:141
float w
Definition vec4.h:93
void load_float3(const void *ptr, float w)
load 3 floats into x,y,z from unaligned memory
Definition vec4.h:243
void storeu(scalar *ptr) const
write content to unaligned memory through the write cache
Definition vec4.h:201
void set(scalar x, scalar y, scalar z, scalar w)
set content
Definition vec4.h:301
float x
Definition vec4.h:93
void load(const scalar *ptr)
load content from 16-byte-aligned memory
Definition vec4.h:171
void operator/=(const vec4 &rhs)
divide by a vector component-wise
Definition vec4.h:264
void operator+=(const vec4 &rhs)
inplace add
Definition vec4.h:273
bool operator==(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:272
bool operator!=(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:282
#define NEBULA_ALIGN16
Definition types.h:181
unsigned int uint
Definition types.h:31