Nebula
Loading...
Searching...
No Matches
vec3.h
Go to the documentation of this file.
1#pragma once
2//------------------------------------------------------------------------------
16#include "core/types.h"
17#include "math/scalar.h"
18#include <xmmintrin.h>
19#include <emmintrin.h>
20#include <smmintrin.h>
21#include <immintrin.h>
22
23//------------------------------------------------------------------------------
24namespace Math
25{
26struct mat4;
27struct vec3;
28
29static const __m128 _id_x = _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f);
30static const __m128 _id_y = _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f);
31static const __m128 _id_z = _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f);
32static const __m128 _id_w = _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f);
33static const __m128 _minus1 = _mm_setr_ps(-1.0f, -1.0f, -1.0f, -1.0f);
34static const __m128 _plus1 = _mm_setr_ps(1.0f, 1.0f, 1.0f, 1.0f);
35static const __m128 _zero = _mm_setr_ps(0.0f, 0.0f, 0.0f, 0.0f);
36static const __m128i _sign = _mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000);
37static const __m128 _mask_xyz = _mm_castsi128_ps(_mm_setr_epi32( 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0 ));
38
40{
41public:
43 vec3() = default;
45 vec3(scalar x, scalar y, scalar z);
47 explicit vec3(scalar v);
49 vec3(const vec3& rhs) = default;
51 vec3(const __m128& rhs);
52
54 void operator=(const __m128& rhs);
56 void operator+=(const vec3& rhs);
58 void operator-=(const vec3& rhs);
60 void operator*=(scalar s);
62 void operator*=(const vec3& rhs);
64 void operator/=(const vec3& rhs);
66 bool operator==(const vec3& rhs) const;
68 bool operator!=(const vec3& rhs) const;
69
71 void load(const scalar* ptr);
73 void loadu(const scalar* ptr);
75 void store(scalar* ptr) const;
77 void storeu(scalar* ptr) const;
79 void stream(scalar* ptr) const;
80
82 void set(scalar x, scalar y, scalar z);
83
85 scalar& operator[](const int index);
87 scalar operator[](const int index) const;
88
89 union
90 {
91 struct
92 {
93 // we can access __w to check it, but we don't actually use it
94 float x, y, z, __w;
95 };
96 __m128 vec;
97 float v[3];
98 };
99};
100
101//------------------------------------------------------------------------------
104__forceinline
106{
107 this->vec = _mm_setr_ps(x, y, z, 0);
108}
109
110//------------------------------------------------------------------------------
113__forceinline
115{
116 this->vec = _mm_setr_ps(v, v, v, 0.0f);
117}
118
119//------------------------------------------------------------------------------
122__forceinline
123vec3::vec3(const __m128& rhs)
124{
125 this->vec = _mm_insert_ps(rhs, _id_w, 0b111000);
126}
127
128//------------------------------------------------------------------------------
131__forceinline void
132vec3::operator=(const __m128& rhs)
133{
134 this->vec = _mm_insert_ps(rhs, _id_w, 0b111000);
135}
136
137//------------------------------------------------------------------------------
140__forceinline bool
141vec3::operator==(const vec3& rhs) const
142{
143 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
144 return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
145}
146
147//------------------------------------------------------------------------------
150__forceinline bool
151vec3::operator!=(const vec3 &rhs) const
152{
153 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
154 return ((_mm_movemask_ps(vTemp)==0x0f) == 0);
155}
156
157//------------------------------------------------------------------------------
161__forceinline void
163{
164 this->vec = _mm_load_ps(ptr);
165 this->vec = _mm_and_ps(this->vec, _mask_xyz);
166}
167
168//------------------------------------------------------------------------------
172__forceinline void
174{
175 this->vec = _mm_loadu_ps(ptr);
176 this->vec = _mm_and_ps(this->vec, _mask_xyz);
177}
178
179//------------------------------------------------------------------------------
183__forceinline void
185{
186 __m128 vv = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
187 _mm_storel_epi64(reinterpret_cast<__m128i*>(ptr), _mm_castps_si128(this->vec));
188 _mm_store_ss(&ptr[2], vv);
189}
190
191//------------------------------------------------------------------------------
195__forceinline void
197{
198 __m128 t1 = _mm_permute_ps(this->vec, _MM_SHUFFLE(1, 1, 1, 1));
199 __m128 t2 = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
200 _mm_store_ss(&ptr[0], this->vec);
201 _mm_store_ss(&ptr[1], t1);
202 _mm_store_ss(&ptr[2], t2);
203}
204
205//------------------------------------------------------------------------------
208__forceinline void
210{
211 this->store(ptr);
212}
213
214//------------------------------------------------------------------------------
217__forceinline vec3
218operator-(const vec3& lhs)
219{
220 return vec3(_mm_xor_ps(_mm_castsi128_ps(_sign), lhs.vec));
221}
222
223//------------------------------------------------------------------------------
226__forceinline vec3
227operator*(const vec3& lhs, scalar t)
228{
229 __m128 temp = _mm_set1_ps(t);
230 return _mm_mul_ps(lhs.vec, temp);
231}
232
233//------------------------------------------------------------------------------
236__forceinline vec3
237operator*(const vec3& lhs, const vec3& rhs)
238{
239 return _mm_mul_ps(lhs.vec, rhs.vec);
240}
241
242//------------------------------------------------------------------------------
245__forceinline void
247{
248 this->vec = _mm_mul_ps(this->vec, rhs.vec);
249}
250
251//------------------------------------------------------------------------------
254__forceinline void
256{
257 this->vec = _mm_div_ps(this->vec, rhs.vec);
258}
259
260//------------------------------------------------------------------------------
263__forceinline void
265{
266 this->vec = _mm_add_ps(this->vec, rhs.vec);
267}
268
269//------------------------------------------------------------------------------
272__forceinline void
274{
275 this->vec = _mm_sub_ps(this->vec, rhs.vec);
276}
277
278//------------------------------------------------------------------------------
281__forceinline void
283{
284 __m128 temp = _mm_set1_ps(s);
285 this->vec = _mm_mul_ps(this->vec, temp);
286}
287
288//------------------------------------------------------------------------------
291__forceinline vec3
292operator+(const vec3& lhs, const vec3 &rhs)
293{
294 return _mm_add_ps(lhs.vec, rhs.vec);
295}
296
297//------------------------------------------------------------------------------
300__forceinline vec3
301operator-(const vec3& lhs, const vec3& rhs)
302{
303 return _mm_sub_ps(lhs.vec, rhs.vec);
304}
305
306//------------------------------------------------------------------------------
309__forceinline void
311{
312 this->vec = _mm_setr_ps(x, y, z, 0);
313}
314
315//------------------------------------------------------------------------------
318__forceinline scalar&
319vec3::operator[]( const int index )
320{
321 n_assert(index < 3);
322 return this->v[index];
323}
324
325//------------------------------------------------------------------------------
328__forceinline scalar
329vec3::operator[](const int index) const
330{
331 n_assert(index < 3);
332 return this->v[index];
333}
334
335//------------------------------------------------------------------------------
338__forceinline scalar
339length(const vec3& v)
340{
341 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.vec, v.vec, 0x71)));
342}
343
344//------------------------------------------------------------------------------
347__forceinline scalar
348lengthsq(const vec3& v)
349{
350 return _mm_cvtss_f32(_mm_dp_ps(v.vec, v.vec, 0x71));
351}
352
353//------------------------------------------------------------------------------
356__forceinline vec3
358{
359 return _mm_div_ps(_plus1, v.vec);
360}
361
362//------------------------------------------------------------------------------
365__forceinline vec3
367{
368 return _mm_rcp_ps(v.vec);
369}
370
371//------------------------------------------------------------------------------
374__forceinline vec3
375multiply(const vec3& v0, const vec3& v1)
376{
377 return _mm_mul_ps(v0.vec, v1.vec);
378}
379
380//------------------------------------------------------------------------------
383__forceinline vec3
384multiplyadd( const vec3& v0, const vec3& v1, const vec3& v2 )
385{
386#if NEBULA_MATH_FMA
387 return _mm_fmadd_ps(v0.vec, v1.vec, v2.vec);
388#else
389 return _mm_add_ps(_mm_mul_ps(v0.vec, v1.vec),v2.vec);
390#endif
391}
392
393//------------------------------------------------------------------------------
396__forceinline vec3
397divide(const vec3& v0, const vec3& v1)
398{
399 return _mm_div_ps(v0.vec, v1.vec);
400}
401
402//------------------------------------------------------------------------------
405__forceinline vec3
406abs(const vec3& v)
407{
408 unsigned int val = 0x7fffffff;
409 __m128 temp = _mm_set1_ps(*(float*)&val);
410 return _mm_and_ps(v.vec, temp);
411}
412
413//------------------------------------------------------------------------------
416__forceinline vec3
417cross(const vec3& v0, const vec3& v1)
418{
419 __m128 tmp0, tmp1, tmp2, tmp3, result;
420 tmp0 = _mm_shuffle_ps( v0.vec, v0.vec, _MM_SHUFFLE(3,0,2,1) );
421 tmp1 = _mm_shuffle_ps( v1.vec, v1.vec, _MM_SHUFFLE(3,1,0,2) );
422 tmp2 = _mm_shuffle_ps( v0.vec, v0.vec, _MM_SHUFFLE(3,1,0,2) );
423 tmp3 = _mm_shuffle_ps( v1.vec, v1.vec, _MM_SHUFFLE(3,0,2,1) );
424 result = _mm_mul_ps( tmp0, tmp1 );
425 result = _mm_sub_ps( result, _mm_mul_ps( tmp2, tmp3 ) );
426 return result;
427}
428
429//------------------------------------------------------------------------------
432__forceinline scalar
433dot(const vec3& v0, const vec3& v1)
434{
435 return _mm_cvtss_f32(_mm_dp_ps(v0.vec, v1.vec, 0x71));
436}
437
438//------------------------------------------------------------------------------
442__forceinline vec3
443barycentric(const vec3& v0, const vec3 &v1, const vec3 &v2, scalar f, scalar g)
444{
445 __m128 R1 = _mm_sub_ps(v1.vec,v0.vec);
446 __m128 SF = _mm_set_ps1(f);
447 __m128 R2 = _mm_sub_ps(v2.vec,v0.vec);
448 __m128 SG = _mm_set_ps1(g);
449 R1 = _mm_mul_ps(R1,SF);
450 R2 = _mm_mul_ps(R2,SG);
451 R1 = _mm_add_ps(R1,v0.vec);
452 R1 = _mm_add_ps(R1,R2);
453 return R1;
454}
455
456//------------------------------------------------------------------------------
459__forceinline vec3
460catmullrom(const vec3& v0, const vec3& v1, const vec3& v2, const vec3& v3, scalar s)
461{
462 scalar s2 = s * s;
463 scalar s3 = s * s2;
464
465 __m128 P0 = _mm_set_ps1((-s3 + 2.0f * s2 - s) * 0.5f);
466 __m128 P1 = _mm_set_ps1((3.0f * s3 - 5.0f * s2 + 2.0f) * 0.5f);
467 __m128 P2 = _mm_set_ps1((-3.0f * s3 + 4.0f * s2 + s) * 0.5f);
468 __m128 P3 = _mm_set_ps1((s3 - s2) * 0.5f);
469
470 P0 = _mm_mul_ps(P0, v0.vec);
471 P1 = _mm_mul_ps(P1, v1.vec);
472 P2 = _mm_mul_ps(P2, v2.vec);
473 P3 = _mm_mul_ps(P3, v3.vec);
474 P0 = _mm_add_ps(P0,P1);
475 P2 = _mm_add_ps(P2,P3);
476 P0 = _mm_add_ps(P0,P2);
477 return P0;
478}
479
480//------------------------------------------------------------------------------
483__forceinline vec3
484hermite(const vec3& v1, const vec3& t1, const vec3& v2, const vec3& t2, scalar s)
485{
486 scalar s2 = s * s;
487 scalar s3 = s * s2;
488
489 __m128 P0 = _mm_set_ps1(2.0f * s3 - 3.0f * s2 + 1.0f);
490 __m128 T0 = _mm_set_ps1(s3 - 2.0f * s2 + s);
491 __m128 P1 = _mm_set_ps1(-2.0f * s3 + 3.0f * s2);
492 __m128 T1 = _mm_set_ps1(s3 - s2);
493
494 __m128 vResult = _mm_mul_ps(P0, v1.vec);
495 __m128 vTemp = _mm_mul_ps(T0, t1.vec);
496 vResult = _mm_add_ps(vResult,vTemp);
497 vTemp = _mm_mul_ps(P1, v2.vec);
498 vResult = _mm_add_ps(vResult,vTemp);
499 vTemp = _mm_mul_ps(T1, t2.vec);
500 vResult = _mm_add_ps(vResult,vTemp);
501 return vResult;
502}
503
504//------------------------------------------------------------------------------
507__forceinline scalar
508angle(const vec3& v0, const vec3& v1)
509{
510
511 __m128 l0 = _mm_mul_ps(v0.vec, v0.vec);
512 l0 = _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(0, 0, 0, 0)),
513 _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l0, l0, _MM_SHUFFLE(2, 2, 2, 2))));
514
515 __m128 l1 = _mm_mul_ps(v1.vec, v1.vec);
516 l1 = _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(0, 0, 0, 0)),
517 _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l1, l1, _MM_SHUFFLE(2, 2, 2, 2))));
518
519 __m128 l = _mm_shuffle_ps(l0, l1, _MM_SHUFFLE(0, 0, 0, 0));
520 l = _mm_rsqrt_ps(l);
521 l = _mm_mul_ss(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)));
522
523
524 __m128 dot = _mm_mul_ps(v0.vec, v1.vec);
525 dot = _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(0, 0, 0, 0)),
526 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(1, 1, 1, 1)),
527 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(2, 2, 2, 2)), _mm_shuffle_ps(dot, dot, _MM_SHUFFLE(3, 3, 3, 3)))));
528
529 dot = _mm_mul_ss(dot, l);
530
531 dot = _mm_max_ss(dot, _minus1);
532 dot = _mm_min_ss(dot, _plus1);
533
534 scalar cangle;
535 _mm_store_ss(&cangle, dot);
536 return acos(cangle);
537}
538
539//------------------------------------------------------------------------------
542__forceinline vec3
543lerp(const vec3& v0, const vec3& v1, scalar s)
544{
545 return v0 + ((v1-v0) * s);
546}
547
548//------------------------------------------------------------------------------
551__forceinline vec3
552maximize(const vec3& v0, const vec3& v1)
553{
554 return _mm_max_ps(v0.vec, v1.vec);
555}
556
557//------------------------------------------------------------------------------
560__forceinline vec3
561minimize(const vec3& v0, const vec3& v1)
562{
563 return _mm_min_ps(v0.vec, v1.vec);
564}
565
566//------------------------------------------------------------------------------
569__forceinline vec3
570clamp(const vec3& clamp, const vec3& min, const vec3& max)
571{
572 __m128 temp = _mm_max_ps(min.vec, clamp.vec);
573 temp = _mm_min_ps(temp, max.vec);
574 return vec3(temp);
575}
576
577//------------------------------------------------------------------------------
580__forceinline vec3
582{
583 if (v == vec3(0)) return v;
584 __m128 t = _mm_div_ps(v.vec, _mm_sqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x77)));
585 return _mm_insert_ps(t, v.vec, 0xF0);
586}
587
588//------------------------------------------------------------------------------
591__forceinline vec3
593{
594 if (v == vec3(0)) return v;
595 __m128 t = _mm_rsqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x7f));
596 t = _mm_or_ps(t, _id_w);
597 return _mm_mul_ps(v.vec, t);
598}
599
600//------------------------------------------------------------------------------
603__forceinline vec3
604reflect(const vec3& normal, const vec3& incident)
605{
606 __m128 res = _mm_mul_ps(incident.vec, normal.vec);
607 res = _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(0,0,0,0)),
608 _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(1,1,1,1)), _mm_shuffle_ps(res, res, _MM_SHUFFLE(2,2,2,2))));
609 res = _mm_add_ps(res, res);
610 res = _mm_mul_ps(res, normal.vec);
611 res = _mm_sub_ps(incident.vec,res);
612 return res;
613}
614
615//------------------------------------------------------------------------------
618__forceinline bool
619less_any(const vec3& v0, const vec3& v1)
620{
621 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
622 int res = _mm_movemask_ps(vTemp) & 7;
623 return res != 7;
624}
625
626//------------------------------------------------------------------------------
629__forceinline bool
630less_all(const vec3& v0, const vec3& v1)
631{
632 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
633 int res = _mm_movemask_ps(vTemp) & 7;
634 return res == 0;
635}
636
637//------------------------------------------------------------------------------
640__forceinline bool
641lessequal_any(const vec3& v0, const vec3& v1)
642{
643 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
644 int res = _mm_movemask_ps(vTemp) & 7;
645 return res != 0x7;
646}
647
648//------------------------------------------------------------------------------
651__forceinline bool
652lessequal_all(const vec3& v0, const vec3& v1)
653{
654 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
655 int res = _mm_movemask_ps(vTemp) & 7;
656 return res == 0;
657}
658
659//------------------------------------------------------------------------------
662__forceinline bool
663greater_any(const vec3& v0, const vec3& v1)
664{
665 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
666 int res = _mm_movemask_ps(vTemp) & 7;
667 return res != 0;
668}
669
670//------------------------------------------------------------------------------
673__forceinline bool
674greater_all(const vec3& v0, const vec3& v1)
675{
676 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
677 int res = _mm_movemask_ps(vTemp) & 7;
678 return res == 0x7;
679}
680
681//------------------------------------------------------------------------------
684__forceinline bool
685greaterequal_any(const vec3& v0, const vec3& v1)
686{
687 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
688 int res = _mm_movemask_ps(vTemp) & 7;
689 return res != 0;
690}
691
692//------------------------------------------------------------------------------
695__forceinline bool
696greaterequal_all(const vec3& v0, const vec3& v1)
697{
698 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
699 int res = _mm_movemask_ps(vTemp) & 7;
700 return res == 0x7;
701}
702
703//------------------------------------------------------------------------------
706__forceinline bool
707equal_any(const vec3& v0, const vec3& v1)
708{
709 __m128 vTemp = _mm_cmpeq_ps(v0.vec, v1.vec);
710 int res = _mm_movemask_ps(vTemp) & 7;
711 return res != 0;
712}
713
714//------------------------------------------------------------------------------
717__forceinline bool
718nearequal(const vec3& v0, const vec3& v1, float epsilon)
719{
720 __m128 eps = _mm_setr_ps(epsilon, epsilon, epsilon, 0.0f);
721 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
722 __m128 temp = _mm_setzero_ps();
723 temp = _mm_sub_ps(temp, delta);
724 temp = _mm_max_ps(temp, delta);
725 temp = _mm_cmple_ps(temp, eps);
726 temp = _mm_and_ps(temp, _mask_xyz);
727 return (_mm_movemask_ps(temp) == 0x7) != 0;
728}
729
730//------------------------------------------------------------------------------
733__forceinline bool
734nearequal(const vec3& v0, const vec3& v1, const vec3& epsilon)
735{
736 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
737 __m128 temp = _mm_setzero_ps();
738 temp = _mm_sub_ps(temp, delta);
739 temp = _mm_max_ps(temp, delta);
740 temp = _mm_cmple_ps(temp, epsilon.vec);
741 temp = _mm_and_ps(temp, _mask_xyz);
742 return (_mm_movemask_ps(temp) == 0x7) != 0;
743}
744
745//------------------------------------------------------------------------------
748__forceinline vec3
749less(const vec3& v0, const vec3& v1)
750{
751 return _mm_min_ps(_mm_cmplt_ps(v0.vec, v1.vec), _plus1);
752}
753
754//------------------------------------------------------------------------------
757__forceinline vec3
758greater(const vec3& v0, const vec3& v1)
759{
760 return _mm_min_ps(_mm_cmpgt_ps(v0.vec, v1.vec), _plus1);
761}
762
763//------------------------------------------------------------------------------
766__forceinline vec3
767equal(const vec3& v0, const vec3& v1)
768{
769 return _mm_min_ps(_mm_cmpeq_ps(v0.vec, v1.vec), _plus1);
770}
771
772//------------------------------------------------------------------------------
775__forceinline vec3
776splat(const vec3& v, uint element)
777{
778 n_assert(element < 3 && element >= 0);
779
780 __m128 res;
781 switch (element)
782 {
783 case 0:
784 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
785 break;
786 case 1:
787 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
788 break;
789 case 2:
790 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
791 break;
792 }
793 res = _mm_and_ps(res, _mask_xyz);
794 return res;
795}
796
797//------------------------------------------------------------------------------
800__forceinline vec3
801splat_x(const vec3& v)
802{
803 __m128 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
804 res = _mm_and_ps(res, _mask_xyz);
805 return res;
806}
807
808//------------------------------------------------------------------------------
811__forceinline vec3
812splat_y(const vec3& v)
813{
814 __m128 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
815 res = _mm_and_ps(res, _mask_xyz);
816 return res;
817}
818
819//------------------------------------------------------------------------------
822__forceinline vec3
823splat_z(const vec3& v)
824{
825 __m128 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
826 res = _mm_and_ps(res, _mask_xyz);
827 return res;
828}
829
830//------------------------------------------------------------------------------
833__forceinline vec3
834permute(const vec3& v0, const vec3& v1, unsigned int i0, unsigned int i1, unsigned int i2)
835{
836 static __m128i three = _mm_set_epi32(3,3,3,3);
837
838 NEBULA_ALIGN16 unsigned int elem[4] = { i0, i1, i2, 7 };
839 __m128i vControl = _mm_load_si128(reinterpret_cast<const __m128i*>(&elem[0]));
840
841 __m128i vSelect = _mm_cmpgt_epi32(vControl, three);
842 vControl = _mm_and_si128(vControl, three);
843
844 __m128 shuffled1 = _mm_permutevar_ps(v0.vec, vControl);
845 __m128 shuffled2 = _mm_permutevar_ps(v1.vec, vControl);
846
847 __m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1);
848 __m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2);
849
850 return _mm_or_ps(masked1, masked2);
851}
852
853//------------------------------------------------------------------------------
856__forceinline vec3
857select(const vec3& v0, const vec3& v1, const uint i0, const uint i1, const uint i2)
858{
859 //FIXME this should be converted to something similiar as XMVectorSelect
860 return permute(v0, v1, i0, i1, i2);
861}
862
863//------------------------------------------------------------------------------
866__forceinline vec3
867select(const vec3& v0, const vec3& v1, const vec3& control)
868{
869 __m128 v0masked = _mm_andnot_ps(control.vec, v0.vec);
870 __m128 v1masked = _mm_and_ps(v1.vec, control.vec);
871 return _mm_or_ps(v0masked, v1masked);
872}
873
874//------------------------------------------------------------------------------
877__forceinline vec3
878floor(const vec3& v)
879{
880 return _mm_floor_ps(v.vec);
881}
882
883//------------------------------------------------------------------------------
886__forceinline vec3
887ceiling(const vec3& v)
888{
889 return _mm_ceil_ps(v.vec);
890}
891
892} // namespace Math
893//------------------------------------------------------------------------------
894
895
896
897
898
899
900
901
#define n_assert(exp)
Definition debug.h:50
Different curves.
Definition angularpfeedbackloop.h:17
static const __m128 _id_z
Definition vec3.h:31
__forceinline point less(const point &v0, const point &v1)
Definition point.h:501
__forceinline vec3 cross(const vec3 &v0, const vec3 &v1)
Definition vec3.h:417
static const __m128 _id_x
Definition vec3.h:29
__forceinline point maximize(const point &v0, const point &v1)
Definition point.h:368
__forceinline quat barycentric(const quat &q0, const quat &q1, const quat &q2, scalar f, scalar g)
Definition quat.h:295
static const __m128 _zero
Definition vec3.h:35
__forceinline bool equal_any(const point &v0, const point &v1)
Definition point.h:474
__forceinline bool greaterequal_any(const point &v0, const point &v1)
Definition point.h:452
__forceinline vec3 ceiling(const vec3 &v)
Definition vec3.h:887
__forceinline vec3 splat_z(const vec3 &v)
Definition vec3.h:823
static const __m128i _sign
Definition vec3.h:36
__forceinline vec3 hermite(const vec3 &v1, const vec3 &t1, const vec3 &v2, const vec3 &t2, scalar s)
Definition vec3.h:484
__forceinline point equal(const point &v0, const point &v1)
Definition point.h:519
__forceinline vec3 splat_x(const vec3 &v)
Definition vec3.h:801
__forceinline vec3 splat(const vec3 &v, uint element)
Definition vec3.h:776
mat4 reflect(const vec4 &p)
based on this http://www.opengl.org/discussion_boards/showthread.php/169605-reflection-matrix-how-to-...
Definition mat4.cc:22
__forceinline vec3 multiply(const vec3 &v0, const vec3 &v1)
Definition vec3.h:375
__forceinline scalar angle(const vec3 &v0, const vec3 &v1)
Definition vec3.h:508
__forceinline vec3 divide(const vec3 &v0, const vec3 &v1)
Definition vec3.h:397
__forceinline vec3 reciprocal(const vec3 &v)
Definition vec3.h:357
__forceinline scalar length(const quat &q)
Definition quat.h:259
__forceinline scalar lengthsq(const quat &q)
Definition quat.h:268
__forceinline scalar dot(const plane &p, const vec4 &v1)
Definition plane.h:246
__forceinline vec3 permute(const vec3 &v0, const vec3 &v1, unsigned int i0, unsigned int i1, unsigned int i2)
Definition vec3.h:834
__forceinline plane normalize(const plane &p)
Definition plane.h:255
__forceinline bool greaterequal_all(const point &v0, const point &v1)
Definition point.h:463
__forceinline bool greater_any(const point &v0, const point &v1)
Definition point.h:430
__forceinline float lerp(float x, float y, float l)
Linearly interpolate between 2 values: ret = x + l * (y - x)
Definition scalar.h:597
__forceinline float clamp(float val, float minVal, float maxVal)
Float clamping.
Definition scalar.h:487
half operator-(half one, half two)
Definition half.h:114
__forceinline bool less_any(const point &v0, const point &v1)
Definition point.h:386
__forceinline vec3 catmullrom(const vec3 &v0, const vec3 &v1, const vec3 &v2, const vec3 &v3, scalar s)
Definition vec3.h:460
static const __m128 _id_w
Definition vec3.h:32
__forceinline TYPE min(TYPE a, TYPE b)
Definition scalar.h:390
__forceinline bool less_all(const point &v0, const point &v1)
Definition point.h:397
__forceinline bool lessequal_all(const point &v0, const point &v1)
Definition point.h:419
__forceinline vec3 select(const vec3 &v0, const vec3 &v1, const uint i0, const uint i1, const uint i2)
Definition vec3.h:857
__forceinline vec3 splat_y(const vec3 &v)
Definition vec3.h:812
static const __m128 _minus1
Definition vec3.h:33
__forceinline point minimize(const point &v0, const point &v1)
Definition point.h:377
float scalar
Definition scalar.h:45
__forceinline point greater(const point &v0, const point &v1)
Definition point.h:510
half operator+(half one, half two)
Definition half.h:105
half operator*(half one, half two)
Definition half.h:123
__forceinline TYPE max(TYPE a, TYPE b)
Definition scalar.h:359
__forceinline scalar abs(scalar a)
Definition scalar.h:432
__forceinline vec3 multiplyadd(const vec3 &v0, const vec3 &v1, const vec3 &v2)
Definition vec3.h:384
__forceinline scalar acos(scalar x)
Definition scalar.h:218
__forceinline bool greater_all(const point &v0, const point &v1)
Definition point.h:441
__forceinline bool nearequal(const point &v0, const point &v1, float epsilon)
Definition point.h:485
static const __m128 _mask_xyz
Definition vec3.h:37
static const __m128 _id_y
Definition vec3.h:30
__forceinline bool lessequal_any(const point &v0, const point &v1)
Definition point.h:408
__forceinline float floor(float val)
Floating point flooring.
Definition scalar.h:533
__forceinline vec3 normalizeapprox(const vec3 &v)
Definition vec3.h:592
static const __m128 _plus1
Definition vec3.h:34
__forceinline vec3 reciprocalapprox(const vec3 &v)
Definition vec3.h:366
Nebula's scalar datatype.
A 3D vector.
Definition vec3.h:40
void loadu(const scalar *ptr)
load content from unaligned memory
Definition vec3.h:173
void stream(scalar *ptr) const
stream content to 16-byte-aligned memory circumventing the write-cache
Definition vec3.h:209
float v[3]
Definition vec3.h:97
bool operator==(const vec3 &rhs) const
equality operator
Definition vec3.h:141
void storeu(scalar *ptr) const
write content to unaligned memory through the write cache
Definition vec3.h:196
vec3()=default
default constructor, NOTE: does NOT setup components!
void load(const scalar *ptr)
load content from 16-byte-aligned memory
Definition vec3.h:162
void operator*=(scalar s)
inplace scalar multiply
Definition vec3.h:282
void operator-=(const vec3 &rhs)
inplace sub
Definition vec3.h:273
void store(scalar *ptr) const
write content to 16-byte-aligned memory through the write cache
Definition vec3.h:184
float x
Definition vec3.h:94
float z
Definition vec3.h:94
void operator/=(const vec3 &rhs)
divide by a vector component-wise
Definition vec3.h:255
float __w
Definition vec3.h:94
__m128 vec
Definition vec3.h:96
void operator+=(const vec3 &rhs)
inplace add
Definition vec3.h:264
scalar & operator[](const int index)
read-only access to indexed component
Definition vec3.h:319
bool operator!=(const vec3 &rhs) const
inequality operator
Definition vec3.h:151
void set(scalar x, scalar y, scalar z)
set content
Definition vec3.h:310
vec3(const vec3 &rhs)=default
copy constructor
float y
Definition vec3.h:94
void operator=(const __m128 &rhs)
assign an vmVector4
Definition vec3.h:132
bool operator==(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:272
bool operator!=(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:282
#define NEBULA_ALIGN16
Definition types.h:154
unsigned int uint
Definition types.h:31