Nebula
Loading...
Searching...
No Matches
vec3.h
Go to the documentation of this file.
1#pragma once
2//------------------------------------------------------------------------------
16#include "core/types.h"
17#include "math/scalar.h"
18#include <xmmintrin.h>
19#include <emmintrin.h>
20#include <smmintrin.h>
21#include <immintrin.h>
22
23//------------------------------------------------------------------------------
24namespace Math
25{
26struct mat4;
27struct vec3;
28
29static const __m128 _id_x = _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f);
30static const __m128 _id_y = _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f);
31static const __m128 _id_z = _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f);
32static const __m128 _id_w = _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f);
33static const __m128 _minus1 = _mm_setr_ps(-1.0f, -1.0f, -1.0f, -1.0f);
34static const __m128 _plus1 = _mm_setr_ps(1.0f, 1.0f, 1.0f, 1.0f);
35static const __m128 _zero = _mm_setr_ps(0.0f, 0.0f, 0.0f, 0.0f);
36static const __m128i _sign = _mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000);
37static const __m128 _mask_xyz = _mm_castsi128_ps(_mm_setr_epi32( 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0 ));
38
40{
41public:
43 vec3() = default;
45 vec3(scalar x, scalar y, scalar z);
47 vec3(float3 f3);
49 explicit vec3(scalar v);
51 vec3(const vec3& rhs) = default;
53 vec3(const __m128& rhs);
54
56 void operator=(const __m128& rhs);
58 void operator+=(const vec3& rhs);
60 void operator-=(const vec3& rhs);
62 void operator*=(scalar s);
64 void operator*=(const vec3& rhs);
66 void operator/=(const vec3& rhs);
68 bool operator==(const vec3& rhs) const;
70 bool operator!=(const vec3& rhs) const;
71
73 void load(const scalar* ptr);
75 void loadu(const scalar* ptr);
77 void store(scalar* ptr) const;
79 void storeu(scalar* ptr) const;
81 void stream(scalar* ptr) const;
82
84 void set(scalar x, scalar y, scalar z);
85
87 scalar& operator[](const int index);
89 scalar operator[](const int index) const;
90
91 union
92 {
93 struct
94 {
95 // we can access __w to check it, but we don't actually use it
96 float x, y, z, __w;
97 };
98 __m128 vec;
99 float v[3];
100 };
101};
102
103//------------------------------------------------------------------------------
106__forceinline
108{
109 this->vec = _mm_setr_ps(x, y, z, 0);
110}
111
112//------------------------------------------------------------------------------
115__forceinline
117{
118 this->vec = _mm_setr_ps(f3.x, f3.y, f3.z, 0);
119}
120
121//------------------------------------------------------------------------------
124__forceinline
126{
127 this->vec = _mm_setr_ps(v, v, v, 0.0f);
128}
129
130//------------------------------------------------------------------------------
133__forceinline
134vec3::vec3(const __m128& rhs)
135{
136 this->vec = _mm_insert_ps(rhs, _id_w, 0b111000);
137}
138
139//------------------------------------------------------------------------------
142__forceinline void
143vec3::operator=(const __m128& rhs)
144{
145 this->vec = _mm_insert_ps(rhs, _id_w, 0b111000);
146}
147
148//------------------------------------------------------------------------------
151__forceinline bool
152vec3::operator==(const vec3& rhs) const
153{
154 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
155 return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
156}
157
158//------------------------------------------------------------------------------
161__forceinline bool
162vec3::operator!=(const vec3 &rhs) const
163{
164 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
165 return ((_mm_movemask_ps(vTemp)==0x0f) == 0);
166}
167
168//------------------------------------------------------------------------------
172__forceinline void
174{
175 this->vec = _mm_load_ps(ptr);
176 this->vec = _mm_and_ps(this->vec, _mask_xyz);
177}
178
179//------------------------------------------------------------------------------
183__forceinline void
185{
186 this->vec = _mm_loadu_ps(ptr);
187 this->vec = _mm_and_ps(this->vec, _mask_xyz);
188}
189
190//------------------------------------------------------------------------------
194__forceinline void
196{
197 __m128 vv = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
198 _mm_storel_epi64(reinterpret_cast<__m128i*>(ptr), _mm_castps_si128(this->vec));
199 _mm_store_ss(&ptr[2], vv);
200}
201
202//------------------------------------------------------------------------------
206__forceinline void
208{
209 __m128 t1 = _mm_permute_ps(this->vec, _MM_SHUFFLE(1, 1, 1, 1));
210 __m128 t2 = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
211 _mm_store_ss(&ptr[0], this->vec);
212 _mm_store_ss(&ptr[1], t1);
213 _mm_store_ss(&ptr[2], t2);
214}
215
216//------------------------------------------------------------------------------
219__forceinline void
221{
222 this->store(ptr);
223}
224
225//------------------------------------------------------------------------------
228__forceinline vec3
229operator-(const vec3& lhs)
230{
231 return vec3(_mm_xor_ps(_mm_castsi128_ps(_sign), lhs.vec));
232}
233
234//------------------------------------------------------------------------------
238__forceinline vec3
239operator*(const vec3& lhs, scalar t)
240{
241 __m128 temp = _mm_set1_ps(t);
242 return _mm_mul_ps(lhs.vec, temp);
243}
244
245//------------------------------------------------------------------------------
249__forceinline vec3
250operator*(const vec3& lhs, const vec3& rhs)
251{
252 return _mm_mul_ps(lhs.vec, rhs.vec);
253}
254
255//------------------------------------------------------------------------------
259__forceinline vec3
260operator/(const vec3& lhs, scalar t)
261{
262 __m128 temp = _mm_set1_ps(t);
263 return _mm_div_ps(lhs.vec, temp);
264}
265
266//------------------------------------------------------------------------------
269__forceinline void
271{
272 this->vec = _mm_mul_ps(this->vec, rhs.vec);
273}
274
275//------------------------------------------------------------------------------
278__forceinline void
280{
281 this->vec = _mm_div_ps(this->vec, rhs.vec);
282}
283
284//------------------------------------------------------------------------------
287__forceinline void
289{
290 this->vec = _mm_add_ps(this->vec, rhs.vec);
291}
292
293//------------------------------------------------------------------------------
296__forceinline void
298{
299 this->vec = _mm_sub_ps(this->vec, rhs.vec);
300}
301
302//------------------------------------------------------------------------------
305__forceinline void
307{
308 __m128 temp = _mm_set1_ps(s);
309 this->vec = _mm_mul_ps(this->vec, temp);
310}
311
312//------------------------------------------------------------------------------
315__forceinline vec3
316operator+(const vec3& lhs, const vec3 &rhs)
317{
318 return _mm_add_ps(lhs.vec, rhs.vec);
319}
320
321//------------------------------------------------------------------------------
324__forceinline vec3
325operator-(const vec3& lhs, const vec3& rhs)
326{
327 return _mm_sub_ps(lhs.vec, rhs.vec);
328}
329
330//------------------------------------------------------------------------------
333__forceinline void
335{
336 this->vec = _mm_setr_ps(x, y, z, 0);
337}
338
339//------------------------------------------------------------------------------
342__forceinline scalar&
343vec3::operator[]( const int index )
344{
345 n_assert(index < 3);
346 return this->v[index];
347}
348
349//------------------------------------------------------------------------------
352__forceinline scalar
353vec3::operator[](const int index) const
354{
355 n_assert(index < 3);
356 return this->v[index];
357}
358
359//------------------------------------------------------------------------------
362__forceinline scalar
363length(const vec3& v)
364{
365 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.vec, v.vec, 0x71)));
366}
367
368//------------------------------------------------------------------------------
371__forceinline scalar
372lengthsq(const vec3& v)
373{
374 return _mm_cvtss_f32(_mm_dp_ps(v.vec, v.vec, 0x71));
375}
376
377//------------------------------------------------------------------------------
380__forceinline vec3
382{
383 return _mm_div_ps(_plus1, v.vec);
384}
385
386//------------------------------------------------------------------------------
389__forceinline vec3
391{
392 return _mm_rcp_ps(v.vec);
393}
394
395//------------------------------------------------------------------------------
398__forceinline vec3
399multiply(const vec3& v0, const vec3& v1)
400{
401 return _mm_mul_ps(v0.vec, v1.vec);
402}
403
404//------------------------------------------------------------------------------
407__forceinline vec3
408multiplyadd( const vec3& v0, const vec3& v1, const vec3& v2 )
409{
410#if NEBULA_MATH_FMA
411 return _mm_fmadd_ps(v0.vec, v1.vec, v2.vec);
412#else
413 return _mm_add_ps(_mm_mul_ps(v0.vec, v1.vec),v2.vec);
414#endif
415}
416
417//------------------------------------------------------------------------------
420__forceinline vec3
421divide(const vec3& v0, const vec3& v1)
422{
423 return _mm_div_ps(v0.vec, v1.vec);
424}
425
426//------------------------------------------------------------------------------
429__forceinline vec3
430abs(const vec3& v)
431{
432 unsigned int val = 0x7fffffff;
433 __m128 temp = _mm_set1_ps(*(float*)&val);
434 return _mm_and_ps(v.vec, temp);
435}
436
437//------------------------------------------------------------------------------
440__forceinline vec3
441cross(const vec3& v0, const vec3& v1)
442{
443 __m128 tmp0, tmp1, tmp2, tmp3, result;
444 tmp0 = _mm_shuffle_ps( v0.vec, v0.vec, _MM_SHUFFLE(3,0,2,1) );
445 tmp1 = _mm_shuffle_ps( v1.vec, v1.vec, _MM_SHUFFLE(3,1,0,2) );
446 tmp2 = _mm_shuffle_ps( v0.vec, v0.vec, _MM_SHUFFLE(3,1,0,2) );
447 tmp3 = _mm_shuffle_ps( v1.vec, v1.vec, _MM_SHUFFLE(3,0,2,1) );
448 result = _mm_mul_ps( tmp0, tmp1 );
449 result = _mm_sub_ps( result, _mm_mul_ps( tmp2, tmp3 ) );
450 return result;
451}
452
453//------------------------------------------------------------------------------
456__forceinline scalar
457dot(const vec3& v0, const vec3& v1)
458{
459 return _mm_cvtss_f32(_mm_dp_ps(v0.vec, v1.vec, 0x71));
460}
461
462//------------------------------------------------------------------------------
466__forceinline vec3
467barycentric(const vec3& v0, const vec3 &v1, const vec3 &v2, scalar f, scalar g)
468{
469 __m128 R1 = _mm_sub_ps(v1.vec,v0.vec);
470 __m128 SF = _mm_set_ps1(f);
471 __m128 R2 = _mm_sub_ps(v2.vec,v0.vec);
472 __m128 SG = _mm_set_ps1(g);
473 R1 = _mm_mul_ps(R1,SF);
474 R2 = _mm_mul_ps(R2,SG);
475 R1 = _mm_add_ps(R1,v0.vec);
476 R1 = _mm_add_ps(R1,R2);
477 return R1;
478}
479
480//------------------------------------------------------------------------------
483__forceinline vec3
484catmullrom(const vec3& v0, const vec3& v1, const vec3& v2, const vec3& v3, scalar s)
485{
486 scalar s2 = s * s;
487 scalar s3 = s * s2;
488
489 __m128 P0 = _mm_set_ps1((-s3 + 2.0f * s2 - s) * 0.5f);
490 __m128 P1 = _mm_set_ps1((3.0f * s3 - 5.0f * s2 + 2.0f) * 0.5f);
491 __m128 P2 = _mm_set_ps1((-3.0f * s3 + 4.0f * s2 + s) * 0.5f);
492 __m128 P3 = _mm_set_ps1((s3 - s2) * 0.5f);
493
494 P0 = _mm_mul_ps(P0, v0.vec);
495 P1 = _mm_mul_ps(P1, v1.vec);
496 P2 = _mm_mul_ps(P2, v2.vec);
497 P3 = _mm_mul_ps(P3, v3.vec);
498 P0 = _mm_add_ps(P0,P1);
499 P2 = _mm_add_ps(P2,P3);
500 P0 = _mm_add_ps(P0,P2);
501 return P0;
502}
503
504//------------------------------------------------------------------------------
507__forceinline vec3
508hermite(const vec3& v1, const vec3& t1, const vec3& v2, const vec3& t2, scalar s)
509{
510 scalar s2 = s * s;
511 scalar s3 = s * s2;
512
513 __m128 P0 = _mm_set_ps1(2.0f * s3 - 3.0f * s2 + 1.0f);
514 __m128 T0 = _mm_set_ps1(s3 - 2.0f * s2 + s);
515 __m128 P1 = _mm_set_ps1(-2.0f * s3 + 3.0f * s2);
516 __m128 T1 = _mm_set_ps1(s3 - s2);
517
518 __m128 vResult = _mm_mul_ps(P0, v1.vec);
519 __m128 vTemp = _mm_mul_ps(T0, t1.vec);
520 vResult = _mm_add_ps(vResult,vTemp);
521 vTemp = _mm_mul_ps(P1, v2.vec);
522 vResult = _mm_add_ps(vResult,vTemp);
523 vTemp = _mm_mul_ps(T1, t2.vec);
524 vResult = _mm_add_ps(vResult,vTemp);
525 return vResult;
526}
527
528//------------------------------------------------------------------------------
531__forceinline scalar
532angle(const vec3& v0, const vec3& v1)
533{
534
535 __m128 l0 = _mm_mul_ps(v0.vec, v0.vec);
536 l0 = _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(0, 0, 0, 0)),
537 _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l0, l0, _MM_SHUFFLE(2, 2, 2, 2))));
538
539 __m128 l1 = _mm_mul_ps(v1.vec, v1.vec);
540 l1 = _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(0, 0, 0, 0)),
541 _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l1, l1, _MM_SHUFFLE(2, 2, 2, 2))));
542
543 __m128 l = _mm_shuffle_ps(l0, l1, _MM_SHUFFLE(0, 0, 0, 0));
544 l = _mm_rsqrt_ps(l);
545 l = _mm_mul_ss(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)));
546
547
548 __m128 dot = _mm_mul_ps(v0.vec, v1.vec);
549 dot = _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(0, 0, 0, 0)),
550 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(1, 1, 1, 1)),
551 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(2, 2, 2, 2)), _mm_shuffle_ps(dot, dot, _MM_SHUFFLE(3, 3, 3, 3)))));
552
553 dot = _mm_mul_ss(dot, l);
554
555 dot = _mm_max_ss(dot, _minus1);
556 dot = _mm_min_ss(dot, _plus1);
557
558 scalar cangle;
559 _mm_store_ss(&cangle, dot);
560 return acos(cangle);
561}
562
563//------------------------------------------------------------------------------
566__forceinline vec3
567lerp(const vec3& v0, const vec3& v1, scalar s)
568{
569 return v0 + ((v1-v0) * s);
570}
571
572//------------------------------------------------------------------------------
575__forceinline vec3
576maximize(const vec3& v0, const vec3& v1)
577{
578 return _mm_max_ps(v0.vec, v1.vec);
579}
580
581//------------------------------------------------------------------------------
584__forceinline vec3
585minimize(const vec3& v0, const vec3& v1)
586{
587 return _mm_min_ps(v0.vec, v1.vec);
588}
589
590//------------------------------------------------------------------------------
593__forceinline vec3
594clamp(const vec3& clamp, const vec3& min, const vec3& max)
595{
596 __m128 temp = _mm_max_ps(min.vec, clamp.vec);
597 temp = _mm_min_ps(temp, max.vec);
598 return vec3(temp);
599}
600
601//------------------------------------------------------------------------------
604__forceinline vec3
606{
607 if (v == vec3(0)) return v;
608 __m128 t = _mm_div_ps(v.vec, _mm_sqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x77)));
609 return _mm_insert_ps(t, v.vec, 0xF0);
610}
611
612//------------------------------------------------------------------------------
615__forceinline vec3
617{
618 if (v == vec3(0)) return v;
619 __m128 t = _mm_rsqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x7f));
620 t = _mm_or_ps(t, _id_w);
621 return _mm_mul_ps(v.vec, t);
622}
623
624//------------------------------------------------------------------------------
627__forceinline vec3
628reflect(const vec3& normal, const vec3& incident)
629{
630 __m128 res = _mm_mul_ps(incident.vec, normal.vec);
631 res = _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(0,0,0,0)),
632 _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(1,1,1,1)), _mm_shuffle_ps(res, res, _MM_SHUFFLE(2,2,2,2))));
633 res = _mm_add_ps(res, res);
634 res = _mm_mul_ps(res, normal.vec);
635 res = _mm_sub_ps(incident.vec,res);
636 return res;
637}
638
639//------------------------------------------------------------------------------
642__forceinline bool
643less_any(const vec3& v0, const vec3& v1)
644{
645 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
646 int res = _mm_movemask_ps(vTemp) & 7;
647 return res != 7;
648}
649
650//------------------------------------------------------------------------------
653__forceinline bool
654less_all(const vec3& v0, const vec3& v1)
655{
656 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
657 int res = _mm_movemask_ps(vTemp) & 7;
658 return res == 0;
659}
660
661//------------------------------------------------------------------------------
664__forceinline bool
665lessequal_any(const vec3& v0, const vec3& v1)
666{
667 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
668 int res = _mm_movemask_ps(vTemp) & 7;
669 return res != 0x7;
670}
671
672//------------------------------------------------------------------------------
675__forceinline bool
676lessequal_all(const vec3& v0, const vec3& v1)
677{
678 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
679 int res = _mm_movemask_ps(vTemp) & 7;
680 return res == 0;
681}
682
683//------------------------------------------------------------------------------
686__forceinline bool
687greater_any(const vec3& v0, const vec3& v1)
688{
689 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
690 int res = _mm_movemask_ps(vTemp) & 7;
691 return res != 0;
692}
693
694//------------------------------------------------------------------------------
697__forceinline bool
698greater_all(const vec3& v0, const vec3& v1)
699{
700 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
701 int res = _mm_movemask_ps(vTemp) & 7;
702 return res == 0x7;
703}
704
705//------------------------------------------------------------------------------
708__forceinline bool
709greaterequal_any(const vec3& v0, const vec3& v1)
710{
711 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
712 int res = _mm_movemask_ps(vTemp) & 7;
713 return res != 0;
714}
715
716//------------------------------------------------------------------------------
719__forceinline bool
720greaterequal_all(const vec3& v0, const vec3& v1)
721{
722 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
723 int res = _mm_movemask_ps(vTemp) & 7;
724 return res == 0x7;
725}
726
727//------------------------------------------------------------------------------
730__forceinline bool
731equal_any(const vec3& v0, const vec3& v1)
732{
733 __m128 vTemp = _mm_cmpeq_ps(v0.vec, v1.vec);
734 int res = _mm_movemask_ps(vTemp) & 7;
735 return res != 0;
736}
737
738//------------------------------------------------------------------------------
741__forceinline bool
742nearequal(const vec3& v0, const vec3& v1, float epsilon)
743{
744 __m128 eps = _mm_setr_ps(epsilon, epsilon, epsilon, 0.0f);
745 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
746 __m128 temp = _mm_setzero_ps();
747 temp = _mm_sub_ps(temp, delta);
748 temp = _mm_max_ps(temp, delta);
749 temp = _mm_cmple_ps(temp, eps);
750 temp = _mm_and_ps(temp, _mask_xyz);
751 return (_mm_movemask_ps(temp) == 0x7) != 0;
752}
753
754//------------------------------------------------------------------------------
757__forceinline bool
758nearequal(const vec3& v0, const vec3& v1, const vec3& epsilon)
759{
760 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
761 __m128 temp = _mm_setzero_ps();
762 temp = _mm_sub_ps(temp, delta);
763 temp = _mm_max_ps(temp, delta);
764 temp = _mm_cmple_ps(temp, epsilon.vec);
765 temp = _mm_and_ps(temp, _mask_xyz);
766 return (_mm_movemask_ps(temp) == 0x7) != 0;
767}
768
769//------------------------------------------------------------------------------
772__forceinline vec3
773less(const vec3& v0, const vec3& v1)
774{
775 return _mm_min_ps(_mm_cmplt_ps(v0.vec, v1.vec), _plus1);
776}
777
778//------------------------------------------------------------------------------
781__forceinline vec3
782greater(const vec3& v0, const vec3& v1)
783{
784 return _mm_min_ps(_mm_cmpgt_ps(v0.vec, v1.vec), _plus1);
785}
786
787//------------------------------------------------------------------------------
790__forceinline vec3
791equal(const vec3& v0, const vec3& v1)
792{
793 return _mm_min_ps(_mm_cmpeq_ps(v0.vec, v1.vec), _plus1);
794}
795
796//------------------------------------------------------------------------------
799__forceinline vec3
800splat(const vec3& v, uint element)
801{
802 n_assert(element < 3 && element >= 0);
803
804 __m128 res;
805 switch (element)
806 {
807 case 0:
808 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
809 break;
810 case 1:
811 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
812 break;
813 case 2:
814 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
815 break;
816 }
817 res = _mm_and_ps(res, _mask_xyz);
818 return res;
819}
820
821//------------------------------------------------------------------------------
824__forceinline vec3
825splat_x(const vec3& v)
826{
827 __m128 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
828 res = _mm_and_ps(res, _mask_xyz);
829 return res;
830}
831
832//------------------------------------------------------------------------------
835__forceinline vec3
836splat_y(const vec3& v)
837{
838 __m128 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
839 res = _mm_and_ps(res, _mask_xyz);
840 return res;
841}
842
843//------------------------------------------------------------------------------
846__forceinline vec3
847splat_z(const vec3& v)
848{
849 __m128 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
850 res = _mm_and_ps(res, _mask_xyz);
851 return res;
852}
853
854//------------------------------------------------------------------------------
857__forceinline vec3
858permute(const vec3& v0, const vec3& v1, unsigned int i0, unsigned int i1, unsigned int i2)
859{
860 static __m128i three = _mm_set_epi32(3,3,3,3);
861
862 NEBULA_ALIGN16 unsigned int elem[4] = { i0, i1, i2, 7 };
863 __m128i vControl = _mm_load_si128(reinterpret_cast<const __m128i*>(&elem[0]));
864
865 __m128i vSelect = _mm_cmpgt_epi32(vControl, three);
866 vControl = _mm_and_si128(vControl, three);
867
868 __m128 shuffled1 = _mm_permutevar_ps(v0.vec, vControl);
869 __m128 shuffled2 = _mm_permutevar_ps(v1.vec, vControl);
870
871 __m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1);
872 __m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2);
873
874 return _mm_or_ps(masked1, masked2);
875}
876
877//------------------------------------------------------------------------------
880__forceinline vec3
881select(const vec3& v0, const vec3& v1, const uint i0, const uint i1, const uint i2)
882{
883 //FIXME this should be converted to something similiar as XMVectorSelect
884 return permute(v0, v1, i0, i1, i2);
885}
886
887//------------------------------------------------------------------------------
890__forceinline vec3
891select(const vec3& v0, const vec3& v1, const vec3& control)
892{
893 __m128 v0masked = _mm_andnot_ps(control.vec, v0.vec);
894 __m128 v1masked = _mm_and_ps(v1.vec, control.vec);
895 return _mm_or_ps(v0masked, v1masked);
896}
897
898//------------------------------------------------------------------------------
901__forceinline vec3
902floor(const vec3& v)
903{
904 return _mm_floor_ps(v.vec);
905}
906
907//------------------------------------------------------------------------------
910__forceinline vec3
911ceiling(const vec3& v)
912{
913 return _mm_ceil_ps(v.vec);
914}
915
916} // namespace Math
917//------------------------------------------------------------------------------
918
919
920
921
922
923
924
925
#define n_assert(exp)
Definition debug.h:50
Different curves.
Definition angularpfeedbackloop.h:17
static const __m128 _id_z
Definition vec3.h:31
__forceinline point less(const point &v0, const point &v1)
Definition point.h:501
__forceinline vec3 cross(const vec3 &v0, const vec3 &v1)
Definition vec3.h:441
static const __m128 _id_x
Definition vec3.h:29
__forceinline point maximize(const point &v0, const point &v1)
Definition point.h:368
__forceinline quat barycentric(const quat &q0, const quat &q1, const quat &q2, scalar f, scalar g)
Definition quat.h:296
static const __m128 _zero
Definition vec3.h:35
__forceinline bool equal_any(const point &v0, const point &v1)
Definition point.h:474
__forceinline bool greaterequal_any(const point &v0, const point &v1)
Definition point.h:452
__forceinline vec3 ceiling(const vec3 &v)
Definition vec3.h:911
__forceinline vec3 splat_z(const vec3 &v)
Definition vec3.h:847
static const __m128i _sign
Definition vec3.h:36
__forceinline vec3 hermite(const vec3 &v1, const vec3 &t1, const vec3 &v2, const vec3 &t2, scalar s)
Definition vec3.h:508
__forceinline point equal(const point &v0, const point &v1)
Definition point.h:519
__forceinline vec3 splat_x(const vec3 &v)
Definition vec3.h:825
__forceinline vec3 splat(const vec3 &v, uint element)
Definition vec3.h:800
mat4 reflect(const vec4 &p)
based on this http://www.opengl.org/discussion_boards/showthread.php/169605-reflection-matrix-how-to-...
Definition mat4.cc:22
__forceinline vec3 multiply(const vec3 &v0, const vec3 &v1)
Definition vec3.h:399
__forceinline scalar angle(const vec3 &v0, const vec3 &v1)
Definition vec3.h:532
__forceinline vec3 divide(const vec3 &v0, const vec3 &v1)
Definition vec3.h:421
__forceinline vec3 reciprocal(const vec3 &v)
Definition vec3.h:381
__forceinline scalar length(const quat &q)
Definition quat.h:260
half operator/(half one, half two)
Definition half.h:132
__forceinline scalar lengthsq(const quat &q)
Definition quat.h:269
__forceinline scalar dot(const plane &p, const vec4 &v1)
Definition plane.h:252
__forceinline vec3 permute(const vec3 &v0, const vec3 &v1, unsigned int i0, unsigned int i1, unsigned int i2)
Definition vec3.h:858
__forceinline plane normalize(const plane &p)
Definition plane.h:261
__forceinline bool greaterequal_all(const point &v0, const point &v1)
Definition point.h:463
__forceinline bool greater_any(const point &v0, const point &v1)
Definition point.h:430
__forceinline float lerp(float x, float y, float l)
Linearly interpolate between 2 values: ret = x + l * (y - x)
Definition scalar.h:606
__forceinline float clamp(float val, float minVal, float maxVal)
Float clamping.
Definition scalar.h:496
half operator-(half one, half two)
Definition half.h:114
__forceinline bool less_any(const point &v0, const point &v1)
Definition point.h:386
__forceinline vec3 catmullrom(const vec3 &v0, const vec3 &v1, const vec3 &v2, const vec3 &v3, scalar s)
Definition vec3.h:484
static const __m128 _id_w
Definition vec3.h:32
__forceinline TYPE min(TYPE a, TYPE b)
Definition scalar.h:399
__forceinline bool less_all(const point &v0, const point &v1)
Definition point.h:397
__forceinline bool lessequal_all(const point &v0, const point &v1)
Definition point.h:419
__forceinline vec3 select(const vec3 &v0, const vec3 &v1, const uint i0, const uint i1, const uint i2)
Definition vec3.h:881
__forceinline vec3 splat_y(const vec3 &v)
Definition vec3.h:836
static const __m128 _minus1
Definition vec3.h:33
__forceinline point minimize(const point &v0, const point &v1)
Definition point.h:377
float scalar
Definition scalar.h:45
__forceinline point greater(const point &v0, const point &v1)
Definition point.h:510
half operator+(half one, half two)
Definition half.h:105
half operator*(half one, half two)
Definition half.h:123
__forceinline TYPE max(TYPE a, TYPE b)
Definition scalar.h:368
__forceinline scalar abs(scalar a)
Definition scalar.h:441
__forceinline vec3 multiplyadd(const vec3 &v0, const vec3 &v1, const vec3 &v2)
Definition vec3.h:408
__forceinline scalar acos(scalar x)
Definition scalar.h:218
__forceinline bool greater_all(const point &v0, const point &v1)
Definition point.h:441
__forceinline bool nearequal(const point &v0, const point &v1, float epsilon)
Definition point.h:485
static const __m128 _mask_xyz
Definition vec3.h:37
static const __m128 _id_y
Definition vec3.h:30
__forceinline bool lessequal_any(const point &v0, const point &v1)
Definition point.h:408
__forceinline float floor(float val)
Floating point flooring.
Definition scalar.h:542
__forceinline vec3 normalizeapprox(const vec3 &v)
Definition vec3.h:616
static const __m128 _plus1
Definition vec3.h:34
__forceinline vec3 reciprocalapprox(const vec3 &v)
Definition vec3.h:390
Nebula's scalar datatype.
vec3()=default
default constructor, NOTE: does NOT setup components!
Definition scalar.h:67
scalar x
Definition scalar.h:70
scalar y
Definition scalar.h:70
scalar z
Definition scalar.h:70
A 4x4 single point precision float matrix.
Definition mat4.h:49
A 3D vector.
Definition vec3.h:40
void loadu(const scalar *ptr)
load content from unaligned memory
Definition vec3.h:184
void stream(scalar *ptr) const
stream content to 16-byte-aligned memory circumventing the write-cache
Definition vec3.h:220
float v[3]
Definition vec3.h:99
bool operator==(const vec3 &rhs) const
equality operator
Definition vec3.h:152
void storeu(scalar *ptr) const
write content to unaligned memory through the write cache
Definition vec3.h:207
vec3()=default
default constructor, NOTE: does NOT setup components!
void load(const scalar *ptr)
load content from 16-byte-aligned memory
Definition vec3.h:173
void operator*=(scalar s)
inplace scalar multiply
Definition vec3.h:306
void operator-=(const vec3 &rhs)
inplace sub
Definition vec3.h:297
void store(scalar *ptr) const
write content to 16-byte-aligned memory through the write cache
Definition vec3.h:195
float x
Definition vec3.h:96
float z
Definition vec3.h:96
void operator/=(const vec3 &rhs)
divide by a vector component-wise
Definition vec3.h:279
float __w
Definition vec3.h:96
__m128 vec
Definition vec3.h:98
void operator+=(const vec3 &rhs)
inplace add
Definition vec3.h:288
scalar & operator[](const int index)
read-only access to indexed component
Definition vec3.h:343
bool operator!=(const vec3 &rhs) const
inequality operator
Definition vec3.h:162
void set(scalar x, scalar y, scalar z)
set content
Definition vec3.h:334
vec3(const vec3 &rhs)=default
copy constructor
float y
Definition vec3.h:96
void operator=(const __m128 &rhs)
assign an vmVector4
Definition vec3.h:143
bool operator==(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:272
bool operator!=(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:282
#define NEBULA_ALIGN16
Definition types.h:146
unsigned int uint
Definition types.h:31