Nebula
Loading...
Searching...
No Matches
vec3.h
Go to the documentation of this file.
1#pragma once
2//------------------------------------------------------------------------------
16#include "core/types.h"
17#include "math/scalar.h"
18#include <xmmintrin.h>
19#include <emmintrin.h>
20#include <smmintrin.h>
21#include <immintrin.h>
22
23//------------------------------------------------------------------------------
24namespace Math
25{
26struct mat4;
27struct vec3;
28
29static const __m128 _id_x = _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f);
30static const __m128 _id_y = _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f);
31static const __m128 _id_z = _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f);
32static const __m128 _id_w = _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f);
33static const __m128 _minus1 = _mm_setr_ps(-1.0f, -1.0f, -1.0f, -1.0f);
34static const __m128 _plus1 = _mm_setr_ps(1.0f, 1.0f, 1.0f, 1.0f);
35static const __m128i _sign = _mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000);
36static const __m128 _mask_xyz = _mm_castsi128_ps(_mm_setr_epi32( 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0 ));
37
39{
40public:
42 vec3() = default;
44 vec3(scalar x, scalar y, scalar z);
46 explicit vec3(scalar v);
48 vec3(const vec3& rhs) = default;
50 vec3(const __m128& rhs);
51
53 void operator=(const __m128& rhs);
55 void operator+=(const vec3& rhs);
57 void operator-=(const vec3& rhs);
59 void operator*=(scalar s);
61 void operator*=(const vec3& rhs);
63 void operator/=(const vec3& rhs);
65 bool operator==(const vec3& rhs) const;
67 bool operator!=(const vec3& rhs) const;
68
70 void load(const scalar* ptr);
72 void loadu(const scalar* ptr);
74 void store(scalar* ptr) const;
76 void storeu(scalar* ptr) const;
78 void stream(scalar* ptr) const;
79
81 void set(scalar x, scalar y, scalar z);
82
84 scalar& operator[](const int index);
86 scalar operator[](const int index) const;
87
88 union
89 {
90 struct
91 {
92 // we can access __w to check it, but we don't actually use it
93 float x, y, z, __w;
94 };
95 __m128 vec;
96 float v[3];
97 };
98};
99
100//------------------------------------------------------------------------------
103__forceinline
105{
106 this->vec = _mm_setr_ps(x, y, z, 0);
107}
108
109//------------------------------------------------------------------------------
112__forceinline
114{
115 this->vec = _mm_setr_ps(v, v, v, 0.0f);
116}
117
118//------------------------------------------------------------------------------
121__forceinline
122vec3::vec3(const __m128& rhs)
123{
124 this->vec = _mm_insert_ps(rhs, _id_w, 0b111000);
125}
126
127//------------------------------------------------------------------------------
130__forceinline void
131vec3::operator=(const __m128& rhs)
132{
133 this->vec = _mm_insert_ps(rhs, _id_w, 0b111000);
134}
135
136//------------------------------------------------------------------------------
139__forceinline bool
140vec3::operator==(const vec3& rhs) const
141{
142 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
143 return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
144}
145
146//------------------------------------------------------------------------------
149__forceinline bool
150vec3::operator!=(const vec3 &rhs) const
151{
152 __m128 vTemp = _mm_cmpeq_ps(this->vec, rhs.vec);
153 return ((_mm_movemask_ps(vTemp)==0x0f) == 0);
154}
155
156//------------------------------------------------------------------------------
160__forceinline void
162{
163 this->vec = _mm_load_ps(ptr);
164 this->vec = _mm_and_ps(this->vec, _mask_xyz);
165}
166
167//------------------------------------------------------------------------------
171__forceinline void
173{
174 this->vec = _mm_loadu_ps(ptr);
175 this->vec = _mm_and_ps(this->vec, _mask_xyz);
176}
177
178//------------------------------------------------------------------------------
182__forceinline void
184{
185 __m128 vv = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
186 _mm_storel_epi64(reinterpret_cast<__m128i*>(ptr), _mm_castps_si128(this->vec));
187 _mm_store_ss(&ptr[2], vv);
188}
189
190//------------------------------------------------------------------------------
194__forceinline void
196{
197 __m128 t1 = _mm_permute_ps(this->vec, _MM_SHUFFLE(1, 1, 1, 1));
198 __m128 t2 = _mm_permute_ps(this->vec, _MM_SHUFFLE(2, 2, 2, 2));
199 _mm_store_ss(&ptr[0], this->vec);
200 _mm_store_ss(&ptr[1], t1);
201 _mm_store_ss(&ptr[2], t2);
202}
203
204//------------------------------------------------------------------------------
207__forceinline void
209{
210 this->store(ptr);
211}
212
213//------------------------------------------------------------------------------
216__forceinline vec3
217operator-(const vec3& lhs)
218{
219 return vec3(_mm_xor_ps(_mm_castsi128_ps(_sign), lhs.vec));
220}
221
222//------------------------------------------------------------------------------
225__forceinline vec3
226operator*(const vec3& lhs, scalar t)
227{
228 __m128 temp = _mm_set1_ps(t);
229 return _mm_mul_ps(lhs.vec, temp);
230}
231
232//------------------------------------------------------------------------------
235__forceinline vec3
236operator*(const vec3& lhs, const vec3& rhs)
237{
238 return _mm_mul_ps(lhs.vec, rhs.vec);
239}
240
241//------------------------------------------------------------------------------
244__forceinline void
246{
247 this->vec = _mm_mul_ps(this->vec, rhs.vec);
248}
249
250//------------------------------------------------------------------------------
253__forceinline void
255{
256 this->vec = _mm_div_ps(this->vec, rhs.vec);
257}
258
259//------------------------------------------------------------------------------
262__forceinline void
264{
265 this->vec = _mm_add_ps(this->vec, rhs.vec);
266}
267
268//------------------------------------------------------------------------------
271__forceinline void
273{
274 this->vec = _mm_sub_ps(this->vec, rhs.vec);
275}
276
277//------------------------------------------------------------------------------
280__forceinline void
282{
283 __m128 temp = _mm_set1_ps(s);
284 this->vec = _mm_mul_ps(this->vec, temp);
285}
286
287//------------------------------------------------------------------------------
290__forceinline vec3
291operator+(const vec3& lhs, const vec3 &rhs)
292{
293 return _mm_add_ps(lhs.vec, rhs.vec);
294}
295
296//------------------------------------------------------------------------------
299__forceinline vec3
300operator-(const vec3& lhs, const vec3& rhs)
301{
302 return _mm_sub_ps(lhs.vec, rhs.vec);
303}
304
305//------------------------------------------------------------------------------
308__forceinline void
310{
311 this->vec = _mm_setr_ps(x, y, z, 0);
312}
313
314//------------------------------------------------------------------------------
317__forceinline scalar&
318vec3::operator[]( const int index )
319{
320 n_assert(index < 3);
321 return this->v[index];
322}
323
324//------------------------------------------------------------------------------
327__forceinline scalar
328vec3::operator[](const int index) const
329{
330 n_assert(index < 3);
331 return this->v[index];
332}
333
334//------------------------------------------------------------------------------
337__forceinline scalar
338length(const vec3& v)
339{
340 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.vec, v.vec, 0x71)));
341}
342
343//------------------------------------------------------------------------------
346__forceinline scalar
347lengthsq(const vec3& v)
348{
349 return _mm_cvtss_f32(_mm_dp_ps(v.vec, v.vec, 0x71));
350}
351
352//------------------------------------------------------------------------------
355__forceinline vec3
357{
358 return _mm_div_ps(_plus1, v.vec);
359}
360
361//------------------------------------------------------------------------------
364__forceinline vec3
366{
367 return _mm_rcp_ps(v.vec);
368}
369
370//------------------------------------------------------------------------------
373__forceinline vec3
374multiply(const vec3& v0, const vec3& v1)
375{
376 return _mm_mul_ps(v0.vec, v1.vec);
377}
378
379//------------------------------------------------------------------------------
382__forceinline vec3
383multiplyadd( const vec3& v0, const vec3& v1, const vec3& v2 )
384{
385#if NEBULA_MATH_FMA
386 return _mm_fmadd_ps(v0.vec, v1.vec, v2.vec);
387#else
388 return _mm_add_ps(_mm_mul_ps(v0.vec, v1.vec),v2.vec);
389#endif
390}
391
392//------------------------------------------------------------------------------
395__forceinline vec3
396divide(const vec3& v0, const vec3& v1)
397{
398 return _mm_div_ps(v0.vec, v1.vec);
399}
400
401//------------------------------------------------------------------------------
404__forceinline vec3
405abs(const vec3& v)
406{
407 unsigned int val = 0x7fffffff;
408 __m128 temp = _mm_set1_ps(*(float*)&val);
409 return _mm_and_ps(v.vec, temp);
410}
411
412//------------------------------------------------------------------------------
415__forceinline vec3
416cross(const vec3& v0, const vec3& v1)
417{
418 __m128 tmp0, tmp1, tmp2, tmp3, result;
419 tmp0 = _mm_shuffle_ps( v0.vec, v0.vec, _MM_SHUFFLE(3,0,2,1) );
420 tmp1 = _mm_shuffle_ps( v1.vec, v1.vec, _MM_SHUFFLE(3,1,0,2) );
421 tmp2 = _mm_shuffle_ps( v0.vec, v0.vec, _MM_SHUFFLE(3,1,0,2) );
422 tmp3 = _mm_shuffle_ps( v1.vec, v1.vec, _MM_SHUFFLE(3,0,2,1) );
423 result = _mm_mul_ps( tmp0, tmp1 );
424 result = _mm_sub_ps( result, _mm_mul_ps( tmp2, tmp3 ) );
425 return result;
426}
427
428//------------------------------------------------------------------------------
431__forceinline scalar
432dot(const vec3& v0, const vec3& v1)
433{
434 return _mm_cvtss_f32(_mm_dp_ps(v0.vec, v1.vec, 0x71));
435}
436
437//------------------------------------------------------------------------------
441__forceinline vec3
442barycentric(const vec3& v0, const vec3 &v1, const vec3 &v2, scalar f, scalar g)
443{
444 __m128 R1 = _mm_sub_ps(v1.vec,v0.vec);
445 __m128 SF = _mm_set_ps1(f);
446 __m128 R2 = _mm_sub_ps(v2.vec,v0.vec);
447 __m128 SG = _mm_set_ps1(g);
448 R1 = _mm_mul_ps(R1,SF);
449 R2 = _mm_mul_ps(R2,SG);
450 R1 = _mm_add_ps(R1,v0.vec);
451 R1 = _mm_add_ps(R1,R2);
452 return R1;
453}
454
455//------------------------------------------------------------------------------
458__forceinline vec3
459catmullrom(const vec3& v0, const vec3& v1, const vec3& v2, const vec3& v3, scalar s)
460{
461 scalar s2 = s * s;
462 scalar s3 = s * s2;
463
464 __m128 P0 = _mm_set_ps1((-s3 + 2.0f * s2 - s) * 0.5f);
465 __m128 P1 = _mm_set_ps1((3.0f * s3 - 5.0f * s2 + 2.0f) * 0.5f);
466 __m128 P2 = _mm_set_ps1((-3.0f * s3 + 4.0f * s2 + s) * 0.5f);
467 __m128 P3 = _mm_set_ps1((s3 - s2) * 0.5f);
468
469 P0 = _mm_mul_ps(P0, v0.vec);
470 P1 = _mm_mul_ps(P1, v1.vec);
471 P2 = _mm_mul_ps(P2, v2.vec);
472 P3 = _mm_mul_ps(P3, v3.vec);
473 P0 = _mm_add_ps(P0,P1);
474 P2 = _mm_add_ps(P2,P3);
475 P0 = _mm_add_ps(P0,P2);
476 return P0;
477}
478
479//------------------------------------------------------------------------------
482__forceinline vec3
483hermite(const vec3& v1, const vec3& t1, const vec3& v2, const vec3& t2, scalar s)
484{
485 scalar s2 = s * s;
486 scalar s3 = s * s2;
487
488 __m128 P0 = _mm_set_ps1(2.0f * s3 - 3.0f * s2 + 1.0f);
489 __m128 T0 = _mm_set_ps1(s3 - 2.0f * s2 + s);
490 __m128 P1 = _mm_set_ps1(-2.0f * s3 + 3.0f * s2);
491 __m128 T1 = _mm_set_ps1(s3 - s2);
492
493 __m128 vResult = _mm_mul_ps(P0, v1.vec);
494 __m128 vTemp = _mm_mul_ps(T0, t1.vec);
495 vResult = _mm_add_ps(vResult,vTemp);
496 vTemp = _mm_mul_ps(P1, v2.vec);
497 vResult = _mm_add_ps(vResult,vTemp);
498 vTemp = _mm_mul_ps(T1, t2.vec);
499 vResult = _mm_add_ps(vResult,vTemp);
500 return vResult;
501}
502
503//------------------------------------------------------------------------------
506__forceinline scalar
507angle(const vec3& v0, const vec3& v1)
508{
509
510 __m128 l0 = _mm_mul_ps(v0.vec, v0.vec);
511 l0 = _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(0, 0, 0, 0)),
512 _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l0, l0, _MM_SHUFFLE(2, 2, 2, 2))));
513
514 __m128 l1 = _mm_mul_ps(v1.vec, v1.vec);
515 l1 = _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(0, 0, 0, 0)),
516 _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l1, l1, _MM_SHUFFLE(2, 2, 2, 2))));
517
518 __m128 l = _mm_shuffle_ps(l0, l1, _MM_SHUFFLE(0, 0, 0, 0));
519 l = _mm_rsqrt_ps(l);
520 l = _mm_mul_ss(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)));
521
522
523 __m128 dot = _mm_mul_ps(v0.vec, v1.vec);
524 dot = _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(0, 0, 0, 0)),
525 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(1, 1, 1, 1)),
526 _mm_add_ps(_mm_shuffle_ps(dot, dot, _MM_SHUFFLE(2, 2, 2, 2)), _mm_shuffle_ps(dot, dot, _MM_SHUFFLE(3, 3, 3, 3)))));
527
528 dot = _mm_mul_ss(dot, l);
529
530 dot = _mm_max_ss(dot, _minus1);
531 dot = _mm_min_ss(dot, _plus1);
532
533 scalar cangle;
534 _mm_store_ss(&cangle, dot);
535 return acos(cangle);
536}
537
538//------------------------------------------------------------------------------
541__forceinline vec3
542lerp(const vec3& v0, const vec3& v1, scalar s)
543{
544 return v0 + ((v1-v0) * s);
545}
546
547//------------------------------------------------------------------------------
550__forceinline vec3
551maximize(const vec3& v0, const vec3& v1)
552{
553 return _mm_max_ps(v0.vec, v1.vec);
554}
555
556//------------------------------------------------------------------------------
559__forceinline vec3
560minimize(const vec3& v0, const vec3& v1)
561{
562 return _mm_min_ps(v0.vec, v1.vec);
563}
564
565//------------------------------------------------------------------------------
568__forceinline vec3
569clamp(const vec3& clamp, const vec3& min, const vec3& max)
570{
571 __m128 temp = _mm_max_ps(min.vec, clamp.vec);
572 temp = _mm_min_ps(temp, max.vec);
573 return vec3(temp);
574}
575
576//------------------------------------------------------------------------------
579__forceinline vec3
581{
582 if (v == vec3(0)) return v;
583 __m128 t = _mm_div_ps(v.vec, _mm_sqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x77)));
584 return _mm_insert_ps(t, v.vec, 0xF0);
585}
586
587//------------------------------------------------------------------------------
590__forceinline vec3
592{
593 if (v == vec3(0)) return v;
594 __m128 t = _mm_rsqrt_ps(_mm_dp_ps(v.vec, v.vec, 0x7f));
595 t = _mm_or_ps(t, _id_w);
596 return _mm_mul_ps(v.vec, t);
597}
598
599//------------------------------------------------------------------------------
602__forceinline vec3
603reflect(const vec3& normal, const vec3& incident)
604{
605 __m128 res = _mm_mul_ps(incident.vec, normal.vec);
606 res = _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(0,0,0,0)),
607 _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(1,1,1,1)), _mm_shuffle_ps(res, res, _MM_SHUFFLE(2,2,2,2))));
608 res = _mm_add_ps(res, res);
609 res = _mm_mul_ps(res, normal.vec);
610 res = _mm_sub_ps(incident.vec,res);
611 return res;
612}
613
614//------------------------------------------------------------------------------
617__forceinline bool
618less_any(const vec3& v0, const vec3& v1)
619{
620 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
621 int res = _mm_movemask_ps(vTemp) & 7;
622 return res != 7;
623}
624
625//------------------------------------------------------------------------------
628__forceinline bool
629less_all(const vec3& v0, const vec3& v1)
630{
631 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
632 int res = _mm_movemask_ps(vTemp) & 7;
633 return res == 0;
634}
635
636//------------------------------------------------------------------------------
639__forceinline bool
640lessequal_any(const vec3& v0, const vec3& v1)
641{
642 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
643 int res = _mm_movemask_ps(vTemp) & 7;
644 return res != 0x7;
645}
646
647//------------------------------------------------------------------------------
650__forceinline bool
651lessequal_all(const vec3& v0, const vec3& v1)
652{
653 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
654 int res = _mm_movemask_ps(vTemp) & 7;
655 return res == 0;
656}
657
658//------------------------------------------------------------------------------
661__forceinline bool
662greater_any(const vec3& v0, const vec3& v1)
663{
664 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
665 int res = _mm_movemask_ps(vTemp) & 7;
666 return res != 0;
667}
668
669//------------------------------------------------------------------------------
672__forceinline bool
673greater_all(const vec3& v0, const vec3& v1)
674{
675 __m128 vTemp = _mm_cmpgt_ps(v0.vec, v1.vec);
676 int res = _mm_movemask_ps(vTemp) & 7;
677 return res == 0x7;
678}
679
680//------------------------------------------------------------------------------
683__forceinline bool
684greaterequal_any(const vec3& v0, const vec3& v1)
685{
686 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
687 int res = _mm_movemask_ps(vTemp) & 7;
688 return res != 0;
689}
690
691//------------------------------------------------------------------------------
694__forceinline bool
695greaterequal_all(const vec3& v0, const vec3& v1)
696{
697 __m128 vTemp = _mm_cmpge_ps(v0.vec, v1.vec);
698 int res = _mm_movemask_ps(vTemp) & 7;
699 return res == 0x7;
700}
701
702//------------------------------------------------------------------------------
705__forceinline bool
706equal_any(const vec3& v0, const vec3& v1)
707{
708 __m128 vTemp = _mm_cmpeq_ps(v0.vec, v1.vec);
709 int res = _mm_movemask_ps(vTemp) & 7;
710 return res != 0;
711}
712
713//------------------------------------------------------------------------------
716__forceinline bool
717nearequal(const vec3& v0, const vec3& v1, float epsilon)
718{
719 __m128 eps = _mm_setr_ps(epsilon, epsilon, epsilon, 0.0f);
720 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
721 __m128 temp = _mm_setzero_ps();
722 temp = _mm_sub_ps(temp, delta);
723 temp = _mm_max_ps(temp, delta);
724 temp = _mm_cmple_ps(temp, eps);
725 temp = _mm_and_ps(temp, _mask_xyz);
726 return (_mm_movemask_ps(temp) == 0x7) != 0;
727}
728
729//------------------------------------------------------------------------------
732__forceinline bool
733nearequal(const vec3& v0, const vec3& v1, const vec3& epsilon)
734{
735 __m128 delta = _mm_sub_ps(v0.vec, v1.vec);
736 __m128 temp = _mm_setzero_ps();
737 temp = _mm_sub_ps(temp, delta);
738 temp = _mm_max_ps(temp, delta);
739 temp = _mm_cmple_ps(temp, epsilon.vec);
740 temp = _mm_and_ps(temp, _mask_xyz);
741 return (_mm_movemask_ps(temp) == 0x7) != 0;
742}
743
744//------------------------------------------------------------------------------
747__forceinline vec3
748less(const vec3& v0, const vec3& v1)
749{
750 return _mm_min_ps(_mm_cmplt_ps(v0.vec, v1.vec), _plus1);
751}
752
753//------------------------------------------------------------------------------
756__forceinline vec3
757greater(const vec3& v0, const vec3& v1)
758{
759 return _mm_min_ps(_mm_cmpgt_ps(v0.vec, v1.vec), _plus1);
760}
761
762//------------------------------------------------------------------------------
765__forceinline vec3
766equal(const vec3& v0, const vec3& v1)
767{
768 return _mm_min_ps(_mm_cmpeq_ps(v0.vec, v1.vec), _plus1);
769}
770
771//------------------------------------------------------------------------------
774__forceinline vec3
775splat(const vec3& v, uint element)
776{
777 n_assert(element < 3 && element >= 0);
778
779 __m128 res;
780 switch (element)
781 {
782 case 0:
783 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
784 break;
785 case 1:
786 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
787 break;
788 case 2:
789 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
790 break;
791 }
792 res = _mm_and_ps(res, _mask_xyz);
793 return res;
794}
795
796//------------------------------------------------------------------------------
799__forceinline vec3
800splat_x(const vec3& v)
801{
802 __m128 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(0, 0, 0, 0));
803 res = _mm_and_ps(res, _mask_xyz);
804 return res;
805}
806
807//------------------------------------------------------------------------------
810__forceinline vec3
811splat_y(const vec3& v)
812{
813 __m128 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
814 res = _mm_and_ps(res, _mask_xyz);
815 return res;
816}
817
818//------------------------------------------------------------------------------
821__forceinline vec3
822splat_z(const vec3& v)
823{
824 __m128 res = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
825 res = _mm_and_ps(res, _mask_xyz);
826 return res;
827}
828
829//------------------------------------------------------------------------------
832__forceinline vec3
833permute(const vec3& v0, const vec3& v1, unsigned int i0, unsigned int i1, unsigned int i2)
834{
835 static __m128i three = _mm_set_epi32(3,3,3,3);
836
837 NEBULA_ALIGN16 unsigned int elem[4] = { i0, i1, i2, 7 };
838 __m128i vControl = _mm_load_si128(reinterpret_cast<const __m128i*>(&elem[0]));
839
840 __m128i vSelect = _mm_cmpgt_epi32(vControl, three);
841 vControl = _mm_and_si128(vControl, three);
842
843 __m128 shuffled1 = _mm_permutevar_ps(v0.vec, vControl);
844 __m128 shuffled2 = _mm_permutevar_ps(v1.vec, vControl);
845
846 __m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1);
847 __m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2);
848
849 return _mm_or_ps(masked1, masked2);
850}
851
852//------------------------------------------------------------------------------
855__forceinline vec3
856select(const vec3& v0, const vec3& v1, const uint i0, const uint i1, const uint i2)
857{
858 //FIXME this should be converted to something similiar as XMVectorSelect
859 return permute(v0, v1, i0, i1, i2);
860}
861
862//------------------------------------------------------------------------------
865__forceinline vec3
866select(const vec3& v0, const vec3& v1, const vec3& control)
867{
868 __m128 v0masked = _mm_andnot_ps(control.vec, v0.vec);
869 __m128 v1masked = _mm_and_ps(v1.vec, control.vec);
870 return _mm_or_ps(v0masked, v1masked);
871}
872
873//------------------------------------------------------------------------------
876__forceinline vec3
877floor(const vec3& v)
878{
879 return _mm_floor_ps(v.vec);
880}
881
882//------------------------------------------------------------------------------
885__forceinline vec3
886ceiling(const vec3& v)
887{
888 return _mm_ceil_ps(v.vec);
889}
890
891} // namespace Math
892//------------------------------------------------------------------------------
893
894
895
896
897
898
899
900
#define n_assert(exp)
Definition debug.h:50
Half precision (16 bit) float implementation.
Definition angularpfeedbackloop.h:17
static const __m128 _id_z
Definition vec3.h:31
__forceinline point less(const point &v0, const point &v1)
Definition point.h:501
__forceinline vec3 cross(const vec3 &v0, const vec3 &v1)
Definition vec3.h:416
static const __m128 _id_x
Definition vec3.h:29
__forceinline point maximize(const point &v0, const point &v1)
Definition point.h:368
__forceinline quat barycentric(const quat &q0, const quat &q1, const quat &q2, scalar f, scalar g)
Definition quat.h:295
__forceinline bool equal_any(const point &v0, const point &v1)
Definition point.h:474
__forceinline bool greaterequal_any(const point &v0, const point &v1)
Definition point.h:452
__forceinline vec3 ceiling(const vec3 &v)
Definition vec3.h:886
__forceinline vec3 splat_z(const vec3 &v)
Definition vec3.h:822
static const __m128i _sign
Definition vec3.h:35
__forceinline vec3 hermite(const vec3 &v1, const vec3 &t1, const vec3 &v2, const vec3 &t2, scalar s)
Definition vec3.h:483
__forceinline point equal(const point &v0, const point &v1)
Definition point.h:519
__forceinline vec3 splat_x(const vec3 &v)
Definition vec3.h:800
__forceinline vec3 splat(const vec3 &v, uint element)
Definition vec3.h:775
mat4 reflect(const vec4 &p)
based on this http://www.opengl.org/discussion_boards/showthread.php/169605-reflection-matrix-how-to-...
Definition mat4.cc:21
__forceinline vec3 multiply(const vec3 &v0, const vec3 &v1)
Definition vec3.h:374
__forceinline scalar angle(const vec3 &v0, const vec3 &v1)
Definition vec3.h:507
__forceinline vec3 divide(const vec3 &v0, const vec3 &v1)
Definition vec3.h:396
__forceinline vec3 reciprocal(const vec3 &v)
Definition vec3.h:356
__forceinline scalar length(const quat &q)
Definition quat.h:259
__forceinline scalar lengthsq(const quat &q)
Definition quat.h:268
__forceinline scalar dot(const plane &p, const vec4 &v1)
Definition plane.h:246
__forceinline vec3 permute(const vec3 &v0, const vec3 &v1, unsigned int i0, unsigned int i1, unsigned int i2)
Definition vec3.h:833
__forceinline plane normalize(const plane &p)
Definition plane.h:255
__forceinline bool greaterequal_all(const point &v0, const point &v1)
Definition point.h:463
__forceinline bool greater_any(const point &v0, const point &v1)
Definition point.h:430
__forceinline float lerp(float x, float y, float l)
Linearly interpolate between 2 values: ret = x + l * (y - x)
Definition scalar.h:597
__forceinline float clamp(float val, float minVal, float maxVal)
Float clamping.
Definition scalar.h:487
half operator-(half one, half two)
Definition half.h:114
__forceinline bool less_any(const point &v0, const point &v1)
Definition point.h:386
__forceinline vec3 catmullrom(const vec3 &v0, const vec3 &v1, const vec3 &v2, const vec3 &v3, scalar s)
Definition vec3.h:459
static const __m128 _id_w
Definition vec3.h:32
__forceinline TYPE min(TYPE a, TYPE b)
Definition scalar.h:390
__forceinline bool less_all(const point &v0, const point &v1)
Definition point.h:397
__forceinline bool lessequal_all(const point &v0, const point &v1)
Definition point.h:419
__forceinline vec3 select(const vec3 &v0, const vec3 &v1, const uint i0, const uint i1, const uint i2)
Definition vec3.h:856
__forceinline vec3 splat_y(const vec3 &v)
Definition vec3.h:811
static const __m128 _minus1
Definition vec3.h:33
__forceinline point minimize(const point &v0, const point &v1)
Definition point.h:377
float scalar
Definition scalar.h:45
__forceinline point greater(const point &v0, const point &v1)
Definition point.h:510
half operator+(half one, half two)
Definition half.h:105
half operator*(half one, half two)
Definition half.h:123
__forceinline TYPE max(TYPE a, TYPE b)
Definition scalar.h:359
__forceinline scalar abs(scalar a)
Definition scalar.h:432
__forceinline vec3 multiplyadd(const vec3 &v0, const vec3 &v1, const vec3 &v2)
Definition vec3.h:383
__forceinline scalar acos(scalar x)
Definition scalar.h:218
__forceinline bool greater_all(const point &v0, const point &v1)
Definition point.h:441
__forceinline bool nearequal(const point &v0, const point &v1, float epsilon)
Definition point.h:485
static const __m128 _mask_xyz
Definition vec3.h:36
static const __m128 _id_y
Definition vec3.h:30
__forceinline bool lessequal_any(const point &v0, const point &v1)
Definition point.h:408
__forceinline float floor(float val)
Floating point flooring.
Definition scalar.h:533
__forceinline vec3 normalizeapprox(const vec3 &v)
Definition vec3.h:591
static const __m128 _plus1
Definition vec3.h:34
__forceinline vec3 reciprocalapprox(const vec3 &v)
Definition vec3.h:365
Nebula's scalar datatype.
A 3D vector.
Definition vec3.h:39
void loadu(const scalar *ptr)
load content from unaligned memory
Definition vec3.h:172
void stream(scalar *ptr) const
stream content to 16-byte-aligned memory circumventing the write-cache
Definition vec3.h:208
float v[3]
Definition vec3.h:96
bool operator==(const vec3 &rhs) const
equality operator
Definition vec3.h:140
void storeu(scalar *ptr) const
write content to unaligned memory through the write cache
Definition vec3.h:195
vec3()=default
default constructor, NOTE: does NOT setup components!
void load(const scalar *ptr)
load content from 16-byte-aligned memory
Definition vec3.h:161
void operator*=(scalar s)
inplace scalar multiply
Definition vec3.h:281
void operator-=(const vec3 &rhs)
inplace sub
Definition vec3.h:272
void store(scalar *ptr) const
write content to 16-byte-aligned memory through the write cache
Definition vec3.h:183
float x
Definition vec3.h:93
float z
Definition vec3.h:93
void operator/=(const vec3 &rhs)
divide by a vector component-wise
Definition vec3.h:254
float __w
Definition vec3.h:93
__m128 vec
Definition vec3.h:95
void operator+=(const vec3 &rhs)
inplace add
Definition vec3.h:263
scalar & operator[](const int index)
read-only access to indexed component
Definition vec3.h:318
bool operator!=(const vec3 &rhs) const
inequality operator
Definition vec3.h:150
void set(scalar x, scalar y, scalar z)
set content
Definition vec3.h:309
vec3(const vec3 &rhs)=default
copy constructor
float y
Definition vec3.h:93
void operator=(const __m128 &rhs)
assign an vmVector4
Definition vec3.h:131
bool operator==(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:272
bool operator!=(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:282
#define NEBULA_ALIGN16
Definition types.h:181
unsigned int uint
Definition types.h:31