29static const __m128
_id_x = _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f);
30static const __m128
_id_y = _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f);
31static const __m128
_id_z = _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f);
32static const __m128
_id_w = _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f);
33static const __m128
_minus1 = _mm_setr_ps(-1.0f, -1.0f, -1.0f, -1.0f);
34static const __m128
_plus1 = _mm_setr_ps(1.0f, 1.0f, 1.0f, 1.0f);
35static const __m128
_zero = _mm_setr_ps(0.0f, 0.0f, 0.0f, 0.0f);
36static const __m128i
_sign = _mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000);
37static const __m128
_mask_xyz = _mm_castsi128_ps(_mm_setr_epi32( 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0 ));
73 void load(
const scalar* ptr);
75 void loadu(
const scalar* ptr);
77 void store(scalar* ptr)
const;
79 void storeu(scalar* ptr)
const;
81 void stream(scalar* ptr)
const;
84 void set(scalar
x, scalar
y, scalar
z);
109 this->
vec = _mm_setr_ps(
x,
y,
z, 0);
118 this->
vec = _mm_setr_ps(f3.
x, f3.
y, f3.
z, 0);
127 this->
vec = _mm_setr_ps(
v,
v,
v, 0.0f);
136 this->
vec = _mm_insert_ps(rhs,
_id_w, 0b111000);
145 this->
vec = _mm_insert_ps(rhs,
_id_w, 0b111000);
154 __m128 vTemp = _mm_cmpeq_ps(this->
vec, rhs.
vec);
155 return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
164 __m128 vTemp = _mm_cmpeq_ps(this->
vec, rhs.
vec);
165 return ((_mm_movemask_ps(vTemp)==0x0f) == 0);
175 this->
vec = _mm_load_ps(ptr);
186 this->
vec = _mm_loadu_ps(ptr);
197 __m128 vv = _mm_permute_ps(this->
vec, _MM_SHUFFLE(2, 2, 2, 2));
198 _mm_storel_epi64(
reinterpret_cast<__m128i*
>(ptr), _mm_castps_si128(this->
vec));
199 _mm_store_ss(&ptr[2], vv);
209 __m128 t1 = _mm_permute_ps(this->
vec, _MM_SHUFFLE(1, 1, 1, 1));
210 __m128 t2 = _mm_permute_ps(this->
vec, _MM_SHUFFLE(2, 2, 2, 2));
211 _mm_store_ss(&ptr[0], this->
vec);
212 _mm_store_ss(&ptr[1], t1);
213 _mm_store_ss(&ptr[2], t2);
231 return vec3(_mm_xor_ps(_mm_castsi128_ps(
_sign), lhs.
vec));
241 __m128 temp = _mm_set1_ps(t);
242 return _mm_mul_ps(lhs.
vec, temp);
252 return _mm_mul_ps(lhs.
vec, rhs.
vec);
262 __m128 temp = _mm_set1_ps(t);
263 return _mm_div_ps(lhs.
vec, temp);
272 this->
vec = _mm_mul_ps(this->
vec, rhs.
vec);
281 this->
vec = _mm_div_ps(this->
vec, rhs.
vec);
290 this->
vec = _mm_add_ps(this->
vec, rhs.
vec);
299 this->
vec = _mm_sub_ps(this->
vec, rhs.
vec);
308 __m128 temp = _mm_set1_ps(s);
309 this->
vec = _mm_mul_ps(this->
vec, temp);
318 return _mm_add_ps(lhs.
vec, rhs.
vec);
327 return _mm_sub_ps(lhs.
vec, rhs.
vec);
336 this->
vec = _mm_setr_ps(
x,
y,
z, 0);
346 return this->
v[index];
356 return this->
v[index];
365 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.
vec, v.
vec, 0x71)));
374 return _mm_cvtss_f32(_mm_dp_ps(v.
vec, v.
vec, 0x71));
392 return _mm_rcp_ps(v.
vec);
401 return _mm_mul_ps(v0.
vec, v1.
vec);
411 return _mm_fmadd_ps(v0.
vec, v1.
vec, v2.
vec);
413 return _mm_add_ps(_mm_mul_ps(v0.
vec, v1.
vec),v2.
vec);
423 return _mm_div_ps(v0.
vec, v1.
vec);
432 unsigned int val = 0x7fffffff;
433 __m128 temp = _mm_set1_ps(*(
float*)&val);
434 return _mm_and_ps(v.
vec, temp);
443 __m128 tmp0, tmp1, tmp2, tmp3, result;
444 tmp0 = _mm_shuffle_ps( v0.
vec, v0.
vec, _MM_SHUFFLE(3,0,2,1) );
445 tmp1 = _mm_shuffle_ps( v1.
vec, v1.
vec, _MM_SHUFFLE(3,1,0,2) );
446 tmp2 = _mm_shuffle_ps( v0.
vec, v0.
vec, _MM_SHUFFLE(3,1,0,2) );
447 tmp3 = _mm_shuffle_ps( v1.
vec, v1.
vec, _MM_SHUFFLE(3,0,2,1) );
448 result = _mm_mul_ps( tmp0, tmp1 );
449 result = _mm_sub_ps( result, _mm_mul_ps( tmp2, tmp3 ) );
459 return _mm_cvtss_f32(_mm_dp_ps(v0.
vec, v1.
vec, 0x71));
469 __m128 R1 = _mm_sub_ps(v1.
vec,v0.
vec);
470 __m128 SF = _mm_set_ps1(f);
471 __m128 R2 = _mm_sub_ps(v2.
vec,v0.
vec);
472 __m128 SG = _mm_set_ps1(g);
473 R1 = _mm_mul_ps(R1,SF);
474 R2 = _mm_mul_ps(R2,SG);
475 R1 = _mm_add_ps(R1,v0.
vec);
476 R1 = _mm_add_ps(R1,R2);
489 __m128 P0 = _mm_set_ps1((-s3 + 2.0f * s2 - s) * 0.5f);
490 __m128 P1 = _mm_set_ps1((3.0f * s3 - 5.0f * s2 + 2.0f) * 0.5f);
491 __m128 P2 = _mm_set_ps1((-3.0f * s3 + 4.0f * s2 + s) * 0.5f);
492 __m128 P3 = _mm_set_ps1((s3 - s2) * 0.5f);
494 P0 = _mm_mul_ps(P0, v0.
vec);
495 P1 = _mm_mul_ps(P1, v1.
vec);
496 P2 = _mm_mul_ps(P2, v2.
vec);
497 P3 = _mm_mul_ps(P3, v3.
vec);
498 P0 = _mm_add_ps(P0,P1);
499 P2 = _mm_add_ps(P2,P3);
500 P0 = _mm_add_ps(P0,P2);
513 __m128 P0 = _mm_set_ps1(2.0f * s3 - 3.0f * s2 + 1.0f);
514 __m128 T0 = _mm_set_ps1(s3 - 2.0f * s2 + s);
515 __m128 P1 = _mm_set_ps1(-2.0f * s3 + 3.0f * s2);
516 __m128 T1 = _mm_set_ps1(s3 - s2);
518 __m128 vResult = _mm_mul_ps(P0, v1.
vec);
519 __m128 vTemp = _mm_mul_ps(T0, t1.
vec);
520 vResult = _mm_add_ps(vResult,vTemp);
521 vTemp = _mm_mul_ps(P1, v2.
vec);
522 vResult = _mm_add_ps(vResult,vTemp);
523 vTemp = _mm_mul_ps(T1, t2.
vec);
524 vResult = _mm_add_ps(vResult,vTemp);
535 __m128 l0 = _mm_mul_ps(v0.
vec, v0.
vec);
536 l0 = _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(0, 0, 0, 0)),
537 _mm_add_ps(_mm_shuffle_ps(l0, l0, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l0, l0, _MM_SHUFFLE(2, 2, 2, 2))));
539 __m128 l1 = _mm_mul_ps(v1.
vec, v1.
vec);
540 l1 = _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(0, 0, 0, 0)),
541 _mm_add_ps(_mm_shuffle_ps(l1, l1, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(l1, l1, _MM_SHUFFLE(2, 2, 2, 2))));
543 __m128 l = _mm_shuffle_ps(l0, l1, _MM_SHUFFLE(0, 0, 0, 0));
545 l = _mm_mul_ss(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)));
549 dot = _mm_add_ps(_mm_shuffle_ps(
dot,
dot, _MM_SHUFFLE(0, 0, 0, 0)),
550 _mm_add_ps(_mm_shuffle_ps(
dot,
dot, _MM_SHUFFLE(1, 1, 1, 1)),
551 _mm_add_ps(_mm_shuffle_ps(
dot,
dot, _MM_SHUFFLE(2, 2, 2, 2)), _mm_shuffle_ps(
dot,
dot, _MM_SHUFFLE(3, 3, 3, 3)))));
559 _mm_store_ss(&cangle,
dot);
569 return v0 + ((v1-v0) * s);
578 return _mm_max_ps(v0.
vec, v1.
vec);
587 return _mm_min_ps(v0.
vec, v1.
vec);
596 __m128 temp = _mm_max_ps(
min.vec,
clamp.vec);
597 temp = _mm_min_ps(temp,
max.vec);
607 if (v ==
vec3(0))
return v;
608 __m128 t = _mm_div_ps(v.
vec, _mm_sqrt_ps(_mm_dp_ps(v.
vec, v.
vec, 0x77)));
609 return _mm_insert_ps(t, v.
vec, 0xF0);
618 if (v ==
vec3(0))
return v;
619 __m128 t = _mm_rsqrt_ps(_mm_dp_ps(v.
vec, v.
vec, 0x7f));
620 t = _mm_or_ps(t,
_id_w);
621 return _mm_mul_ps(v.
vec, t);
630 __m128 res = _mm_mul_ps(incident.
vec, normal.
vec);
631 res = _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(0,0,0,0)),
632 _mm_add_ps(_mm_shuffle_ps(res, res, _MM_SHUFFLE(1,1,1,1)), _mm_shuffle_ps(res, res, _MM_SHUFFLE(2,2,2,2))));
633 res = _mm_add_ps(res, res);
634 res = _mm_mul_ps(res, normal.
vec);
635 res = _mm_sub_ps(incident.
vec,res);
645 __m128 vTemp = _mm_cmpge_ps(v0.
vec, v1.
vec);
646 int res = _mm_movemask_ps(vTemp) & 7;
656 __m128 vTemp = _mm_cmpge_ps(v0.
vec, v1.
vec);
657 int res = _mm_movemask_ps(vTemp) & 7;
667 __m128 vTemp = _mm_cmpgt_ps(v0.
vec, v1.
vec);
668 int res = _mm_movemask_ps(vTemp) & 7;
678 __m128 vTemp = _mm_cmpgt_ps(v0.
vec, v1.
vec);
679 int res = _mm_movemask_ps(vTemp) & 7;
689 __m128 vTemp = _mm_cmpgt_ps(v0.
vec, v1.
vec);
690 int res = _mm_movemask_ps(vTemp) & 7;
700 __m128 vTemp = _mm_cmpgt_ps(v0.
vec, v1.
vec);
701 int res = _mm_movemask_ps(vTemp) & 7;
711 __m128 vTemp = _mm_cmpge_ps(v0.
vec, v1.
vec);
712 int res = _mm_movemask_ps(vTemp) & 7;
722 __m128 vTemp = _mm_cmpge_ps(v0.
vec, v1.
vec);
723 int res = _mm_movemask_ps(vTemp) & 7;
733 __m128 vTemp = _mm_cmpeq_ps(v0.
vec, v1.
vec);
734 int res = _mm_movemask_ps(vTemp) & 7;
744 __m128 eps = _mm_setr_ps(epsilon, epsilon, epsilon, 0.0f);
745 __m128 delta = _mm_sub_ps(v0.
vec, v1.
vec);
746 __m128 temp = _mm_setzero_ps();
747 temp = _mm_sub_ps(temp, delta);
748 temp = _mm_max_ps(temp, delta);
749 temp = _mm_cmple_ps(temp, eps);
751 return (_mm_movemask_ps(temp) == 0x7) != 0;
760 __m128 delta = _mm_sub_ps(v0.
vec, v1.
vec);
761 __m128 temp = _mm_setzero_ps();
762 temp = _mm_sub_ps(temp, delta);
763 temp = _mm_max_ps(temp, delta);
764 temp = _mm_cmple_ps(temp, epsilon.
vec);
766 return (_mm_movemask_ps(temp) == 0x7) != 0;
775 return _mm_min_ps(_mm_cmplt_ps(v0.
vec, v1.
vec),
_plus1);
784 return _mm_min_ps(_mm_cmpgt_ps(v0.
vec, v1.
vec),
_plus1);
793 return _mm_min_ps(_mm_cmpeq_ps(v0.
vec, v1.
vec),
_plus1);
802 n_assert(element < 3 && element >= 0);
808 res = _mm_shuffle_ps(v.
vec, v.
vec, _MM_SHUFFLE(0, 0, 0, 0));
811 res = _mm_shuffle_ps(v.
vec, v.
vec, _MM_SHUFFLE(1, 1, 1, 1));
814 res = _mm_shuffle_ps(v.
vec, v.
vec, _MM_SHUFFLE(2, 2, 2, 2));
827 __m128 res = _mm_shuffle_ps(v.
vec, v.
vec, _MM_SHUFFLE(0, 0, 0, 0));
838 __m128 res = _mm_shuffle_ps(v.
vec, v.
vec, _MM_SHUFFLE(1, 1, 1, 1));
849 __m128 res = _mm_shuffle_ps(v.
vec, v.
vec, _MM_SHUFFLE(2, 2, 2, 2));
858permute(
const vec3& v0,
const vec3& v1,
unsigned int i0,
unsigned int i1,
unsigned int i2)
860 static __m128i three = _mm_set_epi32(3,3,3,3);
863 __m128i vControl = _mm_load_si128(
reinterpret_cast<const __m128i*
>(&elem[0]));
865 __m128i vSelect = _mm_cmpgt_epi32(vControl, three);
866 vControl = _mm_and_si128(vControl, three);
868 __m128 shuffled1 = _mm_permutevar_ps(v0.
vec, vControl);
869 __m128 shuffled2 = _mm_permutevar_ps(v1.
vec, vControl);
871 __m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1);
872 __m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2);
874 return _mm_or_ps(masked1, masked2);
884 return permute(v0, v1, i0, i1, i2);
893 __m128 v0masked = _mm_andnot_ps(control.
vec, v0.
vec);
894 __m128 v1masked = _mm_and_ps(v1.
vec, control.
vec);
895 return _mm_or_ps(v0masked, v1masked);
904 return _mm_floor_ps(v.
vec);
913 return _mm_ceil_ps(v.
vec);
#define n_assert(exp)
Definition debug.h:50
Different curves.
Definition angularpfeedbackloop.h:17
static const __m128 _id_z
Definition vec3.h:31
__forceinline point less(const point &v0, const point &v1)
Definition point.h:501
__forceinline vec3 cross(const vec3 &v0, const vec3 &v1)
Definition vec3.h:441
static const __m128 _id_x
Definition vec3.h:29
__forceinline point maximize(const point &v0, const point &v1)
Definition point.h:368
__forceinline quat barycentric(const quat &q0, const quat &q1, const quat &q2, scalar f, scalar g)
Definition quat.h:296
static const __m128 _zero
Definition vec3.h:35
__forceinline bool equal_any(const point &v0, const point &v1)
Definition point.h:474
__forceinline bool greaterequal_any(const point &v0, const point &v1)
Definition point.h:452
__forceinline vec3 ceiling(const vec3 &v)
Definition vec3.h:911
__forceinline vec3 splat_z(const vec3 &v)
Definition vec3.h:847
static const __m128i _sign
Definition vec3.h:36
__forceinline vec3 hermite(const vec3 &v1, const vec3 &t1, const vec3 &v2, const vec3 &t2, scalar s)
Definition vec3.h:508
__forceinline point equal(const point &v0, const point &v1)
Definition point.h:519
__forceinline vec3 splat_x(const vec3 &v)
Definition vec3.h:825
__forceinline vec3 splat(const vec3 &v, uint element)
Definition vec3.h:800
mat4 reflect(const vec4 &p)
based on this http://www.opengl.org/discussion_boards/showthread.php/169605-reflection-matrix-how-to-...
Definition mat4.cc:22
__forceinline vec3 multiply(const vec3 &v0, const vec3 &v1)
Definition vec3.h:399
__forceinline scalar angle(const vec3 &v0, const vec3 &v1)
Definition vec3.h:532
__forceinline vec3 divide(const vec3 &v0, const vec3 &v1)
Definition vec3.h:421
__forceinline vec3 reciprocal(const vec3 &v)
Definition vec3.h:381
__forceinline scalar length(const quat &q)
Definition quat.h:260
half operator/(half one, half two)
Definition half.h:132
__forceinline scalar lengthsq(const quat &q)
Definition quat.h:269
__forceinline scalar dot(const plane &p, const vec4 &v1)
Definition plane.h:252
__forceinline vec3 permute(const vec3 &v0, const vec3 &v1, unsigned int i0, unsigned int i1, unsigned int i2)
Definition vec3.h:858
__forceinline plane normalize(const plane &p)
Definition plane.h:261
__forceinline bool greaterequal_all(const point &v0, const point &v1)
Definition point.h:463
__forceinline bool greater_any(const point &v0, const point &v1)
Definition point.h:430
__forceinline float lerp(float x, float y, float l)
Linearly interpolate between 2 values: ret = x + l * (y - x)
Definition scalar.h:606
__forceinline float clamp(float val, float minVal, float maxVal)
Float clamping.
Definition scalar.h:496
half operator-(half one, half two)
Definition half.h:114
__forceinline bool less_any(const point &v0, const point &v1)
Definition point.h:386
__forceinline vec3 catmullrom(const vec3 &v0, const vec3 &v1, const vec3 &v2, const vec3 &v3, scalar s)
Definition vec3.h:484
static const __m128 _id_w
Definition vec3.h:32
__forceinline TYPE min(TYPE a, TYPE b)
Definition scalar.h:399
__forceinline bool less_all(const point &v0, const point &v1)
Definition point.h:397
__forceinline bool lessequal_all(const point &v0, const point &v1)
Definition point.h:419
__forceinline vec3 select(const vec3 &v0, const vec3 &v1, const uint i0, const uint i1, const uint i2)
Definition vec3.h:881
__forceinline vec3 splat_y(const vec3 &v)
Definition vec3.h:836
static const __m128 _minus1
Definition vec3.h:33
__forceinline point minimize(const point &v0, const point &v1)
Definition point.h:377
float scalar
Definition scalar.h:45
__forceinline point greater(const point &v0, const point &v1)
Definition point.h:510
half operator+(half one, half two)
Definition half.h:105
half operator*(half one, half two)
Definition half.h:123
__forceinline TYPE max(TYPE a, TYPE b)
Definition scalar.h:368
__forceinline scalar abs(scalar a)
Definition scalar.h:441
__forceinline vec3 multiplyadd(const vec3 &v0, const vec3 &v1, const vec3 &v2)
Definition vec3.h:408
__forceinline scalar acos(scalar x)
Definition scalar.h:218
__forceinline bool greater_all(const point &v0, const point &v1)
Definition point.h:441
__forceinline bool nearequal(const point &v0, const point &v1, float epsilon)
Definition point.h:485
static const __m128 _mask_xyz
Definition vec3.h:37
static const __m128 _id_y
Definition vec3.h:30
__forceinline bool lessequal_any(const point &v0, const point &v1)
Definition point.h:408
__forceinline float floor(float val)
Floating point flooring.
Definition scalar.h:542
__forceinline vec3 normalizeapprox(const vec3 &v)
Definition vec3.h:616
static const __m128 _plus1
Definition vec3.h:34
__forceinline vec3 reciprocalapprox(const vec3 &v)
Definition vec3.h:390
Nebula's scalar datatype.
vec3()=default
default constructor, NOTE: does NOT setup components!
scalar x
Definition scalar.h:70
scalar y
Definition scalar.h:70
scalar z
Definition scalar.h:70
A 4x4 single point precision float matrix.
Definition mat4.h:49
A 3D vector.
Definition vec3.h:40
void loadu(const scalar *ptr)
load content from unaligned memory
Definition vec3.h:184
void stream(scalar *ptr) const
stream content to 16-byte-aligned memory circumventing the write-cache
Definition vec3.h:220
float v[3]
Definition vec3.h:99
bool operator==(const vec3 &rhs) const
equality operator
Definition vec3.h:152
void storeu(scalar *ptr) const
write content to unaligned memory through the write cache
Definition vec3.h:207
vec3()=default
default constructor, NOTE: does NOT setup components!
void load(const scalar *ptr)
load content from 16-byte-aligned memory
Definition vec3.h:173
void operator*=(scalar s)
inplace scalar multiply
Definition vec3.h:306
void operator-=(const vec3 &rhs)
inplace sub
Definition vec3.h:297
void store(scalar *ptr) const
write content to 16-byte-aligned memory through the write cache
Definition vec3.h:195
float x
Definition vec3.h:96
float z
Definition vec3.h:96
void operator/=(const vec3 &rhs)
divide by a vector component-wise
Definition vec3.h:279
float __w
Definition vec3.h:96
__m128 vec
Definition vec3.h:98
void operator+=(const vec3 &rhs)
inplace add
Definition vec3.h:288
scalar & operator[](const int index)
read-only access to indexed component
Definition vec3.h:343
bool operator!=(const vec3 &rhs) const
inequality operator
Definition vec3.h:162
void set(scalar x, scalar y, scalar z)
set content
Definition vec3.h:334
vec3(const vec3 &rhs)=default
copy constructor
float y
Definition vec3.h:96
void operator=(const __m128 &rhs)
assign an vmVector4
Definition vec3.h:143
bool operator==(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:272
bool operator!=(const TiXmlString &a, const TiXmlString &b)
Definition tinystr.h:282
#define NEBULA_ALIGN16
Definition types.h:146
unsigned int uint
Definition types.h:31