00001
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef NL_MATRIX_3X4_H
00027 #define NL_MATRIX_3X4_H
00028
00029 #include "nel/misc/types_nl.h"
00030
00031
00032 namespace NL3D
00033 {
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00050 class CMatrix3x4
00051 {
00052 public:
00053
00054 float a11, a12, a13, a14;
00055 float a21, a22, a23, a24;
00056 float a31, a32, a33, a34;
00057
00058
00059 void set(const CMatrix &mat)
00060 {
00061 const float *m =mat.get();
00062 a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12];
00063 a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13];
00064 a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14];
00065 }
00066
00067
00068
00069 void mulSetVector(const CVector &in, CVector &out)
00070 {
00071 out.x= (a11*in.x + a12*in.y + a13*in.z);
00072 out.y= (a21*in.x + a22*in.y + a23*in.z);
00073 out.z= (a31*in.x + a32*in.y + a33*in.z);
00074 }
00075
00076 void mulSetPoint(const CVector &in, CVector &out)
00077 {
00078 out.x= (a11*in.x + a12*in.y + a13*in.z + a14);
00079 out.y= (a21*in.x + a22*in.y + a23*in.z + a24);
00080 out.z= (a31*in.x + a32*in.y + a33*in.z + a34);
00081 }
00082
00083
00084
00085 void mulSetVector(const CVector &in, float scale, CVector &out)
00086 {
00087 out.x= (a11*in.x + a12*in.y + a13*in.z) * scale;
00088 out.y= (a21*in.x + a22*in.y + a23*in.z) * scale;
00089 out.z= (a31*in.x + a32*in.y + a33*in.z) * scale;
00090 }
00091
00092 void mulSetPoint(const CVector &in, float scale, CVector &out)
00093 {
00094 out.x= (a11*in.x + a12*in.y + a13*in.z + a14) * scale;
00095 out.y= (a21*in.x + a22*in.y + a23*in.z + a24) * scale;
00096 out.z= (a31*in.x + a32*in.y + a33*in.z + a34) * scale;
00097 }
00098
00099
00100
00101 void mulAddVector(const CVector &in, float scale, CVector &out)
00102 {
00103 out.x+= (a11*in.x + a12*in.y + a13*in.z) * scale;
00104 out.y+= (a21*in.x + a22*in.y + a23*in.z) * scale;
00105 out.z+= (a31*in.x + a32*in.y + a33*in.z) * scale;
00106 }
00107
00108 void mulAddPoint(const CVector &in, float scale, CVector &out)
00109 {
00110 out.x+= (a11*in.x + a12*in.y + a13*in.z + a14) * scale;
00111 out.y+= (a21*in.x + a22*in.y + a23*in.z + a24) * scale;
00112 out.z+= (a31*in.x + a32*in.y + a33*in.z + a34) * scale;
00113 }
00114
00115
00116
00117 };
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128 #ifdef NL_OS_WINDOWS
00129
00130
00131
00132
00133
00134
00135 class CMatrix3x4SSE
00136 {
00137 public:
00138
00139 float a11, a21, a31, a41;
00140 float a12, a22, a32, a42;
00141 float a13, a23, a33, a43;
00142 float a14, a24, a34, a44;
00143
00144
00145 void set(const CMatrix &mat)
00146 {
00147 const float *m =mat.get();
00148 a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12];
00149 a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13];
00150 a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14];
00151
00152
00153 }
00154
00155
00156
00157 void mulSetVector(const CVector &vin, CVector &vout)
00158 {
00159 #ifndef NL_DebugSSE
00160 __asm
00161 {
00162 mov eax, vin
00163 mov ebx, this
00164 mov edi, vout
00165
00166 movss xmm0, [eax]vin.x
00167 movss xmm1, [eax]vin.y
00168 movss xmm2, [eax]vin.z
00169
00170 shufps xmm0, xmm0, 0
00171 shufps xmm1, xmm1, 0
00172 shufps xmm2, xmm2, 0
00173
00174 mulps xmm0, [ebx]this.a11
00175 mulps xmm1, [ebx]this.a12
00176 mulps xmm2, [ebx]this.a13
00177
00178 addps xmm0, xmm1
00179 addps xmm0, xmm2
00180
00181
00182 movss [edi]vout.x, xmm0
00183 shufps xmm0, xmm0, 33
00184 movss [edi]vout.y, xmm0
00185 movhlps xmm0, xmm0
00186 movss [edi]vout.z, xmm0
00187 }
00188 #elif !defined (NL_DebugSSENoSkin)
00189 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z);
00190 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z);
00191 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z);
00192 #else
00193 vout= vin;
00194 #endif
00195 }
00196
00197 void mulSetPoint(const CVector &vin, CVector &vout)
00198 {
00199 #ifndef NL_DebugSSE
00200 __asm
00201 {
00202 mov eax, vin
00203 mov ebx, this
00204 mov edi, vout
00205
00206 movss xmm0, [eax]vin.x
00207 movss xmm1, [eax]vin.y
00208 movss xmm2, [eax]vin.z
00209
00210 shufps xmm0, xmm0, 0
00211 shufps xmm1, xmm1, 0
00212 shufps xmm2, xmm2, 0
00213
00214 mulps xmm0, [ebx]this.a11
00215 mulps xmm1, [ebx]this.a12
00216 mulps xmm2, [ebx]this.a13
00217
00218 addps xmm0, xmm1
00219 addps xmm0, xmm2
00220
00221 addps xmm0, [ebx]this.a14
00222
00223
00224 movss [edi]vout.x, xmm0
00225 shufps xmm0, xmm0, 33
00226 movss [edi]vout.y, xmm0
00227 movhlps xmm0, xmm0
00228 movss [edi]vout.z, xmm0
00229 }
00230 #elif !defined (NL_DebugSSENoSkin)
00231 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
00232 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
00233 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
00234 #else
00235 vout= vin;
00236 #endif
00237 }
00238
00239
00240
00241 void mulSetVector(const CVector &vin, float scale, CVector &vout)
00242 {
00243 #ifndef NL_DebugSSE
00244 __asm
00245 {
00246 mov eax, vin
00247 mov ebx, this
00248 mov edi, vout
00249
00250 movss xmm0, [eax]vin.x
00251 movss xmm1, [eax]vin.y
00252 movss xmm2, [eax]vin.z
00253
00254 movss xmm3, scale
00255
00256 shufps xmm0, xmm0, 0
00257 shufps xmm1, xmm1, 0
00258 shufps xmm2, xmm2, 0
00259 shufps xmm3, xmm3, 0
00260
00261 movaps xmm5, xmm0
00262 movaps xmm6, xmm1
00263 movaps xmm7, xmm2
00264
00265 mulps xmm0, [ebx]this.a11
00266 mulps xmm1, [ebx]this.a12
00267 mulps xmm2, [ebx]this.a13
00268
00269 addps xmm0, xmm1
00270 addps xmm0, xmm2
00271
00272
00273 mulps xmm0, xmm3
00274
00275
00276 movaps xmm4, xmm0
00277 }
00278 #elif !defined (NL_DebugSSENoSkin)
00279 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z) * scale;
00280 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z) * scale;
00281 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z) * scale;
00282 #else
00283 vout= vin;
00284 #endif
00285 }
00286
00287 void mulSetPoint(const CVector &vin, float scale, CVector &vout)
00288 {
00289 #ifndef NL_DebugSSE
00290 __asm
00291 {
00292 mov eax, vin
00293 mov ebx, this
00294 mov edi, vout
00295
00296 movss xmm0, [eax]vin.x
00297 movss xmm1, [eax]vin.y
00298 movss xmm2, [eax]vin.z
00299
00300 movss xmm3, scale
00301
00302 shufps xmm0, xmm0, 0
00303 shufps xmm1, xmm1, 0
00304 shufps xmm2, xmm2, 0
00305 shufps xmm3, xmm3, 0
00306
00307 movaps xmm5, xmm0
00308 movaps xmm6, xmm1
00309 movaps xmm7, xmm2
00310
00311 mulps xmm0, [ebx]this.a11
00312 mulps xmm1, [ebx]this.a12
00313 mulps xmm2, [ebx]this.a13
00314
00315 addps xmm0, xmm1
00316 addps xmm0, xmm2
00317
00318 addps xmm0, [ebx]this.a14
00319
00320
00321 mulps xmm0, xmm3
00322
00323
00324 movaps xmm4, xmm0
00325 }
00326 #elif !defined (NL_DebugSSENoSkin)
00327 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14) * scale;
00328 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24) * scale;
00329 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34) * scale;
00330 #else
00331 vout= vin;
00332 #endif
00333 }
00334
00335
00336
00337 void mulAddVector(const CVector &vin, float scale, CVector &vout)
00338 {
00339 #ifndef NL_DebugSSE
00340 __asm
00341 {
00342 mov ebx, this
00343 mov edi, vout
00344
00345 movaps xmm0, xmm5
00346 movaps xmm1, xmm6
00347 movaps xmm2, xmm7
00348
00349 movss xmm3, scale
00350
00351 shufps xmm3, xmm3, 0
00352
00353 mulps xmm0, [ebx]this.a11
00354 mulps xmm1, [ebx]this.a12
00355 mulps xmm2, [ebx]this.a13
00356
00357 addps xmm0, xmm1
00358 addps xmm0, xmm2
00359
00360
00361 mulps xmm0, xmm3
00362
00363
00364 addps xmm0, xmm4
00365
00366
00367 movaps xmm4, xmm0
00368
00369
00370 movss [edi]vout.x, xmm0
00371 shufps xmm0, xmm0, 33
00372 movss [edi]vout.y, xmm0
00373 movhlps xmm0, xmm0
00374 movss [edi]vout.z, xmm0
00375 }
00376 #elif !defined (NL_DebugSSENoSkin)
00377 vout.x+= (a11*vin.x + a12*vin.y + a13*vin.z) * scale;
00378 vout.y+= (a21*vin.x + a22*vin.y + a23*vin.z) * scale;
00379 vout.z+= (a31*vin.x + a32*vin.y + a33*vin.z) * scale;
00380 #else
00381 vout= vin;
00382 #endif
00383 }
00384
00385 void mulAddPoint(const CVector &vin, float scale, CVector &vout)
00386 {
00387 #ifndef NL_DebugSSE
00388 __asm
00389 {
00390 mov ebx, this
00391 mov edi, vout
00392
00393 movaps xmm0, xmm5
00394 movaps xmm1, xmm6
00395 movaps xmm2, xmm7
00396
00397 movss xmm3, scale
00398
00399 shufps xmm3, xmm3, 0
00400
00401 mulps xmm0, [ebx]this.a11
00402 mulps xmm1, [ebx]this.a12
00403 mulps xmm2, [ebx]this.a13
00404
00405 addps xmm0, xmm1
00406 addps xmm0, xmm2
00407
00408 addps xmm0, [ebx]this.a14
00409
00410
00411 mulps xmm0, xmm3
00412
00413
00414 addps xmm0, xmm4
00415
00416
00417 movaps xmm4, xmm0
00418
00419
00420 movss [edi]vout.x, xmm0
00421 shufps xmm0, xmm0, 33
00422 movss [edi]vout.y, xmm0
00423 movhlps xmm0, xmm0
00424 movss [edi]vout.z, xmm0
00425 }
00426 #elif !defined (NL_DebugSSENoSkin)
00427 vout.x+= (a11*vin.x + a12*vin.y + a13*vin.z + a14) * scale;
00428 vout.y+= (a21*vin.x + a22*vin.y + a23*vin.z + a24) * scale;
00429 vout.z+= (a31*vin.x + a32*vin.y + a33*vin.z + a34) * scale;
00430 #else
00431 vout= vin;
00432 #endif
00433 }
00434
00435 };
00436
00437 #else // NL_OS_WINDOWS
00438
00439 class CMatrix3x4SSE : public CMatrix3x4 { };
00440 #endif
00441
00442
00443
00444 }
00445
00446
00447 #endif // NL_MATRIX_3X4_H
00448
00449