nevrax.org : docs

00001 
+00007 /* Copyright, 2000-2002 Nevrax Ltd.
+00008  *
+00009  * This file is part of NEVRAX NEL.
+00010  * NEVRAX NEL is free software; you can redistribute it and/or modify
+00011  * it under the terms of the GNU General Public License as published by
+00012  * the Free Software Foundation; either version 2, or (at your option)
+00013  * any later version.
+00014 
+00015  * NEVRAX NEL is distributed in the hope that it will be useful, but
+00016  * WITHOUT ANY WARRANTY; without even the implied warranty of
+00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+00018  * General Public License for more details.
+00019 
+00020  * You should have received a copy of the GNU General Public License
+00021  * along with NEVRAX NEL; see the file COPYING. If not, write to the
+00022  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+00023  * MA 02111-1307, USA.
+00024  */
+00025 
+00026 #ifndef NL_MATRIX_3X4_H
+00027 #define NL_MATRIX_3X4_H
+00028 
+00029 #include "nel/misc/types_nl.h"
+00030 
+00031 
+00032 namespace NL3D 
+00033 {
+00034 
+00035 
+00036 // ***************************************************************************
+00037 // ***************************************************************************
+00038 // STD Matrix
+00039 // ***************************************************************************
+00040 // ***************************************************************************
+00041 
+00042 
+00043 // ***************************************************************************
+00050 class   CMatrix3x4
+00051 {
+00052 public:
+00053         // Order them in memory line first, for faster memory access.
+00054         float   a11, a12, a13, a14;
+00055         float   a21, a22, a23, a24;
+00056         float   a31, a32, a33, a34;
+00057 
+00058         // Copy from a matrix.
+00059         void    set(const CMatrix &mat)
+00060         {
+00061                 const float     *m =mat.get();
+00062                 a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12]; 
+00063                 a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13]; 
+00064                 a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14]; 
+00065         }
+00066 
+00067 
+00068         // mulSetvector. NB: in should be different as v!! (else don't work).
+00069         void    mulSetVector(const CVector &in, CVector &out)
+00070         {
+00071                 out.x= (a11*in.x + a12*in.y + a13*in.z);
+00072                 out.y= (a21*in.x + a22*in.y + a23*in.z);
+00073                 out.z= (a31*in.x + a32*in.y + a33*in.z);
+00074         }
+00075         // mulSetpoint. NB: in should be different as v!! (else don't work).
+00076         void    mulSetPoint(const CVector &in, CVector &out)
+00077         {
+00078                 out.x= (a11*in.x + a12*in.y + a13*in.z + a14);
+00079                 out.y= (a21*in.x + a22*in.y + a23*in.z + a24);
+00080                 out.z= (a31*in.x + a32*in.y + a33*in.z + a34);
+00081         }
+00082 
+00083 
+00084         // mulSetvector. NB: in should be different as v!! (else don't work).
+00085         void    mulSetVector(const CVector &in, float scale, CVector &out)
+00086         {
+00087                 out.x= (a11*in.x + a12*in.y + a13*in.z) * scale;
+00088                 out.y= (a21*in.x + a22*in.y + a23*in.z) * scale;
+00089                 out.z= (a31*in.x + a32*in.y + a33*in.z) * scale;
+00090         }
+00091         // mulSetpoint. NB: in should be different as v!! (else don't work).
+00092         void    mulSetPoint(const CVector &in, float scale, CVector &out)
+00093         {
+00094                 out.x= (a11*in.x + a12*in.y + a13*in.z + a14) * scale;
+00095                 out.y= (a21*in.x + a22*in.y + a23*in.z + a24) * scale;
+00096                 out.z= (a31*in.x + a32*in.y + a33*in.z + a34) * scale;
+00097         }
+00098 
+00099 
+00100         // mulAddvector. NB: in should be different as v!! (else don't work).
+00101         void    mulAddVector(const CVector &in, float scale, CVector &out)
+00102         {
+00103                 out.x+= (a11*in.x + a12*in.y + a13*in.z) * scale;
+00104                 out.y+= (a21*in.x + a22*in.y + a23*in.z) * scale;
+00105                 out.z+= (a31*in.x + a32*in.y + a33*in.z) * scale;
+00106         }
+00107         // mulAddpoint. NB: in should be different as v!! (else don't work).
+00108         void    mulAddPoint(const CVector &in, float scale, CVector &out)
+00109         {
+00110                 out.x+= (a11*in.x + a12*in.y + a13*in.z + a14) * scale;
+00111                 out.y+= (a21*in.x + a22*in.y + a23*in.z + a24) * scale;
+00112                 out.z+= (a31*in.x + a32*in.y + a33*in.z + a34) * scale;
+00113         }
+00114 
+00115 
+00116 
+00117 };
+00118 
+00119 
+00120 // ***************************************************************************
+00121 // ***************************************************************************
+00122 // SSE Matrix
+00123 // ***************************************************************************
+00124 // ***************************************************************************
+00125 
+00126 
+00127 // ***************************************************************************
+00128 #ifdef NL_OS_WINDOWS
+00129 
+00130 //#define NL_DebugSSE
+00131 //#define NL_DebugSSENoSkin
+00132 
+00133 
+00134 // For fast vector/point multiplication.
+00135 class   CMatrix3x4SSE
+00136 {
+00137 public:
+00138         // Order them in memory column first, for SSE column multiplication.
+00139         float   a11, a21, a31, a41;
+00140         float   a12, a22, a32, a42;
+00141         float   a13, a23, a33, a43;
+00142         float   a14, a24, a34, a44;
+00143 
+00144         // Copy from a matrix.
+00145         void    set(const CMatrix &mat)
+00146         {
+00147                 const float     *m =mat.get();
+00148                 a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12]; 
+00149                 a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13]; 
+00150                 a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14]; 
+00151                 // not used.
+00152                 //a41= 0   ; a42= 0   ; a43= 0    ; a44= 1; 
+00153         }
+00154 
+00155 
+00156         // mulSetvector. NB: in should be different as v!! (else don't work).
+00157         void    mulSetVector(const CVector &vin, CVector &vout)
+00158         {
+00159         #ifndef NL_DebugSSE
+00160                 __asm
+00161                 {
+00162                         mov             eax, vin
+00163                         mov             ebx, this
+00164                         mov             edi, vout
+00165                         // Load in vector in op[0]
+00166                         movss   xmm0, [eax]vin.x
+00167                         movss   xmm1, [eax]vin.y
+00168                         movss   xmm2, [eax]vin.z
+00169                         // Expand op[0] to op[1], op[2], op[3]
+00170                         shufps  xmm0, xmm0, 0
+00171                         shufps  xmm1, xmm1, 0
+00172                         shufps  xmm2, xmm2, 0
+00173                         // Mul each vector with 3 Matrix column
+00174                         mulps   xmm0, [ebx]this.a11
+00175                         mulps   xmm1, [ebx]this.a12
+00176                         mulps   xmm2, [ebx]this.a13
+00177                         // Add each column vector.
+00178                         addps   xmm0, xmm1
+00179                         addps   xmm0, xmm2
+00180 
+00181                         // write the result.
+00182                         movss   [edi]vout.x, xmm0
+00183                         shufps  xmm0, xmm0, 33
+00184                         movss   [edi]vout.y, xmm0
+00185                         movhlps xmm0, xmm0
+00186                         movss   [edi]vout.z, xmm0
+00187                 }
+00188         #elif !defined (NL_DebugSSENoSkin)
+00189                 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z);
+00190                 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z);
+00191                 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z);
+00192         #else
+00193                 vout= vin;
+00194         #endif
+00195         }
+00196         // mulSetpoint. NB: in should be different as v!! (else don't work).
+00197         void    mulSetPoint(const CVector &vin, CVector &vout)
+00198         {
+00199         #ifndef NL_DebugSSE
+00200                 __asm
+00201                 {
+00202                         mov             eax, vin
+00203                         mov             ebx, this
+00204                         mov             edi, vout
+00205                         // Load in vector in op[0]
+00206                         movss   xmm0, [eax]vin.x
+00207                         movss   xmm1, [eax]vin.y
+00208                         movss   xmm2, [eax]vin.z
+00209                         // Expand op[0] to op[1], op[2], op[3]
+00210                         shufps  xmm0, xmm0, 0
+00211                         shufps  xmm1, xmm1, 0
+00212                         shufps  xmm2, xmm2, 0
+00213                         // Mul each vector with 3 Matrix column
+00214                         mulps   xmm0, [ebx]this.a11
+00215                         mulps   xmm1, [ebx]this.a12
+00216                         mulps   xmm2, [ebx]this.a13
+00217                         // Add each column vector.
+00218                         addps   xmm0, xmm1
+00219                         addps   xmm0, xmm2
+00220                         // Add Matrix translate column vector
+00221                         addps   xmm0, [ebx]this.a14
+00222 
+00223                         // write the result.
+00224                         movss   [edi]vout.x, xmm0
+00225                         shufps  xmm0, xmm0, 33
+00226                         movss   [edi]vout.y, xmm0
+00227                         movhlps xmm0, xmm0
+00228                         movss   [edi]vout.z, xmm0
+00229                 }
+00230         #elif !defined (NL_DebugSSENoSkin)
+00231                 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
+00232                 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
+00233                 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
+00234         #else
+00235                 vout= vin;
+00236         #endif
+00237         }
+00238 
+00239 
+00240         // mulSetvector. NB: vin should be different as v!! (else don't work).
+00241         void    mulSetVector(const CVector &vin, float scale, CVector &vout)
+00242         {
+00243         #ifndef NL_DebugSSE
+00244                 __asm
+00245                 {
+00246                         mov             eax, vin
+00247                         mov             ebx, this
+00248                         mov             edi, vout
+00249                         // Load in vector in op[0]
+00250                         movss   xmm0, [eax]vin.x
+00251                         movss   xmm1, [eax]vin.y
+00252                         movss   xmm2, [eax]vin.z
+00253                         // Load scale in op[0]
+00254                         movss   xmm3, scale
+00255                         // Expand op[0] to op[1], op[2], op[3]
+00256                         shufps  xmm0, xmm0, 0
+00257                         shufps  xmm1, xmm1, 0
+00258                         shufps  xmm2, xmm2, 0
+00259                         shufps  xmm3, xmm3, 0
+00260                         // Store vertex column in other regs.
+00261                         movaps  xmm5, xmm0
+00262                         movaps  xmm6, xmm1
+00263                         movaps  xmm7, xmm2
+00264                         // Mul each vector with 3 Matrix column
+00265                         mulps   xmm0, [ebx]this.a11
+00266                         mulps   xmm1, [ebx]this.a12
+00267                         mulps   xmm2, [ebx]this.a13
+00268                         // Add each column vector.
+00269                         addps   xmm0, xmm1
+00270                         addps   xmm0, xmm2
+00271 
+00272                         // mul final result with scale
+00273                         mulps   xmm0, xmm3
+00274 
+00275                         // store it in xmm4 for future use.
+00276                         movaps  xmm4, xmm0
+00277                 }
+00278         #elif !defined (NL_DebugSSENoSkin)
+00279                 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z) * scale;
+00280                 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z) * scale;
+00281                 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z) * scale;
+00282         #else
+00283                 vout= vin;
+00284         #endif
+00285         }
+00286         // mulSetpoint. NB: vin should be different as v!! (else don't work).
+00287         void    mulSetPoint(const CVector &vin, float scale, CVector &vout)
+00288         {
+00289         #ifndef NL_DebugSSE
+00290                 __asm
+00291                 {
+00292                         mov             eax, vin
+00293                         mov             ebx, this
+00294                         mov             edi, vout
+00295                         // Load in vector in op[0]
+00296                         movss   xmm0, [eax]vin.x
+00297                         movss   xmm1, [eax]vin.y
+00298                         movss   xmm2, [eax]vin.z
+00299                         // Load scale in op[0]
+00300                         movss   xmm3, scale
+00301                         // Expand op[0] to op[1], op[2], op[3]
+00302                         shufps  xmm0, xmm0, 0
+00303                         shufps  xmm1, xmm1, 0
+00304                         shufps  xmm2, xmm2, 0
+00305                         shufps  xmm3, xmm3, 0
+00306                         // Store vertex column in other regs.
+00307                         movaps  xmm5, xmm0
+00308                         movaps  xmm6, xmm1
+00309                         movaps  xmm7, xmm2
+00310                         // Mul each vector with 3 Matrix column
+00311                         mulps   xmm0, [ebx]this.a11
+00312                         mulps   xmm1, [ebx]this.a12
+00313                         mulps   xmm2, [ebx]this.a13
+00314                         // Add each column vector.
+00315                         addps   xmm0, xmm1
+00316                         addps   xmm0, xmm2
+00317                         // Add Matrix translate column vector
+00318                         addps   xmm0, [ebx]this.a14
+00319 
+00320                         // mul final result with scale
+00321                         mulps   xmm0, xmm3
+00322 
+00323                         // store it in xmm4 for future use.
+00324                         movaps  xmm4, xmm0
+00325                 }
+00326         #elif !defined (NL_DebugSSENoSkin)
+00327                 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14) * scale;
+00328                 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24) * scale;
+00329                 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34) * scale;
+00330         #else
+00331                 vout= vin;
+00332         #endif
+00333         }
+00334 
+00335 
+00336         // mulAddvector. NB: vin should be different as v!! (else don't work).
+00337         void    mulAddVector(const CVector &vin, float scale, CVector &vout)
+00338         {
+00339         #ifndef NL_DebugSSE
+00340                 __asm
+00341                 {
+00342                         mov             ebx, this
+00343                         mov             edi, vout
+00344                         // Load vin vector loaded in mulSetVector
+00345                         movaps  xmm0, xmm5
+00346                         movaps  xmm1, xmm6
+00347                         movaps  xmm2, xmm7
+00348                         // Load scale in op[0]
+00349                         movss   xmm3, scale
+00350                         // Expand op[0] to op[1], op[2], op[3]
+00351                         shufps  xmm3, xmm3, 0
+00352                         // Mul each vector with 3 Matrix column
+00353                         mulps   xmm0, [ebx]this.a11
+00354                         mulps   xmm1, [ebx]this.a12
+00355                         mulps   xmm2, [ebx]this.a13
+00356                         // Add each column vector.
+00357                         addps   xmm0, xmm1
+00358                         addps   xmm0, xmm2
+00359 
+00360                         // mul final result with scale
+00361                         mulps   xmm0, xmm3
+00362 
+00363                         // Add result, with prec sum.
+00364                         addps   xmm0, xmm4
+00365 
+00366                         // store it in xmm4 for future use.
+00367                         movaps  xmm4, xmm0
+00368 
+00369                         // write the result.
+00370                         movss   [edi]vout.x, xmm0
+00371                         shufps  xmm0, xmm0, 33
+00372                         movss   [edi]vout.y, xmm0
+00373                         movhlps xmm0, xmm0
+00374                         movss   [edi]vout.z, xmm0
+00375                 }
+00376         #elif !defined (NL_DebugSSENoSkin)
+00377                 vout.x+= (a11*vin.x + a12*vin.y + a13*vin.z) * scale;
+00378                 vout.y+= (a21*vin.x + a22*vin.y + a23*vin.z) * scale;
+00379                 vout.z+= (a31*vin.x + a32*vin.y + a33*vin.z) * scale;
+00380         #else
+00381                 vout= vin;
+00382         #endif
+00383         }
+00384         // mulAddpoint. NB: vin should be different as v!! (else don't work).
+00385         void    mulAddPoint(const CVector &vin, float scale, CVector &vout)
+00386         {
+00387         #ifndef NL_DebugSSE
+00388                 __asm
+00389                 {
+00390                         mov             ebx, this
+00391                         mov             edi, vout
+00392                         // Load vin vector loaded in mulSetPoint
+00393                         movaps  xmm0, xmm5
+00394                         movaps  xmm1, xmm6
+00395                         movaps  xmm2, xmm7
+00396                         // Load scale in op[0]
+00397                         movss   xmm3, scale
+00398                         // Expand op[0] to op[1], op[2], op[3]
+00399                         shufps  xmm3, xmm3, 0
+00400                         // Mul each vector with 3 Matrix column
+00401                         mulps   xmm0, [ebx]this.a11
+00402                         mulps   xmm1, [ebx]this.a12
+00403                         mulps   xmm2, [ebx]this.a13
+00404                         // Add each column vector.
+00405                         addps   xmm0, xmm1
+00406                         addps   xmm0, xmm2
+00407                         // Add Matrix translate column vector
+00408                         addps   xmm0, [ebx]this.a14
+00409 
+00410                         // mul final result with scale
+00411                         mulps   xmm0, xmm3
+00412 
+00413                         // Add result, with prec sum.
+00414                         addps   xmm0, xmm4
+00415 
+00416                         // store it in xmm4 for future use.
+00417                         movaps  xmm4, xmm0
+00418 
+00419                         // write the result.
+00420                         movss   [edi]vout.x, xmm0
+00421                         shufps  xmm0, xmm0, 33
+00422                         movss   [edi]vout.y, xmm0
+00423                         movhlps xmm0, xmm0
+00424                         movss   [edi]vout.z, xmm0
+00425                 }
+00426         #elif !defined (NL_DebugSSENoSkin)
+00427                 vout.x+= (a11*vin.x + a12*vin.y + a13*vin.z + a14) * scale;
+00428                 vout.y+= (a21*vin.x + a22*vin.y + a23*vin.z + a24) * scale;
+00429                 vout.z+= (a31*vin.x + a32*vin.y + a33*vin.z + a34) * scale;
+00430         #else
+00431                 vout= vin;
+00432         #endif
+00433         }
+00434 
+00435 };
+00436 
+00437 #else // NL_OS_WINDOWS
+00438 
+00439 class CMatrix3x4SSE : public  CMatrix3x4 { };
+00440 #endif
+00441 
+00442 
+00443 
+00444 } // NL3D
+00445 
+00446 
+00447 #endif // NL_MATRIX_3X4_H
+00448 
+00449 /* End of matrix_3x4.h */
+