nevrax.org : docs

00001 
00007 /* Copyright, 2000-2002 Nevrax Ltd.
00008  *
00009  * This file is part of NEVRAX NEL.
00010  * NEVRAX NEL is free software; you can redistribute it and/or modify
00011  * it under the terms of the GNU General Public License as published by
00012  * the Free Software Foundation; either version 2, or (at your option)
00013  * any later version.
00014 
00015  * NEVRAX NEL is distributed in the hope that it will be useful, but
00016  * WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00018  * General Public License for more details.
00019 
00020  * You should have received a copy of the GNU General Public License
00021  * along with NEVRAX NEL; see the file COPYING. If not, write to the
00022  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00023  * MA 02111-1307, USA.
00024  */
00025 
00026 #ifndef NL_MATRIX_3X4_H
00027 #define NL_MATRIX_3X4_H
00028 
00029 #include "nel/misc/types_nl.h"
00030 
00031 
00032 namespace NL3D 
00033 {
00034 
00035 
00036 // ***************************************************************************
00037 // ***************************************************************************
00038 // STD Matrix
00039 // ***************************************************************************
00040 // ***************************************************************************
00041 
00042 
00043 // ***************************************************************************
00050 class   CMatrix3x4
00051 {
00052 public:
00053         // Order them in memory line first, for faster memory access.
00054         float   a11, a12, a13, a14;
00055         float   a21, a22, a23, a24;
00056         float   a31, a32, a33, a34;
00057 
00058         // Copy from a matrix.
00059         void    set(const CMatrix &mat)
00060         {
00061                 const float     *m =mat.get();
00062                 a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12]; 
00063                 a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13]; 
00064                 a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14]; 
00065         }
00066 
00067 
00068         // mulSetvector. NB: in should be different as v!! (else don't work).
00069         void    mulSetVector(const CVector &in, CVector &out)
00070         {
00071                 out.x= (a11*in.x + a12*in.y + a13*in.z);
00072                 out.y= (a21*in.x + a22*in.y + a23*in.z);
00073                 out.z= (a31*in.x + a32*in.y + a33*in.z);
00074         }
00075         // mulSetpoint. NB: in should be different as v!! (else don't work).
00076         void    mulSetPoint(const CVector &in, CVector &out)
00077         {
00078                 out.x= (a11*in.x + a12*in.y + a13*in.z + a14);
00079                 out.y= (a21*in.x + a22*in.y + a23*in.z + a24);
00080                 out.z= (a31*in.x + a32*in.y + a33*in.z + a34);
00081         }
00082 
00083 
00084         // mulSetvector. NB: in should be different as v!! (else don't work).
00085         void    mulSetVector(const CVector &in, float scale, CVector &out)
00086         {
00087                 out.x= (a11*in.x + a12*in.y + a13*in.z) * scale;
00088                 out.y= (a21*in.x + a22*in.y + a23*in.z) * scale;
00089                 out.z= (a31*in.x + a32*in.y + a33*in.z) * scale;
00090         }
00091         // mulSetpoint. NB: in should be different as v!! (else don't work).
00092         void    mulSetPoint(const CVector &in, float scale, CVector &out)
00093         {
00094                 out.x= (a11*in.x + a12*in.y + a13*in.z + a14) * scale;
00095                 out.y= (a21*in.x + a22*in.y + a23*in.z + a24) * scale;
00096                 out.z= (a31*in.x + a32*in.y + a33*in.z + a34) * scale;
00097         }
00098 
00099 
00100         // mulAddvector. NB: in should be different as v!! (else don't work).
00101         void    mulAddVector(const CVector &in, float scale, CVector &out)
00102         {
00103                 out.x+= (a11*in.x + a12*in.y + a13*in.z) * scale;
00104                 out.y+= (a21*in.x + a22*in.y + a23*in.z) * scale;
00105                 out.z+= (a31*in.x + a32*in.y + a33*in.z) * scale;
00106         }
00107         // mulAddpoint. NB: in should be different as v!! (else don't work).
00108         void    mulAddPoint(const CVector &in, float scale, CVector &out)
00109         {
00110                 out.x+= (a11*in.x + a12*in.y + a13*in.z + a14) * scale;
00111                 out.y+= (a21*in.x + a22*in.y + a23*in.z + a24) * scale;
00112                 out.z+= (a31*in.x + a32*in.y + a33*in.z + a34) * scale;
00113         }
00114 
00115 
00116 
00117 };
00118 
00119 
00120 // ***************************************************************************
00121 // ***************************************************************************
00122 // SSE Matrix
00123 // ***************************************************************************
00124 // ***************************************************************************
00125 
00126 
00127 // ***************************************************************************
00128 #ifdef NL_OS_WINDOWS
00129 
00130 //#define NL_DebugSSE
00131 //#define NL_DebugSSENoSkin
00132 
00133 
00134 // For fast vector/point multiplication.
00135 class   CMatrix3x4SSE
00136 {
00137 public:
00138         // Order them in memory column first, for SSE column multiplication.
00139         float   a11, a21, a31, a41;
00140         float   a12, a22, a32, a42;
00141         float   a13, a23, a33, a43;
00142         float   a14, a24, a34, a44;
00143 
00144         // Copy from a matrix.
00145         void    set(const CMatrix &mat)
00146         {
00147                 const float     *m =mat.get();
00148                 a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12]; 
00149                 a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13]; 
00150                 a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14]; 
00151                 // not used.
00152                 //a41= 0   ; a42= 0   ; a43= 0    ; a44= 1; 
00153         }
00154 
00155 
00156         // mulSetvector. NB: in should be different as v!! (else don't work).
00157         void    mulSetVector(const CVector &vin, CVector &vout)
00158         {
00159         #ifndef NL_DebugSSE
00160                 __asm
00161                 {
00162                         mov             eax, vin
00163                         mov             ebx, this
00164                         mov             edi, vout
00165                         // Load in vector in op[0]
00166                         movss   xmm0, [eax]vin.x
00167                         movss   xmm1, [eax]vin.y
00168                         movss   xmm2, [eax]vin.z
00169                         // Expand op[0] to op[1], op[2], op[3]
00170                         shufps  xmm0, xmm0, 0
00171                         shufps  xmm1, xmm1, 0
00172                         shufps  xmm2, xmm2, 0
00173                         // Mul each vector with 3 Matrix column
00174                         mulps   xmm0, [ebx]this.a11
00175                         mulps   xmm1, [ebx]this.a12
00176                         mulps   xmm2, [ebx]this.a13
00177                         // Add each column vector.
00178                         addps   xmm0, xmm1
00179                         addps   xmm0, xmm2
00180 
00181                         // write the result.
00182                         movss   [edi]vout.x, xmm0
00183                         shufps  xmm0, xmm0, 33
00184                         movss   [edi]vout.y, xmm0
00185                         movhlps xmm0, xmm0
00186                         movss   [edi]vout.z, xmm0
00187                 }
00188         #elif !defined (NL_DebugSSENoSkin)
00189                 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z);
00190                 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z);
00191                 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z);
00192         #else
00193                 vout= vin;
00194         #endif
00195         }
00196         // mulSetpoint. NB: in should be different as v!! (else don't work).
00197         void    mulSetPoint(const CVector &vin, CVector &vout)
00198         {
00199         #ifndef NL_DebugSSE
00200                 __asm
00201                 {
00202                         mov             eax, vin
00203                         mov             ebx, this
00204                         mov             edi, vout
00205                         // Load in vector in op[0]
00206                         movss   xmm0, [eax]vin.x
00207                         movss   xmm1, [eax]vin.y
00208                         movss   xmm2, [eax]vin.z
00209                         // Expand op[0] to op[1], op[2], op[3]
00210                         shufps  xmm0, xmm0, 0
00211                         shufps  xmm1, xmm1, 0
00212                         shufps  xmm2, xmm2, 0
00213                         // Mul each vector with 3 Matrix column
00214                         mulps   xmm0, [ebx]this.a11
00215                         mulps   xmm1, [ebx]this.a12
00216                         mulps   xmm2, [ebx]this.a13
00217                         // Add each column vector.
00218                         addps   xmm0, xmm1
00219                         addps   xmm0, xmm2
00220                         // Add Matrix translate column vector
00221                         addps   xmm0, [ebx]this.a14
00222 
00223                         // write the result.
00224                         movss   [edi]vout.x, xmm0
00225                         shufps  xmm0, xmm0, 33
00226                         movss   [edi]vout.y, xmm0
00227                         movhlps xmm0, xmm0
00228                         movss   [edi]vout.z, xmm0
00229                 }
00230         #elif !defined (NL_DebugSSENoSkin)
00231                 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
00232                 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
00233                 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
00234         #else
00235                 vout= vin;
00236         #endif
00237         }
00238 
00239 
00240         // mulSetvector. NB: vin should be different as v!! (else don't work).
00241         void    mulSetVector(const CVector &vin, float scale, CVector &vout)
00242         {
00243         #ifndef NL_DebugSSE
00244                 __asm
00245                 {
00246                         mov             eax, vin
00247                         mov             ebx, this
00248                         mov             edi, vout
00249                         // Load in vector in op[0]
00250                         movss   xmm0, [eax]vin.x
00251                         movss   xmm1, [eax]vin.y
00252                         movss   xmm2, [eax]vin.z
00253                         // Load scale in op[0]
00254                         movss   xmm3, scale
00255                         // Expand op[0] to op[1], op[2], op[3]
00256                         shufps  xmm0, xmm0, 0
00257                         shufps  xmm1, xmm1, 0
00258                         shufps  xmm2, xmm2, 0
00259                         shufps  xmm3, xmm3, 0
00260                         // Store vertex column in other regs.
00261                         movaps  xmm5, xmm0
00262                         movaps  xmm6, xmm1
00263                         movaps  xmm7, xmm2
00264                         // Mul each vector with 3 Matrix column
00265                         mulps   xmm0, [ebx]this.a11
00266                         mulps   xmm1, [ebx]this.a12
00267                         mulps   xmm2, [ebx]this.a13
00268                         // Add each column vector.
00269                         addps   xmm0, xmm1
00270                         addps   xmm0, xmm2
00271 
00272                         // mul final result with scale
00273                         mulps   xmm0, xmm3
00274 
00275                         // store it in xmm4 for future use.
00276                         movaps  xmm4, xmm0
00277                 }
00278         #elif !defined (NL_DebugSSENoSkin)
00279                 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z) * scale;
00280                 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z) * scale;
00281                 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z) * scale;
00282         #else
00283                 vout= vin;
00284         #endif
00285         }
00286         // mulSetpoint. NB: vin should be different as v!! (else don't work).
00287         void    mulSetPoint(const CVector &vin, float scale, CVector &vout)
00288         {
00289         #ifndef NL_DebugSSE
00290                 __asm
00291                 {
00292                         mov             eax, vin
00293                         mov             ebx, this
00294                         mov             edi, vout
00295                         // Load in vector in op[0]
00296                         movss   xmm0, [eax]vin.x
00297                         movss   xmm1, [eax]vin.y
00298                         movss   xmm2, [eax]vin.z
00299                         // Load scale in op[0]
00300                         movss   xmm3, scale
00301                         // Expand op[0] to op[1], op[2], op[3]
00302                         shufps  xmm0, xmm0, 0
00303                         shufps  xmm1, xmm1, 0
00304                         shufps  xmm2, xmm2, 0
00305                         shufps  xmm3, xmm3, 0
00306                         // Store vertex column in other regs.
00307                         movaps  xmm5, xmm0
00308                         movaps  xmm6, xmm1
00309                         movaps  xmm7, xmm2
00310                         // Mul each vector with 3 Matrix column
00311                         mulps   xmm0, [ebx]this.a11
00312                         mulps   xmm1, [ebx]this.a12
00313                         mulps   xmm2, [ebx]this.a13
00314                         // Add each column vector.
00315                         addps   xmm0, xmm1
00316                         addps   xmm0, xmm2
00317                         // Add Matrix translate column vector
00318                         addps   xmm0, [ebx]this.a14
00319 
00320                         // mul final result with scale
00321                         mulps   xmm0, xmm3
00322 
00323                         // store it in xmm4 for future use.
00324                         movaps  xmm4, xmm0
00325                 }
00326         #elif !defined (NL_DebugSSENoSkin)
00327                 vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14) * scale;
00328                 vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24) * scale;
00329                 vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34) * scale;
00330         #else
00331                 vout= vin;
00332         #endif
00333         }
00334 
00335 
00336         // mulAddvector. NB: vin should be different as v!! (else don't work).
00337         void    mulAddVector(const CVector &vin, float scale, CVector &vout)
00338         {
00339         #ifndef NL_DebugSSE
00340                 __asm
00341                 {
00342                         mov             ebx, this
00343                         mov             edi, vout
00344                         // Load vin vector loaded in mulSetVector
00345                         movaps  xmm0, xmm5
00346                         movaps  xmm1, xmm6
00347                         movaps  xmm2, xmm7
00348                         // Load scale in op[0]
00349                         movss   xmm3, scale
00350                         // Expand op[0] to op[1], op[2], op[3]
00351                         shufps  xmm3, xmm3, 0
00352                         // Mul each vector with 3 Matrix column
00353                         mulps   xmm0, [ebx]this.a11
00354                         mulps   xmm1, [ebx]this.a12
00355                         mulps   xmm2, [ebx]this.a13
00356                         // Add each column vector.
00357                         addps   xmm0, xmm1
00358                         addps   xmm0, xmm2
00359 
00360                         // mul final result with scale
00361                         mulps   xmm0, xmm3
00362 
00363                         // Add result, with prec sum.
00364                         addps   xmm0, xmm4
00365 
00366                         // store it in xmm4 for future use.
00367                         movaps  xmm4, xmm0
00368 
00369                         // write the result.
00370                         movss   [edi]vout.x, xmm0
00371                         shufps  xmm0, xmm0, 33
00372                         movss   [edi]vout.y, xmm0
00373                         movhlps xmm0, xmm0
00374                         movss   [edi]vout.z, xmm0
00375                 }
00376         #elif !defined (NL_DebugSSENoSkin)
00377                 vout.x+= (a11*vin.x + a12*vin.y + a13*vin.z) * scale;
00378                 vout.y+= (a21*vin.x + a22*vin.y + a23*vin.z) * scale;
00379                 vout.z+= (a31*vin.x + a32*vin.y + a33*vin.z) * scale;
00380         #else
00381                 vout= vin;
00382         #endif
00383         }
00384         // mulAddpoint. NB: vin should be different as v!! (else don't work).
00385         void    mulAddPoint(const CVector &vin, float scale, CVector &vout)
00386         {
00387         #ifndef NL_DebugSSE
00388                 __asm
00389                 {
00390                         mov             ebx, this
00391                         mov             edi, vout
00392                         // Load vin vector loaded in mulSetPoint
00393                         movaps  xmm0, xmm5
00394                         movaps  xmm1, xmm6
00395                         movaps  xmm2, xmm7
00396                         // Load scale in op[0]
00397                         movss   xmm3, scale
00398                         // Expand op[0] to op[1], op[2], op[3]
00399                         shufps  xmm3, xmm3, 0
00400                         // Mul each vector with 3 Matrix column
00401                         mulps   xmm0, [ebx]this.a11
00402                         mulps   xmm1, [ebx]this.a12
00403                         mulps   xmm2, [ebx]this.a13
00404                         // Add each column vector.
00405                         addps   xmm0, xmm1
00406                         addps   xmm0, xmm2
00407                         // Add Matrix translate column vector
00408                         addps   xmm0, [ebx]this.a14
00409 
00410                         // mul final result with scale
00411                         mulps   xmm0, xmm3
00412 
00413                         // Add result, with prec sum.
00414                         addps   xmm0, xmm4
00415 
00416                         // store it in xmm4 for future use.
00417                         movaps  xmm4, xmm0
00418 
00419                         // write the result.
00420                         movss   [edi]vout.x, xmm0
00421                         shufps  xmm0, xmm0, 33
00422                         movss   [edi]vout.y, xmm0
00423                         movhlps xmm0, xmm0
00424                         movss   [edi]vout.z, xmm0
00425                 }
00426         #elif !defined (NL_DebugSSENoSkin)
00427                 vout.x+= (a11*vin.x + a12*vin.y + a13*vin.z + a14) * scale;
00428                 vout.y+= (a21*vin.x + a22*vin.y + a23*vin.z + a24) * scale;
00429                 vout.z+= (a31*vin.x + a32*vin.y + a33*vin.z + a34) * scale;
00430         #else
00431                 vout= vin;
00432         #endif
00433         }
00434 
00435 };
00436 
00437 #else // NL_OS_WINDOWS
00438 
00439 class CMatrix3x4SSE : public  CMatrix3x4 { };
00440 #endif
00441 
00442 
00443 
00444 } // NL3D
00445 
00446 
00447 #endif // NL_MATRIX_3X4_H
00448 
00449 /* End of matrix_3x4.h */