# Home    # nevrax.com   
Nevrax
Nevrax.org
#News
#Mailing-list
#Documentation
#CVS
#Bugs
#License
Docs
 
Documentation  
Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages   Search  

fasthls_modifier.cpp

Go to the documentation of this file.
00001 
00007 /* Copyright, 2000-2002 Nevrax Ltd.
00008  *
00009  * This file is part of NEVRAX NEL.
00010  * NEVRAX NEL is free software; you can redistribute it and/or modify
00011  * it under the terms of the GNU General Public License as published by
00012  * the Free Software Foundation; either version 2, or (at your option)
00013  * any later version.
00014 
00015  * NEVRAX NEL is distributed in the hope that it will be useful, but
00016  * WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00018  * General Public License for more details.
00019 
00020  * You should have received a copy of the GNU General Public License
00021  * along with NEVRAX NEL; see the file COPYING. If not, write to the
00022  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00023  * MA 02111-1307, USA.
00024  */
00025 
00026 
00027 #include "std3d.h"
00028 #include "3d/fasthls_modifier.h"
00029 #include "3d/fast_floor.h"
00030 #include "nel/misc/bitmap.h"
00031 #include "nel/misc/system_info.h"
00032 #include "nel/misc/algo.h"
00033 
00034 
00035 using   namespace std;
00036 using   namespace NLMISC;
00037 
00038 
00039 namespace NL3D 
00040 {
00041 
00042 // ***************************************************************************
00043 CFastHLSModifier        *CFastHLSModifier::_Instance= NULL;
00044 
00045 
00046 // ***************************************************************************
00047 CFastHLSModifier::CFastHLSModifier()
00048 {
00049         uint i;
00050         // build the HueTable.
00051         for(i=0;i<HueTableSize;i++)
00052         {
00053                 _HueTable[i].buildFromHLS(360.0f*i/HueTableSize, 0.5f, 1);
00054         }
00055         // build conversion from uint16 to HLS.
00056         for(i=0;i<65536;i++)
00057         {
00058                 CRGBA   col;
00059                 col.set565(i);
00060                 float   h,l,s;
00061                 col.convertToHLS(h,l,s);
00062                 h= (float)floor(255*(h/360.f)+0.5f);
00063                 l= (float)floor(255*l+0.5f);
00064                 s= (float)floor(255*s+0.5f);
00065                 clamp(h,0,255);
00066                 clamp(l,0,255);
00067                 clamp(s,0,255);
00068                 _Color16ToHLS[i].H= (uint8)h;
00069                 _Color16ToHLS[i].L= (uint8)l;
00070                 _Color16ToHLS[i].S= (uint8)s;
00071                 _Color16ToHLS[i].A= 255;
00072         }
00073 }
00074 
00075 
00076 // ***************************************************************************
00077 CFastHLSModifier        &CFastHLSModifier::getInstance()
00078 {
00079         if(!_Instance)
00080                 _Instance= new CFastHLSModifier;
00081         return *_Instance;
00082 }
00083 
00084 
00085 // ***************************************************************************
00086 CRGBA           CFastHLSModifier::convert(uint H, uint L, uint S)
00087 {
00088         static  CRGBA   gray(128,128,128);
00089         L+= L>>7;
00090         S+= S>>7;
00091         // H.
00092         CRGBA   col= _HueTable[H];
00093         // S.
00094         col.blendFromuiRGBOnly(gray, col, S);
00095         // L.
00096         if(L<=128)
00097         {
00098                 col.modulateFromuiRGBOnly(col, L*2);
00099         }
00100         else
00101         {
00102                 col.blendFromuiRGBOnly(col, CRGBA::White, (L-128)*2 );
00103         }
00104 
00105         return col;
00106 }
00107 
00108 
00109 // ***************************************************************************
00110 uint16          CFastHLSModifier::applyHLSMod(uint16 colorIn, uint8 dHue, uint dLum, uint dSat)
00111 {
00112         static  uint64  mmBlank = 0;
00113         static  uint64  mmOne   = 0x00FF00FF00FF00FF;
00114         static  uint64  mmGray  = 0x0080008000800080;
00115         static  uint64  mmInterpBufer[4]= {0,0,0,0x00FF00FF00FF00FF};
00116 
00117         /*
00118                 dLum is actually 0xFFFFFF00 + realDLum
00119                 dSat is actually 0xFFFFFF00 + realDSat
00120         */
00121 
00122         uint16  retVal;
00123 
00124 #ifdef NL_OS_WINDOWS
00125         if(CSystemInfo::hasMMX())
00126         {
00127                 __asm
00128                 {
00129                         mov                     edi, offset mmInterpBufer
00130                         mov                     ecx, this
00131 
00132                         // get HLS in edx.
00133                         mov                     eax, 0
00134                         mov                     ebx, 0
00135                         lea                     esi, [ecx]this._Color16ToHLS
00136                         mov                     ax, colorIn
00137                         mov                     edx, [esi+ eax*4]
00138 
00139                         // apply dh to H (ie dl!). Auto-wrap.
00140                         add                     dl, dHue
00141                         // get the color into mm0
00142                         mov                     bl, dl
00143                         lea                     esi, [ecx]this._HueTable
00144                         movd            mm0, [esi+ ebx*4]
00145                         punpcklbw       mm0, mmBlank
00146 
00147                         // get L into eax and S into ebx
00148                         mov                     eax, edx
00149                         mov                     ebx, edx
00150                         shr                     eax, 8
00151                         shr                     ebx, 16
00152                         and                     eax, 255
00153                         and                     ebx, 255
00154                         // add dLum/dSat and clamp to 1.
00155                         add                     eax, dLum
00156                         sbb                     ecx, ecx        // ecx= FFFFFFFF if carry.
00157                         add                     ebx, dSat
00158                         sbb                     edx, edx
00159                         or                      eax, ecx        // eax= FFFFFFFF if carry was set
00160                         or                      ebx, edx
00161                         // add Magic delta, and clamp to 0.
00162                         add                     eax, 256
00163                         sbb                     ecx, ecx        // ecx= 0 if carry not set => result below 0.
00164                         add                     ebx, 256
00165                         sbb                     edx, edx
00166                         and                     eax, ecx        // eax= 0 if result was below 0
00167                         and                     ebx, edx
00168 
00169                         // Load Sat/(1-Sat) into MMX
00170                         movd            mm2, ebx
00171                         movq            mm3, mmOne
00172                         punpckldq       mm2, mm2        // mm2= 0000 00AA 0000 00AA
00173                         packssdw        mm2, mm2        // mm2= 00AA 00AA 00AA 00AA
00174                         movq            mm1, mmGray
00175                         psubusw         mm3, mm2                // mm3= 1-sat.
00176                         // combine Color and Sat
00177                         pmullw          mm0, mm2        // mm0= color*sat
00178                         pmullw          mm1, mm3        // mm1= gray*(1-sat)
00179                         paddusw         mm0, mm1        // mm0= color saturated
00180                         // shift and store into the buffer for Luminance interpolation
00181                         psrlw       mm0, 8
00182                         movq            [edi+ 8], mm0
00183                         movq            [edi+ 16], mm0
00184 
00185                         // use edx as index for luminance: 0: L=0 to 127. 1: L=128 to 255.
00186                         mov                     edx, eax
00187                         shl                     eax, 1
00188                         shr                     edx, 7
00189                         and                     eax, 255                // 0-127 and 128-255 transform auto to 0-254
00190                         // expand 0-254 to 0-255
00191                         mov                     ecx, eax
00192                         shl                     edx, 4
00193                         shr                     ecx, 7
00194                         add                     eax, ecx
00195 
00196                         // Combine color and Luminance into MMX. interpolate 0->col or col->white according to edx.
00197                         // Load Lum/(1-Lum) into MMX
00198                         movd            mm2, eax
00199                         movq            mm3, mmOne
00200                         punpckldq       mm2, mm2        // mm2= 0000 00AA 0000 00AA
00201                         packssdw        mm2, mm2        // mm2= 00AA 00AA 00AA 00AA
00202                         psubusw         mm3, mm2        // mm3= 1-lum.
00203                         // Combine color and Sat into MMX
00204                         movq            mm0, [edi+ edx]
00205                         movq            mm1, [edi+ edx + 8]
00206                         pmullw          mm0, mm3        // mm0= color0*(1-lum)
00207                         pmullw          mm1, mm2        // mm1= color1*lum
00208                         paddusw         mm0, mm1        // mm0= final color
00209 
00210                         // shift and unpack
00211                         psrlw       mm0, 8
00212                         packuswb    mm0, mm0
00213                         movd            eax, mm0
00214 
00215                         // pack to 16bits.
00216                         mov                     ebx, eax
00217                         mov                     ecx, eax
00218                         shl                     eax, 8          // Red
00219                         shr                     ebx, 5          // Green
00220                         shr                     ecx, 19         // Blue
00221                         and                     eax, 0xF800
00222                         and                     ebx, 0x07E0
00223                         and                     ecx, 0x001F
00224                         or                      eax, ebx
00225                         or                      eax, ecx
00226 
00227                         mov                     retVal, ax
00228                 }
00229         }
00230         else
00231 #endif  // NL_OS_WINDOWS
00232         {
00233                 CHLSA   hls= _Color16ToHLS[colorIn];
00234                 // apply (C version) Dhue, dLum and dSat
00235                 hls.H= (uint8)(hls.H + dHue);
00236                 sint    v= (sint)hls.L + (sint)(dLum-0xFFFFFF00);
00237                 fastClamp8(v);
00238                 hls.L= v;
00239                 v= (sint)hls.S + (sint)(dSat-0xFFFFFF00);
00240                 fastClamp8(v);
00241                 hls.S= v;
00242 
00243                 CRGBA   ret= convert(hls.H, hls.L, hls.S);
00244                 retVal= ret.get565();
00245         }
00246 
00247         return retVal;
00248 }
00249 
00250 
00251 // ***************************************************************************
00252 void            CFastHLSModifier::convertDDSBitmapDXTC1Or1A(CBitmap &dst, const CBitmap &src, uint8 dh, uint dLum, uint dSat)
00253 {
00254         uint    W= src.getWidth();
00255         uint    H= src.getHeight();
00256 
00257         const uint8     *srcPix= &(src.getPixels()[0]);
00258         uint8           *dstPix= &(dst.getPixels()[0]);
00259         uint    numBlock= (W*H)/16;
00260 
00261         /*
00262                 need to swap color and bits for DXTC1 or DXTC1A.
00263         */
00264 
00265         static uint32   bitLUT[8]= { 
00266                 1,0,3,2,                        // reverse std order
00267                 1,0,2,3,                        // reverse order for "special 0/black packing"
00268         };
00269 
00270         // Do not use alpha mask for now.
00271         for(;numBlock>0;numBlock--)
00272         {
00273                 uint16  srcCol0= ((uint16*)srcPix)[0];
00274                 uint16  srcCol1= ((uint16*)srcPix)[1];
00275                 bool    srcSign= srcCol0>srcCol1;
00276                 // apply modifiers for 2 colors.
00277                 uint16  dstCol0= applyHLSMod(srcCol0, dh,dLum,dSat);
00278                 uint16  dstCol1= applyHLSMod(srcCol1, dh,dLum,dSat);
00279                 bool    dstSign= dstCol0>dstCol1;
00280                 if((uint)dstSign!=(uint)srcSign)
00281                 {
00282                         swap(dstCol0,dstCol1);
00283                         // must change bits too!
00284                         uint32  srcBits= ((uint32*)srcPix)[1];
00285                         uint32  dstBits= 0;
00286                         // take correct lut according to original sign
00287                         uint32  *lut;
00288                         if(srcCol0>srcCol1)
00289                                 lut= bitLUT;
00290                         else
00291                                 lut= bitLUT+4;
00292 
00293                         // for all bits, transpose with lut.
00294 #ifdef NL_OS_WINDOWS
00295                         __asm
00296                         {
00297                                 mov             eax, srcBits
00298                                 mov             esi, lut
00299                                 mov             edx, 0
00300                                 mov             ecx, 16
00301                                 // prepare 1st.
00302                                 rol             eax, 2
00303                                 mov             ebx, eax
00304                                 and             ebx, 2
00305                                 // do it 16 times.
00306                         myLoop:
00307                                 or              edx, [esi+ebx*4]
00308                                 rol             eax, 2
00309                                 rol             edx, 2
00310                                 mov             ebx, eax
00311                                 and             ebx, 2
00312                                 dec             ecx
00313                                 jnz             myLoop
00314 
00315                                 ror             edx, 2
00316                                 mov             dstBits, edx
00317                         }
00318 #else
00319                         for(uint n=16;n>0;n--)
00320                         {
00321                                 // transform the id.
00322                                 uint    id= srcBits&3;
00323                                 id= lut[id];
00324                                 // write.
00325                                 dstBits|= id<<30;
00326                                 // don't decal last
00327                                 if(n>1)
00328                                         dstBits>>=2;
00329                         }
00330 #endif
00331 
00332                         // store 
00333                         ((uint32*)dstPix)[1]= dstBits;
00334                 }
00335                 else
00336                         // just copy bits
00337                         ((uint32*)dstPix)[1]= ((uint32*)srcPix)[3];
00338                 ((uint16*)dstPix)[0]= dstCol0;
00339                 ((uint16*)dstPix)[1]= dstCol1;
00340                 // skip.
00341                 srcPix+= 8;
00342                 dstPix+= 8;
00343         }
00344 
00345         // Must end MMX, for applyHLSMod()
00346 #ifdef NL_OS_WINDOWS
00347         if(CSystemInfo::hasMMX())
00348                 _asm    emms;
00349 #endif
00350 
00351 }
00352 
00353 // ***************************************************************************
00354 void            CFastHLSModifier::convertDDSBitmapDXTC3Or5(CBitmap &dst, const CBitmap &src, uint8 dh, uint dLum, uint dSat)
00355 {
00356         uint    W= src.getWidth();
00357         uint    H= src.getHeight();
00358 
00359         const uint8     *srcPix= &(src.getPixels()[0]);
00360         uint8           *dstPix= &(dst.getPixels()[0]);
00361         uint    numBlock= (W*H)/16;
00362 
00363         /*
00364                 NB: don't need to swap color and bits for DXTC3 or DXTC5.
00365         */
00366 
00367         // Do not use alpha mask for now.
00368         for(;numBlock>0;numBlock--)
00369         {
00370                 uint16  srcCol0= ((uint16*)srcPix)[4];
00371                 uint16  srcCol1= ((uint16*)srcPix)[5];
00372                 // apply modifiers for 2 colors.
00373                 ((uint16*)dstPix)[4]= applyHLSMod(srcCol0, dh,dLum,dSat);
00374                 ((uint16*)dstPix)[5]= applyHLSMod(srcCol1, dh,dLum,dSat);
00375                 // just copy bits
00376                 ((uint32*)dstPix)[3]= ((uint32*)srcPix)[3];
00377                 // copy alpha part.
00378                 ((uint32*)dstPix)[0]= ((uint32*)srcPix)[0];
00379                 ((uint32*)dstPix)[1]= ((uint32*)srcPix)[1];
00380                 // skip bits and alpha part.
00381                 srcPix+= 16;
00382                 dstPix+= 16;
00383         }
00384 
00385         // Must end MMX, for applyHLSMod()
00386 #ifdef NL_OS_WINDOWS
00387         if(CSystemInfo::hasMMX())
00388                 _asm    emms;
00389 #endif
00390 }
00391 
00392 // ***************************************************************************
00393 void            CFastHLSModifier::convertDDSBitmap(CBitmap &dst, const CBitmap &src, uint8 dh, sint dl, sint ds)
00394 {
00395         nlassert(src.getPixelFormat()==dst.getPixelFormat());
00396         nlassert(src.getWidth()==dst.getWidth() && src.getHeight()==dst.getHeight());
00397 
00398         // Magic add clamp.
00399         uint    dLum= 0xFFFFFF00 + dl;
00400         uint    dSat= 0xFFFFFF00 + ds;
00401 
00402         if(src.getPixelFormat()==CBitmap::DXTC1 || src.getPixelFormat()==CBitmap::DXTC1Alpha)
00403                 convertDDSBitmapDXTC1Or1A(dst, src, dh, dLum, dSat);
00404         else if(src.getPixelFormat()==CBitmap::DXTC3 || src.getPixelFormat()==CBitmap::DXTC5)
00405                 convertDDSBitmapDXTC3Or5(dst, src, dh, dLum, dSat);
00406         else
00407         {
00408                 nlstop;
00409         }
00410 }
00411 
00412 
00413 // ***************************************************************************
00414 void            CFastHLSModifier::convertRGBABitmap(CBitmap &dst, const CBitmap &src, uint8 dh, sint dl, sint ds)
00415 {
00416         nlassert(src.getPixelFormat()==dst.getPixelFormat());
00417         nlassert(src.getPixelFormat()==CBitmap::RGBA);
00418 
00419         uint    W= src.getWidth();
00420         uint    H= src.getHeight();
00421 
00422         const CRGBA     *srcPix= (const CRGBA*)&(src.getPixels()[0]);
00423         CRGBA           *dstPix= (CRGBA*)&(dst.getPixels()[0]);
00424         uint    numPix= W*H;
00425 
00426         // Do not use alpha mask for now.
00427         for(;numPix>0;numPix--)
00428         {
00429                 float   H,L,S;
00430                 srcPix->convertToHLS(H,L,S);
00431                 H*= 256.f/360.f;
00432                 L*= 255.f;
00433                 S*= 255.f;
00434                 H+= dh+0.5f;
00435                 L+= dl+0.5f;
00436                 S+= ds+0.5f;
00437                 clamp(H, 0, 255);
00438                 clamp(L, 0, 255);
00439                 clamp(S, 0, 255);
00440                 uint8   H8= (uint8)OptFastFloor(H);
00441                 uint8   L8= (uint8)OptFastFloor(L);
00442                 uint8   S8= (uint8)OptFastFloor(S);
00443                 *dstPix= convert(H8, L8, S8);
00444                 srcPix++;
00445                 dstPix++;
00446         }
00447 }
00448 
00449 
00450 } // NL3D