NeL: fasthls_modifier.cpp Source File

00001 
+00007 /* Copyright, 2000-2002 Nevrax Ltd.
+00008  *
+00009  * This file is part of NEVRAX NEL.
+00010  * NEVRAX NEL is free software; you can redistribute it and/or modify
+00011  * it under the terms of the GNU General Public License as published by
+00012  * the Free Software Foundation; either version 2, or (at your option)
+00013  * any later version.
+00014 
+00015  * NEVRAX NEL is distributed in the hope that it will be useful, but
+00016  * WITHOUT ANY WARRANTY; without even the implied warranty of
+00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+00018  * General Public License for more details.
+00019 
+00020  * You should have received a copy of the GNU General Public License
+00021  * along with NEVRAX NEL; see the file COPYING. If not, write to the
+00022  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+00023  * MA 02111-1307, USA.
+00024  */
+00025 
+00026 
+00027 #include "std3d.h"
+00028 #include "3d/fasthls_modifier.h"
+00029 #include "nel/misc/fast_floor.h"
+00030 #include "nel/misc/bitmap.h"
+00031 #include "nel/misc/system_info.h"
+00032 #include "nel/misc/algo.h"
+00033 
+00034 
+00035 using   namespace std;
+00036 using   namespace NLMISC;
+00037 
+00038 
+00039 namespace NL3D 
+00040 {
+00041 
+00042 // ***************************************************************************
+00043 CFastHLSModifier        *CFastHLSModifier::_Instance= NULL;
+00044 
+00045 
+00046 // ***************************************************************************
+00047 CFastHLSModifier::CFastHLSModifier()
+00048 {
+00049         uint i;
+00050         // build the HueTable.
+00051         for(i=0;i<HueTableSize;i++)
+00052         {
+00053                 _HueTable[i].buildFromHLS(360.0f*i/HueTableSize, 0.5f, 1);
+00054         }
+00055         // build conversion from uint16 to HLS.
+00056         for(i=0;i<65536;i++)
+00057         {
+00058                 CRGBA   col;
+00059                 col.set565(i);
+00060                 float   h,l,s;
+00061                 col.convertToHLS(h,l,s);
+00062                 h= (float)floor(255*(h/360.f)+0.5f);
+00063                 l= (float)floor(255*l+0.5f);
+00064                 s= (float)floor(255*s+0.5f);
+00065                 clamp(h,0,255);
+00066                 clamp(l,0,255);
+00067                 clamp(s,0,255);
+00068                 _Color16ToHLS[i].H= (uint8)h;
+00069                 _Color16ToHLS[i].L= (uint8)l;
+00070                 _Color16ToHLS[i].S= (uint8)s;
+00071                 _Color16ToHLS[i].A= 255;
+00072         }
+00073 }
+00074 
+00075 
+00076 // ***************************************************************************
+00077 CFastHLSModifier        &CFastHLSModifier::getInstance()
+00078 {
+00079         if(!_Instance)
+00080                 _Instance= new CFastHLSModifier;
+00081         return *_Instance;
+00082 }
+00083 
+00084 
+00085 // ***************************************************************************
+00086 CRGBA           CFastHLSModifier::convert(uint H, uint L, uint S)
+00087 {
+00088         static  CRGBA   gray(128,128,128);
+00089         L+= L>>7;
+00090         S+= S>>7;
+00091         // H.
+00092         CRGBA   col= _HueTable[H];
+00093         // S.
+00094         col.blendFromuiRGBOnly(gray, col, S);
+00095         // L.
+00096         if(L<=128)
+00097         {
+00098                 col.modulateFromuiRGBOnly(col, L*2);
+00099         }
+00100         else
+00101         {
+00102                 col.blendFromuiRGBOnly(col, CRGBA::White, (L-128)*2 );
+00103         }
+00104 
+00105         return col;
+00106 }
+00107 
+00108 
+00109 // ***************************************************************************
+00110 uint16          CFastHLSModifier::applyHLSMod(uint16 colorIn, uint8 dHue, uint dLum, uint dSat)
+00111 {
+00112         static  uint64  mmBlank = 0;
+00113         static  uint64  mmOne   = INT64_CONSTANT(0x00FF00FF00FF00FF);
+00114         static  uint64  mmGray  = INT64_CONSTANT(0x0080008000800080);
+00115         static  uint64  mmInterpBufer[4]= {0,0,0,INT64_CONSTANT(0x00FF00FF00FF00FF)};
+00116 
+00117         /*
+00118                 dLum is actually 0xFFFFFF00 + realDLum
+00119                 dSat is actually 0xFFFFFF00 + realDSat
+00120         */
+00121 
+00122         uint16  retVal;
+00123 
+00124 #ifdef NL_OS_WINDOWS
+00125         if(CSystemInfo::hasMMX())
+00126         {
+00127                 __asm
+00128                 {
+00129                         mov                     edi, offset mmInterpBufer
+00130                         mov                     ecx, this
+00131 
+00132                         // get HLS in edx.
+00133                         mov                     eax, 0
+00134                         mov                     ebx, 0
+00135                         lea                     esi, [ecx]this._Color16ToHLS
+00136                         mov                     ax, colorIn
+00137                         mov                     edx, [esi+ eax*4]
+00138 
+00139                         // apply dh to H (ie dl!). Auto-wrap.
+00140                         add                     dl, dHue
+00141                         // get the color into mm0
+00142                         mov                     bl, dl
+00143                         lea                     esi, [ecx]this._HueTable
+00144                         movd            mm0, [esi+ ebx*4]
+00145                         punpcklbw       mm0, mmBlank
+00146 
+00147                         // get L into eax and S into ebx
+00148                         mov                     eax, edx
+00149                         mov                     ebx, edx
+00150                         shr                     eax, 8
+00151                         shr                     ebx, 16
+00152                         and                     eax, 255
+00153                         and                     ebx, 255
+00154                         // add dLum/dSat and clamp to 1.
+00155                         add                     eax, dLum
+00156                         sbb                     ecx, ecx        // ecx= FFFFFFFF if carry.
+00157                         add                     ebx, dSat
+00158                         sbb                     edx, edx
+00159                         or                      eax, ecx        // eax= FFFFFFFF if carry was set
+00160                         or                      ebx, edx
+00161                         // add Magic delta, and clamp to 0.
+00162                         add                     eax, 256
+00163                         sbb                     ecx, ecx        // ecx= 0 if carry not set => result below 0.
+00164                         add                     ebx, 256
+00165                         sbb                     edx, edx
+00166                         and                     eax, ecx        // eax= 0 if result was below 0
+00167                         and                     ebx, edx
+00168 
+00169                         // Load Sat/(1-Sat) into MMX
+00170                         movd            mm2, ebx
+00171                         movq            mm3, mmOne
+00172                         punpckldq       mm2, mm2        // mm2= 0000 00AA 0000 00AA
+00173                         packssdw        mm2, mm2        // mm2= 00AA 00AA 00AA 00AA
+00174                         movq            mm1, mmGray
+00175                         psubusw         mm3, mm2                // mm3= 1-sat.
+00176                         // combine Color and Sat
+00177                         pmullw          mm0, mm2        // mm0= color*sat
+00178                         pmullw          mm1, mm3        // mm1= gray*(1-sat)
+00179                         paddusw         mm0, mm1        // mm0= color saturated
+00180                         // shift and store into the buffer for Luminance interpolation
+00181                         psrlw       mm0, 8
+00182                         movq            [edi+ 8], mm0
+00183                         movq            [edi+ 16], mm0
+00184 
+00185                         // use edx as index for luminance: 0: L=0 to 127. 1: L=128 to 255.
+00186                         mov                     edx, eax
+00187                         shl                     eax, 1
+00188                         shr                     edx, 7
+00189                         and                     eax, 255                // 0-127 and 128-255 transform auto to 0-254
+00190                         // expand 0-254 to 0-255
+00191                         mov                     ecx, eax
+00192                         shl                     edx, 4
+00193                         shr                     ecx, 7
+00194                         add                     eax, ecx
+00195 
+00196                         // Combine color and Luminance into MMX. interpolate 0->col or col->white according to edx.
+00197                         // Load Lum/(1-Lum) into MMX
+00198                         movd            mm2, eax
+00199                         movq            mm3, mmOne
+00200                         punpckldq       mm2, mm2        // mm2= 0000 00AA 0000 00AA
+00201                         packssdw        mm2, mm2        // mm2= 00AA 00AA 00AA 00AA
+00202                         psubusw         mm3, mm2        // mm3= 1-lum.
+00203                         // Combine color and Sat into MMX
+00204                         movq            mm0, [edi+ edx]
+00205                         movq            mm1, [edi+ edx + 8]
+00206                         pmullw          mm0, mm3        // mm0= color0*(1-lum)
+00207                         pmullw          mm1, mm2        // mm1= color1*lum
+00208                         paddusw         mm0, mm1        // mm0= final color
+00209 
+00210                         // shift and unpack
+00211                         psrlw       mm0, 8
+00212                         packuswb    mm0, mm0
+00213                         movd            eax, mm0
+00214 
+00215                         // pack to 16bits.
+00216                         mov                     ebx, eax
+00217                         mov                     ecx, eax
+00218                         shl                     eax, 8          // Red
+00219                         shr                     ebx, 5          // Green
+00220                         shr                     ecx, 19         // Blue
+00221                         and                     eax, 0xF800
+00222                         and                     ebx, 0x07E0
+00223                         and                     ecx, 0x001F
+00224                         or                      eax, ebx
+00225                         or                      eax, ecx
+00226 
+00227                         mov                     retVal, ax
+00228                 }
+00229         }
+00230         else
+00231 #endif  // NL_OS_WINDOWS
+00232         {
+00233                 CHLSA   hls= _Color16ToHLS[colorIn];
+00234                 // apply (C version) Dhue, dLum and dSat
+00235                 hls.H= (uint8)(hls.H + dHue);
+00236                 sint    v= (sint)hls.L + (sint)(dLum-0xFFFFFF00);
+00237                 fastClamp8(v);
+00238                 hls.L= v;
+00239                 v= (sint)hls.S + (sint)(dSat-0xFFFFFF00);
+00240                 fastClamp8(v);
+00241                 hls.S= v;
+00242 
+00243                 CRGBA   ret= convert(hls.H, hls.L, hls.S);
+00244                 retVal= ret.get565();
+00245         }
+00246 
+00247         return retVal;
+00248 }
+00249 
+00250 
+00251 // ***************************************************************************
+00252 void            CFastHLSModifier::convertDDSBitmapDXTC1Or1A(CBitmap &dst, const CBitmap &src, uint8 dh, uint dLum, uint dSat)
+00253 {
+00254         uint    W= src.getWidth();
+00255         uint    H= src.getHeight();
+00256 
+00257         const uint8     *srcPix= &(src.getPixels()[0]);
+00258         uint8           *dstPix= &(dst.getPixels()[0]);
+00259         uint    numBlock= (W*H)/16;
+00260 
+00261         /*
+00262                 need to swap color and bits for DXTC1 or DXTC1A.
+00263         */
+00264 
+00265         static uint32   bitLUT[8]= { 
+00266                 1,0,3,2,                        // reverse std order
+00267                 1,0,2,3,                        // reverse order for "special 0/black packing"
+00268         };
+00269 
+00270         // Do not use alpha mask for now.
+00271         for(;numBlock>0;numBlock--)
+00272         {
+00273                 uint16  srcCol0= ((uint16*)srcPix)[0];
+00274                 uint16  srcCol1= ((uint16*)srcPix)[1];
+00275                 bool    srcSign= srcCol0>srcCol1;
+00276                 // apply modifiers for 2 colors.
+00277                 uint16  dstCol0= applyHLSMod(srcCol0, dh,dLum,dSat);
+00278                 uint16  dstCol1= applyHLSMod(srcCol1, dh,dLum,dSat);
+00279                 bool    dstSign= dstCol0>dstCol1;
+00280                 if((uint)dstSign!=(uint)srcSign)
+00281                 {
+00282                         swap(dstCol0,dstCol1);
+00283                         // must change bits too!
+00284                         uint32  srcBits= ((uint32*)srcPix)[1];
+00285                         uint32  dstBits= 0;
+00286                         // take correct lut according to original sign
+00287                         uint32  *lut;
+00288                         if(srcCol0>srcCol1)
+00289                                 lut= bitLUT;
+00290                         else
+00291                                 lut= bitLUT+4;
+00292 
+00293                         // for all bits, transpose with lut.
+00294 #ifdef NL_OS_WINDOWS
+00295                         __asm
+00296                         {
+00297                                 mov             eax, srcBits
+00298                                 mov             esi, lut
+00299                                 mov             edx, 0
+00300                                 mov             ecx, 16
+00301                                 // prepare 1st.
+00302                                 rol             eax, 2
+00303                                 mov             ebx, eax
+00304                                 and             ebx, 2
+00305                                 // do it 16 times.
+00306                         myLoop:
+00307                                 or              edx, [esi+ebx*4]
+00308                                 rol             eax, 2
+00309                                 rol             edx, 2
+00310                                 mov             ebx, eax
+00311                                 and             ebx, 2
+00312                                 dec             ecx
+00313                                 jnz             myLoop
+00314 
+00315                                 ror             edx, 2
+00316                                 mov             dstBits, edx
+00317                         }
+00318 #else
+00319                         for(uint n=16;n>0;n--)
+00320                         {
+00321                                 // transform the id.
+00322                                 uint    id= srcBits&3;
+00323                                 id= lut[id];
+00324                                 // write.
+00325                                 dstBits|= id<<30;
+00326                                 // don't decal last
+00327                                 if(n>1)
+00328                                         dstBits>>=2;
+00329                         }
+00330 #endif
+00331 
+00332                         // store 
+00333                         ((uint32*)dstPix)[1]= dstBits;
+00334                 }
+00335                 else
+00336                         // just copy bits
+00337                         ((uint32*)dstPix)[1]= ((uint32*)srcPix)[3];
+00338                 ((uint16*)dstPix)[0]= dstCol0;
+00339                 ((uint16*)dstPix)[1]= dstCol1;
+00340                 // skip.
+00341                 srcPix+= 8;
+00342                 dstPix+= 8;
+00343         }
+00344 
+00345         // Must end MMX, for applyHLSMod()
+00346 #ifdef NL_OS_WINDOWS
+00347         if(CSystemInfo::hasMMX())
+00348                 _asm    emms;
+00349 #endif
+00350 
+00351 }
+00352 
+00353 // ***************************************************************************
+00354 void            CFastHLSModifier::convertDDSBitmapDXTC3Or5(CBitmap &dst, const CBitmap &src, uint8 dh, uint dLum, uint dSat)
+00355 {
+00356         uint    W= src.getWidth();
+00357         uint    H= src.getHeight();
+00358 
+00359         const uint8     *srcPix= &(src.getPixels()[0]);
+00360         uint8           *dstPix= &(dst.getPixels()[0]);
+00361         uint    numBlock= (W*H)/16;
+00362 
+00363         /*
+00364                 NB: don't need to swap color and bits for DXTC3 or DXTC5.
+00365         */
+00366 
+00367         // Do not use alpha mask for now.
+00368         for(;numBlock>0;numBlock--)
+00369         {
+00370                 uint16  srcCol0= ((uint16*)srcPix)[4];
+00371                 uint16  srcCol1= ((uint16*)srcPix)[5];
+00372                 // apply modifiers for 2 colors.
+00373                 ((uint16*)dstPix)[4]= applyHLSMod(srcCol0, dh,dLum,dSat);
+00374                 ((uint16*)dstPix)[5]= applyHLSMod(srcCol1, dh,dLum,dSat);
+00375                 // just copy bits
+00376                 ((uint32*)dstPix)[3]= ((uint32*)srcPix)[3];
+00377                 // copy alpha part.
+00378                 ((uint32*)dstPix)[0]= ((uint32*)srcPix)[0];
+00379                 ((uint32*)dstPix)[1]= ((uint32*)srcPix)[1];
+00380                 // skip bits and alpha part.
+00381                 srcPix+= 16;
+00382                 dstPix+= 16;
+00383         }
+00384 
+00385         // Must end MMX, for applyHLSMod()
+00386 #ifdef NL_OS_WINDOWS
+00387         if(CSystemInfo::hasMMX())
+00388                 _asm    emms;
+00389 #endif
+00390 }
+00391 
+00392 // ***************************************************************************
+00393 void            CFastHLSModifier::convertDDSBitmap(CBitmap &dst, const CBitmap &src, uint8 dh, sint dl, sint ds)
+00394 {
+00395         nlassert(src.getPixelFormat()==dst.getPixelFormat());
+00396         nlassert(src.getWidth()==dst.getWidth() && src.getHeight()==dst.getHeight());
+00397 
+00398         // Magic add clamp.
+00399         uint    dLum= 0xFFFFFF00 + dl;
+00400         uint    dSat= 0xFFFFFF00 + ds;
+00401 
+00402         if(src.getPixelFormat()==CBitmap::DXTC1 || src.getPixelFormat()==CBitmap::DXTC1Alpha)
+00403                 convertDDSBitmapDXTC1Or1A(dst, src, dh, dLum, dSat);
+00404         else if(src.getPixelFormat()==CBitmap::DXTC3 || src.getPixelFormat()==CBitmap::DXTC5)
+00405                 convertDDSBitmapDXTC3Or5(dst, src, dh, dLum, dSat);
+00406         else
+00407         {
+00408                 nlstop;
+00409         }
+00410 }
+00411 
+00412 
+00413 // ***************************************************************************
+00414 void            CFastHLSModifier::convertRGBABitmap(CBitmap &dst, const CBitmap &src, uint8 dh, sint dl, sint ds)
+00415 {
+00416         nlassert(src.getPixelFormat()==dst.getPixelFormat());
+00417         nlassert(src.getPixelFormat()==CBitmap::RGBA);
+00418 
+00419         uint    W= src.getWidth();
+00420         uint    H= src.getHeight();
+00421 
+00422         const CRGBA     *srcPix= (const CRGBA*)&(src.getPixels()[0]);
+00423         CRGBA           *dstPix= (CRGBA*)&(dst.getPixels()[0]);
+00424         uint    numPix= W*H;
+00425 
+00426         // Do not use alpha mask for now.
+00427         for(;numPix>0;numPix--)
+00428         {
+00429                 float   H,L,S;
+00430                 srcPix->convertToHLS(H,L,S);
+00431                 H*= 256.f/360.f;
+00432                 L*= 255.f;
+00433                 S*= 255.f;
+00434                 H+= dh+0.5f;
+00435                 L+= dl+0.5f;
+00436                 S+= ds+0.5f;
+00437                 clamp(H, 0, 255);
+00438                 clamp(L, 0, 255);
+00439                 clamp(S, 0, 255);
+00440                 uint8   H8= (uint8)NLMISC::OptFastFloor(H);
+00441                 uint8   L8= (uint8)NLMISC::OptFastFloor(L);
+00442                 uint8   S8= (uint8)NLMISC::OptFastFloor(S);
+00443                 *dstPix= convert(H8, L8, S8);
+00444                 srcPix++;
+00445                 dstPix++;
+00446         }
+00447 }
+00448 
+00449 
+00450 } // NL3D
+