nevrax.org : docs

00001 
+00007 /* Copyright, 2000-2002 Nevrax Ltd.
+00008  *
+00009  * This file is part of NEVRAX NEL.
+00010  * NEVRAX NEL is free software; you can redistribute it and/or modify
+00011  * it under the terms of the GNU General Public License as published by
+00012  * the Free Software Foundation; either version 2, or (at your option)
+00013  * any later version.
+00014 
+00015  * NEVRAX NEL is distributed in the hope that it will be useful, but
+00016  * WITHOUT ANY WARRANTY; without even the implied warranty of
+00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+00018  * General Public License for more details.
+00019 
+00020  * You should have received a copy of the GNU General Public License
+00021  * along with NEVRAX NEL; see the file COPYING. If not, write to the
+00022  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+00023  * MA 02111-1307, USA.
+00024  */
+00025 
+00026 #include "std3d.h"
+00027 #include "3d/hls_color_texture.h"
+00028 #include "3d/fast_floor.h"
+00029 #include "3d/fasthls_modifier.h"
+00030 #include "nel/misc/stream.h"
+00031 #include "nel/misc/bitmap.h"
+00032 #include "nel/misc/system_info.h"
+00033 #include "nel/misc/algo.h"
+00034 
+00035 
+00036 using   namespace std;
+00037 using   namespace NLMISC;
+00038 
+00039 namespace NL3D 
+00040 {
+00041 
+00042 
+00043 #define BLOCK_NUM_PIXEL         16
+00044 #define BLOCK_DXTC_SIZE         16
+00045 #define BLOCK_ALPHA_SIZE        16
+00046 
+00047 
+00048 // ***************************************************************************
+00049 void    CHLSColorDelta::serial(NLMISC::IStream &f)
+00050 {
+00051         f.serialVersion(0);
+00052         f.serial(DHue, DLum, DSat);
+00053 }
+00054 
+00055 
+00056 // ***************************************************************************
+00057 void                    CHLSColorTexture::CMask::serial(NLMISC::IStream &f)
+00058 {
+00059         f.serialVersion(0);
+00060 
+00061         f.serial(FullBlockIndex);
+00062         f.serial(MixtBlockIndex);
+00063         f.serialCont(Data);
+00064 }
+00065 
+00066 
+00067 // ***************************************************************************
+00068 void                    CHLSColorTexture::CMask::setBit(uint bitId)
+00069 {
+00070         uint8   &b= Data[bitId/8];
+00071         b|= 1<<(bitId&7);
+00072 }
+00073 
+00074 
+00075 // ***************************************************************************
+00076 CHLSColorTexture::CHLSColorTexture()
+00077 {
+00078         reset();
+00079 }
+00080 
+00081 // ***************************************************************************
+00082 void                    CHLSColorTexture::reset()
+00083 {
+00084         _Width= 0;
+00085         _Height= 0;
+00086         _NumMipMap= 0;
+00087         contReset(_Texture);
+00088         contReset(_Masks);
+00089 }
+00090 
+00091 // ***************************************************************************
+00092 void                    CHLSColorTexture::setBitmap(const NLMISC::CBitmap &bmp)
+00093 {
+00094         nlassert(bmp.getPixelFormat()==CBitmap::DXTC5);
+00095         uint    width= bmp.getWidth();
+00096         uint    height= bmp.getHeight();
+00097         uint    mmCount= bmp.getMipMapCount();
+00098         nlassert(width>=1 && height>=1);
+00099         nlassert(mmCount>1 || width*height==1);
+00100 
+00101         // restart
+00102         reset();
+00103 
+00104         // resize.
+00105         uint    m;
+00106         uint    pixelSize= 0;
+00107         uint    numTotalBlock= 0;
+00108         for(m=0;m<mmCount;m++)
+00109         {
+00110                 pixelSize+= bmp.getPixels(m).size();
+00111                 uint    mmWidth= bmp.getWidth(m);
+00112                 uint    mmHeight= bmp.getHeight(m);
+00113                 uint    wBlock= (mmWidth+3)/4;
+00114                 uint    hBlock= (mmHeight+3)/4;
+00115                 numTotalBlock+= wBlock*hBlock;
+00116         }
+00117         // add the info for the "Block to compress"
+00118         uint    blockToCompressSize= 4*((numTotalBlock+31)/32);
+00119         // allocate good size, and reset to 0 => no block to re-compress.
+00120         _Texture.resize(pixelSize+blockToCompressSize, 0);
+00121 
+00122         // fill texture
+00123         uint8   *ptr= &_Texture[0];
+00124         for(m=0;m<mmCount;m++)
+00125         {
+00126                 uint    mSize= bmp.getPixels(m).size();
+00127                 memcpy(ptr, &bmp.getPixels(m)[0], mSize);
+00128                 ptr+= mSize;
+00129         }
+00130 
+00131         // header
+00132         _BlockToCompressIndex= pixelSize;
+00133         _Width= width;
+00134         _Height= height;
+00135         _NumMipMap= mmCount;
+00136 }
+00137 
+00138 
+00139 // ***************************************************************************
+00140 #define MASK_BLOCK_EMPTY        0
+00141 #define MASK_BLOCK_FULL         1
+00142 #define MASK_BLOCK_MIXT         2
+00143 struct  CMaskInfo
+00144 {
+00145         // list of block
+00146         uint                            WBlock, HBlock;
+00147         uint                            NumBlock;
+00148         vector<uint8>           Blocks; // 0 empty, 1. Full. 2. Mixt.
+00149 };
+00150 
+00151 // ***************************************************************************
+00152 void                    CHLSColorTexture::addMask(const NLMISC::CBitmap &bmpIn, uint threshold)
+00153 {
+00154         // copy the bitmap and set RGBA/mipmaps.
+00155         CBitmap         bmp= bmpIn;
+00156         bmp.convertToType(CBitmap::RGBA);
+00157         bmp.buildMipMaps();
+00158 
+00159         // verify widht...
+00160         nlassert(bmp.getWidth()== _Width);
+00161         nlassert(bmp.getHeight()== _Height);
+00162         nlassert(bmp.getMipMapCount()== _NumMipMap);
+00163 
+00164         // ***** build the information for all mipmaps
+00165         vector<CMaskInfo>       masks;
+00166         masks.resize(_NumMipMap);
+00167         uint    m;
+00168         uint    numMixtBlock= 0;
+00169         uint    numTotalBlock= 0;
+00170         for(m=0;m<_NumMipMap;m++)
+00171         {
+00172                 CMaskInfo       &mask= masks[m];
+00173                 uint    mmWidth= bmp.getWidth(m);
+00174                 uint    mmHeight= bmp.getHeight(m);
+00175                 mask.WBlock= (mmWidth+3)/4;
+00176                 mask.HBlock= (mmHeight+3)/4;
+00177                 mask.NumBlock= mask.WBlock*mask.HBlock;
+00178                 mask.Blocks.resize(mask.NumBlock);
+00179 
+00180                 numTotalBlock+= mask.NumBlock;
+00181 
+00182                 CRGBA   *src= (CRGBA*)(&bmp.getPixels(m)[0]);
+00183 
+00184                 for(uint yB=0;yB<mask.HBlock;yB++)
+00185                 {
+00186                         for(uint xB=0;xB<mask.WBlock;xB++)
+00187                         {
+00188                                 uint    accum= 0;
+00189                                 uint    w= min(mmWidth, 4U);
+00190                                 uint    h= min(mmHeight, 4U);
+00191                                 for(uint y= 0;y< h;y++)
+00192                                 {
+00193                                         for(uint x= 0;x< w;x++)
+00194                                         {
+00195                                                 uint    yPix= yB*4+y;
+00196                                                 uint    xPix= xB*4+x;
+00197                                                 // read the color
+00198                                                 uint8   alphaMask = src[yPix*mmWidth+xPix].R;
+00199                                                 // remove some dummy precision.
+00200                                                 if(alphaMask<threshold)
+00201                                                         alphaMask= 0;
+00202                                                 if(alphaMask>255-threshold)
+00203                                                         alphaMask= 255;
+00204                                                 // Add to the accum
+00205                                                 accum+= alphaMask;
+00206                                         }
+00207                                 }
+00208 
+00209                                 // full black?
+00210                                 if(accum==0)
+00211                                         mask.Blocks[yB*mask.WBlock+xB]= MASK_BLOCK_EMPTY;
+00212                                 else if(accum==w*h*255)
+00213                                         mask.Blocks[yB*mask.WBlock+xB]= MASK_BLOCK_FULL;
+00214                                 // if not full white or full black, mixt block
+00215                                 else
+00216                                 {
+00217                                         mask.Blocks[yB*mask.WBlock+xB]= MASK_BLOCK_MIXT;
+00218                                         numMixtBlock++;
+00219                                 }
+00220                         }
+00221                 }
+00222         }
+00223 
+00224         // ***** compress into CMask
+00225         CMask           newMask;
+00226         uint            newMaskDataSize= 0;
+00227 
+00228         // add the mixt block data size (16*uint8 per block)
+00229         newMaskDataSize+= numMixtBlock*BLOCK_ALPHA_SIZE;
+00230         // compute the bit size. NB: use uint32 to blocks bits. => data is aligned.
+00231         uint    bitDataSize= 4*((numTotalBlock+31)/32);
+00232         // add fullBlock bits
+00233         newMask.FullBlockIndex= newMaskDataSize;
+00234         newMaskDataSize+= bitDataSize;
+00235         // add mixtBlock bits
+00236         newMask.MixtBlockIndex= newMaskDataSize;
+00237         newMaskDataSize+= bitDataSize;
+00238 
+00239         // allocate. Fill with 0 to initialize bits per default EMPTY value
+00240         newMask.Data.resize(newMaskDataSize, 0);
+00241 
+00242         // compress each mipMaps from bigger to smaller
+00243         uint    bitId= 0;
+00244         uint    mixtBlockId= 0;
+00245         for(m=0;m<_NumMipMap;m++)
+00246         {
+00247                 CMaskInfo       &mask= masks[m];
+00248 
+00249                 // ---- build the mixtBlock alpha Mask
+00250                 for(uint yB=0;yB<mask.HBlock;yB++)
+00251                 {
+00252                         for(uint xB=0;xB<mask.WBlock;xB++)
+00253                         {
+00254                                 uint    id= yB*mask.WBlock+xB;
+00255                                 // if mixt block
+00256                                 if(mask.Blocks[id]==MASK_BLOCK_MIXT)
+00257                                 {
+00258                                         nlassert(mixtBlockId<numMixtBlock);
+00259                                         // Fill Alpha data.
+00260                                         uint8   *dst= &newMask.Data[mixtBlockId*BLOCK_ALPHA_SIZE];
+00261                                         uint    mmWidth= bmp.getWidth(m);
+00262                                         uint    mmHeight= bmp.getHeight(m);
+00263                                         // point to the src alpha color
+00264                                         CRGBA   *src= (CRGBA*)(&bmp.getPixels(m)[0]);
+00265                                         src= src + yB*4*mmWidth + xB*4;
+00266 
+00267                                         // for the 4*4 pixels
+00268                                         uint    w= min(mmWidth, 4U);
+00269                                         uint    h= min(mmHeight, 4U);
+00270                                         for(uint y=0;y<h;y++)
+00271                                         {
+00272                                                 for(uint x=0;x<w;x++)
+00273                                                 {
+00274                                                         dst[y*4+x]= src[y*mmWidth+x].R;
+00275                                                 }
+00276                                         }
+00277 
+00278                                         // inc
+00279                                         mixtBlockId++;
+00280                                 }
+00281                         }
+00282                 }
+00283 
+00284                 // ---- build the fullBlock and mixtBlocks bits.
+00285                 for(uint i=0; i<mask.NumBlock; i++)
+00286                 {
+00287                         nlassert(bitId<numTotalBlock);
+00288 
+00289                         // fill bits
+00290                         if(mask.Blocks[i]==MASK_BLOCK_FULL)
+00291                                 newMask.setBit(newMask.FullBlockIndex*8 + bitId);
+00292                         else if(mask.Blocks[i]==MASK_BLOCK_MIXT)
+00293                                 newMask.setBit(newMask.MixtBlockIndex*8 + bitId);
+00294 
+00295                         // inc
+00296                         bitId++;
+00297                 }
+00298         }
+00299 
+00300         // ***** Add the CMask
+00301         _Masks.push_back(newMask);
+00302 
+00303         // Or the BlockToCompress info with the MixtBlocks bits.
+00304         nlassert(bitDataSize==_Texture.size()-_BlockToCompressIndex);
+00305         for(uint i=0;i<bitDataSize;i++)
+00306         {
+00307                 _Texture[_BlockToCompressIndex+i]|= newMask.Data[newMask.MixtBlockIndex+i];
+00308         }
+00309 }
+00310 
+00311 
+00312 // ***************************************************************************
+00313 void                    CHLSColorTexture::serial(NLMISC::IStream &f)
+00314 {
+00315         f.serialVersion(0);
+00316 
+00317         f.serial(_Width, _Height, _NumMipMap, _BlockToCompressIndex);
+00318         f.serialCont(_Texture);
+00319         f.serialCont(_Masks);
+00320 }
+00321 
+00322 
+00323 // ***************************************************************************
+00324 static inline   void    getBitPack(uint32 *bitPtr, uint32 &bitMask)
+00325 {
+00326 #ifdef NL_LITTLE_ENDIAN
+00327         bitMask= *bitPtr;
+00328 #else
+00329         bitMask = ((uint8*)bitPtr)[0];
+00330         bitMask+= ((uint8*)bitPtr)[1]<<8;
+00331         bitMask+= ((uint8*)bitPtr)[2]<<16;
+00332         bitMask+= ((uint8*)bitPtr)[3]<<24;
+00333 #endif
+00334 }
+00335 
+00336 // ***************************************************************************
+00337 void                    CHLSColorTexture::buildColorVersion(const CHLSColorDelta *colDeltaList, NLMISC::CBitmap &out)
+00338 {
+00339         // static to avoid realloc
+00340         static  vector<uint8>   dstTexture;
+00341         static  vector<CRGBA>   dstUnCompTexture;
+00342         uint32  *bitPtr;
+00343         uint8   *srcPtr;
+00344         uint8   *dstPtr;
+00345         CRGBA   *dstUnCompPtr;
+00346         uint32  bitMask;
+00347 
+00348         // **** prepare Data
+00349 
+00350         // count number of DXTC5 block in _Texture.
+00351         uint    numBlocks= _BlockToCompressIndex/BLOCK_DXTC_SIZE;
+00352 
+00353         // create a tmp compressed block array, copy of Texture.
+00354         dstTexture.resize(numBlocks*BLOCK_DXTC_SIZE);
+00355         // copy from texture (to have non colored version already copied, and also ALPHA ok)
+00356         memcpy(&dstTexture[0], &_Texture[0], dstTexture.size());
+00357 
+00358         // create a tmp uncompressed block array, which will receive coloring of mixt blocks
+00359         dstUnCompTexture.resize(numBlocks*BLOCK_NUM_PIXEL);
+00360 
+00361         // For all blockToCompress, uncompress them in dstUnCompTexture, because they will blend with future mask coloring
+00362         uint    n= numBlocks;
+00363         bitPtr= (uint32*)(&_Texture[_BlockToCompressIndex]);
+00364         dstUnCompPtr= &dstUnCompTexture[0];
+00365         srcPtr= &_Texture[0];
+00366         while(n>0)
+00367         {
+00368                 uint    nBits= min(n, 32U);
+00369                 getBitPack(bitPtr, bitMask);
+00370                 n-= nBits;
+00371                 bitPtr++;
+00372                 for(;nBits>0;nBits--)
+00373                 {
+00374                         // need to compress/uncompress ??
+00375                         if(bitMask&1)
+00376                         {
+00377                                 // uncompress this block. ignore alpha
+00378                                 uncompressBlockRGB(srcPtr, dstUnCompPtr);
+00379                         }
+00380                         bitMask>>=1;
+00381                         dstUnCompPtr+= BLOCK_NUM_PIXEL;
+00382                         srcPtr+= BLOCK_DXTC_SIZE;
+00383                 }
+00384         }
+00385 
+00386         // **** build the color version for all masks.
+00387 
+00388         for(uint maskId= 0; maskId<_Masks.size();maskId++)
+00389         {
+00390                 CMask                   &mask= _Masks[maskId];
+00391                 // unpack colDelta, and prepare for use with CFastHLSModifier.
+00392                 uint8                   dHue= colDeltaList[maskId].DHue;
+00393                 uint                    dLum= 0xFFFFFF00 + colDeltaList[maskId].DLum*2;
+00394                 uint                    dSat= 0xFFFFFF00 + colDeltaList[maskId].DSat*2;
+00395 
+00396                 // get a ptr on alpha of mixt block.
+00397                 uint8                   *alphaMixtBlock= &mask.Data[0];
+00398 
+00399 
+00400                 // ---- for all Fullblock ot this mask, color and store in dstTexture
+00401                 // start at full Block bits desc
+00402                 bitPtr= (uint32*)(&mask.Data[mask.FullBlockIndex]);
+00403                 uint32  *bitCompPtr= (uint32*)(&_Texture[_BlockToCompressIndex]);
+00404                 srcPtr= &_Texture[0];
+00405                 dstPtr= &dstTexture[0];
+00406                 dstUnCompPtr= &dstUnCompTexture[0];
+00407                 n= numBlocks;
+00408                 // run all blocks.
+00409                 while(n>0)
+00410                 {
+00411                         uint    nBits= min(n, 32U);
+00412                         // get Full block mask.
+00413                         getBitPack(bitPtr, bitMask);
+00414                         n-= nBits;
+00415                         bitPtr++;
+00416                         // get Compress mask.
+00417                         uint32  bitCompMask;
+00418                         getBitPack(bitCompPtr, bitCompMask);
+00419                         bitCompPtr++;
+00420                         // for all bits
+00421                         for(;nBits>0;nBits--)
+00422                         {
+00423                                 // need to colorize??
+00424                                 if(bitMask&1)
+00425                                 {
+00426                                         // colorize this block. ignore alpha
+00427                                         colorizeDXTCBlockRGB(srcPtr, dstPtr, dHue, dLum, dSat);
+00428                                         // If this block is "a block to recompress", then must uncompress it in dstUnCompPtr
+00429                                         uncompressBlockRGB(dstPtr, dstUnCompPtr);
+00430                                 }
+00431                                 bitMask>>=1;
+00432                                 bitCompMask>>=1;
+00433                                 srcPtr+= BLOCK_DXTC_SIZE;
+00434                                 dstPtr+= BLOCK_DXTC_SIZE;
+00435                                 dstUnCompPtr+= BLOCK_NUM_PIXEL;
+00436                         }
+00437                 }
+00438 
+00439                 // ---- for all mixtblock ot this mask, color, uncompress and blend in store in dstUnCompTexture
+00440                 static  uint8   tmpColoredBlockDXTC[BLOCK_NUM_PIXEL];
+00441                 static  CRGBA   tmpColoredBlockRGBA[BLOCK_NUM_PIXEL];
+00442                 // start at mixt Block bits desc
+00443                 bitPtr= (uint32*)(&mask.Data[mask.MixtBlockIndex]);
+00444                 srcPtr= &_Texture[0];
+00445                 dstUnCompPtr= &dstUnCompTexture[0];
+00446                 n= numBlocks;
+00447                 // run all blocks.
+00448                 while(n>0)
+00449                 {
+00450                         uint    nBits= min(n, 32U);
+00451                         getBitPack(bitPtr, bitMask);
+00452                         n-= nBits;
+00453                         bitPtr++;
+00454                         for(;nBits>0;nBits--)
+00455                         {
+00456                                 // need to colorize??
+00457                                 if(bitMask&1)
+00458                                 {
+00459                                         // colorize this block. store 2 colors in tmp
+00460                                         colorizeDXTCBlockRGB(srcPtr, tmpColoredBlockDXTC, dHue, dLum, dSat);
+00461                                         // copy RGB bits from src to tmp
+00462                                         ((uint32*)tmpColoredBlockDXTC)[3]= ((uint32*)srcPtr)[3];
+00463 
+00464                                         // uncompress the block.
+00465                                         uncompressBlockRGB(tmpColoredBlockDXTC, tmpColoredBlockRGBA);
+00466 
+00467                                         // blend tmpColoredBlockRGBA into dstUnCompPtr, according to alphaMixtBlock.
+00468                                         for(uint i=0;i<16;i++)
+00469                                         {
+00470                                                 dstUnCompPtr[i].blendFromuiRGBOnly(dstUnCompPtr[i], tmpColoredBlockRGBA[i], *alphaMixtBlock);
+00471                                                 // next pixel
+00472                                                 alphaMixtBlock++;
+00473                                         }
+00474                                 }
+00475                                 bitMask>>=1;
+00476                                 srcPtr+= BLOCK_DXTC_SIZE;
+00477                                 dstUnCompPtr+= BLOCK_NUM_PIXEL;
+00478                         }
+00479                 }
+00480 
+00481         }
+00482 
+00483 
+00484         // Since colorizeDXTCBlockRGB() use MMX, must end with emms.
+00485 #ifdef NL_OS_WINDOWS
+00486         if(CSystemInfo::hasMMX())
+00487                 _asm    emms;
+00488 #endif
+00489 
+00490 
+00491         // **** compress needed blocks
+00492         n= numBlocks;
+00493         bitPtr= (uint32*)(&_Texture[_BlockToCompressIndex]);
+00494         dstUnCompPtr= &dstUnCompTexture[0];
+00495         dstPtr= &dstTexture[0];
+00496         while(n>0)
+00497         {
+00498                 uint    nBits= min(n, 32U);
+00499                 getBitPack(bitPtr, bitMask);
+00500                 n-= nBits;
+00501                 bitPtr++;
+00502                 for(;nBits>0;nBits--)
+00503                 {
+00504                         // need to compress ??
+00505                         if(bitMask&1)
+00506                         {
+00507                                 // uncompress this block. ignore alpha
+00508                                 compressBlockRGB(dstUnCompPtr, dstPtr);
+00509                         }
+00510                         bitMask>>=1;
+00511                         dstUnCompPtr+= BLOCK_NUM_PIXEL;
+00512                         dstPtr+= BLOCK_DXTC_SIZE;
+00513                 }
+00514         }
+00515 
+00516         // **** format bitmap out with dstTexture.
+00517         out.reset(CBitmap::DXTC5);
+00518         out.resize(_Width, _Height, CBitmap::DXTC5);
+00519 
+00520         // create and fill all the mipMaps
+00521         uint    w= _Width, h=_Height;
+00522         dstPtr= &dstTexture[0];
+00523         for(uint m=0;m<_NumMipMap;m++)
+00524         {
+00525                 // allocate.
+00526                 out.resizeMipMap(m, w, h);
+00527                 // get the size of this DXTC5 level.
+00528                 uint    size= out.getPixels(m).size();
+00529                 // fill
+00530                 memcpy(&out.getPixels(m)[0], dstPtr, size);
+00531                 // next mipmap
+00532                 dstPtr+= size;
+00533                 w= (w+1)/2;
+00534                 h= (h+1)/2;
+00535         }
+00536         // verify all filled
+00537         nlassert( dstPtr== (&dstTexture[0] + dstTexture.size()) );
+00538 
+00539         // set the correct num of mipmap
+00540         out.setMipMapCount(_NumMipMap);
+00541 }
+00542 
+00543 
+00544 // ***************************************************************************
+00545 void                    CHLSColorTexture::colorizeDXTCBlockRGB(const uint8 *srcPtr, uint8 *dstPtr, uint8 dHue, uint dLum, uint dSat)
+00546 {
+00547         // get modifier.
+00548         CFastHLSModifier        &fastHLS= CFastHLSModifier::getInstance();
+00549 
+00550         // apply the color on the 2 DXTC colors
+00551         *(uint16*)(dstPtr+8 )= fastHLS.applyHLSMod(*(uint16*)(srcPtr+8 ) , dHue, dLum, dSat);
+00552         *(uint16*)(dstPtr+10)= fastHLS.applyHLSMod(*(uint16*)(srcPtr+10) , dHue, dLum, dSat);
+00553 }
+00554 
+00555 
+00556 // ***************************************************************************
+00557 void                    CHLSColorTexture::uncompressBlockRGB(const uint8* srcDXTC, CRGBA *dstRGBA)
+00558 {
+00559         CRGBA   c[4];
+00560 
+00561         uint16 color0;
+00562         uint16 color1;
+00563         uint32 bits;
+00564         color0= *(uint16*)(srcDXTC+8);
+00565         color1= *(uint16*)(srcDXTC+10);
+00566         bits=   *(uint32*)(srcDXTC+12);
+00567 
+00568         c[0].set565(color0);
+00569         c[1].set565(color1);
+00570         
+00571         // ignore color0>color1 for DXT3 and DXT5.
+00572         c[2].blendFromui(c[0],c[1],85);
+00573         c[3].blendFromui(c[0],c[1],171);        
+00574 
+00575         // bits to color (ignore alpha result)
+00576         for(uint n= 16;n>0;n--)
+00577         {
+00578                 *dstRGBA= c[bits&3];
+00579                 bits>>=2;
+00580                 dstRGBA++;
+00581         }
+00582 }
+00583 
+00584 
+00585 // ***************************************************************************
+00586 void            CHLSColorTexture::computeMinMax(sint *diffBlock, CVectorInt &v, sint mean[3], sint rgb0[3], sint rgb1[3])
+00587 {
+00588         // compute the min and max distance along the axis v.
+00589         sint    mind= INT_MAX;
+00590         sint    maxd= INT_MIN;
+00591         sint    *srcDiff= diffBlock;
+00592         // for the 16 pixels
+00593         for(uint n=16;n>0;n--,srcDiff+=3)
+00594         {
+00595                 sint    R= srcDiff[0];
+00596                 sint    G= srcDiff[1];
+00597                 sint    B= srcDiff[2];
+00598                 sint    d= R*v.x + G*v.y + B*v.z;
+00599                 if(d<mind)
+00600                         mind= d;
+00601                 if(d>maxd)
+00602                         maxd= d;
+00603         }
+00604 
+00605         // avoid overflow. here, Higher possible bit is 16+8+2 (add of 3 values=> *4) == 26
+00606         // 26-12= 14. 14+16=30 => ok.
+00607         mind>>= 12;
+00608         maxd>>= 12;
+00609 
+00610         // compute the 2 colors: rgb0 on the min, and rgb1 on the max
+00611         rgb0[0]= mean[0]+ (mind*v.x>>20);
+00612         rgb0[1]= mean[1]+ (mind*v.y>>20);
+00613         rgb0[2]= mean[2]+ (mind*v.z>>20);
+00614         rgb1[0]= mean[0]+ (maxd*v.x>>20);
+00615         rgb1[1]= mean[1]+ (maxd*v.y>>20);
+00616         rgb1[2]= mean[2]+ (maxd*v.z>>20);
+00617         // clamp to 0..255
+00618         fastClamp8(rgb0[0]);
+00619         fastClamp8(rgb0[1]);
+00620         fastClamp8(rgb0[2]);
+00621         fastClamp8(rgb1[0]);
+00622         fastClamp8(rgb1[1]);
+00623         fastClamp8(rgb1[2]);
+00624 }
+00625 
+00626 
+00627 // ***************************************************************************
+00628 void                    CHLSColorTexture::compressBlockRGB(CRGBA *srcRGBA, uint8* dstDXTC)
+00629 {
+00630         // skip alpha part.
+00631         uint8   *dstBlock= dstDXTC+8;
+00632 
+00633 
+00634         // **** compute RGB0 and RGB1.
+00635         uint    i,j,n;
+00636         
+00637         // compute the mean color of 16 pixels
+00638         sint    mean[3];
+00639         mean[0]= 0;
+00640         mean[1]= 0;
+00641         mean[2]= 0;
+00642         CRGBA   *src= srcRGBA;
+00643         for(n=16;n>0;n--,src++)
+00644         {
+00645                 mean[0]+= src->R;
+00646                 mean[1]+= src->G;
+00647                 mean[2]+= src->B;
+00648                 // at same time, setup alpha to 0. Important for "compute bits" part (see MMX)!!
+00649                 src->A= 0;
+00650         }
+00651         mean[0]>>= 4;
+00652         mean[1]>>= 4;
+00653         mean[2]>>= 4;
+00654 
+00655         // compute col-mean
+00656         sint    diffBlock[16*3];
+00657         src= srcRGBA;
+00658         sint    *srcDiff= diffBlock;
+00659         for(n=16;n>0;n--,src++,srcDiff+=3)
+00660         {
+00661                 srcDiff[0]= (sint)src->R - mean[0];
+00662                 srcDiff[1]= (sint)src->G - mean[1];
+00663                 srcDiff[2]= (sint)src->B - mean[2];
+00664         }
+00665 
+00666 
+00667         // compute the covariant matrix.
+00668         sint    coMat[3][3];
+00669         // Apply std RGB factor (0.3, 0.56, 0.14) to choose the best Axis. This give far much best results.
+00670         sint    rgbFact[3]= {77, 143, 36};
+00671         for(i=0;i<3;i++)
+00672         {
+00673                 // OPTIMIZE SINCE SYMETRIX MATRIX
+00674                 for(j=i;j<3;j++)
+00675                 {
+00676                         sint32  factor= 0;
+00677                         // divide / 16 to avoid overflow sint32
+00678                         uint    colFactor= (rgbFact[i]*rgbFact[j]) >> 4;
+00679                         // run all 16 pixels.
+00680                         sint    *srcDiff= diffBlock;
+00681                         for(n=16;n>0;n--,srcDiff+=3)
+00682                         {
+00683                                 factor+= srcDiff[i] * srcDiff[j] * colFactor;
+00684                         }
+00685                         coMat[i][j]= factor;
+00686                 }
+00687         }
+00688         // Fill symetrix matrix
+00689         coMat[1][0]= coMat[0][1];
+00690         coMat[2][0]= coMat[0][2];
+00691         coMat[2][1]= coMat[1][2];
+00692 
+00693 
+00694         // take the bigger vector
+00695         sint    maxSize= 0;
+00696         uint    axis= 0;
+00697         for(i=0;i<3;i++)
+00698         {
+00699                 // Use abs since sqr fails because all sint32 range may be used.
+00700                 sint    size= abs(coMat[i][0]) + abs(coMat[i][1]) + abs(coMat[i][2]);
+00701                 if(size>maxSize)
+00702                 {
+00703                         maxSize= size;
+00704                         axis= i;
+00705                 }
+00706         }
+00707 
+00708         // normalize this vector
+00709         CVector v;
+00710         // remove some rgb factor...
+00711         v.x= (float)coMat[axis][0]/rgbFact[0];
+00712         v.y= (float)coMat[axis][1]/rgbFact[1];
+00713         v.z= (float)coMat[axis][2]/rgbFact[2];
+00714         v.normalize();
+00715         // set a Fixed 16:16.
+00716         CVectorInt      vInt;
+00717         // don't bother if OptFastFloorBegin() has been called. 16:16 precision is sufficient.
+00718         vInt.x= OptFastFloor(v.x*65536);
+00719         vInt.y= OptFastFloor(v.y*65536);
+00720         vInt.z= OptFastFloor(v.z*65536);
+00721 
+00722 
+00723         // For all pixels, choose the 2 colors along the axis
+00724         sint    rgb0[3];
+00725         sint    rgb1[3];
+00726         computeMinMax(diffBlock, vInt, mean, rgb0, rgb1);
+00727 
+00728         // Average to 16 bits. NB: correclty encode 0..255 to 0.31 or 0..63.
+00729         uint    R,G,B;
+00730         R= ((rgb0[0]*7967+32768)>>16);
+00731         G= ((rgb0[1]*16191+32768)>>16);
+00732         B= ((rgb0[2]*7967+32768)>>16);
+00733         uint16  rgb016= (R<<11) + (G<<5) + (B);
+00734         R= ((rgb1[0]*7967+32768)>>16);
+00735         G= ((rgb1[1]*16191+32768)>>16);
+00736         B= ((rgb1[2]*7967+32768)>>16);
+00737         uint16  rgb116= (R<<11) + (G<<5) + (B);
+00738         // copy to block
+00739         ((uint16*)dstBlock)[0]= rgb016;
+00740         ((uint16*)dstBlock)[1]= rgb116;
+00741 
+00742 
+00743         // **** compute bits
+00744         CRGBA   c[4];
+00745         c[0].set565(rgb016);
+00746         c[1].set565(rgb116);
+00747         c[2].blendFromui(c[0],c[1],85);
+00748         c[3].blendFromui(c[0],c[1],171);
+00749         // it is important that c[] and src Alpha are set to 0, because of "pmaddwd" use in MMX code...
+00750         c[0].A= 0;
+00751         c[1].A= 0;
+00752         c[2].A= 0;
+00753         c[3].A= 0;
+00754         CRGBA   *cPtr= c;
+00755 
+00756         // result.
+00757         uint32  bits= 0;
+00758 
+00759 #ifdef NL_OS_WINDOWS
+00760         if(CSystemInfo::hasMMX())
+00761         {
+00762                 // preapre mmx
+00763                 uint64  blank= 0;
+00764                 __asm
+00765                 {       
+00766                         movq            mm7, blank
+00767                 }
+00768 
+00769                 // for 16 pixels
+00770                 src= srcRGBA;
+00771                 for(n=16;n>0;n--,src++)
+00772                 {
+00773                         /* // C Version (+ little asm).
+00774                         uint    minDist= 0xFFFFFFFF;
+00775                         uint    id= 0;
+00776                         for(i=0;i<4;i++)
+00777                         {
+00778                                 // applying factors such *23, *80, *6 gives better results, but slower (in MMX).
+00779                                 uint    dist= sqr((sint)src->R-(sint)c[i].R);
+00780                                 dist+= sqr((sint)src->G-(sint)c[i].G);
+00781                                 dist+= sqr((sint)src->B-(sint)c[i].B);
+00782                                 if(dist<minDist)
+00783                                 {
+00784                                         minDist= dist;
+00785                                         id= i;
+00786                                 }
+00787                         }
+00788                         bits|=id;
+00789                         __asm
+00790                         {
+00791                                 mov     eax, bits
+00792                                 ror eax, 2
+00793                                 mov bits, eax
+00794                         }*/
+00795                         __asm
+00796                         {
+00797                                 mov                     esi, src
+00798                                 mov                     edi, cPtr
+00799 
+00800                                 mov                     ecx, 4
+00801                                 mov                     edx, 0xFFFFFFFF // edx= minDist
+00802 
+00803                                 movd            mm0, [esi]
+00804                                 punpcklbw       mm0, mm7
+00805 
+00806                                 mov                     esi, 4                  // esi= id MinDist (inverted)
+00807 
+00808                                 // compare 4 cases.
+00809                         myLoop:
+00810                                 movd            mm1, [edi]
+00811                                 punpcklbw       mm1, mm7
+00812                                 psubsw          mm1, mm0
+00813                                 pmaddwd         mm1, mm1
+00814                                 movd            eax, mm1
+00815                                 psrlq       mm1, 32
+00816                                 movd            ebx, mm1
+00817                                 add                     eax, ebx
+00818 
+00819                                 // take smaller of A and B. here: eax= A, edx= B
+00820                                 sub                     eax, edx                // eax= A-B
+00821                                 sbb                     ebx, ebx                // ebx= FF if A<B.
+00822                                 and                     eax, ebx                // eax= A-B if A<B
+00823                                 add                     edx, eax                // if A<B, edx= B+A-B= A, else, edx= B. => minimum
+00824                                 // setup the "smaller" id. here esi= iB, ecx= iA
+00825                                 not                     ebx                             // ebx= 0 if A<B, FF else
+00826                                 sub                     esi, ecx                // esi= iB-iA
+00827                                 and                     esi, ebx                // esi= 0 if A<B, iB-iA else
+00828                                 add                     esi, ecx                // esi= 0+iA= iA if A<B, else esi= iB-iA+iA= iB
+00829                                                         
+00830                                 add                     edi, 4
+00831                                 dec                     ecx
+00832                                 jnz                     myLoop
+00833 
+00834                                 // reverse id
+00835                                 mov                     edx, 4
+00836                                 mov                     eax, bits
+00837                                 sub                     edx, esi
+00838                                 // and store into bits
+00839                                 or                      eax, edx
+00840                                 ror                     eax, 2
+00841                                 mov                     bits, eax
+00842                         }
+00843                 }
+00844 
+00845 
+00846                 // end MMX block.
+00847                 __asm   emms;
+00848         }
+00849         else
+00850 #endif  // NL_OS_WINDOWS
+00851         {
+00852                 src= srcRGBA;
+00853                 for(n=16;n>0;n--,src++)
+00854                 {
+00855                         // C Version (+ little asm).
+00856                         uint    minDist= 0xFFFFFFFF;
+00857                         uint    id= 0;
+00858                         for(i=0;i<4;i++)
+00859                         {
+00860                                 // applying factors such *23, *80, *6 gives better results, but slower (in MMX).
+00861                                 uint    dist= sqr((sint)src->R-(sint)c[i].R);
+00862                                 dist+= sqr((sint)src->G-(sint)c[i].G);
+00863                                 dist+= sqr((sint)src->B-(sint)c[i].B);
+00864                                 if(dist<minDist)
+00865                                 {
+00866                                         minDist= dist;
+00867                                         id= i;
+00868                                 }
+00869                         }
+00870                         // a ror is faster, but full C version
+00871                         bits|= id<<30;
+00872                         // don't do it for the last.
+00873                         if(n>1)
+00874                                 bits>>=2;
+00875                 }
+00876         }
+00877 
+00878         // copy
+00879         ((uint32*)dstBlock)[1]= bits;
+00880 }
+00881 
+00882 
+00883 } // NL3D
+
hls_color_texture.cpp