# Home    # nevrax.com   
Nevrax
Nevrax.org
#News
#Mailing-list
#Documentation
#CVS
#Bugs
#License
Docs
 
Documentation  
Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages   Search  

fast_mem.cpp

Go to the documentation of this file.
00001 
00007 /* Copyright, 2000-2002 Nevrax Ltd.
00008  *
00009  * This file is part of NEVRAX NEL.
00010  * NEVRAX NEL is free software; you can redistribute it and/or modify
00011  * it under the terms of the GNU General Public License as published by
00012  * the Free Software Foundation; either version 2, or (at your option)
00013  * any later version.
00014 
00015  * NEVRAX NEL is distributed in the hope that it will be useful, but
00016  * WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00018  * General Public License for more details.
00019 
00020  * You should have received a copy of the GNU General Public License
00021  * along with NEVRAX NEL; see the file COPYING. If not, write to the
00022  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00023  * MA 02111-1307, USA.
00024  */
00025 
00026 #include "stdmisc.h"
00027 
00028 #include "nel/misc/fast_mem.h"
00029 #include "nel/misc/system_info.h"
00030 
00031 
00032 namespace NLMISC
00033 {
00034 
00035 #ifdef NL_OS_WINDOWS
00036 
00037 
00038 // ***************************************************************************
00039 void            *CFastMem::memcpySSE(void *dest, const void *src, size_t nbytes)
00040 {
00041         _asm 
00042         {
00043                         mov esi, src 
00044                         mov edi, dest 
00045                         mov ebx, nbytes 
00046 
00047                         // edx takes number of bytes%64
00048                         mov     edx, ebx
00049                         and edx, 63
00050 
00051                         // ebx takes number of bytes/64
00052                         shr     ebx, 6
00053                         jz      byteCopy
00054 
00055 
00056         loop4k: // flush 4k into temporary buffer 
00057                         push esi 
00058                         mov ecx, ebx
00059                         // copy per block of 64 bytes. Must not override 64*64= 4096 bytes.
00060                         cmp ecx, 64
00061                         jle     skipMiniMize
00062                         mov     ecx, 64
00063         skipMiniMize:
00064                         // eax takes the number of 64bytes packet for this block.
00065                         mov eax, ecx
00066 
00067         loopMemToL1: 
00068                         prefetchnta 64[ESI] // Prefetch next loop, non-temporal 
00069                         prefetchnta 96[ESI] 
00070 
00071                         movq mm1,  0[ESI] // Read in source data 
00072                         movq mm2,  8[ESI] 
00073                         movq mm3, 16[ESI] 
00074                         movq mm4, 24[ESI] 
00075                         movq mm5, 32[ESI] 
00076                         movq mm6, 40[ESI] 
00077                         movq mm7, 48[ESI] 
00078                         movq mm0, 56[ESI] 
00079 
00080                         add esi, 64 
00081                         dec ecx 
00082                         jnz loopMemToL1 
00083 
00084                         pop esi // Now copy from L1 to system memory 
00085                         mov ecx, eax
00086 
00087         loopL1ToMem: 
00088                         movq mm1, 0[ESI] // Read in source data from L1 
00089                         movq mm2, 8[ESI] 
00090                         movq mm3, 16[ESI] 
00091                         movq mm4, 24[ESI] 
00092                         movq mm5, 32[ESI] 
00093                         movq mm6, 40[ESI] 
00094                         movq mm7, 48[ESI] 
00095                         movq mm0, 56[ESI] 
00096 
00097                         movntq 0[EDI], mm1 // Non-temporal stores 
00098                         movntq 8[EDI], mm2 
00099                         movntq 16[EDI], mm3 
00100                         movntq 24[EDI], mm4 
00101                         movntq 32[EDI], mm5 
00102                         movntq 40[EDI], mm6 
00103                         movntq 48[EDI], mm7 
00104                         movntq 56[EDI], mm0 
00105 
00106                         add esi, 64 
00107                         add edi, 64 
00108                         dec ecx 
00109                         jnz loopL1ToMem
00110 
00111                         // Do next 4k block 
00112                         sub ebx, eax
00113                         jnz loop4k 
00114 
00115                         emms
00116 
00117         byteCopy:
00118                         // Do last bytes with std cpy
00119                         mov     ecx, edx
00120                         rep movsb
00121         }
00122         return dest;
00123 }
00124 
00125 // ***************************************************************************
00126 void            CFastMem::precacheSSE(const void *src, uint nbytes)
00127 {
00128         _asm 
00129         { 
00130                         mov esi, src 
00131                         mov ecx, nbytes
00132                         // 64 bytes per pass
00133                         shr ecx, 6 
00134                         jz endLabel
00135 
00136         loopMemToL1: 
00137                         prefetchnta 64[ESI] // Prefetch next loop, non-temporal 
00138                         prefetchnta 96[ESI] 
00139 
00140                         movq mm1,  0[ESI] // Read in source data 
00141                         movq mm2,  8[ESI] 
00142                         movq mm3, 16[ESI] 
00143                         movq mm4, 24[ESI] 
00144                         movq mm5, 32[ESI] 
00145                         movq mm6, 40[ESI] 
00146                         movq mm7, 48[ESI] 
00147                         movq mm0, 56[ESI]
00148 
00149                         add esi, 64 
00150                         dec ecx 
00151                         jnz loopMemToL1 
00152 
00153                         emms
00154 
00155         endLabel:
00156         }
00157 }
00158 
00159 // ***************************************************************************
00160 void            CFastMem::precacheMMX(const void *src, uint nbytes)
00161 {
00162         _asm 
00163         { 
00164                         mov esi, src 
00165                         mov ecx, nbytes
00166                         // 64 bytes per pass
00167                         shr ecx, 6 
00168                         jz endLabel
00169 
00170         loopMemToL1: 
00171                         movq mm1,  0[ESI] // Read in source data 
00172                         movq mm2,  8[ESI] 
00173                         movq mm3, 16[ESI] 
00174                         movq mm4, 24[ESI] 
00175                         movq mm5, 32[ESI] 
00176                         movq mm6, 40[ESI] 
00177                         movq mm7, 48[ESI] 
00178                         movq mm0, 56[ESI]
00179 
00180                         add esi, 64 
00181                         dec ecx 
00182                         jnz loopMemToL1 
00183 
00184                         emms
00185 
00186         endLabel:
00187         }
00188 }
00189 
00190 
00191 // ***************************************************************************
00192 void            CFastMem::precache(const void *src, uint nbytes)
00193 {
00194         if(NLMISC::CSystemInfo::hasSSE())
00195                 precacheSSE(src, nbytes);
00196         else if(NLMISC::CSystemInfo::hasMMX())
00197                 precacheMMX(src, nbytes);
00198 }
00199 
00200 
00201 #else
00202 
00203 
00204 // ***************************************************************************
00205 void            *CFastMem::memcpySSE(void *dst, const void *src, size_t nbytes)
00206 {
00207         // Use std memcpy.
00208         return memcpy(dst, src, nbytes);
00209 }
00210 void            CFastMem::precacheSSE(const void *src, uint nbytes)
00211 {
00212         // no-op.
00213 }
00214 void            CFastMem::precacheMMX(const void *src, uint nbytes)
00215 {
00216         // no-op.
00217 }
00218 void            CFastMem::precache(const void *src, uint nbytes)
00219 {
00220         // no-op.
00221 }
00222 
00223 #endif
00224 
00225 typedef void  *(*memcpyPtr)(void *dts, const void *src, size_t nbytes);
00226 
00227 static memcpyPtr findBestmemcpy ()
00228 {
00229 #ifdef NL_OS_WINDOWS
00230         if (CSystemInfo::hasSSE ())
00231                 return CFastMem::memcpySSE;
00232         else
00233                 return ::memcpy;
00234 #else // NL_OS_WINDOWS
00235         return ::memcpy;
00236 #endif // NL_OS_WINDOWS
00237 }
00238 
00239 void  *(*CFastMem::memcpy)(void *dts, const void *src, size_t nbytes) = findBestmemcpy ();
00240 
00241 } // NLMISC