From 0ea5fc66924303d1bf73ba283a383e2aadee02f2 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sat, 11 Aug 2018 20:21:34 +0200 Subject: Initial commit --- docs/doxygen/nel/fast__mem_8cpp-source.html | 310 ++++++++++++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 docs/doxygen/nel/fast__mem_8cpp-source.html (limited to 'docs/doxygen/nel/fast__mem_8cpp-source.html') diff --git a/docs/doxygen/nel/fast__mem_8cpp-source.html b/docs/doxygen/nel/fast__mem_8cpp-source.html new file mode 100644 index 00000000..8a4fa927 --- /dev/null +++ b/docs/doxygen/nel/fast__mem_8cpp-source.html @@ -0,0 +1,310 @@ + + + + nevrax.org : docs + + + + + + + + + + + + + + +
# Home   # nevrax.com   
+ + + + +
Nevrax
+ + + + + + + + + + +
+ + +
+ Nevrax.org
+ + + + + + + +
#News
#Mailing-list
#Documentation
#CVS
#Bugs
#License
+
+ + +
+ + +
+Docs + +
+  + + + + + +
Documentation 
+ +
+Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages   Search  
+

fast_mem.cpp

Go to the documentation of this file.
00001 
+00007 /* Copyright, 2000-2002 Nevrax Ltd.
+00008  *
+00009  * This file is part of NEVRAX NEL.
+00010  * NEVRAX NEL is free software; you can redistribute it and/or modify
+00011  * it under the terms of the GNU General Public License as published by
+00012  * the Free Software Foundation; either version 2, or (at your option)
+00013  * any later version.
+00014 
+00015  * NEVRAX NEL is distributed in the hope that it will be useful, but
+00016  * WITHOUT ANY WARRANTY; without even the implied warranty of
+00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+00018  * General Public License for more details.
+00019 
+00020  * You should have received a copy of the GNU General Public License
+00021  * along with NEVRAX NEL; see the file COPYING. If not, write to the
+00022  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+00023  * MA 02111-1307, USA.
+00024  */
+00025 
+00026 #include "stdmisc.h"
+00027 
+00028 #include "nel/misc/fast_mem.h"
+00029 #include "nel/misc/system_info.h"
+00030 
+00031 
+00032 namespace NLMISC
+00033 {
+00034 
+00035 #ifdef NL_OS_WINDOWS
+00036 
+00037 
+00038 // ***************************************************************************
+00039 void            *CFastMem::memcpySSE(void *dest, const void *src, size_t nbytes)
+00040 {
+00041         _asm 
+00042         {
+00043                         mov esi, src 
+00044                         mov edi, dest 
+00045                         mov ebx, nbytes 
+00046 
+00047                         // edx takes number of bytes%64
+00048                         mov     edx, ebx
+00049                         and edx, 63
+00050 
+00051                         // ebx takes number of bytes/64
+00052                         shr     ebx, 6
+00053                         jz      byteCopy
+00054 
+00055 
+00056         loop4k: // flush 4k into temporary buffer 
+00057                         push esi 
+00058                         mov ecx, ebx
+00059                         // copy per block of 64 bytes. Must not override 64*64= 4096 bytes.
+00060                         cmp ecx, 64
+00061                         jle     skipMiniMize
+00062                         mov     ecx, 64
+00063         skipMiniMize:
+00064                         // eax takes the number of 64bytes packet for this block.
+00065                         mov eax, ecx
+00066 
+00067         loopMemToL1: 
+00068                         prefetchnta 64[ESI] // Prefetch next loop, non-temporal 
+00069                         prefetchnta 96[ESI] 
+00070 
+00071                         movq mm1,  0[ESI] // Read in source data 
+00072                         movq mm2,  8[ESI] 
+00073                         movq mm3, 16[ESI] 
+00074                         movq mm4, 24[ESI] 
+00075                         movq mm5, 32[ESI] 
+00076                         movq mm6, 40[ESI] 
+00077                         movq mm7, 48[ESI] 
+00078                         movq mm0, 56[ESI] 
+00079 
+00080                         add esi, 64 
+00081                         dec ecx 
+00082                         jnz loopMemToL1 
+00083 
+00084                         pop esi // Now copy from L1 to system memory 
+00085                         mov ecx, eax
+00086 
+00087         loopL1ToMem: 
+00088                         movq mm1, 0[ESI] // Read in source data from L1 
+00089                         movq mm2, 8[ESI] 
+00090                         movq mm3, 16[ESI] 
+00091                         movq mm4, 24[ESI] 
+00092                         movq mm5, 32[ESI] 
+00093                         movq mm6, 40[ESI] 
+00094                         movq mm7, 48[ESI] 
+00095                         movq mm0, 56[ESI] 
+00096 
+00097                         movntq 0[EDI], mm1 // Non-temporal stores 
+00098                         movntq 8[EDI], mm2 
+00099                         movntq 16[EDI], mm3 
+00100                         movntq 24[EDI], mm4 
+00101                         movntq 32[EDI], mm5 
+00102                         movntq 40[EDI], mm6 
+00103                         movntq 48[EDI], mm7 
+00104                         movntq 56[EDI], mm0 
+00105 
+00106                         add esi, 64 
+00107                         add edi, 64 
+00108                         dec ecx 
+00109                         jnz loopL1ToMem
+00110 
+00111                         // Do next 4k block 
+00112                         sub ebx, eax
+00113                         jnz loop4k 
+00114 
+00115                         emms
+00116 
+00117         byteCopy:
+00118                         // Do last bytes with std cpy
+00119                         mov     ecx, edx
+00120                         rep movsb
+00121         }
+00122         return dest;
+00123 }
+00124 
+00125 // ***************************************************************************
+00126 void            CFastMem::precacheSSE(const void *src, uint nbytes)
+00127 {
+00128         _asm 
+00129         { 
+00130                         mov esi, src 
+00131                         mov ecx, nbytes
+00132                         // 64 bytes per pass
+00133                         shr ecx, 6 
+00134                         jz endLabel
+00135 
+00136         loopMemToL1: 
+00137                         prefetchnta 64[ESI] // Prefetch next loop, non-temporal 
+00138                         prefetchnta 96[ESI] 
+00139 
+00140                         movq mm1,  0[ESI] // Read in source data 
+00141                         movq mm2,  8[ESI] 
+00142                         movq mm3, 16[ESI] 
+00143                         movq mm4, 24[ESI] 
+00144                         movq mm5, 32[ESI] 
+00145                         movq mm6, 40[ESI] 
+00146                         movq mm7, 48[ESI] 
+00147                         movq mm0, 56[ESI]
+00148 
+00149                         add esi, 64 
+00150                         dec ecx 
+00151                         jnz loopMemToL1 
+00152 
+00153                         emms
+00154 
+00155         endLabel:
+00156         }
+00157 }
+00158 
+00159 // ***************************************************************************
+00160 void            CFastMem::precacheMMX(const void *src, uint nbytes)
+00161 {
+00162         _asm 
+00163         { 
+00164                         mov esi, src 
+00165                         mov ecx, nbytes
+00166                         // 64 bytes per pass
+00167                         shr ecx, 6 
+00168                         jz endLabel
+00169 
+00170         loopMemToL1: 
+00171                         movq mm1,  0[ESI] // Read in source data 
+00172                         movq mm2,  8[ESI] 
+00173                         movq mm3, 16[ESI] 
+00174                         movq mm4, 24[ESI] 
+00175                         movq mm5, 32[ESI] 
+00176                         movq mm6, 40[ESI] 
+00177                         movq mm7, 48[ESI] 
+00178                         movq mm0, 56[ESI]
+00179 
+00180                         add esi, 64 
+00181                         dec ecx 
+00182                         jnz loopMemToL1 
+00183 
+00184                         emms
+00185 
+00186         endLabel:
+00187         }
+00188 }
+00189 
+00190 
+00191 // ***************************************************************************
+00192 void            CFastMem::precache(const void *src, uint nbytes)
+00193 {
+00194         if(NLMISC::CSystemInfo::hasSSE())
+00195                 precacheSSE(src, nbytes);
+00196         else if(NLMISC::CSystemInfo::hasMMX())
+00197                 precacheMMX(src, nbytes);
+00198 }
+00199 
+00200 
+00201 #else
+00202 
+00203 
+00204 // ***************************************************************************
+00205 void            *CFastMem::memcpySSE(void *dst, const void *src, size_t nbytes)
+00206 {
+00207         // Use std memcpy.
+00208         return memcpy(dst, src, nbytes);
+00209 }
+00210 void            CFastMem::precacheSSE(const void *src, uint nbytes)
+00211 {
+00212         // no-op.
+00213 }
+00214 void            CFastMem::precacheMMX(const void *src, uint nbytes)
+00215 {
+00216         // no-op.
+00217 }
+00218 void            CFastMem::precache(const void *src, uint nbytes)
+00219 {
+00220         // no-op.
+00221 }
+00222 
+00223 #endif
+00224 
+00225 typedef void  *(*memcpyPtr)(void *dts, const void *src, size_t nbytes);
+00226 
+00227 static memcpyPtr findBestmemcpy ()
+00228 {
+00229 #ifdef NL_OS_WINDOWS
+00230         if (CSystemInfo::hasSSE ())
+00231                 return CFastMem::memcpySSE;
+00232         else
+00233                 return ::memcpy;
+00234 #else // NL_OS_WINDOWS
+00235         return ::memcpy;
+00236 #endif // NL_OS_WINDOWS
+00237 }
+00238 
+00239 void  *(*CFastMem::memcpy)(void *dts, const void *src, size_t nbytes) = findBestmemcpy ();
+00240 
+00241 } // NLMISC
+
+ + +
                                                                                                                                                                    +
+ + -- cgit v1.2.1