diff options
author | neodarz <neodarz@neodarz.net> | 2018-08-11 20:21:34 +0200 |
---|---|---|
committer | neodarz <neodarz@neodarz.net> | 2018-08-11 20:21:34 +0200 |
commit | 0ea5fc66924303d1bf73ba283a383e2aadee02f2 (patch) | |
tree | 2568e71a7ccc44ec23b8bb3f0ff97fb6bf2ed709 /docs/doxygen/nel/fast__mem_8cpp-source.html | |
download | nevrax-website-self-hostable-0ea5fc66924303d1bf73ba283a383e2aadee02f2.tar.xz nevrax-website-self-hostable-0ea5fc66924303d1bf73ba283a383e2aadee02f2.zip |
Initial commit
Diffstat (limited to '')
-rw-r--r-- | docs/doxygen/nel/fast__mem_8cpp-source.html | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/docs/doxygen/nel/fast__mem_8cpp-source.html b/docs/doxygen/nel/fast__mem_8cpp-source.html new file mode 100644 index 00000000..8a4fa927 --- /dev/null +++ b/docs/doxygen/nel/fast__mem_8cpp-source.html @@ -0,0 +1,310 @@ +<!doctype html public "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> +<HTML> +<HEAD> + <TITLE>nevrax.org : docs</TITLE> + <LINK REL=stylesheet TYPE="text/css" HREF="http://www.nevrax.org/inc/css/nevrax.css"> + <link href="doxygen.css" rel="stylesheet" type="text/css"> +</HEAD> +<BODY MARGINHEIGHT="0" MARGINWIDTH="0"> + +<!-- uplinks --> +<TABLE CELLSPACING=0 CELLPADDING=0 BORDER=0> + <TR> + <TD WIDTH=16><IMG SRC="http://www.nevrax.org/inc/img/pixel.gif" WIDTH="16" HEIGHT="16" BORDER=0 ALT=""></TD> + <TD WIDTH=140 BGCOLOR=#dddddd><IMG SRC="http://www.nevrax.org/inc/img/pixel.gif" WIDTH="140" HEIGHT="16" BORDER=0 ALT=""></TD> + <TD WIDTH=16><IMG SRC="http://www.nevrax.org/inc/img/pixel.gif" WIDTH="16" HEIGHT="16" BORDER=0 ALT=""></TD> + <TD><IMG width=6 height=14 SRC="http://www.nevrax.org/inc/img/reddots.gif" ALT="#" VSPACE=2 HSPACE=2 BORDER=0 ></TD><TD VALIGN=middle> <A CLASS=uplinks HREF=http://www.nevrax.org><b>Home</B></FONT></A> </TD> + <TD><IMG width=6 height=14 SRC="http://www.nevrax.org/inc/img/reddots.gif" ALT="#" VSPACE=2 HSPACE=2 BORDER=0 ></TD><TD VALIGN=middle> <A CLASS=uplinks HREF=http://www.nevrax.com><b>nevrax.com</B></FONT></A> </TD> + </TR> +</TABLE> + +<!-- banner Nevrax --> +<TABLE CELLSPACING=0 CELLPADDING=0 BORDER=0 WIDTH=100%> + <TR><TD BGCOLOR="#000000" BACKGROUND="http://www.nevrax.org/inc/img/black_banner.jpg"><A HREF="http://www.nevrax.org"><IMG SRC="http://www.nevrax.org/inc/img/nevrax.gif" WIDTH="170" HEIGHT="45" BORDER=0 ALT="Nevrax" ></A></TD></TR> +</TABLE> + +<!-- main table --> +<TABLE CELLSPACING=0 CELLPADDING=0 BORDER=0 height=100%> + <TR> + <TD WIDTH=16><IMG SRC="http://www.nevrax.org/inc/img/pixel.gif" WIDTH="16" HEIGHT="10" BORDER=0 ALT=""></TD> + <TD WIDTH=140 BGCOLOR=#dddddd VALIGN=TOP ALIGN=middle><IMG SRC="http://www.nevrax.org/inc/img/pixel.gif" WIDTH="140" HEIGHT="10" BORDER=0 ALT=""> + + <!------ Begin Box ------> + <TABLE BORDER=0 CELLSPACING=0 CELLPADDING=0 BGCOLOR=black><TR><TD><TABLE border=0 cellspacing=2 cellpadding=0 width=120><tr><TD ALIGN=middle bgcolor=black> + <FONT COLOR=white FACE="sans-serif"><B>Nevrax.org</B></FONT></TD></TR><tr><td colspan=2 bgcolor=#FFFFFF> + <TABLE cellspacing=0 cellpadding=1 border=0> + <tr><td ALIGN=middle><a class='linkbox' href="http://www.nevrax.org/news/" TITLE="Rubrique news"><img width=13 height=15 hspace=5 border=0 src=http://www.nevrax.org/inc/img/picto-news.gif ALT=#></A></td><td><a class='linkbox' href="http://www.nevrax.org/news/" TITLE="News">News</a></td></tr> + <tr><td ALIGN=middle><a class='linkbox' href="http://www.nevrax.org/mail/" TITLE="Rubrique mail"><img width=15 height=11 hspace=5 border=0 src=http://www.nevrax.org/inc/img/picto-mail.gif ALT=#></A></td><td><a class='linkbox' href="http://www.nevrax.org/mail/" TITLE="Mailing list archive">Mailing-list</a></td></tr> + <tr><td ALIGN=middle><a class='linkbox' href="http://www.nevrax.org/docs/" TITLE="Rubrique docs"><img width=14 height=16 hspace=5 border=0 src=http://www.nevrax.org/inc/img/picto-docs.gif ALT=#></A></td><td><a class='linkbox' href="http://www.nevrax.org/docs/" TITLE="Documentation">Documentation</a></td></tr> + <tr><td ALIGN=middle><a class='linkbox' href="http://www.nevrax.org/cvs/" TITLE="Rubrique cvs"><img width=13 height=17 hspace=5 border=0 src=http://www.nevrax.org/inc/img/picto-cvs.gif ALT=#></A></td><td><a class='linkbox' href="http://www.nevrax.org/cvs/" TITLE="CVS Web">CVS</a></td></tr> + <tr><td ALIGN=middle><a class='linkbox' href="http://www.nevrax.org/bugs/" TITLE="Rubrique bugs"><img width=20 height=16 hspace=5 border=0 src=http://www.nevrax.org/inc/img/picto-bugs.gif ALT=#></A></td><td><a class='linkbox' href="http://www.nevrax.org/bugs/" TITLE="Bugtracking">Bugs</a></td></tr> + <tr><td ALIGN=middle><a class='linkbox' href="http://www.nevrax.org/GPL.php3" TITLE="Rubrique license"><img width=18 height=12 hspace=5 border=0 src=http://www.nevrax.org/inc/img/picto-gpl.gif ALT=#></A></td><td><a class='linkbox' href="http://www.nevrax.org/GPL.php3" TITLE="License">License</a></td></tr> + </TABLE> + </TD></TR></TABLE></TD></TR></TABLE> + <!------ End Box ------> + + </TD> + <TD WIDTH=15><IMG SRC="http://www.nevrax.org/inc/img/pixel.gif" WIDTH="16" HEIGHT="16" BORDER=0 ALT=""></TD> + <TD ALIGN=left valign=top><IMG SRC="http://www.nevrax.org/inc/img/pixel.gif" WIDTH="140" HEIGHT="10" BORDER=0 ALT=""> + +<!-- title --> +<TABLE background="http://www.nevrax.org/inc/img/redline.gif" CELLSPACING=0 CELLPADDING=0 BORDER=0 width=100%><tr><td> +<A HREF="http://www.nevrax.org/docs/"><img src="http://www.nevrax.org/inc/img/t_docs.gif" ALT="Docs" HEIGHT=20 BORDER=0></A> +</td><td><IMG SRC="http://www.nevrax.org/inc/img/pixel.gif" WIDTH="1" HEIGHT="1" BORDER=0 ALT=""> +</td></tr></table> + + +<!-- block --> +<TABLE bgcolor="#dddddd" CELLSPACING=0 CELLPADDING=0 BORDER=0 width=100%><tr><td width=1% valign=middle><img width=6 height=14 hspace=2 vspace=2 src="http://www.nevrax.org/inc/img/reddots.gif"></TD> + <TD><B>Documentation</B></TD> + <TD ALIGN=RIGHT> </td> +</tr></table> +<!-- Generated by Doxygen 1.2.14 --> +<center> +<a class="qindex" href="index.html">Main Page</a> <a class="qindex" href="namespaces.html">Namespace List</a> <a class="qindex" href="hierarchy.html">Class Hierarchy</a> <a class="qindex" href="classes.html">Alphabetical List</a> <a class="qindex" href="annotated.html">Compound List</a> <a class="qindex" href="files.html">File List</a> <a class="qindex" href="namespacemembers.html">Namespace Members</a> <a class="qindex" href="functions.html">Compound Members</a> <a class="qindex" href="globals.html">File Members</a> <a class="qindex" href="pages.html">Related Pages</a> <a class="qindexRef" doxygen="_cgi:http://www.nevrax.org/cgi-bin/nel-search.cgi" href="http://www.nevrax.org/cgi-bin/nel-search.cgi">Search</a> </center> +<hr><h1>fast_mem.cpp</h1><a href="fast__mem_8cpp.html">Go to the documentation of this file.</a><div class="fragment"><pre>00001 +00007 <font class="comment">/* Copyright, 2000-2002 Nevrax Ltd.</font> +00008 <font class="comment"> *</font> +00009 <font class="comment"> * This file is part of NEVRAX NEL.</font> +00010 <font class="comment"> * NEVRAX NEL is free software; you can redistribute it and/or modify</font> +00011 <font class="comment"> * it under the terms of the GNU General Public License as published by</font> +00012 <font class="comment"> * the Free Software Foundation; either version 2, or (at your option)</font> +00013 <font class="comment"> * any later version.</font> +00014 <font class="comment"></font> +00015 <font class="comment"> * NEVRAX NEL is distributed in the hope that it will be useful, but</font> +00016 <font class="comment"> * WITHOUT ANY WARRANTY; without even the implied warranty of</font> +00017 <font class="comment"> * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU</font> +00018 <font class="comment"> * General Public License for more details.</font> +00019 <font class="comment"></font> +00020 <font class="comment"> * You should have received a copy of the GNU General Public License</font> +00021 <font class="comment"> * along with NEVRAX NEL; see the file COPYING. If not, write to the</font> +00022 <font class="comment"> * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,</font> +00023 <font class="comment"> * MA 02111-1307, USA.</font> +00024 <font class="comment"> */</font> +00025 +00026 <font class="preprocessor">#include "<a class="code" href="stdmisc_8h.html">stdmisc.h</a>"</font> +00027 +00028 <font class="preprocessor">#include "<a class="code" href="fast__mem_8h.html">nel/misc/fast_mem.h</a>"</font> +00029 <font class="preprocessor">#include "<a class="code" href="system__info_8h.html">nel/misc/system_info.h</a>"</font> +00030 +00031 +00032 <font class="keyword">namespace </font>NLMISC +00033 { +00034 +00035 <font class="preprocessor">#ifdef NL_OS_WINDOWS</font> +00036 <font class="preprocessor"></font> +00037 +00038 <font class="comment">// ***************************************************************************</font> +00039 <font class="keywordtype">void</font> *CFastMem::memcpySSE(<font class="keywordtype">void</font> *dest, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes) +00040 { +00041 _asm +00042 { +00043 mov esi, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a> +00044 mov edi, dest +00045 mov ebx, nbytes +00046 +00047 <font class="comment">// edx takes number of bytes%64</font> +00048 mov edx, ebx +00049 and edx, 63 +00050 +00051 <font class="comment">// ebx takes number of bytes/64</font> +00052 shr ebx, 6 +00053 jz byteCopy +00054 +00055 +00056 loop4k: <font class="comment">// flush 4k into temporary buffer </font> +00057 push esi +00058 mov ecx, ebx +00059 <font class="comment">// copy per block of 64 bytes. Must not override 64*64= 4096 bytes.</font> +00060 cmp ecx, 64 +00061 jle skipMiniMize +00062 mov ecx, 64 +00063 skipMiniMize: +00064 <font class="comment">// eax takes the number of 64bytes packet for this block.</font> +00065 mov eax, ecx +00066 +00067 loopMemToL1: +00068 prefetchnta 64[ESI] <font class="comment">// Prefetch next loop, non-temporal </font> +00069 prefetchnta 96[ESI] +00070 +00071 movq mm1, 0[ESI] <font class="comment">// Read in source data </font> +00072 movq mm2, 8[ESI] +00073 movq mm3, 16[ESI] +00074 movq mm4, 24[ESI] +00075 movq mm5, 32[ESI] +00076 movq mm6, 40[ESI] +00077 movq mm7, 48[ESI] +00078 movq mm0, 56[ESI] +00079 +00080 add esi, 64 +00081 dec ecx +00082 jnz loopMemToL1 +00083 +00084 pop esi <font class="comment">// Now copy from L1 to system memory </font> +00085 mov ecx, eax +00086 +00087 loopL1ToMem: +00088 movq mm1, 0[ESI] <font class="comment">// Read in source data from L1 </font> +00089 movq mm2, 8[ESI] +00090 movq mm3, 16[ESI] +00091 movq mm4, 24[ESI] +00092 movq mm5, 32[ESI] +00093 movq mm6, 40[ESI] +00094 movq mm7, 48[ESI] +00095 movq mm0, 56[ESI] +00096 +00097 movntq 0[EDI], mm1 <font class="comment">// Non-temporal stores </font> +00098 movntq 8[EDI], mm2 +00099 movntq 16[EDI], mm3 +00100 movntq 24[EDI], mm4 +00101 movntq 32[EDI], mm5 +00102 movntq 40[EDI], mm6 +00103 movntq 48[EDI], mm7 +00104 movntq 56[EDI], mm0 +00105 +00106 add esi, 64 +00107 add edi, 64 +00108 dec ecx +00109 jnz loopL1ToMem +00110 +00111 <font class="comment">// Do next 4k block </font> +00112 sub ebx, eax +00113 jnz loop4k +00114 +00115 emms +00116 +00117 byteCopy: +00118 <font class="comment">// Do last bytes with std cpy</font> +00119 mov ecx, edx +00120 rep movsb +00121 } +00122 <font class="keywordflow">return</font> dest; +00123 } +00124 +00125 <font class="comment">// ***************************************************************************</font> +00126 <font class="keywordtype">void</font> CFastMem::precacheSSE(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes) +00127 { +00128 _asm +00129 { +00130 mov esi, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a> +00131 mov ecx, nbytes +00132 <font class="comment">// 64 bytes per pass</font> +00133 shr ecx, 6 +00134 jz endLabel +00135 +00136 loopMemToL1: +00137 prefetchnta 64[ESI] <font class="comment">// Prefetch next loop, non-temporal </font> +00138 prefetchnta 96[ESI] +00139 +00140 movq mm1, 0[ESI] <font class="comment">// Read in source data </font> +00141 movq mm2, 8[ESI] +00142 movq mm3, 16[ESI] +00143 movq mm4, 24[ESI] +00144 movq mm5, 32[ESI] +00145 movq mm6, 40[ESI] +00146 movq mm7, 48[ESI] +00147 movq mm0, 56[ESI] +00148 +00149 add esi, 64 +00150 dec ecx +00151 jnz loopMemToL1 +00152 +00153 emms +00154 +00155 endLabel: +00156 } +00157 } +00158 +00159 <font class="comment">// ***************************************************************************</font> +00160 <font class="keywordtype">void</font> CFastMem::precacheMMX(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes) +00161 { +00162 _asm +00163 { +00164 mov esi, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a> +00165 mov ecx, nbytes +00166 <font class="comment">// 64 bytes per pass</font> +00167 shr ecx, 6 +00168 jz endLabel +00169 +00170 loopMemToL1: +00171 movq mm1, 0[ESI] <font class="comment">// Read in source data </font> +00172 movq mm2, 8[ESI] +00173 movq mm3, 16[ESI] +00174 movq mm4, 24[ESI] +00175 movq mm5, 32[ESI] +00176 movq mm6, 40[ESI] +00177 movq mm7, 48[ESI] +00178 movq mm0, 56[ESI] +00179 +00180 add esi, 64 +00181 dec ecx +00182 jnz loopMemToL1 +00183 +00184 emms +00185 +00186 endLabel: +00187 } +00188 } +00189 +00190 +00191 <font class="comment">// ***************************************************************************</font> +00192 <font class="keywordtype">void</font> CFastMem::precache(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes) +00193 { +00194 <font class="keywordflow">if</font>(<a class="code" href="classNLMISC_1_1CSystemInfo.html#d5">NLMISC::CSystemInfo::hasSSE</a>()) +00195 precacheSSE(<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, nbytes); +00196 <font class="keywordflow">else</font> <font class="keywordflow">if</font>(<a class="code" href="classNLMISC_1_1CSystemInfo.html#d4">NLMISC::CSystemInfo::hasMMX</a>()) +00197 precacheMMX(<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, nbytes); +00198 } +00199 +00200 +00201 <font class="preprocessor">#else</font> +00202 <font class="preprocessor"></font> +00203 +00204 <font class="comment">// ***************************************************************************</font> +<a name="l00205"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d1">00205</a> <font class="keywordtype">void</font> *CFastMem::memcpySSE(<font class="keywordtype">void</font> *dst, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes) +00206 { +00207 <font class="comment">// Use std memcpy.</font> +00208 <font class="keywordflow">return</font> memcpy(dst, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, nbytes); +00209 } +<a name="l00210"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d2">00210</a> <font class="keywordtype">void</font> CFastMem::precacheSSE(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes) +00211 { +00212 <font class="comment">// no-op.</font> +00213 } +<a name="l00214"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d3">00214</a> <font class="keywordtype">void</font> CFastMem::precacheMMX(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes) +00215 { +00216 <font class="comment">// no-op.</font> +00217 } +<a name="l00218"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d0">00218</a> <font class="keywordtype">void</font> CFastMem::precache(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes) +00219 { +00220 <font class="comment">// no-op.</font> +00221 } +00222 +00223 <font class="preprocessor">#endif</font> +00224 <font class="preprocessor"></font> +00225 <font class="keyword">typedef</font> <font class="keywordtype">void</font> *(*memcpyPtr)(<font class="keywordtype">void</font> *dts, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes); +00226 +00227 <font class="keyword">static</font> <a class="code" href="namespaceNLMISC.html#a193">memcpyPtr</a> <a class="code" href="namespaceNLMISC.html#a295">findBestmemcpy</a> () +00228 { +00229 <font class="preprocessor">#ifdef NL_OS_WINDOWS</font> +00230 <font class="preprocessor"></font> <font class="keywordflow">if</font> (CSystemInfo::hasSSE ()) +00231 <font class="keywordflow">return</font> CFastMem::memcpySSE; +00232 <font class="keywordflow">else</font> +00233 return ::memcpy; +00234 <font class="preprocessor">#else // NL_OS_WINDOWS</font> +00235 <font class="preprocessor"></font> return ::memcpy; +00236 <font class="preprocessor">#endif // NL_OS_WINDOWS</font> +00237 <font class="preprocessor"></font>} +00238 +00239 <font class="keywordtype">void</font> *(*CFastMem::memcpy)(<font class="keywordtype">void</font> *dts, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes) = <a class="code" href="namespaceNLMISC.html#a295">findBestmemcpy</a> (); +00240 +00241 } <font class="comment">// NLMISC</font> +</pre></div> + +<!-- footer --> +<BR><FONT Size=+5> </FONT> +</TD> +<TD WIDTH=15><IMG SRC=http://www.nevrax.org/inc/img/pixel.gif WIDTH=15 HEIGHT=15 BORDER=0 ALT=""></TD> +</TR> +</TABLE> +</BODY> +</HTML> |