aboutsummaryrefslogtreecommitdiff
path: root/docs/doxygen/nel/fast__mem_8cpp-source.html
blob: 7fbe0e67ffbbe8d7927ec05917f13cddb6f86fa9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
<!doctype html public "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<HTML>
<HEAD>
	<TITLE>nevrax.org : docs</TITLE>
	<LINK REL=stylesheet TYPE="text/css" HREF="/inc/css/nevrax.css">
	<link href="doxygen.css" rel="stylesheet" type="text/css">
</HEAD>
<BODY MARGINHEIGHT="0" MARGINWIDTH="0">

<!-- uplinks -->
<TABLE CELLSPACING=0 CELLPADDING=0  BORDER=0>
 <TR>
        <TD WIDTH=16><IMG  SRC="/inc/img/pixel.gif" WIDTH="16" HEIGHT="16" BORDER=0 ALT=""></TD>
        <TD WIDTH=140 BGCOLOR=#dddddd><IMG  SRC="/inc/img/pixel.gif" WIDTH="140" HEIGHT="16" BORDER=0 ALT=""></TD>
        <TD WIDTH=16><IMG  SRC="/inc/img/pixel.gif" WIDTH="16" HEIGHT="16" BORDER=0 ALT=""></TD>
        <TD><IMG width=6 height=14 SRC="/inc/img/reddots.gif" ALT="#" VSPACE=2 HSPACE=2 BORDER=0 ></TD><TD VALIGN=middle>&nbsp;<A CLASS=uplinks HREF='/'><b>Home</B></FONT></A>&nbsp;&nbsp;&nbsp;</TD>
        <TD><IMG  width=6 height=14  SRC="/inc/img/reddots.gif" ALT="#" VSPACE=2 HSPACE=2 BORDER=0 ></TD><TD VALIGN=middle>&nbsp;<A CLASS=uplinks HREF='/'><b>nevrax.com</B></FONT></A>&nbsp;&nbsp;&nbsp;</TD>
 </TR>
</TABLE> 

<!-- banner Nevrax -->
<TABLE CELLSPACING=0 CELLPADDING=0  BORDER=0 WIDTH=100%>
 <TR><TD  BGCOLOR="#000000" BACKGROUND="/inc/img/black_banner.jpg"><A HREF=""><IMG  SRC="/inc/img/nevrax.gif" WIDTH="170" HEIGHT="45" BORDER=0 ALT="Nevrax" ></A></TD></TR>
</TABLE>

<!-- main table -->
<TABLE CELLSPACING=0 CELLPADDING=0  BORDER=0 height=100%>
 <TR>
	<TD WIDTH=16><IMG  SRC="/inc/img/pixel.gif" WIDTH="16" HEIGHT="10" BORDER=0 ALT=""></TD>
	<TD WIDTH=140   BGCOLOR=#dddddd VALIGN=TOP ALIGN=middle><IMG  SRC="/inc/img/pixel.gif" WIDTH="140" HEIGHT="10" BORDER=0 ALT="">

		<!------ Begin Box ------>
		<TABLE BORDER=0 CELLSPACING=0 CELLPADDING=0 BGCOLOR=black><TR><TD><TABLE border=0  cellspacing=2 cellpadding=0 width=120><tr><TD ALIGN=middle bgcolor=black>
		<FONT COLOR=white FACE="sans-serif"><B>Nevrax.org</B></FONT></TD></TR><tr><td  colspan=2 bgcolor=#FFFFFF>
		<TABLE cellspacing=0 cellpadding=1 border=0>
			<tr><td ALIGN=middle><a  class='linkbox' href="/news/" TITLE="Rubrique news"><img width=13 height=15  hspace=5 border=0 src=/inc/img/picto-news.gif ALT=#></A></td><td><a  class='linkbox' href="/news/" TITLE="News">News</a></td></tr>
			<tr><td ALIGN=middle><a  class='linkbox' href="/mail/" TITLE="Rubrique mail"><img width=15 height=11  hspace=5 border=0 src=/inc/img/picto-mail.gif ALT=#></A></td><td><a  class='linkbox' href="/mail/" TITLE="Mailing list archive">Mailing-list</a></td></tr>
			<tr><td ALIGN=middle><a  class='linkbox' href="/docs/" TITLE="Rubrique docs"><img width=14 height=16  hspace=5 border=0 src=/inc/img/picto-docs.gif ALT=#></A></td><td><a  class='linkbox' href="/docs/" TITLE="Documentation">Documentation</a></td></tr>
			<tr><td ALIGN=middle><a  class='linkbox' href="/cvs/" TITLE="Rubrique cvs"><img width=13 height=17  hspace=5 border=0 src=/inc/img/picto-cvs.gif ALT=#></A></td><td><a  class='linkbox' href="/cvs/" TITLE="CVS Web">CVS</a></td></tr>
			<tr><td ALIGN=middle><a  class='linkbox' href="/bugs/" TITLE="Rubrique bugs"><img width=20 height=16  hspace=5 border=0 src=/inc/img/picto-bugs.gif ALT=#></A></td><td><a  class='linkbox' href="/bugs/" TITLE="Bugtracking">Bugs</a></td></tr>
			<tr><td ALIGN=middle><a  class='linkbox' href="/GPL.php3" TITLE="Rubrique license"><img  width=18 height=12   hspace=5 border=0 src=/inc/img/picto-gpl.gif ALT=#></A></td><td><a  class='linkbox' href="/GPL.php3" TITLE="License">License</a></td></tr>
		</TABLE>
		</TD></TR></TABLE></TD></TR></TABLE>
		<!------ End Box  ------>

	</TD>
	<TD WIDTH=15><IMG  SRC="/inc/img/pixel.gif" WIDTH="16" HEIGHT="16" BORDER=0 ALT=""></TD>
	<TD ALIGN=left valign=top><IMG  SRC="/inc/img/pixel.gif" WIDTH="140" HEIGHT="10" BORDER=0 ALT="">

<!-- title -->
<TABLE  background="/inc/img/redline.gif" CELLSPACING=0 CELLPADDING=0  BORDER=0 width=100%><tr><td>
<A HREF="/docs/"><img src="/inc/img/t_docs.gif" ALT="Docs" HEIGHT=20 BORDER=0></A>
</td><td><IMG  SRC="/inc/img/pixel.gif" WIDTH="1" HEIGHT="1" BORDER=0 ALT="">
</td></tr></table>
&nbsp;

<!-- block -->
<TABLE  bgcolor="#dddddd" CELLSPACING=0 CELLPADDING=0  BORDER=0 width=100%><tr><td width=1% valign=middle><img width=6 height=14 hspace=2 vspace=2 src="/inc/img/reddots.gif"></TD>
	<TD><B>Documentation</B></TD>
	<TD ALIGN=RIGHT>&nbsp;</td>
</tr></table>
<!-- Generated by Doxygen 1.2.14 -->
<center>
<a class="qindex" href="index.html">Main Page</a> &nbsp; <a class="qindex" href="namespaces.html">Namespace List</a> &nbsp; <a class="qindex" href="hierarchy.html">Class Hierarchy</a> &nbsp; <a class="qindex" href="classes.html">Alphabetical List</a> &nbsp; <a class="qindex" href="annotated.html">Compound List</a> &nbsp; <a class="qindex" href="files.html">File List</a> &nbsp; <a class="qindex" href="namespacemembers.html">Namespace Members</a> &nbsp; <a class="qindex" href="functions.html">Compound Members</a> &nbsp; <a class="qindex" href="globals.html">File Members</a> &nbsp; <a class="qindex" href="pages.html">Related Pages</a> &nbsp; <a class="qindexRef" doxygen="_cgi:/cgi-bin/nel-search.cgi" href="/cgi-bin/nel-search.cgi">Search</a> &nbsp; </center>
<hr><h1>fast_mem.cpp</h1><a href="fast__mem_8cpp.html">Go to the documentation of this file.</a><div class="fragment"><pre>00001 
00007 <font class="comment">/* Copyright, 2000-2002 Nevrax Ltd.</font>
00008 <font class="comment"> *</font>
00009 <font class="comment"> * This file is part of NEVRAX NEL.</font>
00010 <font class="comment"> * NEVRAX NEL is free software; you can redistribute it and/or modify</font>
00011 <font class="comment"> * it under the terms of the GNU General Public License as published by</font>
00012 <font class="comment"> * the Free Software Foundation; either version 2, or (at your option)</font>
00013 <font class="comment"> * any later version.</font>
00014 <font class="comment"></font>
00015 <font class="comment"> * NEVRAX NEL is distributed in the hope that it will be useful, but</font>
00016 <font class="comment"> * WITHOUT ANY WARRANTY; without even the implied warranty of</font>
00017 <font class="comment"> * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU</font>
00018 <font class="comment"> * General Public License for more details.</font>
00019 <font class="comment"></font>
00020 <font class="comment"> * You should have received a copy of the GNU General Public License</font>
00021 <font class="comment"> * along with NEVRAX NEL; see the file COPYING. If not, write to the</font>
00022 <font class="comment"> * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,</font>
00023 <font class="comment"> * MA 02111-1307, USA.</font>
00024 <font class="comment"> */</font>
00025 
00026 <font class="preprocessor">#include "<a class="code" href="stdmisc_8h.html">stdmisc.h</a>"</font>
00027 
00028 <font class="preprocessor">#include "<a class="code" href="fast__mem_8h.html">nel/misc/fast_mem.h</a>"</font>
00029 <font class="preprocessor">#include "<a class="code" href="system__info_8h.html">nel/misc/system_info.h</a>"</font>
00030 
00031 
00032 <font class="keyword">namespace </font>NLMISC
00033 {
00034 
00035 <font class="preprocessor">#ifdef NL_OS_WINDOWS</font>
00036 <font class="preprocessor"></font>
00037 
00038 <font class="comment">// ***************************************************************************</font>
00039 <font class="keywordtype">void</font>            *CFastMem::memcpySSE(<font class="keywordtype">void</font> *dest, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes)
00040 {
00041         _asm 
00042         {
00043                         mov esi, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a> 
00044                         mov edi, dest 
00045                         mov ebx, nbytes 
00046 
00047                         <font class="comment">// edx takes number of bytes%64</font>
00048                         mov     edx, ebx
00049                         and edx, 63
00050 
00051                         <font class="comment">// ebx takes number of bytes/64</font>
00052                         shr     ebx, 6
00053                         jz      byteCopy
00054 
00055 
00056         loop4k: <font class="comment">// flush 4k into temporary buffer </font>
00057                         push esi 
00058                         mov ecx, ebx
00059                         <font class="comment">// copy per block of 64 bytes. Must not override 64*64= 4096 bytes.</font>
00060                         cmp ecx, 64
00061                         jle     skipMiniMize
00062                         mov     ecx, 64
00063         skipMiniMize:
00064                         <font class="comment">// eax takes the number of 64bytes packet for this block.</font>
00065                         mov eax, ecx
00066 
00067         loopMemToL1: 
00068                         prefetchnta 64[ESI] <font class="comment">// Prefetch next loop, non-temporal </font>
00069                         prefetchnta 96[ESI] 
00070 
00071                         movq mm1,  0[ESI] <font class="comment">// Read in source data </font>
00072                         movq mm2,  8[ESI] 
00073                         movq mm3, 16[ESI] 
00074                         movq mm4, 24[ESI] 
00075                         movq mm5, 32[ESI] 
00076                         movq mm6, 40[ESI] 
00077                         movq mm7, 48[ESI] 
00078                         movq mm0, 56[ESI] 
00079 
00080                         add esi, 64 
00081                         dec ecx 
00082                         jnz loopMemToL1 
00083 
00084                         pop esi <font class="comment">// Now copy from L1 to system memory </font>
00085                         mov ecx, eax
00086 
00087         loopL1ToMem: 
00088                         movq mm1, 0[ESI] <font class="comment">// Read in source data from L1 </font>
00089                         movq mm2, 8[ESI] 
00090                         movq mm3, 16[ESI] 
00091                         movq mm4, 24[ESI] 
00092                         movq mm5, 32[ESI] 
00093                         movq mm6, 40[ESI] 
00094                         movq mm7, 48[ESI] 
00095                         movq mm0, 56[ESI] 
00096 
00097                         movntq 0[EDI], mm1 <font class="comment">// Non-temporal stores </font>
00098                         movntq 8[EDI], mm2 
00099                         movntq 16[EDI], mm3 
00100                         movntq 24[EDI], mm4 
00101                         movntq 32[EDI], mm5 
00102                         movntq 40[EDI], mm6 
00103                         movntq 48[EDI], mm7 
00104                         movntq 56[EDI], mm0 
00105 
00106                         add esi, 64 
00107                         add edi, 64 
00108                         dec ecx 
00109                         jnz loopL1ToMem
00110 
00111                         <font class="comment">// Do next 4k block </font>
00112                         sub ebx, eax
00113                         jnz loop4k 
00114 
00115                         emms
00116 
00117         byteCopy:
00118                         <font class="comment">// Do last bytes with std cpy</font>
00119                         mov     ecx, edx
00120                         rep movsb
00121         }
00122         <font class="keywordflow">return</font> dest;
00123 }
00124 
00125 <font class="comment">// ***************************************************************************</font>
00126 <font class="keywordtype">void</font>            CFastMem::precacheSSE(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00127 {
00128         _asm 
00129         { 
00130                         mov esi, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a> 
00131                         mov ecx, nbytes
00132                         <font class="comment">// 64 bytes per pass</font>
00133                         shr ecx, 6 
00134                         jz endLabel
00135 
00136         loopMemToL1: 
00137                         prefetchnta 64[ESI] <font class="comment">// Prefetch next loop, non-temporal </font>
00138                         prefetchnta 96[ESI] 
00139 
00140                         movq mm1,  0[ESI] <font class="comment">// Read in source data </font>
00141                         movq mm2,  8[ESI] 
00142                         movq mm3, 16[ESI] 
00143                         movq mm4, 24[ESI] 
00144                         movq mm5, 32[ESI] 
00145                         movq mm6, 40[ESI] 
00146                         movq mm7, 48[ESI] 
00147                         movq mm0, 56[ESI]
00148 
00149                         add esi, 64 
00150                         dec ecx 
00151                         jnz loopMemToL1 
00152 
00153                         emms
00154 
00155         endLabel:
00156         }
00157 }
00158 
00159 <font class="comment">// ***************************************************************************</font>
00160 <font class="keywordtype">void</font>            CFastMem::precacheMMX(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00161 {
00162         _asm 
00163         { 
00164                         mov esi, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a> 
00165                         mov ecx, nbytes
00166                         <font class="comment">// 64 bytes per pass</font>
00167                         shr ecx, 6 
00168                         jz endLabel
00169 
00170         loopMemToL1: 
00171                         movq mm1,  0[ESI] <font class="comment">// Read in source data </font>
00172                         movq mm2,  8[ESI] 
00173                         movq mm3, 16[ESI] 
00174                         movq mm4, 24[ESI] 
00175                         movq mm5, 32[ESI] 
00176                         movq mm6, 40[ESI] 
00177                         movq mm7, 48[ESI] 
00178                         movq mm0, 56[ESI]
00179 
00180                         add esi, 64 
00181                         dec ecx 
00182                         jnz loopMemToL1 
00183 
00184                         emms
00185 
00186         endLabel:
00187         }
00188 }
00189 
00190 
00191 <font class="comment">// ***************************************************************************</font>
00192 <font class="keywordtype">void</font>            CFastMem::precache(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00193 {
00194         <font class="keywordflow">if</font>(<a class="code" href="classNLMISC_1_1CSystemInfo.html#d5">NLMISC::CSystemInfo::hasSSE</a>())
00195                 precacheSSE(<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, nbytes);
00196         <font class="keywordflow">else</font> <font class="keywordflow">if</font>(<a class="code" href="classNLMISC_1_1CSystemInfo.html#d4">NLMISC::CSystemInfo::hasMMX</a>())
00197                 precacheMMX(<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, nbytes);
00198 }
00199 
00200 
00201 <font class="preprocessor">#else</font>
00202 <font class="preprocessor"></font>
00203 
00204 <font class="comment">// ***************************************************************************</font>
<a name="l00205"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d1">00205</a> <font class="keywordtype">void</font>            *CFastMem::memcpySSE(<font class="keywordtype">void</font> *dst, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes)
00206 {
00207         <font class="comment">// Use std memcpy.</font>
00208         <font class="keywordflow">return</font> memcpy(dst, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, nbytes);
00209 }
<a name="l00210"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d2">00210</a> <font class="keywordtype">void</font>            CFastMem::precacheSSE(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00211 {
00212         <font class="comment">// no-op.</font>
00213 }
<a name="l00214"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d3">00214</a> <font class="keywordtype">void</font>            CFastMem::precacheMMX(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00215 {
00216         <font class="comment">// no-op.</font>
00217 }
<a name="l00218"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d0">00218</a> <font class="keywordtype">void</font>            CFastMem::precache(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00219 {
00220         <font class="comment">// no-op.</font>
00221 }
00222 
00223 <font class="preprocessor">#endif</font>
00224 <font class="preprocessor"></font>
00225 <font class="keyword">typedef</font> <font class="keywordtype">void</font>  *(*memcpyPtr)(<font class="keywordtype">void</font> *dts, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes);
00226 
00227 <font class="keyword">static</font> <a class="code" href="namespaceNLMISC.html#a193">memcpyPtr</a> <a class="code" href="namespaceNLMISC.html#a295">findBestmemcpy</a> ()
00228 {
00229 <font class="preprocessor">#ifdef NL_OS_WINDOWS</font>
00230 <font class="preprocessor"></font>        <font class="keywordflow">if</font> (CSystemInfo::hasSSE ())
00231                 <font class="keywordflow">return</font> CFastMem::memcpySSE;
00232         <font class="keywordflow">else</font>
00233                 return ::memcpy;
00234 <font class="preprocessor">#else // NL_OS_WINDOWS</font>
00235 <font class="preprocessor"></font>        return ::memcpy;
00236 <font class="preprocessor">#endif // NL_OS_WINDOWS</font>
00237 <font class="preprocessor"></font>}
00238 
00239 <font class="keywordtype">void</font>  *(*CFastMem::memcpy)(<font class="keywordtype">void</font> *dts, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes) = <a class="code" href="namespaceNLMISC.html#a295">findBestmemcpy</a> ();
00240 
00241 } <font class="comment">// NLMISC</font>
</pre></div>

<!-- footer -->
<BR><FONT Size=+5>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; </FONT>
</TD>
<TD WIDTH=15><IMG  SRC=/inc/img/pixel.gif WIDTH=15 HEIGHT=15 BORDER=0 ALT=""></TD>
</TR>
</TABLE>
</BODY>
</HTML>