1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
|
<!doctype html public "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<HTML>
<HEAD>
<TITLE>nevrax.org : docs</TITLE>
<LINK REL=stylesheet TYPE="text/css" HREF="/inc/css/nevrax.css">
<link href="doxygen.css" rel="stylesheet" type="text/css">
</HEAD>
<BODY MARGINHEIGHT="0" MARGINWIDTH="0">
<!-- uplinks -->
<TABLE CELLSPACING=0 CELLPADDING=0 BORDER=0>
<TR>
<TD WIDTH=16><IMG SRC="/inc/img/pixel.gif" WIDTH="16" HEIGHT="16" BORDER=0 ALT=""></TD>
<TD WIDTH=140 BGCOLOR=#dddddd><IMG SRC="/inc/img/pixel.gif" WIDTH="140" HEIGHT="16" BORDER=0 ALT=""></TD>
<TD WIDTH=16><IMG SRC="/inc/img/pixel.gif" WIDTH="16" HEIGHT="16" BORDER=0 ALT=""></TD>
<TD><IMG width=6 height=14 SRC="/inc/img/reddots.gif" ALT="#" VSPACE=2 HSPACE=2 BORDER=0 ></TD><TD VALIGN=middle> <A CLASS=uplinks HREF=><b>Home</B></FONT></A> </TD>
<TD><IMG width=6 height=14 SRC="/inc/img/reddots.gif" ALT="#" VSPACE=2 HSPACE=2 BORDER=0 ></TD><TD VALIGN=middle> <A CLASS=uplinks HREF=><b>nevrax.com</B></FONT></A> </TD>
</TR>
</TABLE>
<!-- banner Nevrax -->
<TABLE CELLSPACING=0 CELLPADDING=0 BORDER=0 WIDTH=100%>
<TR><TD BGCOLOR="#000000" BACKGROUND="/inc/img/black_banner.jpg"><A HREF=""><IMG SRC="/inc/img/nevrax.gif" WIDTH="170" HEIGHT="45" BORDER=0 ALT="Nevrax" ></A></TD></TR>
</TABLE>
<!-- main table -->
<TABLE CELLSPACING=0 CELLPADDING=0 BORDER=0 height=100%>
<TR>
<TD WIDTH=16><IMG SRC="/inc/img/pixel.gif" WIDTH="16" HEIGHT="10" BORDER=0 ALT=""></TD>
<TD WIDTH=140 BGCOLOR=#dddddd VALIGN=TOP ALIGN=middle><IMG SRC="/inc/img/pixel.gif" WIDTH="140" HEIGHT="10" BORDER=0 ALT="">
<!------ Begin Box ------>
<TABLE BORDER=0 CELLSPACING=0 CELLPADDING=0 BGCOLOR=black><TR><TD><TABLE border=0 cellspacing=2 cellpadding=0 width=120><tr><TD ALIGN=middle bgcolor=black>
<FONT COLOR=white FACE="sans-serif"><B>Nevrax.org</B></FONT></TD></TR><tr><td colspan=2 bgcolor=#FFFFFF>
<TABLE cellspacing=0 cellpadding=1 border=0>
<tr><td ALIGN=middle><a class='linkbox' href="/news/" TITLE="Rubrique news"><img width=13 height=15 hspace=5 border=0 src=/inc/img/picto-news.gif ALT=#></A></td><td><a class='linkbox' href="/news/" TITLE="News">News</a></td></tr>
<tr><td ALIGN=middle><a class='linkbox' href="/mail/" TITLE="Rubrique mail"><img width=15 height=11 hspace=5 border=0 src=/inc/img/picto-mail.gif ALT=#></A></td><td><a class='linkbox' href="/mail/" TITLE="Mailing list archive">Mailing-list</a></td></tr>
<tr><td ALIGN=middle><a class='linkbox' href="/docs/" TITLE="Rubrique docs"><img width=14 height=16 hspace=5 border=0 src=/inc/img/picto-docs.gif ALT=#></A></td><td><a class='linkbox' href="/docs/" TITLE="Documentation">Documentation</a></td></tr>
<tr><td ALIGN=middle><a class='linkbox' href="/cvs/" TITLE="Rubrique cvs"><img width=13 height=17 hspace=5 border=0 src=/inc/img/picto-cvs.gif ALT=#></A></td><td><a class='linkbox' href="/cvs/" TITLE="CVS Web">CVS</a></td></tr>
<tr><td ALIGN=middle><a class='linkbox' href="/bugs/" TITLE="Rubrique bugs"><img width=20 height=16 hspace=5 border=0 src=/inc/img/picto-bugs.gif ALT=#></A></td><td><a class='linkbox' href="/bugs/" TITLE="Bugtracking">Bugs</a></td></tr>
<tr><td ALIGN=middle><a class='linkbox' href="/GPL.php3" TITLE="Rubrique license"><img width=18 height=12 hspace=5 border=0 src=/inc/img/picto-gpl.gif ALT=#></A></td><td><a class='linkbox' href="/GPL.php3" TITLE="License">License</a></td></tr>
</TABLE>
</TD></TR></TABLE></TD></TR></TABLE>
<!------ End Box ------>
</TD>
<TD WIDTH=15><IMG SRC="/inc/img/pixel.gif" WIDTH="16" HEIGHT="16" BORDER=0 ALT=""></TD>
<TD ALIGN=left valign=top><IMG SRC="/inc/img/pixel.gif" WIDTH="140" HEIGHT="10" BORDER=0 ALT="">
<!-- title -->
<TABLE background="/inc/img/redline.gif" CELLSPACING=0 CELLPADDING=0 BORDER=0 width=100%><tr><td>
<A HREF="/docs/"><img src="/inc/img/t_docs.gif" ALT="Docs" HEIGHT=20 BORDER=0></A>
</td><td><IMG SRC="/inc/img/pixel.gif" WIDTH="1" HEIGHT="1" BORDER=0 ALT="">
</td></tr></table>
<!-- block -->
<TABLE bgcolor="#dddddd" CELLSPACING=0 CELLPADDING=0 BORDER=0 width=100%><tr><td width=1% valign=middle><img width=6 height=14 hspace=2 vspace=2 src="/inc/img/reddots.gif"></TD>
<TD><B>Documentation</B></TD>
<TD ALIGN=RIGHT> </td>
</tr></table>
<!-- Generated by Doxygen 1.2.14 -->
<center>
<a class="qindex" href="index.html">Main Page</a> <a class="qindex" href="namespaces.html">Namespace List</a> <a class="qindex" href="hierarchy.html">Class Hierarchy</a> <a class="qindex" href="classes.html">Alphabetical List</a> <a class="qindex" href="annotated.html">Compound List</a> <a class="qindex" href="files.html">File List</a> <a class="qindex" href="namespacemembers.html">Namespace Members</a> <a class="qindex" href="functions.html">Compound Members</a> <a class="qindex" href="globals.html">File Members</a> <a class="qindex" href="pages.html">Related Pages</a> <a class="qindexRef" doxygen="_cgi:/cgi-bin/nel-search.cgi" href="/cgi-bin/nel-search.cgi">Search</a> </center>
<hr><h1>fast_mem.cpp</h1><a href="fast__mem_8cpp.html">Go to the documentation of this file.</a><div class="fragment"><pre>00001
00007 <font class="comment">/* Copyright, 2000-2002 Nevrax Ltd.</font>
00008 <font class="comment"> *</font>
00009 <font class="comment"> * This file is part of NEVRAX NEL.</font>
00010 <font class="comment"> * NEVRAX NEL is free software; you can redistribute it and/or modify</font>
00011 <font class="comment"> * it under the terms of the GNU General Public License as published by</font>
00012 <font class="comment"> * the Free Software Foundation; either version 2, or (at your option)</font>
00013 <font class="comment"> * any later version.</font>
00014 <font class="comment"></font>
00015 <font class="comment"> * NEVRAX NEL is distributed in the hope that it will be useful, but</font>
00016 <font class="comment"> * WITHOUT ANY WARRANTY; without even the implied warranty of</font>
00017 <font class="comment"> * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU</font>
00018 <font class="comment"> * General Public License for more details.</font>
00019 <font class="comment"></font>
00020 <font class="comment"> * You should have received a copy of the GNU General Public License</font>
00021 <font class="comment"> * along with NEVRAX NEL; see the file COPYING. If not, write to the</font>
00022 <font class="comment"> * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,</font>
00023 <font class="comment"> * MA 02111-1307, USA.</font>
00024 <font class="comment"> */</font>
00025
00026 <font class="preprocessor">#include "<a class="code" href="stdmisc_8h.html">stdmisc.h</a>"</font>
00027
00028 <font class="preprocessor">#include "<a class="code" href="fast__mem_8h.html">nel/misc/fast_mem.h</a>"</font>
00029 <font class="preprocessor">#include "<a class="code" href="system__info_8h.html">nel/misc/system_info.h</a>"</font>
00030
00031
00032 <font class="keyword">namespace </font>NLMISC
00033 {
00034
00035 <font class="preprocessor">#ifdef NL_OS_WINDOWS</font>
00036 <font class="preprocessor"></font>
00037
00038 <font class="comment">// ***************************************************************************</font>
00039 <font class="keywordtype">void</font> *CFastMem::memcpySSE(<font class="keywordtype">void</font> *dest, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes)
00040 {
00041 _asm
00042 {
00043 mov esi, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>
00044 mov edi, dest
00045 mov ebx, nbytes
00046
00047 <font class="comment">// edx takes number of bytes%64</font>
00048 mov edx, ebx
00049 and edx, 63
00050
00051 <font class="comment">// ebx takes number of bytes/64</font>
00052 shr ebx, 6
00053 jz byteCopy
00054
00055
00056 loop4k: <font class="comment">// flush 4k into temporary buffer </font>
00057 push esi
00058 mov ecx, ebx
00059 <font class="comment">// copy per block of 64 bytes. Must not override 64*64= 4096 bytes.</font>
00060 cmp ecx, 64
00061 jle skipMiniMize
00062 mov ecx, 64
00063 skipMiniMize:
00064 <font class="comment">// eax takes the number of 64bytes packet for this block.</font>
00065 mov eax, ecx
00066
00067 loopMemToL1:
00068 prefetchnta 64[ESI] <font class="comment">// Prefetch next loop, non-temporal </font>
00069 prefetchnta 96[ESI]
00070
00071 movq mm1, 0[ESI] <font class="comment">// Read in source data </font>
00072 movq mm2, 8[ESI]
00073 movq mm3, 16[ESI]
00074 movq mm4, 24[ESI]
00075 movq mm5, 32[ESI]
00076 movq mm6, 40[ESI]
00077 movq mm7, 48[ESI]
00078 movq mm0, 56[ESI]
00079
00080 add esi, 64
00081 dec ecx
00082 jnz loopMemToL1
00083
00084 pop esi <font class="comment">// Now copy from L1 to system memory </font>
00085 mov ecx, eax
00086
00087 loopL1ToMem:
00088 movq mm1, 0[ESI] <font class="comment">// Read in source data from L1 </font>
00089 movq mm2, 8[ESI]
00090 movq mm3, 16[ESI]
00091 movq mm4, 24[ESI]
00092 movq mm5, 32[ESI]
00093 movq mm6, 40[ESI]
00094 movq mm7, 48[ESI]
00095 movq mm0, 56[ESI]
00096
00097 movntq 0[EDI], mm1 <font class="comment">// Non-temporal stores </font>
00098 movntq 8[EDI], mm2
00099 movntq 16[EDI], mm3
00100 movntq 24[EDI], mm4
00101 movntq 32[EDI], mm5
00102 movntq 40[EDI], mm6
00103 movntq 48[EDI], mm7
00104 movntq 56[EDI], mm0
00105
00106 add esi, 64
00107 add edi, 64
00108 dec ecx
00109 jnz loopL1ToMem
00110
00111 <font class="comment">// Do next 4k block </font>
00112 sub ebx, eax
00113 jnz loop4k
00114
00115 emms
00116
00117 byteCopy:
00118 <font class="comment">// Do last bytes with std cpy</font>
00119 mov ecx, edx
00120 rep movsb
00121 }
00122 <font class="keywordflow">return</font> dest;
00123 }
00124
00125 <font class="comment">// ***************************************************************************</font>
00126 <font class="keywordtype">void</font> CFastMem::precacheSSE(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00127 {
00128 _asm
00129 {
00130 mov esi, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>
00131 mov ecx, nbytes
00132 <font class="comment">// 64 bytes per pass</font>
00133 shr ecx, 6
00134 jz endLabel
00135
00136 loopMemToL1:
00137 prefetchnta 64[ESI] <font class="comment">// Prefetch next loop, non-temporal </font>
00138 prefetchnta 96[ESI]
00139
00140 movq mm1, 0[ESI] <font class="comment">// Read in source data </font>
00141 movq mm2, 8[ESI]
00142 movq mm3, 16[ESI]
00143 movq mm4, 24[ESI]
00144 movq mm5, 32[ESI]
00145 movq mm6, 40[ESI]
00146 movq mm7, 48[ESI]
00147 movq mm0, 56[ESI]
00148
00149 add esi, 64
00150 dec ecx
00151 jnz loopMemToL1
00152
00153 emms
00154
00155 endLabel:
00156 }
00157 }
00158
00159 <font class="comment">// ***************************************************************************</font>
00160 <font class="keywordtype">void</font> CFastMem::precacheMMX(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00161 {
00162 _asm
00163 {
00164 mov esi, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>
00165 mov ecx, nbytes
00166 <font class="comment">// 64 bytes per pass</font>
00167 shr ecx, 6
00168 jz endLabel
00169
00170 loopMemToL1:
00171 movq mm1, 0[ESI] <font class="comment">// Read in source data </font>
00172 movq mm2, 8[ESI]
00173 movq mm3, 16[ESI]
00174 movq mm4, 24[ESI]
00175 movq mm5, 32[ESI]
00176 movq mm6, 40[ESI]
00177 movq mm7, 48[ESI]
00178 movq mm0, 56[ESI]
00179
00180 add esi, 64
00181 dec ecx
00182 jnz loopMemToL1
00183
00184 emms
00185
00186 endLabel:
00187 }
00188 }
00189
00190
00191 <font class="comment">// ***************************************************************************</font>
00192 <font class="keywordtype">void</font> CFastMem::precache(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00193 {
00194 <font class="keywordflow">if</font>(<a class="code" href="classNLMISC_1_1CSystemInfo.html#d5">NLMISC::CSystemInfo::hasSSE</a>())
00195 precacheSSE(<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, nbytes);
00196 <font class="keywordflow">else</font> <font class="keywordflow">if</font>(<a class="code" href="classNLMISC_1_1CSystemInfo.html#d4">NLMISC::CSystemInfo::hasMMX</a>())
00197 precacheMMX(<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, nbytes);
00198 }
00199
00200
00201 <font class="preprocessor">#else</font>
00202 <font class="preprocessor"></font>
00203
00204 <font class="comment">// ***************************************************************************</font>
<a name="l00205"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d1">00205</a> <font class="keywordtype">void</font> *CFastMem::memcpySSE(<font class="keywordtype">void</font> *dst, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes)
00206 {
00207 <font class="comment">// Use std memcpy.</font>
00208 <font class="keywordflow">return</font> memcpy(dst, <a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, nbytes);
00209 }
<a name="l00210"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d2">00210</a> <font class="keywordtype">void</font> CFastMem::precacheSSE(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00211 {
00212 <font class="comment">// no-op.</font>
00213 }
<a name="l00214"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d3">00214</a> <font class="keywordtype">void</font> CFastMem::precacheMMX(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00215 {
00216 <font class="comment">// no-op.</font>
00217 }
<a name="l00218"></a><a class="code" href="classNLMISC_1_1CFastMem.html#d0">00218</a> <font class="keywordtype">void</font> CFastMem::precache(<font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, uint nbytes)
00219 {
00220 <font class="comment">// no-op.</font>
00221 }
00222
00223 <font class="preprocessor">#endif</font>
00224 <font class="preprocessor"></font>
00225 <font class="keyword">typedef</font> <font class="keywordtype">void</font> *(*memcpyPtr)(<font class="keywordtype">void</font> *dts, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes);
00226
00227 <font class="keyword">static</font> <a class="code" href="namespaceNLMISC.html#a193">memcpyPtr</a> <a class="code" href="namespaceNLMISC.html#a295">findBestmemcpy</a> ()
00228 {
00229 <font class="preprocessor">#ifdef NL_OS_WINDOWS</font>
00230 <font class="preprocessor"></font> <font class="keywordflow">if</font> (CSystemInfo::hasSSE ())
00231 <font class="keywordflow">return</font> CFastMem::memcpySSE;
00232 <font class="keywordflow">else</font>
00233 return ::memcpy;
00234 <font class="preprocessor">#else // NL_OS_WINDOWS</font>
00235 <font class="preprocessor"></font> return ::memcpy;
00236 <font class="preprocessor">#endif // NL_OS_WINDOWS</font>
00237 <font class="preprocessor"></font>}
00238
00239 <font class="keywordtype">void</font> *(*CFastMem::memcpy)(<font class="keywordtype">void</font> *dts, <font class="keyword">const</font> <font class="keywordtype">void</font> *<a class="code" href="driver__opengl__extension__def_8h.html#a409">src</a>, size_t nbytes) = <a class="code" href="namespaceNLMISC.html#a295">findBestmemcpy</a> ();
00240
00241 } <font class="comment">// NLMISC</font>
</pre></div>
<!-- footer -->
<BR><FONT Size=+5> </FONT>
</TD>
<TD WIDTH=15><IMG SRC=/inc/img/pixel.gif WIDTH=15 HEIGHT=15 BORDER=0 ALT=""></TD>
</TR>
</TABLE>
</BODY>
</HTML>
|