562
|
1 /*
|
|
2 dct36_3dnowext: extended 3DNow optimized DCT36
|
|
3
|
|
4 copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
6
|
|
7 Transformed back into standalone asm, with help of
|
|
8 gcc -S -DHAVE_CONFIG_H -I. -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct36_3dnowext.{S,c}
|
|
9
|
|
10 MPlayer comment follows.
|
|
11 */
|
|
12
|
|
13 /*
|
|
14 * dct36_3dnow.c - 3DNow! optimized dct36()
|
|
15 *
|
|
16 * This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
|
|
17 * <squash@mb.kcom.ne.jp>, only two types of changes have been made:
|
|
18 *
|
|
19 * - removed PREFETCH instruction for speedup
|
|
20 * - changed function name for support 3DNow! automatic detection
|
|
21 *
|
|
22 * You can find Kashiyama's original 3dnow! support patch
|
|
23 * (for mpg123-0.59o) at
|
|
24 * http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
|
|
25 *
|
|
26 * by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
|
|
27 * <kim@comtec.co.jp> - after 1.Apr.1999
|
|
28 *
|
|
29 * Modified for use with MPlayer, for details see the changelog at
|
|
30 * http://svn.mplayerhq.hu/mplayer/trunk/
|
|
31 * $Id: dct36_3dnow.c 18786 2006-06-22 13:34:00Z diego $
|
|
32 *
|
|
33 * Original disclaimer:
|
|
34 * The author of this program disclaim whole expressed or implied
|
|
35 * warranties with regard to this program, and in no event shall the
|
|
36 * author of this program liable to whatever resulted from the use of
|
|
37 * this program. Use it at your own risk.
|
|
38 *
|
|
39 * 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi
|
|
40 */
|
|
41
|
|
42 #include "mangle.h"
|
|
43
|
|
44 .text
|
|
45 ALIGN32,,31
|
|
46 .globl ASM_NAME(dct36_3dnowext)
|
|
47 /* .type ASM_NAME(dct36_3dnowext), @function */
|
|
48 ASM_NAME(dct36_3dnowext):
|
|
49 pushl %ebp
|
|
50 movl %esp, %ebp
|
|
51 pushl %esi
|
|
52 pushl %ebx
|
|
53 movl 8(%ebp), %eax
|
|
54 movl 12(%ebp), %esi
|
|
55 movl 16(%ebp), %ecx
|
|
56 movl 20(%ebp), %edx
|
|
57 movl 24(%ebp), %ebx
|
|
58 /* APP */
|
|
59 movq (%eax),%mm0
|
|
60 movq 4(%eax),%mm1
|
|
61 pfadd %mm1,%mm0
|
|
62 movq %mm0,4(%eax)
|
|
63 psrlq $32,%mm1
|
|
64 movq 12(%eax),%mm2
|
|
65 punpckldq %mm2,%mm1
|
|
66 pfadd %mm2,%mm1
|
|
67 movq %mm1,12(%eax)
|
|
68 psrlq $32,%mm2
|
|
69 movq 20(%eax),%mm3
|
|
70 punpckldq %mm3,%mm2
|
|
71 pfadd %mm3,%mm2
|
|
72 movq %mm2,20(%eax)
|
|
73 psrlq $32,%mm3
|
|
74 movq 28(%eax),%mm4
|
|
75 punpckldq %mm4,%mm3
|
|
76 pfadd %mm4,%mm3
|
|
77 movq %mm3,28(%eax)
|
|
78 psrlq $32,%mm4
|
|
79 movq 36(%eax),%mm5
|
|
80 punpckldq %mm5,%mm4
|
|
81 pfadd %mm5,%mm4
|
|
82 movq %mm4,36(%eax)
|
|
83 psrlq $32,%mm5
|
|
84 movq 44(%eax),%mm6
|
|
85 punpckldq %mm6,%mm5
|
|
86 pfadd %mm6,%mm5
|
|
87 movq %mm5,44(%eax)
|
|
88 psrlq $32,%mm6
|
|
89 movq 52(%eax),%mm7
|
|
90 punpckldq %mm7,%mm6
|
|
91 pfadd %mm7,%mm6
|
|
92 movq %mm6,52(%eax)
|
|
93 psrlq $32,%mm7
|
|
94 movq 60(%eax),%mm0
|
|
95 punpckldq %mm0,%mm7
|
|
96 pfadd %mm0,%mm7
|
|
97 movq %mm7,60(%eax)
|
|
98 psrlq $32,%mm0
|
|
99 movd 68(%eax),%mm1
|
|
100 pfadd %mm1,%mm0
|
|
101 movd %mm0,68(%eax)
|
|
102 movd 4(%eax),%mm0
|
|
103 movd 12(%eax),%mm1
|
|
104 punpckldq %mm1,%mm0
|
|
105 punpckldq 20(%eax),%mm1
|
|
106 pfadd %mm1,%mm0
|
|
107 movd %mm0,12(%eax)
|
|
108 psrlq $32,%mm0
|
|
109 movd %mm0,20(%eax)
|
|
110 psrlq $32,%mm1
|
|
111 movd 28(%eax),%mm2
|
|
112 punpckldq %mm2,%mm1
|
|
113 punpckldq 36(%eax),%mm2
|
|
114 pfadd %mm2,%mm1
|
|
115 movd %mm1,28(%eax)
|
|
116 psrlq $32,%mm1
|
|
117 movd %mm1,36(%eax)
|
|
118 psrlq $32,%mm2
|
|
119 movd 44(%eax),%mm3
|
|
120 punpckldq %mm3,%mm2
|
|
121 punpckldq 52(%eax),%mm3
|
|
122 pfadd %mm3,%mm2
|
|
123 movd %mm2,44(%eax)
|
|
124 psrlq $32,%mm2
|
|
125 movd %mm2,52(%eax)
|
|
126 psrlq $32,%mm3
|
|
127 movd 60(%eax),%mm4
|
|
128 punpckldq %mm4,%mm3
|
|
129 punpckldq 68(%eax),%mm4
|
|
130 pfadd %mm4,%mm3
|
|
131 movd %mm3,60(%eax)
|
|
132 psrlq $32,%mm3
|
|
133 movd %mm3,68(%eax)
|
|
134 movq 24(%eax),%mm0
|
|
135 movq 48(%eax),%mm1
|
|
136 movd ASM_NAME(COS9)+12,%mm2
|
|
137 punpckldq %mm2,%mm2
|
|
138 movd ASM_NAME(COS9)+24,%mm3
|
|
139 punpckldq %mm3,%mm3
|
|
140 pfmul %mm2,%mm0
|
|
141 pfmul %mm3,%mm1
|
|
142 pushl %eax
|
|
143 movl $1,%eax
|
|
144 movd %eax,%mm7
|
|
145 pi2fd %mm7,%mm7
|
|
146 popl %eax
|
|
147 movq 8(%eax),%mm2
|
|
148 movd ASM_NAME(COS9)+4,%mm3
|
|
149 punpckldq %mm3,%mm3
|
|
150 pfmul %mm3,%mm2
|
|
151 pfadd %mm0,%mm2
|
|
152 movq 40(%eax),%mm3
|
|
153 movd ASM_NAME(COS9)+20,%mm4
|
|
154 punpckldq %mm4,%mm4
|
|
155 pfmul %mm4,%mm3
|
|
156 pfadd %mm3,%mm2
|
|
157 movq 56(%eax),%mm3
|
|
158 movd ASM_NAME(COS9)+28,%mm4
|
|
159 punpckldq %mm4,%mm4
|
|
160 pfmul %mm4,%mm3
|
|
161 pfadd %mm3,%mm2
|
|
162 movq (%eax),%mm3
|
|
163 movq 16(%eax),%mm4
|
|
164 movd ASM_NAME(COS9)+8,%mm5
|
|
165 punpckldq %mm5,%mm5
|
|
166 pfmul %mm5,%mm4
|
|
167 pfadd %mm4,%mm3
|
|
168 movq 32(%eax),%mm4
|
|
169 movd ASM_NAME(COS9)+16,%mm5
|
|
170 punpckldq %mm5,%mm5
|
|
171 pfmul %mm5,%mm4
|
|
172 pfadd %mm4,%mm3
|
|
173 pfadd %mm1,%mm3
|
|
174 movq 64(%eax),%mm4
|
|
175 movd ASM_NAME(COS9)+32,%mm5
|
|
176 punpckldq %mm5,%mm5
|
|
177 pfmul %mm5,%mm4
|
|
178 pfadd %mm4,%mm3
|
|
179 movq %mm2,%mm4
|
|
180 pfadd %mm3,%mm4
|
|
181 movq %mm7,%mm5
|
|
182 punpckldq ASM_NAME(tfcos36)+0,%mm5
|
|
183 pfmul %mm5,%mm4
|
|
184 movq %mm4,%mm5
|
|
185 pfacc %mm5,%mm5
|
|
186 movd 108(%edx),%mm6
|
|
187 punpckldq 104(%edx),%mm6
|
|
188 pfmul %mm6,%mm5
|
|
189 pswapd %mm5,%mm5
|
|
190 movq %mm5,32(%ecx)
|
|
191 movq %mm4,%mm6
|
|
192 punpckldq %mm6,%mm5
|
|
193 pfsub %mm6,%mm5
|
|
194 punpckhdq %mm5,%mm5
|
|
195 movd 32(%edx),%mm6
|
|
196 punpckldq 36(%edx),%mm6
|
|
197 pfmul %mm6,%mm5
|
|
198 movd 32(%esi),%mm6
|
|
199 punpckldq 36(%esi),%mm6
|
|
200 pfadd %mm6,%mm5
|
|
201 movd %mm5,1024(%ebx)
|
|
202 psrlq $32,%mm5
|
|
203 movd %mm5,1152(%ebx)
|
|
204 movq %mm3,%mm4
|
|
205 pfsub %mm2,%mm4
|
|
206 movq %mm7,%mm5
|
|
207 punpckldq ASM_NAME(tfcos36)+32,%mm5
|
|
208 pfmul %mm5,%mm4
|
|
209 movq %mm4,%mm5
|
|
210 pfacc %mm5,%mm5
|
|
211 movd 140(%edx),%mm6
|
|
212 punpckldq 72(%edx),%mm6
|
|
213 pfmul %mm6,%mm5
|
|
214 movd %mm5,68(%ecx)
|
|
215 psrlq $32,%mm5
|
|
216 movd %mm5,0(%ecx)
|
|
217 movq %mm4,%mm6
|
|
218 punpckldq %mm6,%mm5
|
|
219 pfsub %mm6,%mm5
|
|
220 punpckhdq %mm5,%mm5
|
|
221 movd 0(%edx),%mm6
|
|
222 punpckldq 68(%edx),%mm6
|
|
223 pfmul %mm6,%mm5
|
|
224 movd 0(%esi),%mm6
|
|
225 punpckldq 68(%esi),%mm6
|
|
226 pfadd %mm6,%mm5
|
|
227 movd %mm5,0(%ebx)
|
|
228 psrlq $32,%mm5
|
|
229 movd %mm5,2176(%ebx)
|
|
230 movq 8(%eax),%mm2
|
|
231 movq 40(%eax),%mm3
|
|
232 pfsub %mm3,%mm2
|
|
233 movq 56(%eax),%mm3
|
|
234 pfsub %mm3,%mm2
|
|
235 movd ASM_NAME(COS9)+12,%mm3
|
|
236 punpckldq %mm3,%mm3
|
|
237 pfmul %mm3,%mm2
|
|
238 movq 16(%eax),%mm3
|
|
239 movq 32(%eax),%mm4
|
|
240 pfsub %mm4,%mm3
|
|
241 movq 64(%eax),%mm4
|
|
242 pfsub %mm4,%mm3
|
|
243 movd ASM_NAME(COS9)+24,%mm4
|
|
244 punpckldq %mm4,%mm4
|
|
245 pfmul %mm4,%mm3
|
|
246 movq 48(%eax),%mm4
|
|
247 pfsub %mm4,%mm3
|
|
248 movq (%eax),%mm4
|
|
249 pfadd %mm4,%mm3
|
|
250 movq %mm2,%mm4
|
|
251 pfadd %mm3,%mm4
|
|
252 movq %mm7,%mm5
|
|
253 punpckldq ASM_NAME(tfcos36)+4,%mm5
|
|
254 pfmul %mm5,%mm4
|
|
255 movq %mm4,%mm5
|
|
256 pfacc %mm5,%mm5
|
|
257 movd 112(%edx),%mm6
|
|
258 punpckldq 100(%edx),%mm6
|
|
259 pfmul %mm6,%mm5
|
|
260 movd %mm5,40(%ecx)
|
|
261 psrlq $32,%mm5
|
|
262 movd %mm5,28(%ecx)
|
|
263 movq %mm4,%mm6
|
|
264 punpckldq %mm6,%mm5
|
|
265 pfsub %mm6,%mm5
|
|
266 punpckhdq %mm5,%mm5
|
|
267 movd 28(%edx),%mm6
|
|
268 punpckldq 40(%edx),%mm6
|
|
269 pfmul %mm6,%mm5
|
|
270 movd 28(%esi),%mm6
|
|
271 punpckldq 40(%esi),%mm6
|
|
272 pfadd %mm6,%mm5
|
|
273 movd %mm5,896(%ebx)
|
|
274 psrlq $32,%mm5
|
|
275 movd %mm5,1280(%ebx)
|
|
276 movq %mm3,%mm4
|
|
277 pfsub %mm2,%mm4
|
|
278 movq %mm7,%mm5
|
|
279 punpckldq ASM_NAME(tfcos36)+28,%mm5
|
|
280 pfmul %mm5,%mm4
|
|
281 movq %mm4,%mm5
|
|
282 pfacc %mm5,%mm5
|
|
283 movd 136(%edx),%mm6
|
|
284 punpckldq 76(%edx),%mm6
|
|
285 pfmul %mm6,%mm5
|
|
286 movd %mm5,64(%ecx)
|
|
287 psrlq $32,%mm5
|
|
288 movd %mm5,4(%ecx)
|
|
289 movq %mm4,%mm6
|
|
290 punpckldq %mm6,%mm5
|
|
291 pfsub %mm6,%mm5
|
|
292 punpckhdq %mm5,%mm5
|
|
293 movd 4(%edx),%mm6
|
|
294 punpckldq 64(%edx),%mm6
|
|
295 pfmul %mm6,%mm5
|
|
296 movd 4(%esi),%mm6
|
|
297 punpckldq 64(%esi),%mm6
|
|
298 pfadd %mm6,%mm5
|
|
299 movd %mm5,128(%ebx)
|
|
300 psrlq $32,%mm5
|
|
301 movd %mm5,2048(%ebx)
|
|
302 movq 8(%eax),%mm2
|
|
303 movd ASM_NAME(COS9)+20,%mm3
|
|
304 punpckldq %mm3,%mm3
|
|
305 pfmul %mm3,%mm2
|
|
306 pfsub %mm0,%mm2
|
|
307 movq 40(%eax),%mm3
|
|
308 movd ASM_NAME(COS9)+28,%mm4
|
|
309 punpckldq %mm4,%mm4
|
|
310 pfmul %mm4,%mm3
|
|
311 pfsub %mm3,%mm2
|
|
312 movq 56(%eax),%mm3
|
|
313 movd ASM_NAME(COS9)+4,%mm4
|
|
314 punpckldq %mm4,%mm4
|
|
315 pfmul %mm4,%mm3
|
|
316 pfadd %mm3,%mm2
|
|
317 movq (%eax),%mm3
|
|
318 movq 16(%eax),%mm4
|
|
319 movd ASM_NAME(COS9)+32,%mm5
|
|
320 punpckldq %mm5,%mm5
|
|
321 pfmul %mm5,%mm4
|
|
322 pfsub %mm4,%mm3
|
|
323 movq 32(%eax),%mm4
|
|
324 movd ASM_NAME(COS9)+8,%mm5
|
|
325 punpckldq %mm5,%mm5
|
|
326 pfmul %mm5,%mm4
|
|
327 pfsub %mm4,%mm3
|
|
328 pfadd %mm1,%mm3
|
|
329 movq 64(%eax),%mm4
|
|
330 movd ASM_NAME(COS9)+16,%mm5
|
|
331 punpckldq %mm5,%mm5
|
|
332 pfmul %mm5,%mm4
|
|
333 pfadd %mm4,%mm3
|
|
334 movq %mm2,%mm4
|
|
335 pfadd %mm3,%mm4
|
|
336 movq %mm7,%mm5
|
|
337 punpckldq ASM_NAME(tfcos36)+8,%mm5
|
|
338 pfmul %mm5,%mm4
|
|
339 movq %mm4,%mm5
|
|
340 pfacc %mm5,%mm5
|
|
341 movd 116(%edx),%mm6
|
|
342 punpckldq 96(%edx),%mm6
|
|
343 pfmul %mm6,%mm5
|
|
344 movd %mm5,44(%ecx)
|
|
345 psrlq $32,%mm5
|
|
346 movd %mm5,24(%ecx)
|
|
347 movq %mm4,%mm6
|
|
348 punpckldq %mm6,%mm5
|
|
349 pfsub %mm6,%mm5
|
|
350 punpckhdq %mm5,%mm5
|
|
351 movd 24(%edx),%mm6
|
|
352 punpckldq 44(%edx),%mm6
|
|
353 pfmul %mm6,%mm5
|
|
354 movd 24(%esi),%mm6
|
|
355 punpckldq 44(%esi),%mm6
|
|
356 pfadd %mm6,%mm5
|
|
357 movd %mm5,768(%ebx)
|
|
358 psrlq $32,%mm5
|
|
359 movd %mm5,1408(%ebx)
|
|
360 movq %mm3,%mm4
|
|
361 pfsub %mm2,%mm4
|
|
362 movq %mm7,%mm5
|
|
363 punpckldq ASM_NAME(tfcos36)+24,%mm5
|
|
364 pfmul %mm5,%mm4
|
|
365 movq %mm4,%mm5
|
|
366 pfacc %mm5,%mm5
|
|
367 movd 132(%edx),%mm6
|
|
368 punpckldq 80(%edx),%mm6
|
|
369 pfmul %mm6,%mm5
|
|
370 movd %mm5,60(%ecx)
|
|
371 psrlq $32,%mm5
|
|
372 movd %mm5,8(%ecx)
|
|
373 movq %mm4,%mm6
|
|
374 punpckldq %mm6,%mm5
|
|
375 pfsub %mm6,%mm5
|
|
376 punpckhdq %mm5,%mm5
|
|
377 movd 8(%edx),%mm6
|
|
378 punpckldq 60(%edx),%mm6
|
|
379 pfmul %mm6,%mm5
|
|
380 movd 8(%esi),%mm6
|
|
381 punpckldq 60(%esi),%mm6
|
|
382 pfadd %mm6,%mm5
|
|
383 movd %mm5,256(%ebx)
|
|
384 psrlq $32,%mm5
|
|
385 movd %mm5,1920(%ebx)
|
|
386 movq 8(%eax),%mm2
|
|
387 movd ASM_NAME(COS9)+28,%mm3
|
|
388 punpckldq %mm3,%mm3
|
|
389 pfmul %mm3,%mm2
|
|
390 pfsub %mm0,%mm2
|
|
391 movq 40(%eax),%mm3
|
|
392 movd ASM_NAME(COS9)+4,%mm4
|
|
393 punpckldq %mm4,%mm4
|
|
394 pfmul %mm4,%mm3
|
|
395 pfadd %mm3,%mm2
|
|
396 movq 56(%eax),%mm3
|
|
397 movd ASM_NAME(COS9)+20,%mm4
|
|
398 punpckldq %mm4,%mm4
|
|
399 pfmul %mm4,%mm3
|
|
400 pfsub %mm3,%mm2
|
|
401 movq (%eax),%mm3
|
|
402 movq 16(%eax),%mm4
|
|
403 movd ASM_NAME(COS9)+16,%mm5
|
|
404 punpckldq %mm5,%mm5
|
|
405 pfmul %mm5,%mm4
|
|
406 pfsub %mm4,%mm3
|
|
407 movq 32(%eax),%mm4
|
|
408 movd ASM_NAME(COS9)+32,%mm5
|
|
409 punpckldq %mm5,%mm5
|
|
410 pfmul %mm5,%mm4
|
|
411 pfadd %mm4,%mm3
|
|
412 pfadd %mm1,%mm3
|
|
413 movq 64(%eax),%mm4
|
|
414 movd ASM_NAME(COS9)+8,%mm5
|
|
415 punpckldq %mm5,%mm5
|
|
416 pfmul %mm5,%mm4
|
|
417 pfsub %mm4,%mm3
|
|
418 movq %mm2,%mm4
|
|
419 pfadd %mm3,%mm4
|
|
420 movq %mm7,%mm5
|
|
421 punpckldq ASM_NAME(tfcos36)+12,%mm5
|
|
422 pfmul %mm5,%mm4
|
|
423 movq %mm4,%mm5
|
|
424 pfacc %mm5,%mm5
|
|
425 movd 120(%edx),%mm6
|
|
426 punpckldq 92(%edx),%mm6
|
|
427 pfmul %mm6,%mm5
|
|
428 movd %mm5,48(%ecx)
|
|
429 psrlq $32,%mm5
|
|
430 movd %mm5,20(%ecx)
|
|
431 movq %mm4,%mm6
|
|
432 punpckldq %mm6,%mm5
|
|
433 pfsub %mm6,%mm5
|
|
434 punpckhdq %mm5,%mm5
|
|
435 movd 20(%edx),%mm6
|
|
436 punpckldq 48(%edx),%mm6
|
|
437 pfmul %mm6,%mm5
|
|
438 movd 20(%esi),%mm6
|
|
439 punpckldq 48(%esi),%mm6
|
|
440 pfadd %mm6,%mm5
|
|
441 movd %mm5,640(%ebx)
|
|
442 psrlq $32,%mm5
|
|
443 movd %mm5,1536(%ebx)
|
|
444 movq %mm3,%mm4
|
|
445 pfsub %mm2,%mm4
|
|
446 movq %mm7,%mm5
|
|
447 punpckldq ASM_NAME(tfcos36)+20,%mm5
|
|
448 pfmul %mm5,%mm4
|
|
449 movq %mm4,%mm5
|
|
450 pfacc %mm5,%mm5
|
|
451 movd 128(%edx),%mm6
|
|
452 punpckldq 84(%edx),%mm6
|
|
453 pfmul %mm6,%mm5
|
|
454 movd %mm5,56(%ecx)
|
|
455 psrlq $32,%mm5
|
|
456 movd %mm5,12(%ecx)
|
|
457 movq %mm4,%mm6
|
|
458 punpckldq %mm6,%mm5
|
|
459 pfsub %mm6,%mm5
|
|
460 punpckhdq %mm5,%mm5
|
|
461 movd 12(%edx),%mm6
|
|
462 punpckldq 56(%edx),%mm6
|
|
463 pfmul %mm6,%mm5
|
|
464 movd 12(%esi),%mm6
|
|
465 punpckldq 56(%esi),%mm6
|
|
466 pfadd %mm6,%mm5
|
|
467 movd %mm5,384(%ebx)
|
|
468 psrlq $32,%mm5
|
|
469 movd %mm5,1792(%ebx)
|
|
470 movq (%eax),%mm4
|
|
471 movq 16(%eax),%mm3
|
|
472 pfsub %mm3,%mm4
|
|
473 movq 32(%eax),%mm3
|
|
474 pfadd %mm3,%mm4
|
|
475 movq 48(%eax),%mm3
|
|
476 pfsub %mm3,%mm4
|
|
477 movq 64(%eax),%mm3
|
|
478 pfadd %mm3,%mm4
|
|
479 movq %mm7,%mm5
|
|
480 punpckldq ASM_NAME(tfcos36)+16,%mm5
|
|
481 pfmul %mm5,%mm4
|
|
482 movq %mm4,%mm5
|
|
483 pfacc %mm5,%mm5
|
|
484 movd 124(%edx),%mm6
|
|
485 punpckldq 88(%edx),%mm6
|
|
486 pfmul %mm6,%mm5
|
|
487 movd %mm5,52(%ecx)
|
|
488 psrlq $32,%mm5
|
|
489 movd %mm5,16(%ecx)
|
|
490 movq %mm4,%mm6
|
|
491 punpckldq %mm6,%mm5
|
|
492 pfsub %mm6,%mm5
|
|
493 punpckhdq %mm5,%mm5
|
|
494 movd 16(%edx),%mm6
|
|
495 punpckldq 52(%edx),%mm6
|
|
496 pfmul %mm6,%mm5
|
|
497 movd 16(%esi),%mm6
|
|
498 punpckldq 52(%esi),%mm6
|
|
499 pfadd %mm6,%mm5
|
|
500 movd %mm5,512(%ebx)
|
|
501 psrlq $32,%mm5
|
|
502 movd %mm5,1664(%ebx)
|
|
503 femms
|
|
504
|
|
505 /* NO_APP */
|
|
506 popl %ebx
|
|
507 popl %esi
|
|
508 leave
|
|
509 ret
|
|
510 /* .size ASM_NAME(dct36_3dnowext), .-ASM_NAME(dct36_3dnowext) */
|
|
511
|
|
512 /* Mark non-executable stack. */
|
|
513 #if defined(__linux__) && defined(__ELF__)
|
|
514 .section .note.GNU-stack,"",%progbits
|
|
515 #endif
|