Mercurial > SDL_sound_CoreAudio
comparison decoders/libmpg123/dct64_3dnow.S @ 562:7e08477b0fc1
MP3 decoder upgrade work.
Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123.
libmpg123 is a much better version of mpglib, so it should solve all the
problems about MP3's not seeking, or most modern MP3's not playing at all,
etc. Since you no longer have to make a tradeoff with SMPEG for features, and
SMPEG is basically rotting, I removed it from the project.
There is still work to be done with libmpg123...there are MMX, 3DNow, SSE,
Altivec, etc decoders which we don't have enabled at the moment, and the
build system could use some work to make this compile more cleanly, etc.
Still: huge win.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 30 Jan 2009 02:44:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
561:f2985e08589c | 562:7e08477b0fc1 |
---|---|
1 /* | |
2 dct64_3dnow.s: Replacement of dct64() with AMD's 3DNow! SIMD operations support | |
3 | |
4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 | |
5 see COPYING and AUTHORS files in distribution or http://mpg123.org | |
6 initially written by Syuuhei Kashiyama | |
7 | |
8 Original "license" statement: | |
9 The author of this program disclaim whole expressed or implied | |
10 warranties with regard to this program, and in no event shall the | |
11 author of this program liable to whatever resulted from the use of | |
12 this program. Use it at your own risk. | |
13 */ | |
14 | |
15 #include "mangle.h" | |
16 | |
17 .globl ASM_NAME(dct64_3dnow) | |
18 /* .type ASM_NAME(dct64_3dnow),@function */ | |
19 ASM_NAME(dct64_3dnow): | |
20 subl $256,%esp | |
21 pushl %ebp | |
22 pushl %edi | |
23 pushl %esi | |
24 pushl %ebx | |
25 leal 16(%esp),%ebx | |
26 movl 284(%esp),%edi | |
27 movl 276(%esp),%ebp | |
28 movl 280(%esp),%edx | |
29 leal 128(%ebx),%esi | |
30 | |
31 /* femms */ | |
32 | |
33 /* 1 */ | |
34 movl ASM_NAME(pnts),%eax | |
35 movq 0(%edi),%mm0 | |
36 movq %mm0,%mm1 | |
37 movd 124(%edi),%mm2 | |
38 punpckldq 120(%edi),%mm2 | |
39 movq 0(%eax),%mm3 | |
40 pfadd %mm2,%mm0 | |
41 movq %mm0,0(%ebx) | |
42 pfsub %mm2,%mm1 | |
43 pfmul %mm3,%mm1 | |
44 movd %mm1,124(%ebx) | |
45 psrlq $32,%mm1 | |
46 movd %mm1,120(%ebx) | |
47 movq 8(%edi),%mm4 | |
48 movq %mm4,%mm5 | |
49 movd 116(%edi),%mm6 | |
50 punpckldq 112(%edi),%mm6 | |
51 movq 8(%eax),%mm7 | |
52 pfadd %mm6,%mm4 | |
53 movq %mm4,8(%ebx) | |
54 pfsub %mm6,%mm5 | |
55 pfmul %mm7,%mm5 | |
56 movd %mm5,116(%ebx) | |
57 psrlq $32,%mm5 | |
58 movd %mm5,112(%ebx) | |
59 movq 16(%edi),%mm0 | |
60 movq %mm0,%mm1 | |
61 movd 108(%edi),%mm2 | |
62 punpckldq 104(%edi),%mm2 | |
63 movq 16(%eax),%mm3 | |
64 pfadd %mm2,%mm0 | |
65 movq %mm0,16(%ebx) | |
66 pfsub %mm2,%mm1 | |
67 pfmul %mm3,%mm1 | |
68 movd %mm1,108(%ebx) | |
69 psrlq $32,%mm1 | |
70 movd %mm1,104(%ebx) | |
71 movq 24(%edi),%mm4 | |
72 movq %mm4,%mm5 | |
73 movd 100(%edi),%mm6 | |
74 punpckldq 96(%edi),%mm6 | |
75 movq 24(%eax),%mm7 | |
76 pfadd %mm6,%mm4 | |
77 movq %mm4,24(%ebx) | |
78 pfsub %mm6,%mm5 | |
79 pfmul %mm7,%mm5 | |
80 movd %mm5,100(%ebx) | |
81 psrlq $32,%mm5 | |
82 movd %mm5,96(%ebx) | |
83 movq 32(%edi),%mm0 | |
84 movq %mm0,%mm1 | |
85 movd 92(%edi),%mm2 | |
86 punpckldq 88(%edi),%mm2 | |
87 movq 32(%eax),%mm3 | |
88 pfadd %mm2,%mm0 | |
89 movq %mm0,32(%ebx) | |
90 pfsub %mm2,%mm1 | |
91 pfmul %mm3,%mm1 | |
92 movd %mm1,92(%ebx) | |
93 psrlq $32,%mm1 | |
94 movd %mm1,88(%ebx) | |
95 movq 40(%edi),%mm4 | |
96 movq %mm4,%mm5 | |
97 movd 84(%edi),%mm6 | |
98 punpckldq 80(%edi),%mm6 | |
99 movq 40(%eax),%mm7 | |
100 pfadd %mm6,%mm4 | |
101 movq %mm4,40(%ebx) | |
102 pfsub %mm6,%mm5 | |
103 pfmul %mm7,%mm5 | |
104 movd %mm5,84(%ebx) | |
105 psrlq $32,%mm5 | |
106 movd %mm5,80(%ebx) | |
107 movq 48(%edi),%mm0 | |
108 movq %mm0,%mm1 | |
109 movd 76(%edi),%mm2 | |
110 punpckldq 72(%edi),%mm2 | |
111 movq 48(%eax),%mm3 | |
112 pfadd %mm2,%mm0 | |
113 movq %mm0,48(%ebx) | |
114 pfsub %mm2,%mm1 | |
115 pfmul %mm3,%mm1 | |
116 movd %mm1,76(%ebx) | |
117 psrlq $32,%mm1 | |
118 movd %mm1,72(%ebx) | |
119 movq 56(%edi),%mm4 | |
120 movq %mm4,%mm5 | |
121 movd 68(%edi),%mm6 | |
122 punpckldq 64(%edi),%mm6 | |
123 movq 56(%eax),%mm7 | |
124 pfadd %mm6,%mm4 | |
125 movq %mm4,56(%ebx) | |
126 pfsub %mm6,%mm5 | |
127 pfmul %mm7,%mm5 | |
128 movd %mm5,68(%ebx) | |
129 psrlq $32,%mm5 | |
130 movd %mm5,64(%ebx) | |
131 | |
132 /* 2 */ | |
133 movl ASM_NAME(pnts)+4,%eax | |
134 /* 0,14 */ | |
135 movq 0(%ebx),%mm0 | |
136 movq %mm0,%mm1 | |
137 movd 60(%ebx),%mm2 | |
138 punpckldq 56(%ebx),%mm2 | |
139 movq 0(%eax),%mm3 | |
140 pfadd %mm2,%mm0 | |
141 movq %mm0,0(%esi) | |
142 pfsub %mm2,%mm1 | |
143 pfmul %mm3,%mm1 | |
144 movd %mm1,60(%esi) | |
145 psrlq $32,%mm1 | |
146 movd %mm1,56(%esi) | |
147 /* 16,30 */ | |
148 movq 64(%ebx),%mm0 | |
149 movq %mm0,%mm1 | |
150 movd 124(%ebx),%mm2 | |
151 punpckldq 120(%ebx),%mm2 | |
152 pfadd %mm2,%mm0 | |
153 movq %mm0,64(%esi) | |
154 pfsubr %mm2,%mm1 | |
155 pfmul %mm3,%mm1 | |
156 movd %mm1,124(%esi) | |
157 psrlq $32,%mm1 | |
158 movd %mm1,120(%esi) | |
159 /* 2,12 */ | |
160 movq 8(%ebx),%mm4 | |
161 movq %mm4,%mm5 | |
162 movd 52(%ebx),%mm6 | |
163 punpckldq 48(%ebx),%mm6 | |
164 movq 8(%eax),%mm7 | |
165 pfadd %mm6,%mm4 | |
166 movq %mm4,8(%esi) | |
167 pfsub %mm6,%mm5 | |
168 pfmul %mm7,%mm5 | |
169 movd %mm5,52(%esi) | |
170 psrlq $32,%mm5 | |
171 movd %mm5,48(%esi) | |
172 /* 18,28 */ | |
173 movq 72(%ebx),%mm4 | |
174 movq %mm4,%mm5 | |
175 movd 116(%ebx),%mm6 | |
176 punpckldq 112(%ebx),%mm6 | |
177 pfadd %mm6,%mm4 | |
178 movq %mm4,72(%esi) | |
179 pfsubr %mm6,%mm5 | |
180 pfmul %mm7,%mm5 | |
181 movd %mm5,116(%esi) | |
182 psrlq $32,%mm5 | |
183 movd %mm5,112(%esi) | |
184 /* 4,10 */ | |
185 movq 16(%ebx),%mm0 | |
186 movq %mm0,%mm1 | |
187 movd 44(%ebx),%mm2 | |
188 punpckldq 40(%ebx),%mm2 | |
189 movq 16(%eax),%mm3 | |
190 pfadd %mm2,%mm0 | |
191 movq %mm0,16(%esi) | |
192 pfsub %mm2,%mm1 | |
193 pfmul %mm3,%mm1 | |
194 movd %mm1,44(%esi) | |
195 psrlq $32,%mm1 | |
196 movd %mm1,40(%esi) | |
197 /* 20,26 */ | |
198 movq 80(%ebx),%mm0 | |
199 movq %mm0,%mm1 | |
200 movd 108(%ebx),%mm2 | |
201 punpckldq 104(%ebx),%mm2 | |
202 pfadd %mm2,%mm0 | |
203 movq %mm0,80(%esi) | |
204 pfsubr %mm2,%mm1 | |
205 pfmul %mm3,%mm1 | |
206 movd %mm1,108(%esi) | |
207 psrlq $32,%mm1 | |
208 movd %mm1,104(%esi) | |
209 /* 6,8 */ | |
210 movq 24(%ebx),%mm4 | |
211 movq %mm4,%mm5 | |
212 movd 36(%ebx),%mm6 | |
213 punpckldq 32(%ebx),%mm6 | |
214 movq 24(%eax),%mm7 | |
215 pfadd %mm6,%mm4 | |
216 movq %mm4,24(%esi) | |
217 pfsub %mm6,%mm5 | |
218 pfmul %mm7,%mm5 | |
219 movd %mm5,36(%esi) | |
220 psrlq $32,%mm5 | |
221 movd %mm5,32(%esi) | |
222 /* 22,24 */ | |
223 movq 88(%ebx),%mm4 | |
224 movq %mm4,%mm5 | |
225 movd 100(%ebx),%mm6 | |
226 punpckldq 96(%ebx),%mm6 | |
227 pfadd %mm6,%mm4 | |
228 movq %mm4,88(%esi) | |
229 pfsubr %mm6,%mm5 | |
230 pfmul %mm7,%mm5 | |
231 movd %mm5,100(%esi) | |
232 psrlq $32,%mm5 | |
233 movd %mm5,96(%esi) | |
234 | |
235 /* 3 */ | |
236 movl ASM_NAME(pnts)+8,%eax | |
237 movq 0(%eax),%mm0 | |
238 movq 8(%eax),%mm1 | |
239 /* 0,6 */ | |
240 movq 0(%esi),%mm2 | |
241 movq %mm2,%mm3 | |
242 movd 28(%esi),%mm4 | |
243 punpckldq 24(%esi),%mm4 | |
244 pfadd %mm4,%mm2 | |
245 pfsub %mm4,%mm3 | |
246 pfmul %mm0,%mm3 | |
247 movq %mm2,0(%ebx) | |
248 movd %mm3,28(%ebx) | |
249 psrlq $32,%mm3 | |
250 movd %mm3,24(%ebx) | |
251 /* 2,4 */ | |
252 movq 8(%esi),%mm5 | |
253 movq %mm5,%mm6 | |
254 movd 20(%esi),%mm7 | |
255 punpckldq 16(%esi),%mm7 | |
256 pfadd %mm7,%mm5 | |
257 pfsub %mm7,%mm6 | |
258 pfmul %mm1,%mm6 | |
259 movq %mm5,8(%ebx) | |
260 movd %mm6,20(%ebx) | |
261 psrlq $32,%mm6 | |
262 movd %mm6,16(%ebx) | |
263 /* 8,14 */ | |
264 movq 32(%esi),%mm2 | |
265 movq %mm2,%mm3 | |
266 movd 60(%esi),%mm4 | |
267 punpckldq 56(%esi),%mm4 | |
268 pfadd %mm4,%mm2 | |
269 pfsubr %mm4,%mm3 | |
270 pfmul %mm0,%mm3 | |
271 movq %mm2,32(%ebx) | |
272 movd %mm3,60(%ebx) | |
273 psrlq $32,%mm3 | |
274 movd %mm3,56(%ebx) | |
275 /* 10,12 */ | |
276 movq 40(%esi),%mm5 | |
277 movq %mm5,%mm6 | |
278 movd 52(%esi),%mm7 | |
279 punpckldq 48(%esi),%mm7 | |
280 pfadd %mm7,%mm5 | |
281 pfsubr %mm7,%mm6 | |
282 pfmul %mm1,%mm6 | |
283 movq %mm5,40(%ebx) | |
284 movd %mm6,52(%ebx) | |
285 psrlq $32,%mm6 | |
286 movd %mm6,48(%ebx) | |
287 /* 16,22 */ | |
288 movq 64(%esi),%mm2 | |
289 movq %mm2,%mm3 | |
290 movd 92(%esi),%mm4 | |
291 punpckldq 88(%esi),%mm4 | |
292 pfadd %mm4,%mm2 | |
293 pfsub %mm4,%mm3 | |
294 pfmul %mm0,%mm3 | |
295 movq %mm2,64(%ebx) | |
296 movd %mm3,92(%ebx) | |
297 psrlq $32,%mm3 | |
298 movd %mm3,88(%ebx) | |
299 /* 18,20 */ | |
300 movq 72(%esi),%mm5 | |
301 movq %mm5,%mm6 | |
302 movd 84(%esi),%mm7 | |
303 punpckldq 80(%esi),%mm7 | |
304 pfadd %mm7,%mm5 | |
305 pfsub %mm7,%mm6 | |
306 pfmul %mm1,%mm6 | |
307 movq %mm5,72(%ebx) | |
308 movd %mm6,84(%ebx) | |
309 psrlq $32,%mm6 | |
310 movd %mm6,80(%ebx) | |
311 /* 24,30 */ | |
312 movq 96(%esi),%mm2 | |
313 movq %mm2,%mm3 | |
314 movd 124(%esi),%mm4 | |
315 punpckldq 120(%esi),%mm4 | |
316 pfadd %mm4,%mm2 | |
317 pfsubr %mm4,%mm3 | |
318 pfmul %mm0,%mm3 | |
319 movq %mm2,96(%ebx) | |
320 movd %mm3,124(%ebx) | |
321 psrlq $32,%mm3 | |
322 movd %mm3,120(%ebx) | |
323 /* 26,28 */ | |
324 movq 104(%esi),%mm5 | |
325 movq %mm5,%mm6 | |
326 movd 116(%esi),%mm7 | |
327 punpckldq 112(%esi),%mm7 | |
328 pfadd %mm7,%mm5 | |
329 pfsubr %mm7,%mm6 | |
330 pfmul %mm1,%mm6 | |
331 movq %mm5,104(%ebx) | |
332 movd %mm6,116(%ebx) | |
333 psrlq $32,%mm6 | |
334 movd %mm6,112(%ebx) | |
335 | |
336 /* 4 */ | |
337 movl ASM_NAME(pnts)+12,%eax | |
338 movq 0(%eax),%mm0 | |
339 /* 0 */ | |
340 movq 0(%ebx),%mm1 | |
341 movq %mm1,%mm2 | |
342 movd 12(%ebx),%mm3 | |
343 punpckldq 8(%ebx),%mm3 | |
344 pfadd %mm3,%mm1 | |
345 pfsub %mm3,%mm2 | |
346 pfmul %mm0,%mm2 | |
347 movq %mm1,0(%esi) | |
348 movd %mm2,12(%esi) | |
349 psrlq $32,%mm2 | |
350 movd %mm2,8(%esi) | |
351 /* 4 */ | |
352 movq 16(%ebx),%mm4 | |
353 movq %mm4,%mm5 | |
354 movd 28(%ebx),%mm6 | |
355 punpckldq 24(%ebx),%mm6 | |
356 pfadd %mm6,%mm4 | |
357 pfsubr %mm6,%mm5 | |
358 pfmul %mm0,%mm5 | |
359 movq %mm4,16(%esi) | |
360 movd %mm5,28(%esi) | |
361 psrlq $32,%mm5 | |
362 movd %mm5,24(%esi) | |
363 /* 8 */ | |
364 movq 32(%ebx),%mm1 | |
365 movq %mm1,%mm2 | |
366 movd 44(%ebx),%mm3 | |
367 punpckldq 40(%ebx),%mm3 | |
368 pfadd %mm3,%mm1 | |
369 pfsub %mm3,%mm2 | |
370 pfmul %mm0,%mm2 | |
371 movq %mm1,32(%esi) | |
372 movd %mm2,44(%esi) | |
373 psrlq $32,%mm2 | |
374 movd %mm2,40(%esi) | |
375 /* 12 */ | |
376 movq 48(%ebx),%mm4 | |
377 movq %mm4,%mm5 | |
378 movd 60(%ebx),%mm6 | |
379 punpckldq 56(%ebx),%mm6 | |
380 pfadd %mm6,%mm4 | |
381 pfsubr %mm6,%mm5 | |
382 pfmul %mm0,%mm5 | |
383 movq %mm4,48(%esi) | |
384 movd %mm5,60(%esi) | |
385 psrlq $32,%mm5 | |
386 movd %mm5,56(%esi) | |
387 /* 16 */ | |
388 movq 64(%ebx),%mm1 | |
389 movq %mm1,%mm2 | |
390 movd 76(%ebx),%mm3 | |
391 punpckldq 72(%ebx),%mm3 | |
392 pfadd %mm3,%mm1 | |
393 pfsub %mm3,%mm2 | |
394 pfmul %mm0,%mm2 | |
395 movq %mm1,64(%esi) | |
396 movd %mm2,76(%esi) | |
397 psrlq $32,%mm2 | |
398 movd %mm2,72(%esi) | |
399 /* 20 */ | |
400 movq 80(%ebx),%mm4 | |
401 movq %mm4,%mm5 | |
402 movd 92(%ebx),%mm6 | |
403 punpckldq 88(%ebx),%mm6 | |
404 pfadd %mm6,%mm4 | |
405 pfsubr %mm6,%mm5 | |
406 pfmul %mm0,%mm5 | |
407 movq %mm4,80(%esi) | |
408 movd %mm5,92(%esi) | |
409 psrlq $32,%mm5 | |
410 movd %mm5,88(%esi) | |
411 /* 24 */ | |
412 movq 96(%ebx),%mm1 | |
413 movq %mm1,%mm2 | |
414 movd 108(%ebx),%mm3 | |
415 punpckldq 104(%ebx),%mm3 | |
416 pfadd %mm3,%mm1 | |
417 pfsub %mm3,%mm2 | |
418 pfmul %mm0,%mm2 | |
419 movq %mm1,96(%esi) | |
420 movd %mm2,108(%esi) | |
421 psrlq $32,%mm2 | |
422 movd %mm2,104(%esi) | |
423 /* 28 */ | |
424 movq 112(%ebx),%mm4 | |
425 movq %mm4,%mm5 | |
426 movd 124(%ebx),%mm6 | |
427 punpckldq 120(%ebx),%mm6 | |
428 pfadd %mm6,%mm4 | |
429 pfsubr %mm6,%mm5 | |
430 pfmul %mm0,%mm5 | |
431 movq %mm4,112(%esi) | |
432 movd %mm5,124(%esi) | |
433 psrlq $32,%mm5 | |
434 movd %mm5,120(%esi) | |
435 | |
436 /* 5 */ | |
437 movl $-1,%eax | |
438 movd %eax,%mm1 | |
439 movl $1,%eax | |
440 /* L | H */ | |
441 movd %eax,%mm0 | |
442 punpckldq %mm1,%mm0 | |
443 /* 1.0 | -1.0 */ | |
444 pi2fd %mm0,%mm0 | |
445 movd %eax,%mm1 | |
446 pi2fd %mm1,%mm1 | |
447 movl ASM_NAME(pnts)+16,%eax | |
448 movd 0(%eax),%mm2 | |
449 /* 1.0 | cos0 */ | |
450 punpckldq %mm2,%mm1 | |
451 /* 0 */ | |
452 movq 0(%esi),%mm2 | |
453 movq %mm2,%mm3 | |
454 pfmul %mm0,%mm3 | |
455 pfacc %mm3,%mm2 | |
456 pfmul %mm1,%mm2 | |
457 movq %mm2,0(%ebx) | |
458 movq 8(%esi),%mm4 | |
459 movq %mm4,%mm5 | |
460 pfmul %mm0,%mm5 | |
461 pfacc %mm5,%mm4 | |
462 pfmul %mm0,%mm4 | |
463 pfmul %mm1,%mm4 | |
464 movq %mm4,%mm5 | |
465 psrlq $32,%mm5 | |
466 pfacc %mm5,%mm4 | |
467 movq %mm4,8(%ebx) | |
468 /* 4 */ | |
469 movq 16(%esi),%mm2 | |
470 movq %mm2,%mm3 | |
471 pfmul %mm0,%mm3 | |
472 pfacc %mm3,%mm2 | |
473 pfmul %mm1,%mm2 | |
474 movq 24(%esi),%mm4 | |
475 movq %mm4,%mm5 | |
476 pfmul %mm0,%mm5 | |
477 pfacc %mm5,%mm4 | |
478 pfmul %mm0,%mm4 | |
479 pfmul %mm1,%mm4 | |
480 movq %mm4,%mm5 | |
481 psrlq $32,%mm5 | |
482 pfacc %mm5,%mm4 | |
483 movq %mm2,%mm3 | |
484 psrlq $32,%mm3 | |
485 pfadd %mm4,%mm2 | |
486 pfadd %mm3,%mm4 | |
487 movq %mm2,16(%ebx) | |
488 movq %mm4,24(%ebx) | |
489 /* 8 */ | |
490 movq 32(%esi),%mm2 | |
491 movq %mm2,%mm3 | |
492 pfmul %mm0,%mm3 | |
493 pfacc %mm3,%mm2 | |
494 pfmul %mm1,%mm2 | |
495 movq %mm2,32(%ebx) | |
496 movq 40(%esi),%mm4 | |
497 movq %mm4,%mm5 | |
498 pfmul %mm0,%mm5 | |
499 pfacc %mm5,%mm4 | |
500 pfmul %mm0,%mm4 | |
501 pfmul %mm1,%mm4 | |
502 movq %mm4,%mm5 | |
503 psrlq $32,%mm5 | |
504 pfacc %mm5,%mm4 | |
505 movq %mm4,40(%ebx) | |
506 /* 12 */ | |
507 movq 48(%esi),%mm2 | |
508 movq %mm2,%mm3 | |
509 pfmul %mm0,%mm3 | |
510 pfacc %mm3,%mm2 | |
511 pfmul %mm1,%mm2 | |
512 movq 56(%esi),%mm4 | |
513 movq %mm4,%mm5 | |
514 pfmul %mm0,%mm5 | |
515 pfacc %mm5,%mm4 | |
516 pfmul %mm0,%mm4 | |
517 pfmul %mm1,%mm4 | |
518 movq %mm4,%mm5 | |
519 psrlq $32,%mm5 | |
520 pfacc %mm5,%mm4 | |
521 movq %mm2,%mm3 | |
522 psrlq $32,%mm3 | |
523 pfadd %mm4,%mm2 | |
524 pfadd %mm3,%mm4 | |
525 movq %mm2,48(%ebx) | |
526 movq %mm4,56(%ebx) | |
527 /* 16 */ | |
528 movq 64(%esi),%mm2 | |
529 movq %mm2,%mm3 | |
530 pfmul %mm0,%mm3 | |
531 pfacc %mm3,%mm2 | |
532 pfmul %mm1,%mm2 | |
533 movq %mm2,64(%ebx) | |
534 movq 72(%esi),%mm4 | |
535 movq %mm4,%mm5 | |
536 pfmul %mm0,%mm5 | |
537 pfacc %mm5,%mm4 | |
538 pfmul %mm0,%mm4 | |
539 pfmul %mm1,%mm4 | |
540 movq %mm4,%mm5 | |
541 psrlq $32,%mm5 | |
542 pfacc %mm5,%mm4 | |
543 movq %mm4,72(%ebx) | |
544 /* 20 */ | |
545 movq 80(%esi),%mm2 | |
546 movq %mm2,%mm3 | |
547 pfmul %mm0,%mm3 | |
548 pfacc %mm3,%mm2 | |
549 pfmul %mm1,%mm2 | |
550 movq 88(%esi),%mm4 | |
551 movq %mm4,%mm5 | |
552 pfmul %mm0,%mm5 | |
553 pfacc %mm5,%mm4 | |
554 pfmul %mm0,%mm4 | |
555 pfmul %mm1,%mm4 | |
556 movq %mm4,%mm5 | |
557 psrlq $32,%mm5 | |
558 pfacc %mm5,%mm4 | |
559 movq %mm2,%mm3 | |
560 psrlq $32,%mm3 | |
561 pfadd %mm4,%mm2 | |
562 pfadd %mm3,%mm4 | |
563 movq %mm2,80(%ebx) | |
564 movq %mm4,88(%ebx) | |
565 /* 24 */ | |
566 movq 96(%esi),%mm2 | |
567 movq %mm2,%mm3 | |
568 pfmul %mm0,%mm3 | |
569 pfacc %mm3,%mm2 | |
570 pfmul %mm1,%mm2 | |
571 movq %mm2,96(%ebx) | |
572 movq 104(%esi),%mm4 | |
573 movq %mm4,%mm5 | |
574 pfmul %mm0,%mm5 | |
575 pfacc %mm5,%mm4 | |
576 pfmul %mm0,%mm4 | |
577 pfmul %mm1,%mm4 | |
578 movq %mm4,%mm5 | |
579 psrlq $32,%mm5 | |
580 pfacc %mm5,%mm4 | |
581 movq %mm4,104(%ebx) | |
582 /* 28 */ | |
583 movq 112(%esi),%mm2 | |
584 movq %mm2,%mm3 | |
585 pfmul %mm0,%mm3 | |
586 pfacc %mm3,%mm2 | |
587 pfmul %mm1,%mm2 | |
588 movq 120(%esi),%mm4 | |
589 movq %mm4,%mm5 | |
590 pfmul %mm0,%mm5 | |
591 pfacc %mm5,%mm4 | |
592 pfmul %mm0,%mm4 | |
593 pfmul %mm1,%mm4 | |
594 movq %mm4,%mm5 | |
595 psrlq $32,%mm5 | |
596 pfacc %mm5,%mm4 | |
597 movq %mm2,%mm3 | |
598 psrlq $32,%mm3 | |
599 pfadd %mm4,%mm2 | |
600 pfadd %mm3,%mm4 | |
601 movq %mm2,112(%ebx) | |
602 movq %mm4,120(%ebx) | |
603 | |
604 /* Phase6 */ | |
605 movl 0(%ebx),%eax | |
606 movl %eax,1024(%ebp) | |
607 movl 4(%ebx),%eax | |
608 movl %eax,0(%ebp) | |
609 movl %eax,0(%edx) | |
610 movl 8(%ebx),%eax | |
611 movl %eax,512(%ebp) | |
612 movl 12(%ebx),%eax | |
613 movl %eax,512(%edx) | |
614 | |
615 movl 16(%ebx),%eax | |
616 movl %eax,768(%ebp) | |
617 movl 20(%ebx),%eax | |
618 movl %eax,256(%edx) | |
619 | |
620 movl 24(%ebx),%eax | |
621 movl %eax,256(%ebp) | |
622 movl 28(%ebx),%eax | |
623 movl %eax,768(%edx) | |
624 | |
625 movq 32(%ebx),%mm0 | |
626 movq 48(%ebx),%mm1 | |
627 pfadd %mm1,%mm0 | |
628 movd %mm0,896(%ebp) | |
629 psrlq $32,%mm0 | |
630 movd %mm0,128(%edx) | |
631 movq 40(%ebx),%mm2 | |
632 pfadd %mm2,%mm1 | |
633 movd %mm1,640(%ebp) | |
634 psrlq $32,%mm1 | |
635 movd %mm1,384(%edx) | |
636 | |
637 movq 56(%ebx),%mm3 | |
638 pfadd %mm3,%mm2 | |
639 movd %mm2,384(%ebp) | |
640 psrlq $32,%mm2 | |
641 movd %mm2,640(%edx) | |
642 | |
643 movd 36(%ebx),%mm4 | |
644 pfadd %mm4,%mm3 | |
645 movd %mm3,128(%ebp) | |
646 psrlq $32,%mm3 | |
647 movd %mm3,896(%edx) | |
648 movq 96(%ebx),%mm0 | |
649 movq 64(%ebx),%mm1 | |
650 | |
651 movq 112(%ebx),%mm2 | |
652 pfadd %mm2,%mm0 | |
653 movq %mm0,%mm3 | |
654 pfadd %mm1,%mm3 | |
655 movd %mm3,960(%ebp) | |
656 psrlq $32,%mm3 | |
657 movd %mm3,64(%edx) | |
658 movq 80(%ebx),%mm1 | |
659 pfadd %mm1,%mm0 | |
660 movd %mm0,832(%ebp) | |
661 psrlq $32,%mm0 | |
662 movd %mm0,192(%edx) | |
663 movq 104(%ebx),%mm3 | |
664 pfadd %mm3,%mm2 | |
665 movq %mm2,%mm4 | |
666 pfadd %mm1,%mm4 | |
667 movd %mm4,704(%ebp) | |
668 psrlq $32,%mm4 | |
669 movd %mm4,320(%edx) | |
670 movq 72(%ebx),%mm1 | |
671 pfadd %mm1,%mm2 | |
672 movd %mm2,576(%ebp) | |
673 psrlq $32,%mm2 | |
674 movd %mm2,448(%edx) | |
675 | |
676 movq 120(%ebx),%mm4 | |
677 pfadd %mm4,%mm3 | |
678 movq %mm3,%mm5 | |
679 pfadd %mm1,%mm5 | |
680 movd %mm5,448(%ebp) | |
681 psrlq $32,%mm5 | |
682 movd %mm5,576(%edx) | |
683 movq 88(%ebx),%mm1 | |
684 pfadd %mm1,%mm3 | |
685 movd %mm3,320(%ebp) | |
686 psrlq $32,%mm3 | |
687 movd %mm3,704(%edx) | |
688 | |
689 movd 100(%ebx),%mm5 | |
690 pfadd %mm5,%mm4 | |
691 movq %mm4,%mm6 | |
692 pfadd %mm1,%mm6 | |
693 movd %mm6,192(%ebp) | |
694 psrlq $32,%mm6 | |
695 movd %mm6,832(%edx) | |
696 movd 68(%ebx),%mm1 | |
697 pfadd %mm1,%mm4 | |
698 movd %mm4,64(%ebp) | |
699 psrlq $32,%mm4 | |
700 movd %mm4,960(%edx) | |
701 | |
702 /* femms */ | |
703 | |
704 popl %ebx | |
705 popl %esi | |
706 popl %edi | |
707 popl %ebp | |
708 addl $256,%esp | |
709 | |
710 ret | |
711 | |
712 /* Mark non-executable stack. */ | |
713 #if defined(__linux__) && defined(__ELF__) | |
714 .section .note.GNU-stack,"",%progbits | |
715 #endif |