Mercurial > SDL_sound_CoreAudio
comparison decoders/libmpg123/dct64_3dnowext.S @ 562:7e08477b0fc1
MP3 decoder upgrade work.
Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123.
libmpg123 is a much better version of mpglib, so it should solve all the
problems about MP3's not seeking, or most modern MP3's not playing at all,
etc. Since you no longer have to make a tradeoff with SMPEG for features, and
SMPEG is basically rotting, I removed it from the project.
There is still work to be done with libmpg123...there are MMX, 3DNow, SSE,
Altivec, etc decoders which we don't have enabled at the moment, and the
build system could use some work to make this compile more cleanly, etc.
Still: huge win.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 30 Jan 2009 02:44:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
561:f2985e08589c | 562:7e08477b0fc1 |
---|---|
1 /* | |
2 dct64_3dnowext: extended 3DNow optimized DCT64 | |
3 | |
4 copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1 | |
5 see COPYING and AUTHORS files in distribution or http://mpg123.org | |
6 | |
7 Transformed back into standalone asm, with help of | |
8 gcc -S -DHAVE_CONFIG_H -I. -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct64_3dnowext.{S,c} | |
9 | |
10 MPlayer comment follows. | |
11 */ | |
12 | |
13 /* | |
14 * This code was taken from http://www.mpg123.org | |
15 * See ChangeLog of mpg123-0.59s-pre.1 for detail | |
16 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> | |
17 * Partial 3dnowex-DSP! optimization by Nick Kurshev | |
18 * | |
19 * TODO: optimize scalar 3dnow! code | |
20 * Warning: Phases 7 & 8 are not tested | |
21 */ | |
22 | |
23 #include "mangle.h" | |
24 | |
25 .data | |
26 ALIGN4 | |
27 /* .type plus_1f, @object | |
28 .size plus_1f, 4 */ | |
29 plus_1f: | |
30 .long 1065353216 | |
31 ALIGN8 | |
32 /* .type x_plus_minus_3dnow, @object | |
33 .size x_plus_minus_3dnow, 8 */ | |
34 x_plus_minus_3dnow: | |
35 .long 0 | |
36 .long -2147483648 | |
37 | |
38 .text | |
39 ALIGN32,,31 | |
40 .globl ASM_NAME(dct64_3dnowext) | |
41 /* .type ASM_NAME(dct64_3dnowext), @function */ | |
42 ASM_NAME(dct64_3dnowext): | |
43 pushl %ebp | |
44 movl %esp, %ebp | |
45 pushl %edi | |
46 pushl %esi | |
47 pushl %ebx | |
48 subl $256, %esp | |
49 /* APP */ | |
50 movl 16(%ebp),%eax | |
51 leal 128+-268(%ebp),%edx | |
52 movl 8(%ebp),%esi | |
53 movl 12(%ebp),%edi | |
54 movl $ASM_NAME(costab_mmxsse),%ebx | |
55 leal -268(%ebp),%ecx | |
56 movq (%eax), %mm0 | |
57 movq 8(%eax), %mm4 | |
58 movq %mm0, %mm3 | |
59 movq %mm4, %mm7 | |
60 pswapd 120(%eax), %mm1 | |
61 pswapd 112(%eax), %mm5 | |
62 pfadd %mm1, %mm0 | |
63 pfadd %mm5, %mm4 | |
64 movq %mm0, (%edx) | |
65 movq %mm4, 8(%edx) | |
66 pfsub %mm1, %mm3 | |
67 pfsub %mm5, %mm7 | |
68 pfmul (%ebx), %mm3 | |
69 pfmul 8(%ebx), %mm7 | |
70 pswapd %mm3, %mm3 | |
71 pswapd %mm7, %mm7 | |
72 movq %mm3, 120(%edx) | |
73 movq %mm7, 112(%edx) | |
74 movq 16(%eax), %mm0 | |
75 movq 24(%eax), %mm4 | |
76 movq %mm0, %mm3 | |
77 movq %mm4, %mm7 | |
78 pswapd 104(%eax), %mm1 | |
79 pswapd 96(%eax), %mm5 | |
80 pfadd %mm1, %mm0 | |
81 pfadd %mm5, %mm4 | |
82 movq %mm0, 16(%edx) | |
83 movq %mm4, 24(%edx) | |
84 pfsub %mm1, %mm3 | |
85 pfsub %mm5, %mm7 | |
86 pfmul 16(%ebx), %mm3 | |
87 pfmul 24(%ebx), %mm7 | |
88 pswapd %mm3, %mm3 | |
89 pswapd %mm7, %mm7 | |
90 movq %mm3, 104(%edx) | |
91 movq %mm7, 96(%edx) | |
92 movq 32(%eax), %mm0 | |
93 movq 40(%eax), %mm4 | |
94 movq %mm0, %mm3 | |
95 movq %mm4, %mm7 | |
96 pswapd 88(%eax), %mm1 | |
97 pswapd 80(%eax), %mm5 | |
98 pfadd %mm1, %mm0 | |
99 pfadd %mm5, %mm4 | |
100 movq %mm0, 32(%edx) | |
101 movq %mm4, 40(%edx) | |
102 pfsub %mm1, %mm3 | |
103 pfsub %mm5, %mm7 | |
104 pfmul 32(%ebx), %mm3 | |
105 pfmul 40(%ebx), %mm7 | |
106 pswapd %mm3, %mm3 | |
107 pswapd %mm7, %mm7 | |
108 movq %mm3, 88(%edx) | |
109 movq %mm7, 80(%edx) | |
110 movq 48(%eax), %mm0 | |
111 movq 56(%eax), %mm4 | |
112 movq %mm0, %mm3 | |
113 movq %mm4, %mm7 | |
114 pswapd 72(%eax), %mm1 | |
115 pswapd 64(%eax), %mm5 | |
116 pfadd %mm1, %mm0 | |
117 pfadd %mm5, %mm4 | |
118 movq %mm0, 48(%edx) | |
119 movq %mm4, 56(%edx) | |
120 pfsub %mm1, %mm3 | |
121 pfsub %mm5, %mm7 | |
122 pfmul 48(%ebx), %mm3 | |
123 pfmul 56(%ebx), %mm7 | |
124 pswapd %mm3, %mm3 | |
125 pswapd %mm7, %mm7 | |
126 movq %mm3, 72(%edx) | |
127 movq %mm7, 64(%edx) | |
128 movq (%edx), %mm0 | |
129 movq 8(%edx), %mm4 | |
130 movq %mm0, %mm3 | |
131 movq %mm4, %mm7 | |
132 pswapd 56(%edx), %mm1 | |
133 pswapd 48(%edx), %mm5 | |
134 pfadd %mm1, %mm0 | |
135 pfadd %mm5, %mm4 | |
136 movq %mm0, (%ecx) | |
137 movq %mm4, 8(%ecx) | |
138 pfsub %mm1, %mm3 | |
139 pfsub %mm5, %mm7 | |
140 pfmul 64(%ebx), %mm3 | |
141 pfmul 72(%ebx), %mm7 | |
142 pswapd %mm3, %mm3 | |
143 pswapd %mm7, %mm7 | |
144 movq %mm3, 56(%ecx) | |
145 movq %mm7, 48(%ecx) | |
146 movq 16(%edx), %mm0 | |
147 movq 24(%edx), %mm4 | |
148 movq %mm0, %mm3 | |
149 movq %mm4, %mm7 | |
150 pswapd 40(%edx), %mm1 | |
151 pswapd 32(%edx), %mm5 | |
152 pfadd %mm1, %mm0 | |
153 pfadd %mm5, %mm4 | |
154 movq %mm0, 16(%ecx) | |
155 movq %mm4, 24(%ecx) | |
156 pfsub %mm1, %mm3 | |
157 pfsub %mm5, %mm7 | |
158 pfmul 80(%ebx), %mm3 | |
159 pfmul 88(%ebx), %mm7 | |
160 pswapd %mm3, %mm3 | |
161 pswapd %mm7, %mm7 | |
162 movq %mm3, 40(%ecx) | |
163 movq %mm7, 32(%ecx) | |
164 movq 64(%edx), %mm0 | |
165 movq 72(%edx), %mm4 | |
166 movq %mm0, %mm3 | |
167 movq %mm4, %mm7 | |
168 pswapd 120(%edx), %mm1 | |
169 pswapd 112(%edx), %mm5 | |
170 pfadd %mm1, %mm0 | |
171 pfadd %mm5, %mm4 | |
172 movq %mm0, 64(%ecx) | |
173 movq %mm4, 72(%ecx) | |
174 pfsubr %mm1, %mm3 | |
175 pfsubr %mm5, %mm7 | |
176 pfmul 64(%ebx), %mm3 | |
177 pfmul 72(%ebx), %mm7 | |
178 pswapd %mm3, %mm3 | |
179 pswapd %mm7, %mm7 | |
180 movq %mm3, 120(%ecx) | |
181 movq %mm7, 112(%ecx) | |
182 movq 80(%edx), %mm0 | |
183 movq 88(%edx), %mm4 | |
184 movq %mm0, %mm3 | |
185 movq %mm4, %mm7 | |
186 pswapd 104(%edx), %mm1 | |
187 pswapd 96(%edx), %mm5 | |
188 pfadd %mm1, %mm0 | |
189 pfadd %mm5, %mm4 | |
190 movq %mm0, 80(%ecx) | |
191 movq %mm4, 88(%ecx) | |
192 pfsubr %mm1, %mm3 | |
193 pfsubr %mm5, %mm7 | |
194 pfmul 80(%ebx), %mm3 | |
195 pfmul 88(%ebx), %mm7 | |
196 pswapd %mm3, %mm3 | |
197 pswapd %mm7, %mm7 | |
198 movq %mm3, 104(%ecx) | |
199 movq %mm7, 96(%ecx) | |
200 movq 96(%ebx), %mm2 | |
201 movq 104(%ebx), %mm6 | |
202 movq (%ecx), %mm0 | |
203 movq 8(%ecx), %mm4 | |
204 movq %mm0, %mm3 | |
205 movq %mm4, %mm7 | |
206 pswapd 24(%ecx), %mm1 | |
207 pswapd 16(%ecx), %mm5 | |
208 pfadd %mm1, %mm0 | |
209 pfadd %mm5, %mm4 | |
210 movq %mm0, (%edx) | |
211 movq %mm4, 8(%edx) | |
212 pfsub %mm1, %mm3 | |
213 pfsub %mm5, %mm7 | |
214 pfmul %mm2, %mm3 | |
215 pfmul %mm6, %mm7 | |
216 pswapd %mm3, %mm3 | |
217 pswapd %mm7, %mm7 | |
218 movq %mm3, 24(%edx) | |
219 movq %mm7, 16(%edx) | |
220 movq 32(%ecx), %mm0 | |
221 movq 40(%ecx), %mm4 | |
222 movq %mm0, %mm3 | |
223 movq %mm4, %mm7 | |
224 pswapd 56(%ecx), %mm1 | |
225 pswapd 48(%ecx), %mm5 | |
226 pfadd %mm1, %mm0 | |
227 pfadd %mm5, %mm4 | |
228 movq %mm0, 32(%edx) | |
229 movq %mm4, 40(%edx) | |
230 pfsubr %mm1, %mm3 | |
231 pfsubr %mm5, %mm7 | |
232 pfmul %mm2, %mm3 | |
233 pfmul %mm6, %mm7 | |
234 pswapd %mm3, %mm3 | |
235 pswapd %mm7, %mm7 | |
236 movq %mm3, 56(%edx) | |
237 movq %mm7, 48(%edx) | |
238 movq 64(%ecx), %mm0 | |
239 movq 72(%ecx), %mm4 | |
240 movq %mm0, %mm3 | |
241 movq %mm4, %mm7 | |
242 pswapd 88(%ecx), %mm1 | |
243 pswapd 80(%ecx), %mm5 | |
244 pfadd %mm1, %mm0 | |
245 pfadd %mm5, %mm4 | |
246 movq %mm0, 64(%edx) | |
247 movq %mm4, 72(%edx) | |
248 pfsub %mm1, %mm3 | |
249 pfsub %mm5, %mm7 | |
250 pfmul %mm2, %mm3 | |
251 pfmul %mm6, %mm7 | |
252 pswapd %mm3, %mm3 | |
253 pswapd %mm7, %mm7 | |
254 movq %mm3, 88(%edx) | |
255 movq %mm7, 80(%edx) | |
256 movq 96(%ecx), %mm0 | |
257 movq 104(%ecx), %mm4 | |
258 movq %mm0, %mm3 | |
259 movq %mm4, %mm7 | |
260 pswapd 120(%ecx), %mm1 | |
261 pswapd 112(%ecx), %mm5 | |
262 pfadd %mm1, %mm0 | |
263 pfadd %mm5, %mm4 | |
264 movq %mm0, 96(%edx) | |
265 movq %mm4, 104(%edx) | |
266 pfsubr %mm1, %mm3 | |
267 pfsubr %mm5, %mm7 | |
268 pfmul %mm2, %mm3 | |
269 pfmul %mm6, %mm7 | |
270 pswapd %mm3, %mm3 | |
271 pswapd %mm7, %mm7 | |
272 movq %mm3, 120(%edx) | |
273 movq %mm7, 112(%edx) | |
274 movq 112(%ebx), %mm2 | |
275 movq (%edx), %mm0 | |
276 movq 16(%edx), %mm4 | |
277 movq %mm0, %mm3 | |
278 movq %mm4, %mm7 | |
279 pswapd 8(%edx), %mm1 | |
280 pswapd 24(%edx), %mm5 | |
281 pfadd %mm1, %mm0 | |
282 pfadd %mm5, %mm4 | |
283 movq %mm0, (%ecx) | |
284 movq %mm4, 16(%ecx) | |
285 pfsub %mm1, %mm3 | |
286 pfsubr %mm5, %mm7 | |
287 pfmul %mm2, %mm3 | |
288 pfmul %mm2, %mm7 | |
289 pswapd %mm3, %mm3 | |
290 pswapd %mm7, %mm7 | |
291 movq %mm3, 8(%ecx) | |
292 movq %mm7, 24(%ecx) | |
293 movq 32(%edx), %mm0 | |
294 movq 48(%edx), %mm4 | |
295 movq %mm0, %mm3 | |
296 movq %mm4, %mm7 | |
297 pswapd 40(%edx), %mm1 | |
298 pswapd 56(%edx), %mm5 | |
299 pfadd %mm1, %mm0 | |
300 pfadd %mm5, %mm4 | |
301 movq %mm0, 32(%ecx) | |
302 movq %mm4, 48(%ecx) | |
303 pfsub %mm1, %mm3 | |
304 pfsubr %mm5, %mm7 | |
305 pfmul %mm2, %mm3 | |
306 pfmul %mm2, %mm7 | |
307 pswapd %mm3, %mm3 | |
308 pswapd %mm7, %mm7 | |
309 movq %mm3, 40(%ecx) | |
310 movq %mm7, 56(%ecx) | |
311 movq 64(%edx), %mm0 | |
312 movq 80(%edx), %mm4 | |
313 movq %mm0, %mm3 | |
314 movq %mm4, %mm7 | |
315 pswapd 72(%edx), %mm1 | |
316 pswapd 88(%edx), %mm5 | |
317 pfadd %mm1, %mm0 | |
318 pfadd %mm5, %mm4 | |
319 movq %mm0, 64(%ecx) | |
320 movq %mm4, 80(%ecx) | |
321 pfsub %mm1, %mm3 | |
322 pfsubr %mm5, %mm7 | |
323 pfmul %mm2, %mm3 | |
324 pfmul %mm2, %mm7 | |
325 pswapd %mm3, %mm3 | |
326 pswapd %mm7, %mm7 | |
327 movq %mm3, 72(%ecx) | |
328 movq %mm7, 88(%ecx) | |
329 movq 96(%edx), %mm0 | |
330 movq 112(%edx), %mm4 | |
331 movq %mm0, %mm3 | |
332 movq %mm4, %mm7 | |
333 pswapd 104(%edx), %mm1 | |
334 pswapd 120(%edx), %mm5 | |
335 pfadd %mm1, %mm0 | |
336 pfadd %mm5, %mm4 | |
337 movq %mm0, 96(%ecx) | |
338 movq %mm4, 112(%ecx) | |
339 pfsub %mm1, %mm3 | |
340 pfsubr %mm5, %mm7 | |
341 pfmul %mm2, %mm3 | |
342 pfmul %mm2, %mm7 | |
343 pswapd %mm3, %mm3 | |
344 pswapd %mm7, %mm7 | |
345 movq %mm3, 104(%ecx) | |
346 movq %mm7, 120(%ecx) | |
347 movd plus_1f, %mm6 | |
348 punpckldq 120(%ebx), %mm6 | |
349 movq x_plus_minus_3dnow, %mm7 | |
350 movq 32(%ecx), %mm0 | |
351 movq 64(%ecx), %mm2 | |
352 movq %mm0, %mm1 | |
353 movq %mm2, %mm3 | |
354 pxor %mm7, %mm1 | |
355 pxor %mm7, %mm3 | |
356 pfacc %mm1, %mm0 | |
357 pfacc %mm3, %mm2 | |
358 pfmul %mm6, %mm0 | |
359 pfmul %mm6, %mm2 | |
360 movq %mm0, 32(%edx) | |
361 movq %mm2, 64(%edx) | |
362 movd 44(%ecx), %mm0 | |
363 movd 40(%ecx), %mm2 | |
364 movd 120(%ebx), %mm3 | |
365 punpckldq 76(%ecx), %mm0 | |
366 punpckldq 72(%ecx), %mm2 | |
367 punpckldq %mm3, %mm3 | |
368 movq %mm0, %mm4 | |
369 movq %mm2, %mm5 | |
370 pfsub %mm2, %mm0 | |
371 pfmul %mm3, %mm0 | |
372 movq %mm0, %mm1 | |
373 pfadd %mm5, %mm0 | |
374 pfadd %mm4, %mm0 | |
375 movq %mm0, %mm2 | |
376 punpckldq %mm1, %mm0 | |
377 punpckhdq %mm1, %mm2 | |
378 movq %mm0, 40(%edx) | |
379 movq %mm2, 72(%edx) | |
380 movd 48(%ecx), %mm3 | |
381 movd 60(%ecx), %mm2 | |
382 pfsub 52(%ecx), %mm3 | |
383 pfsub 56(%ecx), %mm2 | |
384 pfmul 120(%ebx), %mm3 | |
385 pfmul 120(%ebx), %mm2 | |
386 movq %mm2, %mm1 | |
387 pfadd 56(%ecx), %mm1 | |
388 pfadd 60(%ecx), %mm1 | |
389 movq %mm1, %mm0 | |
390 pfadd 48(%ecx), %mm0 | |
391 pfadd 52(%ecx), %mm0 | |
392 pfadd %mm3, %mm1 | |
393 punpckldq %mm2, %mm1 | |
394 pfadd %mm3, %mm2 | |
395 punpckldq %mm2, %mm0 | |
396 movq %mm1, 56(%edx) | |
397 movq %mm0, 48(%edx) | |
398 movd 92(%ecx), %mm1 | |
399 pfsub 88(%ecx), %mm1 | |
400 pfmul 120(%ebx), %mm1 | |
401 movd %mm1, 92(%edx) | |
402 pfadd 92(%ecx), %mm1 | |
403 pfadd 88(%ecx), %mm1 | |
404 movq %mm1, %mm0 | |
405 pfadd 80(%ecx), %mm0 | |
406 pfadd 84(%ecx), %mm0 | |
407 movd %mm0, 80(%edx) | |
408 movd 80(%ecx), %mm0 | |
409 pfsub 84(%ecx), %mm0 | |
410 pfmul 120(%ebx), %mm0 | |
411 pfadd %mm0, %mm1 | |
412 pfadd 92(%edx), %mm0 | |
413 punpckldq %mm1, %mm0 | |
414 movq %mm0, 84(%edx) | |
415 movq 96(%ecx), %mm0 | |
416 movq %mm0, %mm1 | |
417 pxor %mm7, %mm1 | |
418 pfacc %mm1, %mm0 | |
419 pfmul %mm6, %mm0 | |
420 movq %mm0, 96(%edx) | |
421 movd 108(%ecx), %mm0 | |
422 pfsub 104(%ecx), %mm0 | |
423 pfmul 120(%ebx), %mm0 | |
424 movd %mm0, 108(%edx) | |
425 pfadd 104(%ecx), %mm0 | |
426 pfadd 108(%ecx), %mm0 | |
427 movd %mm0, 104(%edx) | |
428 movd 124(%ecx), %mm1 | |
429 pfsub 120(%ecx), %mm1 | |
430 pfmul 120(%ebx), %mm1 | |
431 movd %mm1, 124(%edx) | |
432 pfadd 120(%ecx), %mm1 | |
433 pfadd 124(%ecx), %mm1 | |
434 movq %mm1, %mm0 | |
435 pfadd 112(%ecx), %mm0 | |
436 pfadd 116(%ecx), %mm0 | |
437 movd %mm0, 112(%edx) | |
438 movd 112(%ecx), %mm0 | |
439 pfsub 116(%ecx), %mm0 | |
440 pfmul 120(%ebx), %mm0 | |
441 pfadd %mm0,%mm1 | |
442 pfadd 124(%edx), %mm0 | |
443 punpckldq %mm1, %mm0 | |
444 movq %mm0, 116(%edx) | |
445 jnz .L01 | |
446 movd (%ecx), %mm0 | |
447 pfadd 4(%ecx), %mm0 | |
448 movd %mm0, 1024(%esi) | |
449 movd (%ecx), %mm0 | |
450 pfsub 4(%ecx), %mm0 | |
451 pfmul 120(%ebx), %mm0 | |
452 movd %mm0, (%esi) | |
453 movd %mm0, (%edi) | |
454 movd 12(%ecx), %mm0 | |
455 pfsub 8(%ecx), %mm0 | |
456 pfmul 120(%ebx), %mm0 | |
457 movd %mm0, 512(%edi) | |
458 pfadd 12(%ecx), %mm0 | |
459 pfadd 8(%ecx), %mm0 | |
460 movd %mm0, 512(%esi) | |
461 movd 16(%ecx), %mm0 | |
462 pfsub 20(%ecx), %mm0 | |
463 pfmul 120(%ebx), %mm0 | |
464 movq %mm0, %mm3 | |
465 movd 28(%ecx), %mm0 | |
466 pfsub 24(%ecx), %mm0 | |
467 pfmul 120(%ebx), %mm0 | |
468 movd %mm0, 768(%edi) | |
469 movq %mm0, %mm2 | |
470 pfadd 24(%ecx), %mm0 | |
471 pfadd 28(%ecx), %mm0 | |
472 movq %mm0, %mm1 | |
473 pfadd 16(%ecx), %mm0 | |
474 pfadd 20(%ecx), %mm0 | |
475 movd %mm0, 768(%esi) | |
476 pfadd %mm3, %mm1 | |
477 movd %mm1, 256(%esi) | |
478 pfadd %mm3, %mm2 | |
479 movd %mm2, 256(%edi) | |
480 movq 32(%edx), %mm0 | |
481 movq 48(%edx), %mm1 | |
482 pfadd 48(%edx), %mm0 | |
483 pfadd 40(%edx), %mm1 | |
484 movd %mm0, 896(%esi) | |
485 movd %mm1, 640(%esi) | |
486 psrlq $32, %mm0 | |
487 psrlq $32, %mm1 | |
488 movd %mm0, 128(%edi) | |
489 movd %mm1, 384(%edi) | |
490 movd 40(%edx), %mm0 | |
491 pfadd 56(%edx), %mm0 | |
492 movd %mm0, 384(%esi) | |
493 movd 56(%edx), %mm0 | |
494 pfadd 36(%edx), %mm0 | |
495 movd %mm0, 128(%esi) | |
496 movd 60(%edx), %mm0 | |
497 movd %mm0, 896(%edi) | |
498 pfadd 44(%edx), %mm0 | |
499 movd %mm0, 640(%edi) | |
500 movq 96(%edx), %mm0 | |
501 movq 112(%edx), %mm2 | |
502 movq 104(%edx), %mm4 | |
503 pfadd 112(%edx), %mm0 | |
504 pfadd 104(%edx), %mm2 | |
505 pfadd 120(%edx), %mm4 | |
506 movq %mm0, %mm1 | |
507 movq %mm2, %mm3 | |
508 movq %mm4, %mm5 | |
509 pfadd 64(%edx), %mm0 | |
510 pfadd 80(%edx), %mm2 | |
511 pfadd 72(%edx), %mm4 | |
512 movd %mm0, 960(%esi) | |
513 movd %mm2, 704(%esi) | |
514 movd %mm4, 448(%esi) | |
515 psrlq $32, %mm0 | |
516 psrlq $32, %mm2 | |
517 psrlq $32, %mm4 | |
518 movd %mm0, 64(%edi) | |
519 movd %mm2, 320(%edi) | |
520 movd %mm4, 576(%edi) | |
521 pfadd 80(%edx), %mm1 | |
522 pfadd 72(%edx), %mm3 | |
523 pfadd 88(%edx), %mm5 | |
524 movd %mm1, 832(%esi) | |
525 movd %mm3, 576(%esi) | |
526 movd %mm5, 320(%esi) | |
527 psrlq $32, %mm1 | |
528 psrlq $32, %mm3 | |
529 psrlq $32, %mm5 | |
530 movd %mm1, 192(%edi) | |
531 movd %mm3, 448(%edi) | |
532 movd %mm5, 704(%edi) | |
533 movd 120(%edx), %mm0 | |
534 pfadd 100(%edx), %mm0 | |
535 movq %mm0, %mm1 | |
536 pfadd 88(%edx), %mm0 | |
537 movd %mm0, 192(%esi) | |
538 pfadd 68(%edx), %mm1 | |
539 movd %mm1, 64(%esi) | |
540 movd 124(%edx), %mm0 | |
541 movd %mm0, 960(%edi) | |
542 pfadd 92(%edx), %mm0 | |
543 movd %mm0, 832(%edi) | |
544 jmp .L_bye | |
545 .L01: | |
546 movq (%ecx), %mm0 | |
547 movq %mm0, %mm1 | |
548 pxor %mm7, %mm1 | |
549 pfacc %mm1, %mm0 | |
550 pfmul %mm6, %mm0 | |
551 pf2iw %mm0, %mm0 | |
552 movd %mm0, %eax | |
553 movw %ax, 512(%esi) | |
554 psrlq $32, %mm0 | |
555 movd %mm0, %eax | |
556 movw %ax, (%esi) | |
557 movd 12(%ecx), %mm0 | |
558 pfsub 8(%ecx), %mm0 | |
559 pfmul 120(%ebx), %mm0 | |
560 pf2iw %mm0, %mm7 | |
561 movd %mm7, %eax | |
562 movw %ax, 256(%edi) | |
563 pfadd 12(%ecx), %mm0 | |
564 pfadd 8(%ecx), %mm0 | |
565 pf2iw %mm0, %mm0 | |
566 movd %mm0, %eax | |
567 movw %ax, 256(%esi) | |
568 movd 16(%ecx), %mm3 | |
569 pfsub 20(%ecx), %mm3 | |
570 pfmul 120(%ebx), %mm3 | |
571 movq %mm3, %mm2 | |
572 movd 28(%ecx), %mm2 | |
573 pfsub 24(%ecx), %mm2 | |
574 pfmul 120(%ebx), %mm2 | |
575 movq %mm2, %mm1 | |
576 pf2iw %mm2, %mm7 | |
577 movd %mm7, %eax | |
578 movw %ax, 384(%edi) | |
579 pfadd 24(%ecx), %mm1 | |
580 pfadd 28(%ecx), %mm1 | |
581 movq %mm1, %mm0 | |
582 pfadd 16(%ecx), %mm0 | |
583 pfadd 20(%ecx), %mm0 | |
584 pf2iw %mm0, %mm0 | |
585 movd %mm0, %eax | |
586 movw %ax, 384(%esi) | |
587 pfadd %mm3, %mm1 | |
588 pf2iw %mm1, %mm1 | |
589 movd %mm1, %eax | |
590 movw %ax, 128(%esi) | |
591 pfadd %mm3, %mm2 | |
592 pf2iw %mm2, %mm2 | |
593 movd %mm2, %eax | |
594 movw %ax, 128(%edi) | |
595 movq 32(%edx), %mm0 | |
596 movq 48(%edx), %mm1 | |
597 pfadd 48(%edx), %mm0 | |
598 pfadd 40(%edx), %mm1 | |
599 pf2iw %mm0, %mm0 | |
600 pf2iw %mm1, %mm1 | |
601 movd %mm0, %eax | |
602 movd %mm1, %ecx | |
603 movw %ax, 448(%esi) | |
604 movw %cx, 320(%esi) | |
605 psrlq $32, %mm0 | |
606 psrlq $32, %mm1 | |
607 movd %mm0, %eax | |
608 movd %mm1, %ecx | |
609 movw %ax, 64(%edi) | |
610 movw %cx, 192(%edi) | |
611 movd 40(%edx), %mm3 | |
612 movd 56(%edx), %mm4 | |
613 movd 60(%edx), %mm0 | |
614 movd 44(%edx), %mm2 | |
615 movd 120(%edx), %mm5 | |
616 punpckldq %mm4, %mm3 | |
617 punpckldq 124(%edx), %mm0 | |
618 pfadd 100(%edx), %mm5 | |
619 punpckldq 36(%edx), %mm4 | |
620 punpckldq 92(%edx), %mm2 | |
621 movq %mm5, %mm6 | |
622 pfadd %mm4, %mm3 | |
623 pf2iw %mm0, %mm1 | |
624 pf2iw %mm3, %mm3 | |
625 pfadd 88(%edx), %mm5 | |
626 movd %mm1, %eax | |
627 movd %mm3, %ecx | |
628 movw %ax, 448(%edi) | |
629 movw %cx, 192(%esi) | |
630 pf2iw %mm5, %mm5 | |
631 psrlq $32, %mm1 | |
632 psrlq $32, %mm3 | |
633 movd %mm5, %ebx | |
634 movd %mm1, %eax | |
635 movd %mm3, %ecx | |
636 movw %bx, 96(%esi) | |
637 movw %ax, 480(%edi) | |
638 movw %cx, 64(%esi) | |
639 pfadd %mm2, %mm0 | |
640 pf2iw %mm0, %mm0 | |
641 movd %mm0, %eax | |
642 pfadd 68(%edx), %mm6 | |
643 movw %ax, 320(%edi) | |
644 psrlq $32, %mm0 | |
645 pf2iw %mm6, %mm6 | |
646 movd %mm0, %eax | |
647 movd %mm6, %ebx | |
648 movw %ax, 416(%edi) | |
649 movw %bx, 32(%esi) | |
650 movq 96(%edx), %mm0 | |
651 movq 112(%edx), %mm2 | |
652 movq 104(%edx), %mm4 | |
653 pfadd %mm2, %mm0 | |
654 pfadd %mm4, %mm2 | |
655 pfadd 120(%edx), %mm4 | |
656 movq %mm0, %mm1 | |
657 movq %mm2, %mm3 | |
658 movq %mm4, %mm5 | |
659 pfadd 64(%edx), %mm0 | |
660 pfadd 80(%edx), %mm2 | |
661 pfadd 72(%edx), %mm4 | |
662 pf2iw %mm0, %mm0 | |
663 pf2iw %mm2, %mm2 | |
664 pf2iw %mm4, %mm4 | |
665 movd %mm0, %eax | |
666 movd %mm2, %ecx | |
667 movd %mm4, %ebx | |
668 movw %ax, 480(%esi) | |
669 movw %cx, 352(%esi) | |
670 movw %bx, 224(%esi) | |
671 psrlq $32, %mm0 | |
672 psrlq $32, %mm2 | |
673 psrlq $32, %mm4 | |
674 movd %mm0, %eax | |
675 movd %mm2, %ecx | |
676 movd %mm4, %ebx | |
677 movw %ax, 32(%edi) | |
678 movw %cx, 160(%edi) | |
679 movw %bx, 288(%edi) | |
680 pfadd 80(%edx), %mm1 | |
681 pfadd 72(%edx), %mm3 | |
682 pfadd 88(%edx), %mm5 | |
683 pf2iw %mm1, %mm1 | |
684 pf2iw %mm3, %mm3 | |
685 pf2iw %mm5, %mm5 | |
686 movd %mm1, %eax | |
687 movd %mm3, %ecx | |
688 movd %mm5, %ebx | |
689 movw %ax, 416(%esi) | |
690 movw %cx, 288(%esi) | |
691 movw %bx, 160(%esi) | |
692 psrlq $32, %mm1 | |
693 psrlq $32, %mm3 | |
694 psrlq $32, %mm5 | |
695 movd %mm1, %eax | |
696 movd %mm3, %ecx | |
697 movd %mm5, %ebx | |
698 movw %ax, 96(%edi) | |
699 movw %cx, 224(%edi) | |
700 movw %bx, 352(%edi) | |
701 movsw | |
702 .L_bye: | |
703 femms | |
704 | |
705 /* NO_APP */ | |
706 addl $256, %esp | |
707 popl %ebx | |
708 popl %esi | |
709 popl %edi | |
710 leave | |
711 ret | |
712 /* .size ASM_NAME(dct64_3dnowext), .-ASM_NAME(dct64_3dnowext) */ | |
713 | |
714 /* Mark non-executable stack. */ | |
715 #if defined(__linux__) && defined(__ELF__) | |
716 .section .note.GNU-stack,"",%progbits | |
717 #endif |