comparison decoders/libmpg123/dct36_3dnowext.S @ 562:7e08477b0fc1

MP3 decoder upgrade work. Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123. libmpg123 is a much better version of mpglib, so it should solve all the problems about MP3's not seeking, or most modern MP3's not playing at all, etc. Since you no longer have to make a tradeoff with SMPEG for features, and SMPEG is basically rotting, I removed it from the project. There is still work to be done with libmpg123...there are MMX, 3DNow, SSE, Altivec, etc decoders which we don't have enabled at the moment, and the build system could use some work to make this compile more cleanly, etc. Still: huge win.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 30 Jan 2009 02:44:47 -0500
parents
children
comparison
equal deleted inserted replaced
561:f2985e08589c 562:7e08477b0fc1
1 /*
2 dct36_3dnowext: extended 3DNow optimized DCT36
3
4 copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6
7 Transformed back into standalone asm, with help of
8 gcc -S -DHAVE_CONFIG_H -I. -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct36_3dnowext.{S,c}
9
10 MPlayer comment follows.
11 */
12
13 /*
14 * dct36_3dnow.c - 3DNow! optimized dct36()
15 *
16 * This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
17 * <squash@mb.kcom.ne.jp>, only two types of changes have been made:
18 *
19 * - removed PREFETCH instruction for speedup
20 * - changed function name for support 3DNow! automatic detection
21 *
22 * You can find Kashiyama's original 3dnow! support patch
23 * (for mpg123-0.59o) at
24 * http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
25 *
26 * by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
27 * <kim@comtec.co.jp> - after 1.Apr.1999
28 *
29 * Modified for use with MPlayer, for details see the changelog at
30 * http://svn.mplayerhq.hu/mplayer/trunk/
31 * $Id: dct36_3dnow.c 18786 2006-06-22 13:34:00Z diego $
32 *
33 * Original disclaimer:
34 * The author of this program disclaim whole expressed or implied
35 * warranties with regard to this program, and in no event shall the
36 * author of this program liable to whatever resulted from the use of
37 * this program. Use it at your own risk.
38 *
39 * 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi
40 */
41
42 #include "mangle.h"
43
44 .text
45 ALIGN32,,31
46 .globl ASM_NAME(dct36_3dnowext)
47 /* .type ASM_NAME(dct36_3dnowext), @function */
48 ASM_NAME(dct36_3dnowext):
49 pushl %ebp
50 movl %esp, %ebp
51 pushl %esi
52 pushl %ebx
53 movl 8(%ebp), %eax
54 movl 12(%ebp), %esi
55 movl 16(%ebp), %ecx
56 movl 20(%ebp), %edx
57 movl 24(%ebp), %ebx
58 /* APP */
59 movq (%eax),%mm0
60 movq 4(%eax),%mm1
61 pfadd %mm1,%mm0
62 movq %mm0,4(%eax)
63 psrlq $32,%mm1
64 movq 12(%eax),%mm2
65 punpckldq %mm2,%mm1
66 pfadd %mm2,%mm1
67 movq %mm1,12(%eax)
68 psrlq $32,%mm2
69 movq 20(%eax),%mm3
70 punpckldq %mm3,%mm2
71 pfadd %mm3,%mm2
72 movq %mm2,20(%eax)
73 psrlq $32,%mm3
74 movq 28(%eax),%mm4
75 punpckldq %mm4,%mm3
76 pfadd %mm4,%mm3
77 movq %mm3,28(%eax)
78 psrlq $32,%mm4
79 movq 36(%eax),%mm5
80 punpckldq %mm5,%mm4
81 pfadd %mm5,%mm4
82 movq %mm4,36(%eax)
83 psrlq $32,%mm5
84 movq 44(%eax),%mm6
85 punpckldq %mm6,%mm5
86 pfadd %mm6,%mm5
87 movq %mm5,44(%eax)
88 psrlq $32,%mm6
89 movq 52(%eax),%mm7
90 punpckldq %mm7,%mm6
91 pfadd %mm7,%mm6
92 movq %mm6,52(%eax)
93 psrlq $32,%mm7
94 movq 60(%eax),%mm0
95 punpckldq %mm0,%mm7
96 pfadd %mm0,%mm7
97 movq %mm7,60(%eax)
98 psrlq $32,%mm0
99 movd 68(%eax),%mm1
100 pfadd %mm1,%mm0
101 movd %mm0,68(%eax)
102 movd 4(%eax),%mm0
103 movd 12(%eax),%mm1
104 punpckldq %mm1,%mm0
105 punpckldq 20(%eax),%mm1
106 pfadd %mm1,%mm0
107 movd %mm0,12(%eax)
108 psrlq $32,%mm0
109 movd %mm0,20(%eax)
110 psrlq $32,%mm1
111 movd 28(%eax),%mm2
112 punpckldq %mm2,%mm1
113 punpckldq 36(%eax),%mm2
114 pfadd %mm2,%mm1
115 movd %mm1,28(%eax)
116 psrlq $32,%mm1
117 movd %mm1,36(%eax)
118 psrlq $32,%mm2
119 movd 44(%eax),%mm3
120 punpckldq %mm3,%mm2
121 punpckldq 52(%eax),%mm3
122 pfadd %mm3,%mm2
123 movd %mm2,44(%eax)
124 psrlq $32,%mm2
125 movd %mm2,52(%eax)
126 psrlq $32,%mm3
127 movd 60(%eax),%mm4
128 punpckldq %mm4,%mm3
129 punpckldq 68(%eax),%mm4
130 pfadd %mm4,%mm3
131 movd %mm3,60(%eax)
132 psrlq $32,%mm3
133 movd %mm3,68(%eax)
134 movq 24(%eax),%mm0
135 movq 48(%eax),%mm1
136 movd ASM_NAME(COS9)+12,%mm2
137 punpckldq %mm2,%mm2
138 movd ASM_NAME(COS9)+24,%mm3
139 punpckldq %mm3,%mm3
140 pfmul %mm2,%mm0
141 pfmul %mm3,%mm1
142 pushl %eax
143 movl $1,%eax
144 movd %eax,%mm7
145 pi2fd %mm7,%mm7
146 popl %eax
147 movq 8(%eax),%mm2
148 movd ASM_NAME(COS9)+4,%mm3
149 punpckldq %mm3,%mm3
150 pfmul %mm3,%mm2
151 pfadd %mm0,%mm2
152 movq 40(%eax),%mm3
153 movd ASM_NAME(COS9)+20,%mm4
154 punpckldq %mm4,%mm4
155 pfmul %mm4,%mm3
156 pfadd %mm3,%mm2
157 movq 56(%eax),%mm3
158 movd ASM_NAME(COS9)+28,%mm4
159 punpckldq %mm4,%mm4
160 pfmul %mm4,%mm3
161 pfadd %mm3,%mm2
162 movq (%eax),%mm3
163 movq 16(%eax),%mm4
164 movd ASM_NAME(COS9)+8,%mm5
165 punpckldq %mm5,%mm5
166 pfmul %mm5,%mm4
167 pfadd %mm4,%mm3
168 movq 32(%eax),%mm4
169 movd ASM_NAME(COS9)+16,%mm5
170 punpckldq %mm5,%mm5
171 pfmul %mm5,%mm4
172 pfadd %mm4,%mm3
173 pfadd %mm1,%mm3
174 movq 64(%eax),%mm4
175 movd ASM_NAME(COS9)+32,%mm5
176 punpckldq %mm5,%mm5
177 pfmul %mm5,%mm4
178 pfadd %mm4,%mm3
179 movq %mm2,%mm4
180 pfadd %mm3,%mm4
181 movq %mm7,%mm5
182 punpckldq ASM_NAME(tfcos36)+0,%mm5
183 pfmul %mm5,%mm4
184 movq %mm4,%mm5
185 pfacc %mm5,%mm5
186 movd 108(%edx),%mm6
187 punpckldq 104(%edx),%mm6
188 pfmul %mm6,%mm5
189 pswapd %mm5,%mm5
190 movq %mm5,32(%ecx)
191 movq %mm4,%mm6
192 punpckldq %mm6,%mm5
193 pfsub %mm6,%mm5
194 punpckhdq %mm5,%mm5
195 movd 32(%edx),%mm6
196 punpckldq 36(%edx),%mm6
197 pfmul %mm6,%mm5
198 movd 32(%esi),%mm6
199 punpckldq 36(%esi),%mm6
200 pfadd %mm6,%mm5
201 movd %mm5,1024(%ebx)
202 psrlq $32,%mm5
203 movd %mm5,1152(%ebx)
204 movq %mm3,%mm4
205 pfsub %mm2,%mm4
206 movq %mm7,%mm5
207 punpckldq ASM_NAME(tfcos36)+32,%mm5
208 pfmul %mm5,%mm4
209 movq %mm4,%mm5
210 pfacc %mm5,%mm5
211 movd 140(%edx),%mm6
212 punpckldq 72(%edx),%mm6
213 pfmul %mm6,%mm5
214 movd %mm5,68(%ecx)
215 psrlq $32,%mm5
216 movd %mm5,0(%ecx)
217 movq %mm4,%mm6
218 punpckldq %mm6,%mm5
219 pfsub %mm6,%mm5
220 punpckhdq %mm5,%mm5
221 movd 0(%edx),%mm6
222 punpckldq 68(%edx),%mm6
223 pfmul %mm6,%mm5
224 movd 0(%esi),%mm6
225 punpckldq 68(%esi),%mm6
226 pfadd %mm6,%mm5
227 movd %mm5,0(%ebx)
228 psrlq $32,%mm5
229 movd %mm5,2176(%ebx)
230 movq 8(%eax),%mm2
231 movq 40(%eax),%mm3
232 pfsub %mm3,%mm2
233 movq 56(%eax),%mm3
234 pfsub %mm3,%mm2
235 movd ASM_NAME(COS9)+12,%mm3
236 punpckldq %mm3,%mm3
237 pfmul %mm3,%mm2
238 movq 16(%eax),%mm3
239 movq 32(%eax),%mm4
240 pfsub %mm4,%mm3
241 movq 64(%eax),%mm4
242 pfsub %mm4,%mm3
243 movd ASM_NAME(COS9)+24,%mm4
244 punpckldq %mm4,%mm4
245 pfmul %mm4,%mm3
246 movq 48(%eax),%mm4
247 pfsub %mm4,%mm3
248 movq (%eax),%mm4
249 pfadd %mm4,%mm3
250 movq %mm2,%mm4
251 pfadd %mm3,%mm4
252 movq %mm7,%mm5
253 punpckldq ASM_NAME(tfcos36)+4,%mm5
254 pfmul %mm5,%mm4
255 movq %mm4,%mm5
256 pfacc %mm5,%mm5
257 movd 112(%edx),%mm6
258 punpckldq 100(%edx),%mm6
259 pfmul %mm6,%mm5
260 movd %mm5,40(%ecx)
261 psrlq $32,%mm5
262 movd %mm5,28(%ecx)
263 movq %mm4,%mm6
264 punpckldq %mm6,%mm5
265 pfsub %mm6,%mm5
266 punpckhdq %mm5,%mm5
267 movd 28(%edx),%mm6
268 punpckldq 40(%edx),%mm6
269 pfmul %mm6,%mm5
270 movd 28(%esi),%mm6
271 punpckldq 40(%esi),%mm6
272 pfadd %mm6,%mm5
273 movd %mm5,896(%ebx)
274 psrlq $32,%mm5
275 movd %mm5,1280(%ebx)
276 movq %mm3,%mm4
277 pfsub %mm2,%mm4
278 movq %mm7,%mm5
279 punpckldq ASM_NAME(tfcos36)+28,%mm5
280 pfmul %mm5,%mm4
281 movq %mm4,%mm5
282 pfacc %mm5,%mm5
283 movd 136(%edx),%mm6
284 punpckldq 76(%edx),%mm6
285 pfmul %mm6,%mm5
286 movd %mm5,64(%ecx)
287 psrlq $32,%mm5
288 movd %mm5,4(%ecx)
289 movq %mm4,%mm6
290 punpckldq %mm6,%mm5
291 pfsub %mm6,%mm5
292 punpckhdq %mm5,%mm5
293 movd 4(%edx),%mm6
294 punpckldq 64(%edx),%mm6
295 pfmul %mm6,%mm5
296 movd 4(%esi),%mm6
297 punpckldq 64(%esi),%mm6
298 pfadd %mm6,%mm5
299 movd %mm5,128(%ebx)
300 psrlq $32,%mm5
301 movd %mm5,2048(%ebx)
302 movq 8(%eax),%mm2
303 movd ASM_NAME(COS9)+20,%mm3
304 punpckldq %mm3,%mm3
305 pfmul %mm3,%mm2
306 pfsub %mm0,%mm2
307 movq 40(%eax),%mm3
308 movd ASM_NAME(COS9)+28,%mm4
309 punpckldq %mm4,%mm4
310 pfmul %mm4,%mm3
311 pfsub %mm3,%mm2
312 movq 56(%eax),%mm3
313 movd ASM_NAME(COS9)+4,%mm4
314 punpckldq %mm4,%mm4
315 pfmul %mm4,%mm3
316 pfadd %mm3,%mm2
317 movq (%eax),%mm3
318 movq 16(%eax),%mm4
319 movd ASM_NAME(COS9)+32,%mm5
320 punpckldq %mm5,%mm5
321 pfmul %mm5,%mm4
322 pfsub %mm4,%mm3
323 movq 32(%eax),%mm4
324 movd ASM_NAME(COS9)+8,%mm5
325 punpckldq %mm5,%mm5
326 pfmul %mm5,%mm4
327 pfsub %mm4,%mm3
328 pfadd %mm1,%mm3
329 movq 64(%eax),%mm4
330 movd ASM_NAME(COS9)+16,%mm5
331 punpckldq %mm5,%mm5
332 pfmul %mm5,%mm4
333 pfadd %mm4,%mm3
334 movq %mm2,%mm4
335 pfadd %mm3,%mm4
336 movq %mm7,%mm5
337 punpckldq ASM_NAME(tfcos36)+8,%mm5
338 pfmul %mm5,%mm4
339 movq %mm4,%mm5
340 pfacc %mm5,%mm5
341 movd 116(%edx),%mm6
342 punpckldq 96(%edx),%mm6
343 pfmul %mm6,%mm5
344 movd %mm5,44(%ecx)
345 psrlq $32,%mm5
346 movd %mm5,24(%ecx)
347 movq %mm4,%mm6
348 punpckldq %mm6,%mm5
349 pfsub %mm6,%mm5
350 punpckhdq %mm5,%mm5
351 movd 24(%edx),%mm6
352 punpckldq 44(%edx),%mm6
353 pfmul %mm6,%mm5
354 movd 24(%esi),%mm6
355 punpckldq 44(%esi),%mm6
356 pfadd %mm6,%mm5
357 movd %mm5,768(%ebx)
358 psrlq $32,%mm5
359 movd %mm5,1408(%ebx)
360 movq %mm3,%mm4
361 pfsub %mm2,%mm4
362 movq %mm7,%mm5
363 punpckldq ASM_NAME(tfcos36)+24,%mm5
364 pfmul %mm5,%mm4
365 movq %mm4,%mm5
366 pfacc %mm5,%mm5
367 movd 132(%edx),%mm6
368 punpckldq 80(%edx),%mm6
369 pfmul %mm6,%mm5
370 movd %mm5,60(%ecx)
371 psrlq $32,%mm5
372 movd %mm5,8(%ecx)
373 movq %mm4,%mm6
374 punpckldq %mm6,%mm5
375 pfsub %mm6,%mm5
376 punpckhdq %mm5,%mm5
377 movd 8(%edx),%mm6
378 punpckldq 60(%edx),%mm6
379 pfmul %mm6,%mm5
380 movd 8(%esi),%mm6
381 punpckldq 60(%esi),%mm6
382 pfadd %mm6,%mm5
383 movd %mm5,256(%ebx)
384 psrlq $32,%mm5
385 movd %mm5,1920(%ebx)
386 movq 8(%eax),%mm2
387 movd ASM_NAME(COS9)+28,%mm3
388 punpckldq %mm3,%mm3
389 pfmul %mm3,%mm2
390 pfsub %mm0,%mm2
391 movq 40(%eax),%mm3
392 movd ASM_NAME(COS9)+4,%mm4
393 punpckldq %mm4,%mm4
394 pfmul %mm4,%mm3
395 pfadd %mm3,%mm2
396 movq 56(%eax),%mm3
397 movd ASM_NAME(COS9)+20,%mm4
398 punpckldq %mm4,%mm4
399 pfmul %mm4,%mm3
400 pfsub %mm3,%mm2
401 movq (%eax),%mm3
402 movq 16(%eax),%mm4
403 movd ASM_NAME(COS9)+16,%mm5
404 punpckldq %mm5,%mm5
405 pfmul %mm5,%mm4
406 pfsub %mm4,%mm3
407 movq 32(%eax),%mm4
408 movd ASM_NAME(COS9)+32,%mm5
409 punpckldq %mm5,%mm5
410 pfmul %mm5,%mm4
411 pfadd %mm4,%mm3
412 pfadd %mm1,%mm3
413 movq 64(%eax),%mm4
414 movd ASM_NAME(COS9)+8,%mm5
415 punpckldq %mm5,%mm5
416 pfmul %mm5,%mm4
417 pfsub %mm4,%mm3
418 movq %mm2,%mm4
419 pfadd %mm3,%mm4
420 movq %mm7,%mm5
421 punpckldq ASM_NAME(tfcos36)+12,%mm5
422 pfmul %mm5,%mm4
423 movq %mm4,%mm5
424 pfacc %mm5,%mm5
425 movd 120(%edx),%mm6
426 punpckldq 92(%edx),%mm6
427 pfmul %mm6,%mm5
428 movd %mm5,48(%ecx)
429 psrlq $32,%mm5
430 movd %mm5,20(%ecx)
431 movq %mm4,%mm6
432 punpckldq %mm6,%mm5
433 pfsub %mm6,%mm5
434 punpckhdq %mm5,%mm5
435 movd 20(%edx),%mm6
436 punpckldq 48(%edx),%mm6
437 pfmul %mm6,%mm5
438 movd 20(%esi),%mm6
439 punpckldq 48(%esi),%mm6
440 pfadd %mm6,%mm5
441 movd %mm5,640(%ebx)
442 psrlq $32,%mm5
443 movd %mm5,1536(%ebx)
444 movq %mm3,%mm4
445 pfsub %mm2,%mm4
446 movq %mm7,%mm5
447 punpckldq ASM_NAME(tfcos36)+20,%mm5
448 pfmul %mm5,%mm4
449 movq %mm4,%mm5
450 pfacc %mm5,%mm5
451 movd 128(%edx),%mm6
452 punpckldq 84(%edx),%mm6
453 pfmul %mm6,%mm5
454 movd %mm5,56(%ecx)
455 psrlq $32,%mm5
456 movd %mm5,12(%ecx)
457 movq %mm4,%mm6
458 punpckldq %mm6,%mm5
459 pfsub %mm6,%mm5
460 punpckhdq %mm5,%mm5
461 movd 12(%edx),%mm6
462 punpckldq 56(%edx),%mm6
463 pfmul %mm6,%mm5
464 movd 12(%esi),%mm6
465 punpckldq 56(%esi),%mm6
466 pfadd %mm6,%mm5
467 movd %mm5,384(%ebx)
468 psrlq $32,%mm5
469 movd %mm5,1792(%ebx)
470 movq (%eax),%mm4
471 movq 16(%eax),%mm3
472 pfsub %mm3,%mm4
473 movq 32(%eax),%mm3
474 pfadd %mm3,%mm4
475 movq 48(%eax),%mm3
476 pfsub %mm3,%mm4
477 movq 64(%eax),%mm3
478 pfadd %mm3,%mm4
479 movq %mm7,%mm5
480 punpckldq ASM_NAME(tfcos36)+16,%mm5
481 pfmul %mm5,%mm4
482 movq %mm4,%mm5
483 pfacc %mm5,%mm5
484 movd 124(%edx),%mm6
485 punpckldq 88(%edx),%mm6
486 pfmul %mm6,%mm5
487 movd %mm5,52(%ecx)
488 psrlq $32,%mm5
489 movd %mm5,16(%ecx)
490 movq %mm4,%mm6
491 punpckldq %mm6,%mm5
492 pfsub %mm6,%mm5
493 punpckhdq %mm5,%mm5
494 movd 16(%edx),%mm6
495 punpckldq 52(%edx),%mm6
496 pfmul %mm6,%mm5
497 movd 16(%esi),%mm6
498 punpckldq 52(%esi),%mm6
499 pfadd %mm6,%mm5
500 movd %mm5,512(%ebx)
501 psrlq $32,%mm5
502 movd %mm5,1664(%ebx)
503 femms
504
505 /* NO_APP */
506 popl %ebx
507 popl %esi
508 leave
509 ret
510 /* .size ASM_NAME(dct36_3dnowext), .-ASM_NAME(dct36_3dnowext) */
511
512 /* Mark non-executable stack. */
513 #if defined(__linux__) && defined(__ELF__)
514 .section .note.GNU-stack,"",%progbits
515 #endif