diff decoders/libmpg123/decode_mmx.S @ 562:7e08477b0fc1

MP3 decoder upgrade work. Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123. libmpg123 is a much better version of mpglib, so it should solve all the problems about MP3's not seeking, or most modern MP3's not playing at all, etc. Since you no longer have to make a tradeoff with SMPEG for features, and SMPEG is basically rotting, I removed it from the project. There is still work to be done with libmpg123...there are MMX, 3DNow, SSE, Altivec, etc decoders which we don't have enabled at the moment, and the build system could use some work to make this compile more cleanly, etc. Still: huge win.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 30 Jan 2009 02:44:47 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/decoders/libmpg123/decode_mmx.S	Fri Jan 30 02:44:47 2009 -0500
@@ -0,0 +1,129 @@
+/*
+	decode_MMX.s: MMX optimized synth
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by the mysterious higway (apparently)
+
+ Thomas' words about a note:
+ Initially, I found the note "this code comes under GPL" in this file.
+ After asking Michael about legal status of the MMX files, he said that he got them without any comment and thus I believe that the GPL comment was made by Michael, since he made mpg123 GPL at some time - and marked some files that way, but not all.
+ Based on that thought, I now consider this file along with the other parts of higway's MMX optimization to be licensed under LGPL 2.1 by Michael's decision.
+*/
+
+#include "mangle.h"
+
+.text
+
+.globl ASM_NAME(synth_1to1_MMX)
+/* int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins); */
+ASM_NAME(synth_1to1_MMX):
+        pushl %ebp
+        pushl %edi
+        pushl %esi
+        pushl %ebx
+/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16=back, 20=bandPtr, 24=channel, 28=out, 32=buffs, 36=bo, 40=decwins */
+        movl 24(%esp),%ecx
+        movl 28(%esp),%edi
+        movl $15,%ebx
+        movl 36(%esp),%edx
+        leal (%edi,%ecx,2),%edi
+	decl %ecx
+        movl 32(%esp),%esi
+        movl (%edx),%eax
+        jecxz .L1
+        decl %eax
+        andl %ebx,%eax
+        leal 1088(%esi),%esi
+        movl %eax,(%edx)
+.L1:
+        leal (%esi,%eax,2),%edx
+        movl %eax,%ebp
+        incl %eax
+        pushl 20(%esp)
+        andl %ebx,%eax
+        leal 544(%esi,%eax,2),%ecx
+        incl %ebx
+	testl $1, %eax
+	jnz .L2
+        xchgl %edx,%ecx
+	incl %ebp
+        leal 544(%esi),%esi
+.L2:
+        pushl %edx
+        pushl %ecx
+        call ASM_NAME(dct64_MMX)
+        addl $12,%esp
+/* stack like before, pushed 3, incremented again */
+	leal 1(%ebx), %ecx
+        subl %ebp,%ebx
+	pushl %eax
+	movl 44(%esp),%eax /* decwins */
+	leal (%eax,%ebx,2), %edx
+	popl %eax
+.L3:
+        movq  (%edx),%mm0
+        pmaddwd (%esi),%mm0
+        movq  8(%edx),%mm1
+        pmaddwd 8(%esi),%mm1
+        movq  16(%edx),%mm2
+        pmaddwd 16(%esi),%mm2
+        movq  24(%edx),%mm3
+        pmaddwd 24(%esi),%mm3
+        paddd %mm1,%mm0
+        paddd %mm2,%mm0
+        paddd %mm3,%mm0
+        movq  %mm0,%mm1
+        psrlq $32,%mm1
+        paddd %mm1,%mm0
+        psrad $13,%mm0
+        packssdw %mm0,%mm0
+        movd %mm0,%eax
+	movw %ax, (%edi)
+
+        leal 32(%esi),%esi
+        leal 64(%edx),%edx
+        leal 4(%edi),%edi
+        loop .L3
+
+
+        subl $64,%esi
+        movl $15,%ecx
+.L4:
+        movq  (%edx),%mm0
+        pmaddwd (%esi),%mm0
+        movq  8(%edx),%mm1
+        pmaddwd 8(%esi),%mm1
+        movq  16(%edx),%mm2
+        pmaddwd 16(%esi),%mm2
+        movq  24(%edx),%mm3
+        pmaddwd 24(%esi),%mm3
+        paddd %mm1,%mm0
+        paddd %mm2,%mm0
+        paddd %mm3,%mm0
+        movq  %mm0,%mm1
+        psrlq $32,%mm1
+        paddd %mm0,%mm1
+        psrad $13,%mm1
+        packssdw %mm1,%mm1
+        psubd %mm0,%mm0
+        psubsw %mm1,%mm0
+        movd %mm0,%eax
+	movw %ax,(%edi)
+
+        subl $32,%esi
+        addl $64,%edx
+        leal 4(%edi),%edi
+        loop .L4
+	emms
+        popl %ebx
+        popl %esi
+        popl %edi
+        popl %ebp
+        ret
+
+/* Mark non-executable stack. */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+