Mercurial > SDL_sound_CoreAudio
diff decoders/libmpg123/decode_sse3d.h @ 562:7e08477b0fc1
MP3 decoder upgrade work.
Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123.
libmpg123 is a much better version of mpglib, so it should solve all the
problems about MP3's not seeking, or most modern MP3's not playing at all,
etc. Since you no longer have to make a tradeoff with SMPEG for features, and
SMPEG is basically rotting, I removed it from the project.
There is still work to be done with libmpg123...there are MMX, 3DNow, SSE,
Altivec, etc decoders which we don't have enabled at the moment, and the
build system could use some work to make this compile more cleanly, etc.
Still: huge win.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 30 Jan 2009 02:44:47 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/decoders/libmpg123/decode_sse3d.h Fri Jan 30 02:44:47 2009 -0500 @@ -0,0 +1,247 @@ +/* + decode_sse3d: Synth for SSE and extended 3DNow (yeah, the name is a relic) + + copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by the mysterious higway for MMX (apparently) + then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec + Both have agreed to distribution under LGPL 2.1 . + + Transformed back into standalone asm, with help of + gcc -S -DHAVE_CONFIG_H -I. -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c} + + The difference between SSE and 3DNowExt is the dct64 function and the synth function name. + This template here uses the SYNTH_NAME and MPL_DCT64 macros for this - see decode_sse.S and decode_3dnowext.S... + That's not memory efficient since there's doubled code, but it's easier than giving another function pointer. + Maybe I'll change it in future, but now I need something that works. + + Original comment from MPlayer source follows: +*/ + +/* + * this code comes under GPL + * This code was taken from http://www.mpg123.org + * See ChangeLog of mpg123-0.59s-pre.1 for detail + * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> + * + * Local ChangeLog: + * - Partial loops unrolling and removing MOVW insn from loops +*/ + +#include "mangle.h" + + .data + ALIGN8 +one_null: + .long -65536 + .long -65536 + ALIGN8 +null_one: + .long 65535 + .long 65535 + + .text + ALIGN16,,15 + /* void SYNTH_NAME(real *bandPtr, int channel, short *samples, short *buffs, int *bo, float *decwins) */ +.globl SYNTH_NAME +SYNTH_NAME: + pushl %ebp +/* stack:0=ebp 4=back 8=bandptr 12=channel 16=samples 20=buffs 24=bo 28=decwins */ + movl %esp, %ebp +/* Now the old stack addresses are preserved via %epb. */ + subl $4,%esp /* What has been called temp before. */ + pushl %edi + pushl %esi + pushl %ebx +#define TEMP 12(%esp) +#APP + movl 12(%ebp),%ecx + movl 16(%ebp),%edi + movl $15,%ebx + movl 24(%ebp),%edx + leal (%edi,%ecx,2),%edi + decl %ecx + movl 20(%ebp),%esi + movl (%edx),%eax + jecxz .L01 + decl %eax + andl %ebx,%eax + leal 1088(%esi),%esi + movl %eax,(%edx) + .L01: + leal (%esi,%eax,2),%edx + movl %eax,TEMP + incl %eax + andl %ebx,%eax + leal 544(%esi,%eax,2),%ecx + incl %ebx + testl $1, %eax + jnz .L02 + xchgl %edx,%ecx + incl TEMP + leal 544(%esi),%esi + .L02: + emms + pushl 8(%ebp) + pushl %edx + pushl %ecx + call MPL_DCT64 + addl $12, %esp + leal 1(%ebx), %ecx + subl TEMP,%ebx + pushl %ecx + /* leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx */ + movl 28(%ebp),%ecx + leal (%ecx,%ebx,2), %edx + movl (%esp),%ecx /* restore, but leave value on stack */ + shrl $1, %ecx + ALIGN16 + .L03: + movq (%edx),%mm0 + movq 64(%edx),%mm4 + pmaddwd (%esi),%mm0 + pmaddwd 32(%esi),%mm4 + movq 8(%edx),%mm1 + movq 72(%edx),%mm5 + pmaddwd 8(%esi),%mm1 + pmaddwd 40(%esi),%mm5 + movq 16(%edx),%mm2 + movq 80(%edx),%mm6 + pmaddwd 16(%esi),%mm2 + pmaddwd 48(%esi),%mm6 + movq 24(%edx),%mm3 + movq 88(%edx),%mm7 + pmaddwd 24(%esi),%mm3 + pmaddwd 56(%esi),%mm7 + paddd %mm1,%mm0 + paddd %mm5,%mm4 + paddd %mm2,%mm0 + paddd %mm6,%mm4 + paddd %mm3,%mm0 + paddd %mm7,%mm4 + movq %mm0,%mm1 + movq %mm4,%mm5 + psrlq $32,%mm1 + psrlq $32,%mm5 + paddd %mm1,%mm0 + paddd %mm5,%mm4 + psrad $13,%mm0 + psrad $13,%mm4 + packssdw %mm0,%mm0 + packssdw %mm4,%mm4 + movq (%edi), %mm1 + punpckldq %mm4, %mm0 + pand one_null, %mm1 + pand null_one, %mm0 + por %mm0, %mm1 + movq %mm1,(%edi) + leal 64(%esi),%esi + leal 128(%edx),%edx + leal 8(%edi),%edi + decl %ecx + jnz .L03 + popl %ecx + andl $1, %ecx + jecxz .next_loop + movq (%edx),%mm0 + pmaddwd (%esi),%mm0 + movq 8(%edx),%mm1 + pmaddwd 8(%esi),%mm1 + movq 16(%edx),%mm2 + pmaddwd 16(%esi),%mm2 + movq 24(%edx),%mm3 + pmaddwd 24(%esi),%mm3 + paddd %mm1,%mm0 + paddd %mm2,%mm0 + paddd %mm3,%mm0 + movq %mm0,%mm1 + psrlq $32,%mm1 + paddd %mm1,%mm0 + psrad $13,%mm0 + packssdw %mm0,%mm0 + movd %mm0,%eax + movw %ax, (%edi) + leal 32(%esi),%esi + leal 64(%edx),%edx + leal 4(%edi),%edi + .next_loop: + subl $64,%esi + movl $7,%ecx + ALIGN16 + .L04: + movq (%edx),%mm0 + movq 64(%edx),%mm4 + pmaddwd (%esi),%mm0 + pmaddwd -32(%esi),%mm4 + movq 8(%edx),%mm1 + movq 72(%edx),%mm5 + pmaddwd 8(%esi),%mm1 + pmaddwd -24(%esi),%mm5 + movq 16(%edx),%mm2 + movq 80(%edx),%mm6 + pmaddwd 16(%esi),%mm2 + pmaddwd -16(%esi),%mm6 + movq 24(%edx),%mm3 + movq 88(%edx),%mm7 + pmaddwd 24(%esi),%mm3 + pmaddwd -8(%esi),%mm7 + paddd %mm1,%mm0 + paddd %mm5,%mm4 + paddd %mm2,%mm0 + paddd %mm6,%mm4 + paddd %mm3,%mm0 + paddd %mm7,%mm4 + movq %mm0,%mm1 + movq %mm4,%mm5 + psrlq $32,%mm1 + psrlq $32,%mm5 + paddd %mm0,%mm1 + paddd %mm4,%mm5 + psrad $13,%mm1 + psrad $13,%mm5 + packssdw %mm1,%mm1 + packssdw %mm5,%mm5 + psubd %mm0,%mm0 + psubd %mm4,%mm4 + psubsw %mm1,%mm0 + psubsw %mm5,%mm4 + movq (%edi), %mm1 + punpckldq %mm4, %mm0 + pand one_null, %mm1 + pand null_one, %mm0 + por %mm0, %mm1 + movq %mm1,(%edi) + subl $64,%esi + addl $128,%edx + leal 8(%edi),%edi + decl %ecx + jnz .L04 + movq (%edx),%mm0 + pmaddwd (%esi),%mm0 + movq 8(%edx),%mm1 + pmaddwd 8(%esi),%mm1 + movq 16(%edx),%mm2 + pmaddwd 16(%esi),%mm2 + movq 24(%edx),%mm3 + pmaddwd 24(%esi),%mm3 + paddd %mm1,%mm0 + paddd %mm2,%mm0 + paddd %mm3,%mm0 + movq %mm0,%mm1 + psrlq $32,%mm1 + paddd %mm0,%mm1 + psrad $13,%mm1 + packssdw %mm1,%mm1 + psubd %mm0,%mm0 + psubsw %mm1,%mm0 + movd %mm0,%eax + movw %ax,(%edi) + emms + +#NO_APP + popl %ebx + popl %esi + popl %edi + addl $4,%esp + popl %ebp + ret