diff decoders/libmpg123/decode_i586_dither.S @ 562:7e08477b0fc1

MP3 decoder upgrade work. Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123. libmpg123 is a much better version of mpglib, so it should solve all the problems about MP3's not seeking, or most modern MP3's not playing at all, etc. Since you no longer have to make a tradeoff with SMPEG for features, and SMPEG is basically rotting, I removed it from the project. There is still work to be done with libmpg123...there are MMX, 3DNow, SSE, Altivec, etc decoders which we don't have enabled at the moment, and the build system could use some work to make this compile more cleanly, etc. Still: huge win.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 30 Jan 2009 02:44:47 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/decoders/libmpg123/decode_i586_dither.S	Fri Jan 30 02:44:47 2009 -0500
@@ -0,0 +1,372 @@
+/*
+	decode_i586_dither: asm synth with dither noise
+
+	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Stefan Bieschewski as decode_i586.s without dither
+
+	This version uses "circular" 64k dither noise.
+	(Patch by Adrian <adrian.bacon@xs4all.nl>)
+
+	Thomas learned something about assembler and the stack while making this one thread safe (removing static data).
+*/
+
+#include "mangle.h"
+
+.data
+#ifndef __APPLE__
+		.section	.rodata
+#endif
+	ALIGN8
+.LC0:
+	.long 0x0,0x40dfffc0
+	ALIGN8
+.LC1:
+	.long 0x0,0xc0e00000
+	ALIGN8
+.text
+/* int synth_1to1_i586_asm_dither(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int bo_and_ditherindex[2], real *decwin); */
+.globl ASM_NAME(synth_1to1_i586_asm_dither)
+ASM_NAME(synth_1to1_i586_asm_dither):
+	subl $16,%esp
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %ebx
+/* stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo 56=decwin */
+#define BANDPTR 36(%esp)
+#define CHANNEL 40(%esp)
+#define OUT     44(%esp)
+#define BUFFS   48(%esp)
+#define BO      52(%esp)
+#define DECWIN  56(%esp)
+#define LOC0    16(%esp)
+#define LOC1    20(%esp)
+#define LOC2    24(%esp)
+#define DITHERINDEX  28(%esp)
+	movl BANDPTR,%eax
+	movl OUT,%esi
+	movl BO, %ebx
+	movl (%ebx),%ebp    /* get bo value */
+	movl 4(%ebx),%edi;  /* get the ditherindex behind bo */
+	movl %edi,DITHERINDEX
+	xorl %edi,%edi
+	cmpl %edi,CHANNEL
+	jne .L48
+	decl %ebp
+	andl $15,%ebp
+	movl %ebp,(%ebx)   /* save bo back */
+	movl BUFFS,%ecx
+	jmp .L49
+.L48:
+/*       In stereo mode , "rewind" dither pointer 32 samples , so 2nd channel */
+/*       has same dither values. Tested OK for mono and stereo MP2 and MP3 */
+	subl $128,DITHERINDEX /* better move to %edi for the two calculations? */
+	andl $0x0003fffc,DITHERINDEX
+	addl $2,%esi
+	movl BUFFS,%ecx
+	addl $2176,%ecx
+.L49:
+/* now the call of dct64 is prepared, stuff pushed to the stack, but soon after it's removed again */
+	testl $1,%ebp
+	je .L50
+	movl %ecx,%ebx
+	movl %ebp,LOC0
+	pushl %eax
+	movl LOC1,%edx
+	leal (%ebx,%edx,4),%eax
+	pushl %eax
+	movl LOC2,%eax
+	incl %eax
+	andl $15,%eax
+	leal 1088(,%eax,4),%eax
+	addl %ebx,%eax
+	jmp .L74
+.L50:
+	leal 1088(%ecx),%ebx
+	leal 1(%ebp),%edx
+	movl %edx,LOC0
+	pushl %eax
+	leal 1092(%ecx,%ebp,4),%eax
+	pushl %eax
+	leal (%ecx,%ebp,4),%eax
+.L74:
+	pushl %eax
+	call ASM_NAME(dct64_i386)
+	addl $12,%esp
+/* Now removed the parameters.
+   stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo */
+	movl LOC0,%edx
+	leal 0(,%edx,4),%edx
+	/* movl $ASM_NAME(decwin)+64,%eax */
+	movl DECWIN,%eax
+	addl $64,%eax
+	movl %eax,%ecx
+	subl %edx,%ecx
+	movl $16,%ebp
+.L55:
+	flds (%ecx)
+	fmuls (%ebx)
+	flds 4(%ecx)
+	fmuls 4(%ebx)
+	fxch %st(1)
+	flds 8(%ecx)
+	fmuls 8(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 12(%ecx)
+	fmuls 12(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 16(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 20(%ecx)
+	fmuls 20(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 24(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 28(%ecx)
+	fmuls 28(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 32(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 36(%ecx)
+	fmuls 36(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 40(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 44(%ecx)
+	fmuls 44(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 48(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 52(%ecx)
+	fmuls 52(%ebx)
+	fxch %st(2)         
+	faddp %st,%st(1)
+	flds 56(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 60(%ecx)
+	fmuls 60(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	faddp %st,%st(1)
+	fxch %st(1)
+	fsubrp %st,%st(1)
+
+	addl $4,DITHERINDEX
+	andl $0x0003fffc,DITHERINDEX
+	movl $ASM_NAME(dithernoise),%edi
+	addl DITHERINDEX,%edi	
+
+	fadd (%edi)
+
+/* fistpl and popl as a unit keep the stack unchanged */
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:
+/*	incl %edi */
+4:
+.L54:
+	addl $64,%ebx
+	subl $-128,%ecx
+	addl $4,%esi
+	decl %ebp
+	jnz .L55
+	flds (%ecx)
+	fmuls (%ebx)
+	flds 8(%ecx)
+	fmuls 8(%ebx)
+	flds 16(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 24(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 32(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 40(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 48(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 56(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	faddp %st,%st(1)
+	fxch %st(1)
+	faddp %st,%st(1)
+
+	addl $4,DITHERINDEX
+	andl $0x0003fffc,DITHERINDEX
+	movl $ASM_NAME(dithernoise),%edi
+	addl DITHERINDEX,%edi	
+
+	fadd (%edi)
+/* fistpl and popl as a unit keep the stack unchanged */
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:
+/*	incl %edi */
+4:
+.L62:
+	addl $-64,%ebx
+	addl $4,%esi
+	movl LOC0,%edx
+	leal -128(%ecx,%edx,8),%ecx
+	movl $15,%ebp
+.L68:
+	flds -4(%ecx)
+	fchs
+	fmuls (%ebx)
+	flds -8(%ecx)
+	fmuls 4(%ebx)
+	fxch %st(1)
+	flds -12(%ecx)
+	fmuls 8(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -16(%ecx)
+	fmuls 12(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -20(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -24(%ecx)
+	fmuls 20(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -28(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -32(%ecx)
+	fmuls 28(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -36(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -40(%ecx)
+	fmuls 36(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -44(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -48(%ecx)
+	fmuls 44(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -52(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -56(%ecx)
+	fmuls 52(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -60(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds (%ecx)
+	fmuls 60(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	fsubrp %st,%st(1)
+	fxch %st(1)
+	fsubrp %st,%st(1)
+
+	addl $4,DITHERINDEX
+	andl $0x0003fffc,DITHERINDEX
+	movl $ASM_NAME(dithernoise),%edi
+	addl DITHERINDEX,%edi	
+
+	fadd (%edi)
+/* fistpl and popl as a unit keep the stack unchanged */
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:
+/*	incl %edi */
+4:
+.L67:
+	addl $-64,%ebx
+	addl $-128,%ecx
+	addl $4,%esi
+	decl %ebp
+	jnz .L68
+/* return ipv edi 0 in eax */
+	movl $0,%eax
+/* save ditherindex */
+	movl BO,%ebx
+	movl DITHERINDEX,%esi
+	movl %esi,4(%ebx);
+/* stack: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo */
+	popl %ebx
+	popl %esi
+	popl %edi
+	popl %ebp
+	addl $16,%esp
+/* The stack must be now: 0=back 4=bandptr 8=channel 12=out 16=buffs 20=bo */
+	ret
+
+/* Mark non-executable stack. */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif