comparison decoders/libmpg123/decode_i586.S @ 562:7e08477b0fc1

MP3 decoder upgrade work. Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123. libmpg123 is a much better version of mpglib, so it should solve all the problems about MP3's not seeking, or most modern MP3's not playing at all, etc. Since you no longer have to make a tradeoff with SMPEG for features, and SMPEG is basically rotting, I removed it from the project. There is still work to be done with libmpg123...there are MMX, 3DNow, SSE, Altivec, etc decoders which we don't have enabled at the moment, and the build system could use some work to make this compile more cleanly, etc. Still: huge win.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 30 Jan 2009 02:44:47 -0500
parents
children
comparison
equal deleted inserted replaced
561:f2985e08589c 562:7e08477b0fc1
1 /*
2 decode_i586: asm synth
3
4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Stefan Bieschewski
7
8 synth_1to1 works the same way as the c version of this
9 file. only two types of changes have been made:
10 - reordered floating point instructions to
11 prevent pipline stalls
12 - made WRITE_SAMPLE use integer instead of
13 (slower) floating point
14 all kinds of x86 processors should benefit from these
15 modifications.
16
17 useful sources of information on optimizing x86 code include:
18
19 Intel Architecture Optimization Manual
20 http://www.intel.com/design/pentium/manuals/242816.htm
21
22 Cyrix 6x86 Instruction Set Summary
23 ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
24
25 AMD-K5 Processor Software Development
26 http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
27
28 Stefan Bieschewski <stb@acm.org>
29
30 $Id: decode_i586.s 1 2004-09-18 13:30:08Z thomas $
31 */
32
33 #include "mangle.h"
34
35 .data
36 #ifndef __APPLE__
37 .section .rodata
38 #endif
39 ALIGN8
40 .LC0:
41 .long 0x0,0x40dfffc0
42 ALIGN8
43 .LC1:
44 .long 0x0,0xc0e00000
45 ALIGN8
46 .text
47 /* int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
48 .globl ASM_NAME(synth_1to1_i586_asm)
49 ASM_NAME(synth_1to1_i586_asm):
50 subl $12,%esp
51 pushl %ebp
52 pushl %edi
53 pushl %esi
54 pushl %ebx
55 /* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16,20,24=local, 28=back, 32=bandPtr, 36=channel, 40=out, 44=buffs, 48=bo, 52=decwin */
56 movl 32(%esp),%eax /* *bandPtr */
57 movl 40(%esp),%esi /* *out */
58 movl 48(%esp),%edi /* *bo */
59 movl (%edi),%ebp /* store bo value in ebp */
60 xorl %edi,%edi
61 cmpl %edi,36(%esp)
62 jne .L48 /* if(!channel) */
63 decl %ebp /* bo-- */
64 andl $15,%ebp /* bo &= 0xf */
65 movl 48(%esp), %edi /* *bo */
66 movl %ebp,(%edi) /* write back bo */
67 xorl %edi,%edi /* restore %edi to 0; it's used later */
68 movl 44(%esp),%ecx /* use buffs */
69 jmp .L49
70 .L48: /* if(channel) use buffs+2176 */
71 addl $2,%esi
72 movl 44(%esp),%ecx /* *buffs */
73 addl $2176,%ecx
74 .L49:
75 testl $1,%ebp
76 je .L50
77 movl %ecx,%ebx
78 movl %ebp,16(%esp)
79 pushl %eax
80 movl 20(%esp),%edx
81 leal (%ebx,%edx,4),%eax
82 pushl %eax
83 movl 24(%esp),%eax
84 incl %eax
85 andl $15,%eax
86 leal 1088(,%eax,4),%eax
87 addl %ebx,%eax
88 jmp .L74
89 .L50:
90 leal 1088(%ecx),%ebx
91 leal 1(%ebp),%edx
92 movl %edx,16(%esp)
93 pushl %eax
94 leal 1092(%ecx,%ebp,4),%eax
95 pushl %eax
96 leal (%ecx,%ebp,4),%eax
97 .L74:
98 pushl %eax
99 call ASM_NAME(dct64_i386)
100 addl $12,%esp
101 /* stack now back on track */
102 movl 16(%esp),%edx
103 leal 0(,%edx,4),%edx
104 movl 52(%esp),%eax /* decwin */
105 addl $64,%eax
106 movl %eax,%ecx
107 subl %edx,%ecx
108 movl $16,%ebp
109 .L55:
110 flds (%ecx)
111 fmuls (%ebx)
112 flds 4(%ecx)
113 fmuls 4(%ebx)
114 fxch %st(1)
115 flds 8(%ecx)
116 fmuls 8(%ebx)
117 fxch %st(2)
118 fsubrp %st,%st(1)
119 flds 12(%ecx)
120 fmuls 12(%ebx)
121 fxch %st(2)
122 faddp %st,%st(1)
123 flds 16(%ecx)
124 fmuls 16(%ebx)
125 fxch %st(2)
126 fsubrp %st,%st(1)
127 flds 20(%ecx)
128 fmuls 20(%ebx)
129 fxch %st(2)
130 faddp %st,%st(1)
131 flds 24(%ecx)
132 fmuls 24(%ebx)
133 fxch %st(2)
134 fsubrp %st,%st(1)
135 flds 28(%ecx)
136 fmuls 28(%ebx)
137 fxch %st(2)
138 faddp %st,%st(1)
139 flds 32(%ecx)
140 fmuls 32(%ebx)
141 fxch %st(2)
142 fsubrp %st,%st(1)
143 flds 36(%ecx)
144 fmuls 36(%ebx)
145 fxch %st(2)
146 faddp %st,%st(1)
147 flds 40(%ecx)
148 fmuls 40(%ebx)
149 fxch %st(2)
150 fsubrp %st,%st(1)
151 flds 44(%ecx)
152 fmuls 44(%ebx)
153 fxch %st(2)
154 faddp %st,%st(1)
155 flds 48(%ecx)
156 fmuls 48(%ebx)
157 fxch %st(2)
158 fsubrp %st,%st(1)
159 flds 52(%ecx)
160 fmuls 52(%ebx)
161 fxch %st(2)
162 faddp %st,%st(1)
163 flds 56(%ecx)
164 fmuls 56(%ebx)
165 fxch %st(2)
166 fsubrp %st,%st(1)
167 flds 60(%ecx)
168 fmuls 60(%ebx)
169 fxch %st(2)
170 subl $4,%esp
171 faddp %st,%st(1)
172 fxch %st(1)
173 fsubrp %st,%st(1)
174 fistpl (%esp)
175 popl %eax
176 cmpl $32767,%eax
177 jg 1f
178 cmpl $-32768,%eax
179 jl 2f
180 movw %ax,(%esi)
181 jmp 4f
182 1: movw $32767,(%esi)
183 jmp 3f
184 2: movw $-32768,(%esi)
185 3: incl %edi
186 4:
187 .L54:
188 addl $64,%ebx
189 subl $-128,%ecx
190 addl $4,%esi
191 decl %ebp
192 jnz .L55
193 flds (%ecx)
194 fmuls (%ebx)
195 flds 8(%ecx)
196 fmuls 8(%ebx)
197 flds 16(%ecx)
198 fmuls 16(%ebx)
199 fxch %st(2)
200 faddp %st,%st(1)
201 flds 24(%ecx)
202 fmuls 24(%ebx)
203 fxch %st(2)
204 faddp %st,%st(1)
205 flds 32(%ecx)
206 fmuls 32(%ebx)
207 fxch %st(2)
208 faddp %st,%st(1)
209 flds 40(%ecx)
210 fmuls 40(%ebx)
211 fxch %st(2)
212 faddp %st,%st(1)
213 flds 48(%ecx)
214 fmuls 48(%ebx)
215 fxch %st(2)
216 faddp %st,%st(1)
217 flds 56(%ecx)
218 fmuls 56(%ebx)
219 fxch %st(2)
220 subl $4,%esp
221 faddp %st,%st(1)
222 fxch %st(1)
223 faddp %st,%st(1)
224 fistpl (%esp)
225 popl %eax
226 cmpl $32767,%eax
227 jg 1f
228 cmpl $-32768,%eax
229 jl 2f
230 movw %ax,(%esi)
231 jmp 4f
232 1: movw $32767,(%esi)
233 jmp 3f
234 2: movw $-32768,(%esi)
235 3: incl %edi
236 4:
237 .L62:
238 addl $-64,%ebx
239 addl $4,%esi
240 movl 16(%esp),%edx
241 leal -128(%ecx,%edx,8),%ecx
242 movl $15,%ebp
243 .L68:
244 flds -4(%ecx)
245 fchs
246 fmuls (%ebx)
247 flds -8(%ecx)
248 fmuls 4(%ebx)
249 fxch %st(1)
250 flds -12(%ecx)
251 fmuls 8(%ebx)
252 fxch %st(2)
253 fsubrp %st,%st(1)
254 flds -16(%ecx)
255 fmuls 12(%ebx)
256 fxch %st(2)
257 fsubrp %st,%st(1)
258 flds -20(%ecx)
259 fmuls 16(%ebx)
260 fxch %st(2)
261 fsubrp %st,%st(1)
262 flds -24(%ecx)
263 fmuls 20(%ebx)
264 fxch %st(2)
265 fsubrp %st,%st(1)
266 flds -28(%ecx)
267 fmuls 24(%ebx)
268 fxch %st(2)
269 fsubrp %st,%st(1)
270 flds -32(%ecx)
271 fmuls 28(%ebx)
272 fxch %st(2)
273 fsubrp %st,%st(1)
274 flds -36(%ecx)
275 fmuls 32(%ebx)
276 fxch %st(2)
277 fsubrp %st,%st(1)
278 flds -40(%ecx)
279 fmuls 36(%ebx)
280 fxch %st(2)
281 fsubrp %st,%st(1)
282 flds -44(%ecx)
283 fmuls 40(%ebx)
284 fxch %st(2)
285 fsubrp %st,%st(1)
286 flds -48(%ecx)
287 fmuls 44(%ebx)
288 fxch %st(2)
289 fsubrp %st,%st(1)
290 flds -52(%ecx)
291 fmuls 48(%ebx)
292 fxch %st(2)
293 fsubrp %st,%st(1)
294 flds -56(%ecx)
295 fmuls 52(%ebx)
296 fxch %st(2)
297 fsubrp %st,%st(1)
298 flds -60(%ecx)
299 fmuls 56(%ebx)
300 fxch %st(2)
301 fsubrp %st,%st(1)
302 flds (%ecx)
303 fmuls 60(%ebx)
304 fxch %st(2)
305 subl $4,%esp
306 fsubrp %st,%st(1)
307 fxch %st(1)
308 fsubrp %st,%st(1)
309 fistpl (%esp)
310 popl %eax
311 cmpl $32767,%eax
312 jg 1f
313 cmpl $-32768,%eax
314 jl 2f
315 movw %ax,(%esi)
316 jmp 4f
317 1: movw $32767,(%esi)
318 jmp 3f
319 2: movw $-32768,(%esi)
320 3: incl %edi
321 4:
322 .L67:
323 addl $-64,%ebx
324 addl $-128,%ecx
325 addl $4,%esi
326 decl %ebp
327 jnz .L68
328 movl %edi,%eax
329 popl %ebx
330 popl %esi
331 popl %edi
332 popl %ebp
333 addl $12,%esp
334 ret
335
336 /* Mark non-executable stack. */
337 #if defined(__linux__) && defined(__ELF__)
338 .section .note.GNU-stack,"",%progbits
339 #endif