Mercurial > SDL_sound_CoreAudio
comparison decoders/libmpg123/decode_i586.S @ 562:7e08477b0fc1
MP3 decoder upgrade work.
Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123.
libmpg123 is a much better version of mpglib, so it should solve all the
problems about MP3's not seeking, or most modern MP3's not playing at all,
etc. Since you no longer have to make a tradeoff with SMPEG for features, and
SMPEG is basically rotting, I removed it from the project.
There is still work to be done with libmpg123...there are MMX, 3DNow, SSE,
Altivec, etc decoders which we don't have enabled at the moment, and the
build system could use some work to make this compile more cleanly, etc.
Still: huge win.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 30 Jan 2009 02:44:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
561:f2985e08589c | 562:7e08477b0fc1 |
---|---|
1 /* | |
2 decode_i586: asm synth | |
3 | |
4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 | |
5 see COPYING and AUTHORS files in distribution or http://mpg123.org | |
6 initially written by Stefan Bieschewski | |
7 | |
8 synth_1to1 works the same way as the c version of this | |
9 file. only two types of changes have been made: | |
10 - reordered floating point instructions to | |
11 prevent pipline stalls | |
12 - made WRITE_SAMPLE use integer instead of | |
13 (slower) floating point | |
14 all kinds of x86 processors should benefit from these | |
15 modifications. | |
16 | |
17 useful sources of information on optimizing x86 code include: | |
18 | |
19 Intel Architecture Optimization Manual | |
20 http://www.intel.com/design/pentium/manuals/242816.htm | |
21 | |
22 Cyrix 6x86 Instruction Set Summary | |
23 ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf | |
24 | |
25 AMD-K5 Processor Software Development | |
26 http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf | |
27 | |
28 Stefan Bieschewski <stb@acm.org> | |
29 | |
30 $Id: decode_i586.s 1 2004-09-18 13:30:08Z thomas $ | |
31 */ | |
32 | |
33 #include "mangle.h" | |
34 | |
35 .data | |
36 #ifndef __APPLE__ | |
37 .section .rodata | |
38 #endif | |
39 ALIGN8 | |
40 .LC0: | |
41 .long 0x0,0x40dfffc0 | |
42 ALIGN8 | |
43 .LC1: | |
44 .long 0x0,0xc0e00000 | |
45 ALIGN8 | |
46 .text | |
47 /* int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */ | |
48 .globl ASM_NAME(synth_1to1_i586_asm) | |
49 ASM_NAME(synth_1to1_i586_asm): | |
50 subl $12,%esp | |
51 pushl %ebp | |
52 pushl %edi | |
53 pushl %esi | |
54 pushl %ebx | |
55 /* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16,20,24=local, 28=back, 32=bandPtr, 36=channel, 40=out, 44=buffs, 48=bo, 52=decwin */ | |
56 movl 32(%esp),%eax /* *bandPtr */ | |
57 movl 40(%esp),%esi /* *out */ | |
58 movl 48(%esp),%edi /* *bo */ | |
59 movl (%edi),%ebp /* store bo value in ebp */ | |
60 xorl %edi,%edi | |
61 cmpl %edi,36(%esp) | |
62 jne .L48 /* if(!channel) */ | |
63 decl %ebp /* bo-- */ | |
64 andl $15,%ebp /* bo &= 0xf */ | |
65 movl 48(%esp), %edi /* *bo */ | |
66 movl %ebp,(%edi) /* write back bo */ | |
67 xorl %edi,%edi /* restore %edi to 0; it's used later */ | |
68 movl 44(%esp),%ecx /* use buffs */ | |
69 jmp .L49 | |
70 .L48: /* if(channel) use buffs+2176 */ | |
71 addl $2,%esi | |
72 movl 44(%esp),%ecx /* *buffs */ | |
73 addl $2176,%ecx | |
74 .L49: | |
75 testl $1,%ebp | |
76 je .L50 | |
77 movl %ecx,%ebx | |
78 movl %ebp,16(%esp) | |
79 pushl %eax | |
80 movl 20(%esp),%edx | |
81 leal (%ebx,%edx,4),%eax | |
82 pushl %eax | |
83 movl 24(%esp),%eax | |
84 incl %eax | |
85 andl $15,%eax | |
86 leal 1088(,%eax,4),%eax | |
87 addl %ebx,%eax | |
88 jmp .L74 | |
89 .L50: | |
90 leal 1088(%ecx),%ebx | |
91 leal 1(%ebp),%edx | |
92 movl %edx,16(%esp) | |
93 pushl %eax | |
94 leal 1092(%ecx,%ebp,4),%eax | |
95 pushl %eax | |
96 leal (%ecx,%ebp,4),%eax | |
97 .L74: | |
98 pushl %eax | |
99 call ASM_NAME(dct64_i386) | |
100 addl $12,%esp | |
101 /* stack now back on track */ | |
102 movl 16(%esp),%edx | |
103 leal 0(,%edx,4),%edx | |
104 movl 52(%esp),%eax /* decwin */ | |
105 addl $64,%eax | |
106 movl %eax,%ecx | |
107 subl %edx,%ecx | |
108 movl $16,%ebp | |
109 .L55: | |
110 flds (%ecx) | |
111 fmuls (%ebx) | |
112 flds 4(%ecx) | |
113 fmuls 4(%ebx) | |
114 fxch %st(1) | |
115 flds 8(%ecx) | |
116 fmuls 8(%ebx) | |
117 fxch %st(2) | |
118 fsubrp %st,%st(1) | |
119 flds 12(%ecx) | |
120 fmuls 12(%ebx) | |
121 fxch %st(2) | |
122 faddp %st,%st(1) | |
123 flds 16(%ecx) | |
124 fmuls 16(%ebx) | |
125 fxch %st(2) | |
126 fsubrp %st,%st(1) | |
127 flds 20(%ecx) | |
128 fmuls 20(%ebx) | |
129 fxch %st(2) | |
130 faddp %st,%st(1) | |
131 flds 24(%ecx) | |
132 fmuls 24(%ebx) | |
133 fxch %st(2) | |
134 fsubrp %st,%st(1) | |
135 flds 28(%ecx) | |
136 fmuls 28(%ebx) | |
137 fxch %st(2) | |
138 faddp %st,%st(1) | |
139 flds 32(%ecx) | |
140 fmuls 32(%ebx) | |
141 fxch %st(2) | |
142 fsubrp %st,%st(1) | |
143 flds 36(%ecx) | |
144 fmuls 36(%ebx) | |
145 fxch %st(2) | |
146 faddp %st,%st(1) | |
147 flds 40(%ecx) | |
148 fmuls 40(%ebx) | |
149 fxch %st(2) | |
150 fsubrp %st,%st(1) | |
151 flds 44(%ecx) | |
152 fmuls 44(%ebx) | |
153 fxch %st(2) | |
154 faddp %st,%st(1) | |
155 flds 48(%ecx) | |
156 fmuls 48(%ebx) | |
157 fxch %st(2) | |
158 fsubrp %st,%st(1) | |
159 flds 52(%ecx) | |
160 fmuls 52(%ebx) | |
161 fxch %st(2) | |
162 faddp %st,%st(1) | |
163 flds 56(%ecx) | |
164 fmuls 56(%ebx) | |
165 fxch %st(2) | |
166 fsubrp %st,%st(1) | |
167 flds 60(%ecx) | |
168 fmuls 60(%ebx) | |
169 fxch %st(2) | |
170 subl $4,%esp | |
171 faddp %st,%st(1) | |
172 fxch %st(1) | |
173 fsubrp %st,%st(1) | |
174 fistpl (%esp) | |
175 popl %eax | |
176 cmpl $32767,%eax | |
177 jg 1f | |
178 cmpl $-32768,%eax | |
179 jl 2f | |
180 movw %ax,(%esi) | |
181 jmp 4f | |
182 1: movw $32767,(%esi) | |
183 jmp 3f | |
184 2: movw $-32768,(%esi) | |
185 3: incl %edi | |
186 4: | |
187 .L54: | |
188 addl $64,%ebx | |
189 subl $-128,%ecx | |
190 addl $4,%esi | |
191 decl %ebp | |
192 jnz .L55 | |
193 flds (%ecx) | |
194 fmuls (%ebx) | |
195 flds 8(%ecx) | |
196 fmuls 8(%ebx) | |
197 flds 16(%ecx) | |
198 fmuls 16(%ebx) | |
199 fxch %st(2) | |
200 faddp %st,%st(1) | |
201 flds 24(%ecx) | |
202 fmuls 24(%ebx) | |
203 fxch %st(2) | |
204 faddp %st,%st(1) | |
205 flds 32(%ecx) | |
206 fmuls 32(%ebx) | |
207 fxch %st(2) | |
208 faddp %st,%st(1) | |
209 flds 40(%ecx) | |
210 fmuls 40(%ebx) | |
211 fxch %st(2) | |
212 faddp %st,%st(1) | |
213 flds 48(%ecx) | |
214 fmuls 48(%ebx) | |
215 fxch %st(2) | |
216 faddp %st,%st(1) | |
217 flds 56(%ecx) | |
218 fmuls 56(%ebx) | |
219 fxch %st(2) | |
220 subl $4,%esp | |
221 faddp %st,%st(1) | |
222 fxch %st(1) | |
223 faddp %st,%st(1) | |
224 fistpl (%esp) | |
225 popl %eax | |
226 cmpl $32767,%eax | |
227 jg 1f | |
228 cmpl $-32768,%eax | |
229 jl 2f | |
230 movw %ax,(%esi) | |
231 jmp 4f | |
232 1: movw $32767,(%esi) | |
233 jmp 3f | |
234 2: movw $-32768,(%esi) | |
235 3: incl %edi | |
236 4: | |
237 .L62: | |
238 addl $-64,%ebx | |
239 addl $4,%esi | |
240 movl 16(%esp),%edx | |
241 leal -128(%ecx,%edx,8),%ecx | |
242 movl $15,%ebp | |
243 .L68: | |
244 flds -4(%ecx) | |
245 fchs | |
246 fmuls (%ebx) | |
247 flds -8(%ecx) | |
248 fmuls 4(%ebx) | |
249 fxch %st(1) | |
250 flds -12(%ecx) | |
251 fmuls 8(%ebx) | |
252 fxch %st(2) | |
253 fsubrp %st,%st(1) | |
254 flds -16(%ecx) | |
255 fmuls 12(%ebx) | |
256 fxch %st(2) | |
257 fsubrp %st,%st(1) | |
258 flds -20(%ecx) | |
259 fmuls 16(%ebx) | |
260 fxch %st(2) | |
261 fsubrp %st,%st(1) | |
262 flds -24(%ecx) | |
263 fmuls 20(%ebx) | |
264 fxch %st(2) | |
265 fsubrp %st,%st(1) | |
266 flds -28(%ecx) | |
267 fmuls 24(%ebx) | |
268 fxch %st(2) | |
269 fsubrp %st,%st(1) | |
270 flds -32(%ecx) | |
271 fmuls 28(%ebx) | |
272 fxch %st(2) | |
273 fsubrp %st,%st(1) | |
274 flds -36(%ecx) | |
275 fmuls 32(%ebx) | |
276 fxch %st(2) | |
277 fsubrp %st,%st(1) | |
278 flds -40(%ecx) | |
279 fmuls 36(%ebx) | |
280 fxch %st(2) | |
281 fsubrp %st,%st(1) | |
282 flds -44(%ecx) | |
283 fmuls 40(%ebx) | |
284 fxch %st(2) | |
285 fsubrp %st,%st(1) | |
286 flds -48(%ecx) | |
287 fmuls 44(%ebx) | |
288 fxch %st(2) | |
289 fsubrp %st,%st(1) | |
290 flds -52(%ecx) | |
291 fmuls 48(%ebx) | |
292 fxch %st(2) | |
293 fsubrp %st,%st(1) | |
294 flds -56(%ecx) | |
295 fmuls 52(%ebx) | |
296 fxch %st(2) | |
297 fsubrp %st,%st(1) | |
298 flds -60(%ecx) | |
299 fmuls 56(%ebx) | |
300 fxch %st(2) | |
301 fsubrp %st,%st(1) | |
302 flds (%ecx) | |
303 fmuls 60(%ebx) | |
304 fxch %st(2) | |
305 subl $4,%esp | |
306 fsubrp %st,%st(1) | |
307 fxch %st(1) | |
308 fsubrp %st,%st(1) | |
309 fistpl (%esp) | |
310 popl %eax | |
311 cmpl $32767,%eax | |
312 jg 1f | |
313 cmpl $-32768,%eax | |
314 jl 2f | |
315 movw %ax,(%esi) | |
316 jmp 4f | |
317 1: movw $32767,(%esi) | |
318 jmp 3f | |
319 2: movw $-32768,(%esi) | |
320 3: incl %edi | |
321 4: | |
322 .L67: | |
323 addl $-64,%ebx | |
324 addl $-128,%ecx | |
325 addl $4,%esi | |
326 decl %ebp | |
327 jnz .L68 | |
328 movl %edi,%eax | |
329 popl %ebx | |
330 popl %esi | |
331 popl %edi | |
332 popl %ebp | |
333 addl $12,%esp | |
334 ret | |
335 | |
336 /* Mark non-executable stack. */ | |
337 #if defined(__linux__) && defined(__ELF__) | |
338 .section .note.GNU-stack,"",%progbits | |
339 #endif |