562
|
1 /*
|
|
2 decode_i586: asm synth
|
|
3
|
|
4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
6 initially written by Stefan Bieschewski
|
|
7
|
|
8 synth_1to1 works the same way as the c version of this
|
|
9 file. only two types of changes have been made:
|
|
10 - reordered floating point instructions to
|
|
11 prevent pipline stalls
|
|
12 - made WRITE_SAMPLE use integer instead of
|
|
13 (slower) floating point
|
|
14 all kinds of x86 processors should benefit from these
|
|
15 modifications.
|
|
16
|
|
17 useful sources of information on optimizing x86 code include:
|
|
18
|
|
19 Intel Architecture Optimization Manual
|
|
20 http://www.intel.com/design/pentium/manuals/242816.htm
|
|
21
|
|
22 Cyrix 6x86 Instruction Set Summary
|
|
23 ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
|
|
24
|
|
25 AMD-K5 Processor Software Development
|
|
26 http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
|
|
27
|
|
28 Stefan Bieschewski <stb@acm.org>
|
|
29
|
|
30 $Id: decode_i586.s 1 2004-09-18 13:30:08Z thomas $
|
|
31 */
|
|
32
|
|
33 #include "mangle.h"
|
|
34
|
|
35 .data
|
|
36 #ifndef __APPLE__
|
|
37 .section .rodata
|
|
38 #endif
|
|
39 ALIGN8
|
|
40 .LC0:
|
|
41 .long 0x0,0x40dfffc0
|
|
42 ALIGN8
|
|
43 .LC1:
|
|
44 .long 0x0,0xc0e00000
|
|
45 ALIGN8
|
|
46 .text
|
|
47 /* int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
|
|
48 .globl ASM_NAME(synth_1to1_i586_asm)
|
|
49 ASM_NAME(synth_1to1_i586_asm):
|
|
50 subl $12,%esp
|
|
51 pushl %ebp
|
|
52 pushl %edi
|
|
53 pushl %esi
|
|
54 pushl %ebx
|
|
55 /* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16,20,24=local, 28=back, 32=bandPtr, 36=channel, 40=out, 44=buffs, 48=bo, 52=decwin */
|
|
56 movl 32(%esp),%eax /* *bandPtr */
|
|
57 movl 40(%esp),%esi /* *out */
|
|
58 movl 48(%esp),%edi /* *bo */
|
|
59 movl (%edi),%ebp /* store bo value in ebp */
|
|
60 xorl %edi,%edi
|
|
61 cmpl %edi,36(%esp)
|
|
62 jne .L48 /* if(!channel) */
|
|
63 decl %ebp /* bo-- */
|
|
64 andl $15,%ebp /* bo &= 0xf */
|
|
65 movl 48(%esp), %edi /* *bo */
|
|
66 movl %ebp,(%edi) /* write back bo */
|
|
67 xorl %edi,%edi /* restore %edi to 0; it's used later */
|
|
68 movl 44(%esp),%ecx /* use buffs */
|
|
69 jmp .L49
|
|
70 .L48: /* if(channel) use buffs+2176 */
|
|
71 addl $2,%esi
|
|
72 movl 44(%esp),%ecx /* *buffs */
|
|
73 addl $2176,%ecx
|
|
74 .L49:
|
|
75 testl $1,%ebp
|
|
76 je .L50
|
|
77 movl %ecx,%ebx
|
|
78 movl %ebp,16(%esp)
|
|
79 pushl %eax
|
|
80 movl 20(%esp),%edx
|
|
81 leal (%ebx,%edx,4),%eax
|
|
82 pushl %eax
|
|
83 movl 24(%esp),%eax
|
|
84 incl %eax
|
|
85 andl $15,%eax
|
|
86 leal 1088(,%eax,4),%eax
|
|
87 addl %ebx,%eax
|
|
88 jmp .L74
|
|
89 .L50:
|
|
90 leal 1088(%ecx),%ebx
|
|
91 leal 1(%ebp),%edx
|
|
92 movl %edx,16(%esp)
|
|
93 pushl %eax
|
|
94 leal 1092(%ecx,%ebp,4),%eax
|
|
95 pushl %eax
|
|
96 leal (%ecx,%ebp,4),%eax
|
|
97 .L74:
|
|
98 pushl %eax
|
|
99 call ASM_NAME(dct64_i386)
|
|
100 addl $12,%esp
|
|
101 /* stack now back on track */
|
|
102 movl 16(%esp),%edx
|
|
103 leal 0(,%edx,4),%edx
|
|
104 movl 52(%esp),%eax /* decwin */
|
|
105 addl $64,%eax
|
|
106 movl %eax,%ecx
|
|
107 subl %edx,%ecx
|
|
108 movl $16,%ebp
|
|
109 .L55:
|
|
110 flds (%ecx)
|
|
111 fmuls (%ebx)
|
|
112 flds 4(%ecx)
|
|
113 fmuls 4(%ebx)
|
|
114 fxch %st(1)
|
|
115 flds 8(%ecx)
|
|
116 fmuls 8(%ebx)
|
|
117 fxch %st(2)
|
|
118 fsubrp %st,%st(1)
|
|
119 flds 12(%ecx)
|
|
120 fmuls 12(%ebx)
|
|
121 fxch %st(2)
|
|
122 faddp %st,%st(1)
|
|
123 flds 16(%ecx)
|
|
124 fmuls 16(%ebx)
|
|
125 fxch %st(2)
|
|
126 fsubrp %st,%st(1)
|
|
127 flds 20(%ecx)
|
|
128 fmuls 20(%ebx)
|
|
129 fxch %st(2)
|
|
130 faddp %st,%st(1)
|
|
131 flds 24(%ecx)
|
|
132 fmuls 24(%ebx)
|
|
133 fxch %st(2)
|
|
134 fsubrp %st,%st(1)
|
|
135 flds 28(%ecx)
|
|
136 fmuls 28(%ebx)
|
|
137 fxch %st(2)
|
|
138 faddp %st,%st(1)
|
|
139 flds 32(%ecx)
|
|
140 fmuls 32(%ebx)
|
|
141 fxch %st(2)
|
|
142 fsubrp %st,%st(1)
|
|
143 flds 36(%ecx)
|
|
144 fmuls 36(%ebx)
|
|
145 fxch %st(2)
|
|
146 faddp %st,%st(1)
|
|
147 flds 40(%ecx)
|
|
148 fmuls 40(%ebx)
|
|
149 fxch %st(2)
|
|
150 fsubrp %st,%st(1)
|
|
151 flds 44(%ecx)
|
|
152 fmuls 44(%ebx)
|
|
153 fxch %st(2)
|
|
154 faddp %st,%st(1)
|
|
155 flds 48(%ecx)
|
|
156 fmuls 48(%ebx)
|
|
157 fxch %st(2)
|
|
158 fsubrp %st,%st(1)
|
|
159 flds 52(%ecx)
|
|
160 fmuls 52(%ebx)
|
|
161 fxch %st(2)
|
|
162 faddp %st,%st(1)
|
|
163 flds 56(%ecx)
|
|
164 fmuls 56(%ebx)
|
|
165 fxch %st(2)
|
|
166 fsubrp %st,%st(1)
|
|
167 flds 60(%ecx)
|
|
168 fmuls 60(%ebx)
|
|
169 fxch %st(2)
|
|
170 subl $4,%esp
|
|
171 faddp %st,%st(1)
|
|
172 fxch %st(1)
|
|
173 fsubrp %st,%st(1)
|
|
174 fistpl (%esp)
|
|
175 popl %eax
|
|
176 cmpl $32767,%eax
|
|
177 jg 1f
|
|
178 cmpl $-32768,%eax
|
|
179 jl 2f
|
|
180 movw %ax,(%esi)
|
|
181 jmp 4f
|
|
182 1: movw $32767,(%esi)
|
|
183 jmp 3f
|
|
184 2: movw $-32768,(%esi)
|
|
185 3: incl %edi
|
|
186 4:
|
|
187 .L54:
|
|
188 addl $64,%ebx
|
|
189 subl $-128,%ecx
|
|
190 addl $4,%esi
|
|
191 decl %ebp
|
|
192 jnz .L55
|
|
193 flds (%ecx)
|
|
194 fmuls (%ebx)
|
|
195 flds 8(%ecx)
|
|
196 fmuls 8(%ebx)
|
|
197 flds 16(%ecx)
|
|
198 fmuls 16(%ebx)
|
|
199 fxch %st(2)
|
|
200 faddp %st,%st(1)
|
|
201 flds 24(%ecx)
|
|
202 fmuls 24(%ebx)
|
|
203 fxch %st(2)
|
|
204 faddp %st,%st(1)
|
|
205 flds 32(%ecx)
|
|
206 fmuls 32(%ebx)
|
|
207 fxch %st(2)
|
|
208 faddp %st,%st(1)
|
|
209 flds 40(%ecx)
|
|
210 fmuls 40(%ebx)
|
|
211 fxch %st(2)
|
|
212 faddp %st,%st(1)
|
|
213 flds 48(%ecx)
|
|
214 fmuls 48(%ebx)
|
|
215 fxch %st(2)
|
|
216 faddp %st,%st(1)
|
|
217 flds 56(%ecx)
|
|
218 fmuls 56(%ebx)
|
|
219 fxch %st(2)
|
|
220 subl $4,%esp
|
|
221 faddp %st,%st(1)
|
|
222 fxch %st(1)
|
|
223 faddp %st,%st(1)
|
|
224 fistpl (%esp)
|
|
225 popl %eax
|
|
226 cmpl $32767,%eax
|
|
227 jg 1f
|
|
228 cmpl $-32768,%eax
|
|
229 jl 2f
|
|
230 movw %ax,(%esi)
|
|
231 jmp 4f
|
|
232 1: movw $32767,(%esi)
|
|
233 jmp 3f
|
|
234 2: movw $-32768,(%esi)
|
|
235 3: incl %edi
|
|
236 4:
|
|
237 .L62:
|
|
238 addl $-64,%ebx
|
|
239 addl $4,%esi
|
|
240 movl 16(%esp),%edx
|
|
241 leal -128(%ecx,%edx,8),%ecx
|
|
242 movl $15,%ebp
|
|
243 .L68:
|
|
244 flds -4(%ecx)
|
|
245 fchs
|
|
246 fmuls (%ebx)
|
|
247 flds -8(%ecx)
|
|
248 fmuls 4(%ebx)
|
|
249 fxch %st(1)
|
|
250 flds -12(%ecx)
|
|
251 fmuls 8(%ebx)
|
|
252 fxch %st(2)
|
|
253 fsubrp %st,%st(1)
|
|
254 flds -16(%ecx)
|
|
255 fmuls 12(%ebx)
|
|
256 fxch %st(2)
|
|
257 fsubrp %st,%st(1)
|
|
258 flds -20(%ecx)
|
|
259 fmuls 16(%ebx)
|
|
260 fxch %st(2)
|
|
261 fsubrp %st,%st(1)
|
|
262 flds -24(%ecx)
|
|
263 fmuls 20(%ebx)
|
|
264 fxch %st(2)
|
|
265 fsubrp %st,%st(1)
|
|
266 flds -28(%ecx)
|
|
267 fmuls 24(%ebx)
|
|
268 fxch %st(2)
|
|
269 fsubrp %st,%st(1)
|
|
270 flds -32(%ecx)
|
|
271 fmuls 28(%ebx)
|
|
272 fxch %st(2)
|
|
273 fsubrp %st,%st(1)
|
|
274 flds -36(%ecx)
|
|
275 fmuls 32(%ebx)
|
|
276 fxch %st(2)
|
|
277 fsubrp %st,%st(1)
|
|
278 flds -40(%ecx)
|
|
279 fmuls 36(%ebx)
|
|
280 fxch %st(2)
|
|
281 fsubrp %st,%st(1)
|
|
282 flds -44(%ecx)
|
|
283 fmuls 40(%ebx)
|
|
284 fxch %st(2)
|
|
285 fsubrp %st,%st(1)
|
|
286 flds -48(%ecx)
|
|
287 fmuls 44(%ebx)
|
|
288 fxch %st(2)
|
|
289 fsubrp %st,%st(1)
|
|
290 flds -52(%ecx)
|
|
291 fmuls 48(%ebx)
|
|
292 fxch %st(2)
|
|
293 fsubrp %st,%st(1)
|
|
294 flds -56(%ecx)
|
|
295 fmuls 52(%ebx)
|
|
296 fxch %st(2)
|
|
297 fsubrp %st,%st(1)
|
|
298 flds -60(%ecx)
|
|
299 fmuls 56(%ebx)
|
|
300 fxch %st(2)
|
|
301 fsubrp %st,%st(1)
|
|
302 flds (%ecx)
|
|
303 fmuls 60(%ebx)
|
|
304 fxch %st(2)
|
|
305 subl $4,%esp
|
|
306 fsubrp %st,%st(1)
|
|
307 fxch %st(1)
|
|
308 fsubrp %st,%st(1)
|
|
309 fistpl (%esp)
|
|
310 popl %eax
|
|
311 cmpl $32767,%eax
|
|
312 jg 1f
|
|
313 cmpl $-32768,%eax
|
|
314 jl 2f
|
|
315 movw %ax,(%esi)
|
|
316 jmp 4f
|
|
317 1: movw $32767,(%esi)
|
|
318 jmp 3f
|
|
319 2: movw $-32768,(%esi)
|
|
320 3: incl %edi
|
|
321 4:
|
|
322 .L67:
|
|
323 addl $-64,%ebx
|
|
324 addl $-128,%ecx
|
|
325 addl $4,%esi
|
|
326 decl %ebp
|
|
327 jnz .L68
|
|
328 movl %edi,%eax
|
|
329 popl %ebx
|
|
330 popl %esi
|
|
331 popl %edi
|
|
332 popl %ebp
|
|
333 addl $12,%esp
|
|
334 ret
|
|
335
|
|
336 /* Mark non-executable stack. */
|
|
337 #if defined(__linux__) && defined(__ELF__)
|
|
338 .section .note.GNU-stack,"",%progbits
|
|
339 #endif
|