Mercurial > SDL_sound_CoreAudio
comparison decoders/libmpg123/dct64_sse.S @ 562:7e08477b0fc1
MP3 decoder upgrade work.
Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123.
libmpg123 is a much better version of mpglib, so it should solve all the
problems about MP3's not seeking, or most modern MP3's not playing at all,
etc. Since you no longer have to make a tradeoff with SMPEG for features, and
SMPEG is basically rotting, I removed it from the project.
There is still work to be done with libmpg123...there are MMX, 3DNow, SSE,
Altivec, etc decoders which we don't have enabled at the moment, and the
build system could use some work to make this compile more cleanly, etc.
Still: huge win.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 30 Jan 2009 02:44:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
561:f2985e08589c | 562:7e08477b0fc1 |
---|---|
1 /* | |
2 dct64_sse: MMX/SSE optimized dct64 | |
3 | |
4 copyright 2006-2007 by Zuxy Meng <zuxy.meng@gmail.com> / the mpg123 project - free software under the terms of the LGPL 2.1 | |
5 see COPYING and AUTHORS files in distribution or http://mpg123.org | |
6 initially written by the mysterious higway for MMX (apparently) | |
7 then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec | |
8 Both have agreed to distribution under LGPL 2.1 . | |
9 | |
10 Transformed back into standalone asm, with help of | |
11 gcc -S -DHAVE_CONFIG_H -I. -march=pentium3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct64_sse.{S,c} | |
12 | |
13 Original comment from MPlayer source follows: | |
14 */ | |
15 | |
16 /* | |
17 * Discrete Cosine Tansform (DCT) for SSE | |
18 * based upon code from mp3lib/dct64.c, mp3lib/dct64_altivec.c | |
19 * and mp3lib/dct64_MMX.c | |
20 */ | |
21 | |
22 #include "mangle.h" | |
23 | |
24 #ifndef __APPLE__ | |
25 .section .rodata | |
26 #else | |
27 .data | |
28 #endif | |
29 ALIGN16 | |
30 /* .type nnnn, @object | |
31 .size nnnn, 16 */ | |
32 nnnn: | |
33 .long -2147483648 | |
34 .long -2147483648 | |
35 .long -2147483648 | |
36 .long -2147483648 | |
37 ALIGN16 | |
38 /* .type ppnn, @object | |
39 .size ppnn, 16 */ | |
40 ppnn: | |
41 .long 0 | |
42 .long 0 | |
43 .long -2147483648 | |
44 .long -2147483648 | |
45 ALIGN16 | |
46 /* .type pnpn, @object | |
47 .size pnpn, 16 */ | |
48 pnpn: | |
49 .long 0 | |
50 .long -2147483648 | |
51 .long 0 | |
52 .long -2147483648 | |
53 ALIGN4 | |
54 /* .type one.4748, @object | |
55 .size one.4748, 4 */ | |
56 one.4748: | |
57 .long 1065353216 | |
58 | |
59 .text | |
60 ALIGN16,,15 | |
61 .globl ASM_NAME(dct64_sse) | |
62 /* .type ASM_NAME(dct64_sse), @function */ | |
63 ASM_NAME(dct64_sse): | |
64 pushl %ebp | |
65 movl %esp, %ebp | |
66 /* stack from ebp: 0=ebp 4=back 8=arg0 12=arg1 16=arg2 */ | |
67 #define ARG(n) (8+n*4)(%ebp) | |
68 andl $-16, %esp /* align the stack at 16 bytes */ | |
69 subl $256, %esp /* reserve space for local b1 and b2 */ | |
70 pushl %ebx | |
71 /* stack from esp: 0=ebx 4...131=b2 132...259=b1 */ | |
72 #define B1OFF 132 | |
73 #define B2OFF 4 | |
74 #define B1(n) (B1OFF+n)(%esp) | |
75 #define B2(n) (B2OFF+n)(%esp) | |
76 | |
77 movl ARG(2), %eax | |
78 movl ARG(0), %ecx | |
79 /* APP */ | |
80 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 1 */ | |
81 movaps ASM_NAME(costab_mmxsse), %xmm3 | |
82 shufps $27, %xmm3, %xmm3 | |
83 MOVUAPS (%eax), %xmm1 | |
84 movaps %xmm1, %xmm4 | |
85 MOVUAPS 112(%eax), %xmm2 | |
86 shufps $27, %xmm4, %xmm4 | |
87 movaps %xmm2, %xmm0 | |
88 shufps $27, %xmm0, %xmm0 | |
89 addps %xmm0, %xmm1 | |
90 movaps %xmm1, B1(0) | |
91 subps %xmm2, %xmm4 | |
92 mulps %xmm3, %xmm4 | |
93 movaps %xmm4, B1(112) | |
94 | |
95 /* NO_APP */ | |
96 movl ARG(1), %ebx | |
97 /* APP */ | |
98 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 2 */ | |
99 movaps ASM_NAME(costab_mmxsse)+16, %xmm3 | |
100 shufps $27, %xmm3, %xmm3 | |
101 MOVUAPS 16(%eax), %xmm1 | |
102 movaps %xmm1, %xmm4 | |
103 MOVUAPS 96(%eax), %xmm2 | |
104 shufps $27, %xmm4, %xmm4 | |
105 movaps %xmm2, %xmm0 | |
106 shufps $27, %xmm0, %xmm0 | |
107 addps %xmm0, %xmm1 | |
108 movaps %xmm1, B1(16) | |
109 subps %xmm2, %xmm4 | |
110 mulps %xmm3, %xmm4 | |
111 movaps %xmm4, B1(96) | |
112 | |
113 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 3 */ | |
114 movaps ASM_NAME(costab_mmxsse)+32, %xmm3 | |
115 shufps $27, %xmm3, %xmm3 | |
116 MOVUAPS 32(%eax), %xmm1 | |
117 movaps %xmm1, %xmm4 | |
118 MOVUAPS 80(%eax), %xmm2 | |
119 shufps $27, %xmm4, %xmm4 | |
120 movaps %xmm2, %xmm0 | |
121 shufps $27, %xmm0, %xmm0 | |
122 addps %xmm0, %xmm1 | |
123 movaps %xmm1, B1(32) | |
124 subps %xmm2, %xmm4 | |
125 mulps %xmm3, %xmm4 | |
126 movaps %xmm4, B1(80) | |
127 | |
128 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 4 */ | |
129 movaps ASM_NAME(costab_mmxsse)+48, %xmm3 | |
130 shufps $27, %xmm3, %xmm3 | |
131 MOVUAPS 48(%eax), %xmm1 | |
132 movaps %xmm1, %xmm4 | |
133 MOVUAPS 64(%eax), %xmm2 | |
134 shufps $27, %xmm4, %xmm4 | |
135 movaps %xmm2, %xmm0 | |
136 shufps $27, %xmm0, %xmm0 | |
137 addps %xmm0, %xmm1 | |
138 movaps %xmm1, B1(48) | |
139 subps %xmm2, %xmm4 | |
140 mulps %xmm3, %xmm4 | |
141 movaps %xmm4, B1(64) | |
142 | |
143 movaps B1(0), %xmm1 | |
144 movaps B1(16), %xmm3 | |
145 movaps B1(32), %xmm4 | |
146 movaps B1(48), %xmm6 | |
147 movaps %xmm1, %xmm7 | |
148 shufps $27, %xmm7, %xmm7 | |
149 movaps %xmm3, %xmm5 | |
150 shufps $27, %xmm5, %xmm5 | |
151 movaps %xmm4, %xmm2 | |
152 shufps $27, %xmm2, %xmm2 | |
153 movaps %xmm6, %xmm0 | |
154 shufps $27, %xmm0, %xmm0 | |
155 addps %xmm0, %xmm1 | |
156 movaps %xmm1, B2(0) | |
157 addps %xmm2, %xmm3 | |
158 movaps %xmm3, B2(16) | |
159 subps %xmm4, %xmm5 | |
160 movaps %xmm5, B2(32) | |
161 subps %xmm6, %xmm7 | |
162 movaps %xmm7, B2(48) | |
163 | |
164 movaps B1(64), %xmm1 | |
165 movaps B1(80), %xmm3 | |
166 movaps B1(96), %xmm4 | |
167 movaps B1(112), %xmm6 | |
168 movaps %xmm1, %xmm7 | |
169 shufps $27, %xmm7, %xmm7 | |
170 movaps %xmm3, %xmm5 | |
171 shufps $27, %xmm5, %xmm5 | |
172 movaps %xmm4, %xmm2 | |
173 shufps $27, %xmm2, %xmm2 | |
174 movaps %xmm6, %xmm0 | |
175 shufps $27, %xmm0, %xmm0 | |
176 addps %xmm0, %xmm1 | |
177 movaps %xmm1, B2(64) | |
178 addps %xmm2, %xmm3 | |
179 movaps %xmm3, B2(80) | |
180 subps %xmm4, %xmm5 | |
181 movaps %xmm5, B2(96) | |
182 subps %xmm6, %xmm7 | |
183 movaps %xmm7, B2(112) | |
184 | |
185 movaps B2(32), %xmm0 | |
186 movaps B2(48), %xmm1 | |
187 movaps ASM_NAME(costab_mmxsse)+64, %xmm4 | |
188 xorps %xmm6, %xmm6 | |
189 shufps $27, %xmm4, %xmm4 | |
190 mulps %xmm4, %xmm1 | |
191 movaps ASM_NAME(costab_mmxsse)+80, %xmm2 | |
192 xorps %xmm7, %xmm7 | |
193 shufps $27, %xmm2, %xmm2 | |
194 mulps %xmm2, %xmm0 | |
195 movaps %xmm0, B2(32) | |
196 movaps %xmm1, B2(48) | |
197 movaps B2(96), %xmm3 | |
198 mulps %xmm2, %xmm3 | |
199 subps %xmm3, %xmm6 | |
200 movaps %xmm6, B2(96) | |
201 movaps B2(112), %xmm5 | |
202 mulps %xmm4, %xmm5 | |
203 subps %xmm5, %xmm7 | |
204 movaps %xmm7, B2(112) | |
205 | |
206 movaps ASM_NAME(costab_mmxsse)+96, %xmm0 | |
207 shufps $27, %xmm0, %xmm0 | |
208 movaps nnnn, %xmm5 | |
209 movaps %xmm5, %xmm6 | |
210 | |
211 movaps B2(0), %xmm2 | |
212 movaps B2(16), %xmm3 | |
213 movaps %xmm2, %xmm4 | |
214 xorps %xmm5, %xmm6 | |
215 shufps $27, %xmm4, %xmm4 | |
216 movaps %xmm3, %xmm1 | |
217 shufps $27, %xmm1, %xmm1 | |
218 addps %xmm1, %xmm2 | |
219 movaps %xmm2, B1(0) | |
220 subps %xmm3, %xmm4 | |
221 xorps %xmm6, %xmm4 | |
222 mulps %xmm0, %xmm4 | |
223 movaps %xmm4, B1(16) | |
224 | |
225 movaps B2(32), %xmm2 | |
226 movaps B2(48), %xmm3 | |
227 movaps %xmm2, %xmm4 | |
228 xorps %xmm5, %xmm6 | |
229 shufps $27, %xmm4, %xmm4 | |
230 movaps %xmm3, %xmm1 | |
231 shufps $27, %xmm1, %xmm1 | |
232 addps %xmm1, %xmm2 | |
233 movaps %xmm2, B1(32) | |
234 subps %xmm3, %xmm4 | |
235 xorps %xmm6, %xmm4 | |
236 mulps %xmm0, %xmm4 | |
237 movaps %xmm4, B1(48) | |
238 | |
239 movaps B2(64), %xmm2 | |
240 movaps B2(80), %xmm3 | |
241 movaps %xmm2, %xmm4 | |
242 xorps %xmm5, %xmm6 | |
243 shufps $27, %xmm4, %xmm4 | |
244 movaps %xmm3, %xmm1 | |
245 shufps $27, %xmm1, %xmm1 | |
246 addps %xmm1, %xmm2 | |
247 movaps %xmm2, B1(64) | |
248 subps %xmm3, %xmm4 | |
249 xorps %xmm6, %xmm4 | |
250 mulps %xmm0, %xmm4 | |
251 movaps %xmm4, B1(80) | |
252 | |
253 movaps B2(96), %xmm2 | |
254 movaps B2(112), %xmm3 | |
255 movaps %xmm2, %xmm4 | |
256 xorps %xmm5, %xmm6 | |
257 shufps $27, %xmm4, %xmm4 | |
258 movaps %xmm3, %xmm1 | |
259 shufps $27, %xmm1, %xmm1 | |
260 addps %xmm1, %xmm2 | |
261 movaps %xmm2, B1(96) | |
262 subps %xmm3, %xmm4 | |
263 xorps %xmm6, %xmm4 | |
264 mulps %xmm0, %xmm4 | |
265 movaps %xmm4, B1(112) | |
266 | |
267 movss one.4748, %xmm1 | |
268 movss ASM_NAME(costab_mmxsse)+112, %xmm0 | |
269 movaps %xmm1, %xmm3 | |
270 unpcklps %xmm0, %xmm3 | |
271 movss ASM_NAME(costab_mmxsse)+116, %xmm2 | |
272 movaps %xmm1, %xmm0 | |
273 unpcklps %xmm2, %xmm0 | |
274 unpcklps %xmm3, %xmm0 | |
275 movaps ppnn, %xmm2 | |
276 | |
277 movaps B1(0), %xmm3 | |
278 movaps %xmm3, %xmm4 | |
279 shufps $20, %xmm4, %xmm4 | |
280 shufps $235, %xmm3, %xmm3 | |
281 xorps %xmm2, %xmm3 | |
282 addps %xmm3, %xmm4 | |
283 mulps %xmm0, %xmm4 | |
284 movaps %xmm4, B2(0) | |
285 movaps B1(16), %xmm6 | |
286 movaps %xmm6, %xmm5 | |
287 shufps $27, %xmm5, %xmm5 | |
288 xorps %xmm2, %xmm5 | |
289 addps %xmm5, %xmm6 | |
290 mulps %xmm0, %xmm6 | |
291 movaps %xmm6, B2(16) | |
292 | |
293 movaps B1(32), %xmm3 | |
294 movaps %xmm3, %xmm4 | |
295 shufps $20, %xmm4, %xmm4 | |
296 shufps $235, %xmm3, %xmm3 | |
297 xorps %xmm2, %xmm3 | |
298 addps %xmm3, %xmm4 | |
299 mulps %xmm0, %xmm4 | |
300 movaps %xmm4, B2(32) | |
301 movaps B1(48), %xmm6 | |
302 movaps %xmm6, %xmm5 | |
303 shufps $27, %xmm5, %xmm5 | |
304 xorps %xmm2, %xmm5 | |
305 addps %xmm5, %xmm6 | |
306 mulps %xmm0, %xmm6 | |
307 movaps %xmm6, B2(48) | |
308 | |
309 movaps B1(64), %xmm3 | |
310 movaps %xmm3, %xmm4 | |
311 shufps $20, %xmm4, %xmm4 | |
312 shufps $235, %xmm3, %xmm3 | |
313 xorps %xmm2, %xmm3 | |
314 addps %xmm3, %xmm4 | |
315 mulps %xmm0, %xmm4 | |
316 movaps %xmm4, B2(64) | |
317 movaps B1(80), %xmm6 | |
318 movaps %xmm6, %xmm5 | |
319 shufps $27, %xmm5, %xmm5 | |
320 xorps %xmm2, %xmm5 | |
321 addps %xmm5, %xmm6 | |
322 mulps %xmm0, %xmm6 | |
323 movaps %xmm6, B2(80) | |
324 | |
325 movaps B1(96), %xmm3 | |
326 movaps %xmm3, %xmm4 | |
327 shufps $20, %xmm4, %xmm4 | |
328 shufps $235, %xmm3, %xmm3 | |
329 xorps %xmm2, %xmm3 | |
330 addps %xmm3, %xmm4 | |
331 mulps %xmm0, %xmm4 | |
332 movaps %xmm4, B2(96) | |
333 movaps B1(112), %xmm6 | |
334 movaps %xmm6, %xmm5 | |
335 shufps $27, %xmm5, %xmm5 | |
336 xorps %xmm2, %xmm5 | |
337 addps %xmm5, %xmm6 | |
338 mulps %xmm0, %xmm6 | |
339 movaps %xmm6, B2(112) | |
340 | |
341 movss ASM_NAME(costab_mmxsse)+120, %xmm0 | |
342 movaps %xmm1, %xmm2 | |
343 movaps %xmm0, %xmm7 | |
344 unpcklps %xmm1, %xmm2 | |
345 unpcklps %xmm0, %xmm7 | |
346 movaps pnpn, %xmm0 | |
347 unpcklps %xmm7, %xmm2 | |
348 | |
349 movaps B2(32), %xmm1 | |
350 movaps %xmm1, %xmm3 | |
351 shufps $224, %xmm3, %xmm3 | |
352 shufps $181, %xmm1, %xmm1 | |
353 xorps %xmm0, %xmm1 | |
354 addps %xmm1, %xmm3 | |
355 mulps %xmm2, %xmm3 | |
356 movaps %xmm3, B1(32) | |
357 movaps B2(48), %xmm4 | |
358 movaps %xmm4, %xmm5 | |
359 shufps $224, %xmm5, %xmm5 | |
360 shufps $181, %xmm4, %xmm4 | |
361 xorps %xmm0, %xmm4 | |
362 addps %xmm4, %xmm5 | |
363 mulps %xmm2, %xmm5 | |
364 movaps %xmm5, B1(48) | |
365 | |
366 movaps B2(64), %xmm1 | |
367 movaps %xmm1, %xmm3 | |
368 shufps $224, %xmm3, %xmm3 | |
369 shufps $181, %xmm1, %xmm1 | |
370 xorps %xmm0, %xmm1 | |
371 addps %xmm1, %xmm3 | |
372 mulps %xmm2, %xmm3 | |
373 movaps %xmm3, B1(64) | |
374 movaps B2(80), %xmm4 | |
375 movaps %xmm4, %xmm5 | |
376 shufps $224, %xmm5, %xmm5 | |
377 shufps $181, %xmm4, %xmm4 | |
378 xorps %xmm0, %xmm4 | |
379 addps %xmm4, %xmm5 | |
380 mulps %xmm2, %xmm5 | |
381 movaps %xmm5, B1(80) | |
382 | |
383 movaps B2(96), %xmm1 | |
384 movaps %xmm1, %xmm3 | |
385 shufps $224, %xmm3, %xmm3 | |
386 shufps $181, %xmm1, %xmm1 | |
387 xorps %xmm0, %xmm1 | |
388 addps %xmm1, %xmm3 | |
389 mulps %xmm2, %xmm3 | |
390 movaps %xmm3, B1(96) | |
391 movaps B2(112), %xmm4 | |
392 movaps %xmm4, %xmm5 | |
393 shufps $224, %xmm5, %xmm5 | |
394 shufps $181, %xmm4, %xmm4 | |
395 xorps %xmm0, %xmm4 | |
396 addps %xmm4, %xmm5 | |
397 mulps %xmm2, %xmm5 | |
398 movaps %xmm5, B1(112) | |
399 | |
400 /* NO_APP */ | |
401 flds B1(40) | |
402 movl %esp, %edx | |
403 addl $B1OFF, %edx | |
404 movl %esp, %eax | |
405 addl $B2OFF, %eax | |
406 fadds B1(44) | |
407 fstps B1(40) | |
408 flds B1(56) | |
409 fadds B1(60) | |
410 flds B1(48) | |
411 fadd %st(1), %st | |
412 fstps B1(48) | |
413 fadds B1(52) | |
414 fstps B1(56) | |
415 flds B1(52) | |
416 fadds B1(60) | |
417 fstps B1(52) | |
418 flds B1(72) | |
419 fadds B1(76) | |
420 fstps B1(72) | |
421 flds B1(88) | |
422 fadds B1(92) | |
423 flds B1(80) | |
424 fadd %st(1), %st | |
425 fstps B1(80) | |
426 fadds B1(84) | |
427 fstps B1(88) | |
428 flds B1(84) | |
429 fadds B1(92) | |
430 fstps B1(84) | |
431 flds B1(104) | |
432 fadds B1(108) | |
433 fstps B1(104) | |
434 flds B1(120) | |
435 fadds B1(124) | |
436 flds B1(112) | |
437 fadd %st(1), %st | |
438 fstps B1(112) | |
439 fadds B1(116) | |
440 fstps B1(120) | |
441 flds B1(116) | |
442 fadds B1(124) | |
443 fstps B1(116) | |
444 /* APP */ | |
445 flds ASM_NAME(costab_mmxsse)+120 | |
446 flds (%eax) | |
447 fadds 4(%eax) | |
448 fistp 512(%ecx) | |
449 flds (%eax) | |
450 fsubs 4(%eax) | |
451 fmul %st(1) | |
452 fistp (%ecx) | |
453 flds 12(%eax) | |
454 fsubs 8(%eax) | |
455 fmul %st(1) | |
456 fist 256(%ebx) | |
457 fadds 12(%eax) | |
458 fadds 8(%eax) | |
459 fistp 256(%ecx) | |
460 flds 16(%eax) | |
461 fsubs 20(%eax) | |
462 fmul %st(1) | |
463 flds 28(%eax) | |
464 fsubs 24(%eax) | |
465 fmul %st(2) | |
466 fist 384(%ebx) | |
467 fld %st(0) | |
468 fadds 24(%eax) | |
469 fadds 28(%eax) | |
470 fld %st(0) | |
471 fadds 16(%eax) | |
472 fadds 20(%eax) | |
473 fistp 384(%ecx) | |
474 fadd %st(2) | |
475 fistp 128(%ecx) | |
476 faddp %st(1) | |
477 fistp 128(%ebx) | |
478 flds 32(%edx) | |
479 fadds 48(%edx) | |
480 fistp 448(%ecx) | |
481 flds 48(%edx) | |
482 fadds 40(%edx) | |
483 fistp 320(%ecx) | |
484 flds 40(%edx) | |
485 fadds 56(%edx) | |
486 fistp 192(%ecx) | |
487 flds 56(%edx) | |
488 fadds 36(%edx) | |
489 fistp 64(%ecx) | |
490 flds 36(%edx) | |
491 fadds 52(%edx) | |
492 fistp 64(%ebx) | |
493 flds 52(%edx) | |
494 fadds 44(%edx) | |
495 fistp 192(%ebx) | |
496 flds 60(%edx) | |
497 fist 448(%ebx) | |
498 fadds 44(%edx) | |
499 fistp 320(%ebx) | |
500 flds 96(%edx) | |
501 fadds 112(%edx) | |
502 fld %st(0) | |
503 fadds 64(%edx) | |
504 fistp 480(%ecx) | |
505 fadds 80(%edx) | |
506 fistp 416(%ecx) | |
507 flds 112(%edx) | |
508 fadds 104(%edx) | |
509 fld %st(0) | |
510 fadds 80(%edx) | |
511 fistp 352(%ecx) | |
512 fadds 72(%edx) | |
513 fistp 288(%ecx) | |
514 flds 104(%edx) | |
515 fadds 120(%edx) | |
516 fld %st(0) | |
517 fadds 72(%edx) | |
518 fistp 224(%ecx) | |
519 fadds 88(%edx) | |
520 fistp 160(%ecx) | |
521 flds 120(%edx) | |
522 fadds 100(%edx) | |
523 fld %st(0) | |
524 fadds 88(%edx) | |
525 fistp 96(%ecx) | |
526 fadds 68(%edx) | |
527 fistp 32(%ecx) | |
528 flds 100(%edx) | |
529 fadds 116(%edx) | |
530 fld %st(0) | |
531 fadds 68(%edx) | |
532 fistp 32(%ebx) | |
533 fadds 84(%edx) | |
534 fistp 96(%ebx) | |
535 flds 116(%edx) | |
536 fadds 108(%edx) | |
537 fld %st(0) | |
538 fadds 84(%edx) | |
539 fistp 160(%ebx) | |
540 fadds 76(%edx) | |
541 fistp 224(%ebx) | |
542 flds 108(%edx) | |
543 fadds 124(%edx) | |
544 fld %st(0) | |
545 fadds 76(%edx) | |
546 fistp 288(%ebx) | |
547 fadds 92(%edx) | |
548 fistp 352(%ebx) | |
549 flds 124(%edx) | |
550 fist 480(%ebx) | |
551 fadds 92(%edx) | |
552 fistp 416(%ebx) | |
553 ffreep %st(0) | |
554 | |
555 /* NO_APP */ | |
556 movzwl (%ecx), %eax | |
557 movw %ax, (%ebx) | |
558 popl %ebx | |
559 movl %ebp, %esp | |
560 popl %ebp | |
561 ret | |
562 /* .size ASM_NAME(dct64_sse), .-ASM_NAME(dct64_sse) */ | |
563 | |
564 /* Mark non-executable stack. */ | |
565 #if defined(__linux__) && defined(__ELF__) | |
566 .section .note.GNU-stack,"",%progbits | |
567 #endif |