562
|
1 /*
|
|
2 dct64_sse: MMX/SSE optimized dct64
|
|
3
|
|
4 copyright 2006-2007 by Zuxy Meng <zuxy.meng@gmail.com> / the mpg123 project - free software under the terms of the LGPL 2.1
|
|
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
6 initially written by the mysterious higway for MMX (apparently)
|
|
7 then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
|
|
8 Both have agreed to distribution under LGPL 2.1 .
|
|
9
|
|
10 Transformed back into standalone asm, with help of
|
|
11 gcc -S -DHAVE_CONFIG_H -I. -march=pentium3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct64_sse.{S,c}
|
|
12
|
|
13 Original comment from MPlayer source follows:
|
|
14 */
|
|
15
|
|
16 /*
|
|
17 * Discrete Cosine Tansform (DCT) for SSE
|
|
18 * based upon code from mp3lib/dct64.c, mp3lib/dct64_altivec.c
|
|
19 * and mp3lib/dct64_MMX.c
|
|
20 */
|
|
21
|
|
22 #include "mangle.h"
|
|
23
|
|
24 #ifndef __APPLE__
|
|
25 .section .rodata
|
|
26 #else
|
|
27 .data
|
|
28 #endif
|
|
29 ALIGN16
|
|
30 /* .type nnnn, @object
|
|
31 .size nnnn, 16 */
|
|
32 nnnn:
|
|
33 .long -2147483648
|
|
34 .long -2147483648
|
|
35 .long -2147483648
|
|
36 .long -2147483648
|
|
37 ALIGN16
|
|
38 /* .type ppnn, @object
|
|
39 .size ppnn, 16 */
|
|
40 ppnn:
|
|
41 .long 0
|
|
42 .long 0
|
|
43 .long -2147483648
|
|
44 .long -2147483648
|
|
45 ALIGN16
|
|
46 /* .type pnpn, @object
|
|
47 .size pnpn, 16 */
|
|
48 pnpn:
|
|
49 .long 0
|
|
50 .long -2147483648
|
|
51 .long 0
|
|
52 .long -2147483648
|
|
53 ALIGN4
|
|
54 /* .type one.4748, @object
|
|
55 .size one.4748, 4 */
|
|
56 one.4748:
|
|
57 .long 1065353216
|
|
58
|
|
59 .text
|
|
60 ALIGN16,,15
|
|
61 .globl ASM_NAME(dct64_sse)
|
|
62 /* .type ASM_NAME(dct64_sse), @function */
|
|
63 ASM_NAME(dct64_sse):
|
|
64 pushl %ebp
|
|
65 movl %esp, %ebp
|
|
66 /* stack from ebp: 0=ebp 4=back 8=arg0 12=arg1 16=arg2 */
|
|
67 #define ARG(n) (8+n*4)(%ebp)
|
|
68 andl $-16, %esp /* align the stack at 16 bytes */
|
|
69 subl $256, %esp /* reserve space for local b1 and b2 */
|
|
70 pushl %ebx
|
|
71 /* stack from esp: 0=ebx 4...131=b2 132...259=b1 */
|
|
72 #define B1OFF 132
|
|
73 #define B2OFF 4
|
|
74 #define B1(n) (B1OFF+n)(%esp)
|
|
75 #define B2(n) (B2OFF+n)(%esp)
|
|
76
|
|
77 movl ARG(2), %eax
|
|
78 movl ARG(0), %ecx
|
|
79 /* APP */
|
|
80 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 1 */
|
|
81 movaps ASM_NAME(costab_mmxsse), %xmm3
|
|
82 shufps $27, %xmm3, %xmm3
|
|
83 MOVUAPS (%eax), %xmm1
|
|
84 movaps %xmm1, %xmm4
|
|
85 MOVUAPS 112(%eax), %xmm2
|
|
86 shufps $27, %xmm4, %xmm4
|
|
87 movaps %xmm2, %xmm0
|
|
88 shufps $27, %xmm0, %xmm0
|
|
89 addps %xmm0, %xmm1
|
|
90 movaps %xmm1, B1(0)
|
|
91 subps %xmm2, %xmm4
|
|
92 mulps %xmm3, %xmm4
|
|
93 movaps %xmm4, B1(112)
|
|
94
|
|
95 /* NO_APP */
|
|
96 movl ARG(1), %ebx
|
|
97 /* APP */
|
|
98 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 2 */
|
|
99 movaps ASM_NAME(costab_mmxsse)+16, %xmm3
|
|
100 shufps $27, %xmm3, %xmm3
|
|
101 MOVUAPS 16(%eax), %xmm1
|
|
102 movaps %xmm1, %xmm4
|
|
103 MOVUAPS 96(%eax), %xmm2
|
|
104 shufps $27, %xmm4, %xmm4
|
|
105 movaps %xmm2, %xmm0
|
|
106 shufps $27, %xmm0, %xmm0
|
|
107 addps %xmm0, %xmm1
|
|
108 movaps %xmm1, B1(16)
|
|
109 subps %xmm2, %xmm4
|
|
110 mulps %xmm3, %xmm4
|
|
111 movaps %xmm4, B1(96)
|
|
112
|
|
113 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 3 */
|
|
114 movaps ASM_NAME(costab_mmxsse)+32, %xmm3
|
|
115 shufps $27, %xmm3, %xmm3
|
|
116 MOVUAPS 32(%eax), %xmm1
|
|
117 movaps %xmm1, %xmm4
|
|
118 MOVUAPS 80(%eax), %xmm2
|
|
119 shufps $27, %xmm4, %xmm4
|
|
120 movaps %xmm2, %xmm0
|
|
121 shufps $27, %xmm0, %xmm0
|
|
122 addps %xmm0, %xmm1
|
|
123 movaps %xmm1, B1(32)
|
|
124 subps %xmm2, %xmm4
|
|
125 mulps %xmm3, %xmm4
|
|
126 movaps %xmm4, B1(80)
|
|
127
|
|
128 /* for (i = 0; i < 0x20 / 2; i += 4) cycle 4 */
|
|
129 movaps ASM_NAME(costab_mmxsse)+48, %xmm3
|
|
130 shufps $27, %xmm3, %xmm3
|
|
131 MOVUAPS 48(%eax), %xmm1
|
|
132 movaps %xmm1, %xmm4
|
|
133 MOVUAPS 64(%eax), %xmm2
|
|
134 shufps $27, %xmm4, %xmm4
|
|
135 movaps %xmm2, %xmm0
|
|
136 shufps $27, %xmm0, %xmm0
|
|
137 addps %xmm0, %xmm1
|
|
138 movaps %xmm1, B1(48)
|
|
139 subps %xmm2, %xmm4
|
|
140 mulps %xmm3, %xmm4
|
|
141 movaps %xmm4, B1(64)
|
|
142
|
|
143 movaps B1(0), %xmm1
|
|
144 movaps B1(16), %xmm3
|
|
145 movaps B1(32), %xmm4
|
|
146 movaps B1(48), %xmm6
|
|
147 movaps %xmm1, %xmm7
|
|
148 shufps $27, %xmm7, %xmm7
|
|
149 movaps %xmm3, %xmm5
|
|
150 shufps $27, %xmm5, %xmm5
|
|
151 movaps %xmm4, %xmm2
|
|
152 shufps $27, %xmm2, %xmm2
|
|
153 movaps %xmm6, %xmm0
|
|
154 shufps $27, %xmm0, %xmm0
|
|
155 addps %xmm0, %xmm1
|
|
156 movaps %xmm1, B2(0)
|
|
157 addps %xmm2, %xmm3
|
|
158 movaps %xmm3, B2(16)
|
|
159 subps %xmm4, %xmm5
|
|
160 movaps %xmm5, B2(32)
|
|
161 subps %xmm6, %xmm7
|
|
162 movaps %xmm7, B2(48)
|
|
163
|
|
164 movaps B1(64), %xmm1
|
|
165 movaps B1(80), %xmm3
|
|
166 movaps B1(96), %xmm4
|
|
167 movaps B1(112), %xmm6
|
|
168 movaps %xmm1, %xmm7
|
|
169 shufps $27, %xmm7, %xmm7
|
|
170 movaps %xmm3, %xmm5
|
|
171 shufps $27, %xmm5, %xmm5
|
|
172 movaps %xmm4, %xmm2
|
|
173 shufps $27, %xmm2, %xmm2
|
|
174 movaps %xmm6, %xmm0
|
|
175 shufps $27, %xmm0, %xmm0
|
|
176 addps %xmm0, %xmm1
|
|
177 movaps %xmm1, B2(64)
|
|
178 addps %xmm2, %xmm3
|
|
179 movaps %xmm3, B2(80)
|
|
180 subps %xmm4, %xmm5
|
|
181 movaps %xmm5, B2(96)
|
|
182 subps %xmm6, %xmm7
|
|
183 movaps %xmm7, B2(112)
|
|
184
|
|
185 movaps B2(32), %xmm0
|
|
186 movaps B2(48), %xmm1
|
|
187 movaps ASM_NAME(costab_mmxsse)+64, %xmm4
|
|
188 xorps %xmm6, %xmm6
|
|
189 shufps $27, %xmm4, %xmm4
|
|
190 mulps %xmm4, %xmm1
|
|
191 movaps ASM_NAME(costab_mmxsse)+80, %xmm2
|
|
192 xorps %xmm7, %xmm7
|
|
193 shufps $27, %xmm2, %xmm2
|
|
194 mulps %xmm2, %xmm0
|
|
195 movaps %xmm0, B2(32)
|
|
196 movaps %xmm1, B2(48)
|
|
197 movaps B2(96), %xmm3
|
|
198 mulps %xmm2, %xmm3
|
|
199 subps %xmm3, %xmm6
|
|
200 movaps %xmm6, B2(96)
|
|
201 movaps B2(112), %xmm5
|
|
202 mulps %xmm4, %xmm5
|
|
203 subps %xmm5, %xmm7
|
|
204 movaps %xmm7, B2(112)
|
|
205
|
|
206 movaps ASM_NAME(costab_mmxsse)+96, %xmm0
|
|
207 shufps $27, %xmm0, %xmm0
|
|
208 movaps nnnn, %xmm5
|
|
209 movaps %xmm5, %xmm6
|
|
210
|
|
211 movaps B2(0), %xmm2
|
|
212 movaps B2(16), %xmm3
|
|
213 movaps %xmm2, %xmm4
|
|
214 xorps %xmm5, %xmm6
|
|
215 shufps $27, %xmm4, %xmm4
|
|
216 movaps %xmm3, %xmm1
|
|
217 shufps $27, %xmm1, %xmm1
|
|
218 addps %xmm1, %xmm2
|
|
219 movaps %xmm2, B1(0)
|
|
220 subps %xmm3, %xmm4
|
|
221 xorps %xmm6, %xmm4
|
|
222 mulps %xmm0, %xmm4
|
|
223 movaps %xmm4, B1(16)
|
|
224
|
|
225 movaps B2(32), %xmm2
|
|
226 movaps B2(48), %xmm3
|
|
227 movaps %xmm2, %xmm4
|
|
228 xorps %xmm5, %xmm6
|
|
229 shufps $27, %xmm4, %xmm4
|
|
230 movaps %xmm3, %xmm1
|
|
231 shufps $27, %xmm1, %xmm1
|
|
232 addps %xmm1, %xmm2
|
|
233 movaps %xmm2, B1(32)
|
|
234 subps %xmm3, %xmm4
|
|
235 xorps %xmm6, %xmm4
|
|
236 mulps %xmm0, %xmm4
|
|
237 movaps %xmm4, B1(48)
|
|
238
|
|
239 movaps B2(64), %xmm2
|
|
240 movaps B2(80), %xmm3
|
|
241 movaps %xmm2, %xmm4
|
|
242 xorps %xmm5, %xmm6
|
|
243 shufps $27, %xmm4, %xmm4
|
|
244 movaps %xmm3, %xmm1
|
|
245 shufps $27, %xmm1, %xmm1
|
|
246 addps %xmm1, %xmm2
|
|
247 movaps %xmm2, B1(64)
|
|
248 subps %xmm3, %xmm4
|
|
249 xorps %xmm6, %xmm4
|
|
250 mulps %xmm0, %xmm4
|
|
251 movaps %xmm4, B1(80)
|
|
252
|
|
253 movaps B2(96), %xmm2
|
|
254 movaps B2(112), %xmm3
|
|
255 movaps %xmm2, %xmm4
|
|
256 xorps %xmm5, %xmm6
|
|
257 shufps $27, %xmm4, %xmm4
|
|
258 movaps %xmm3, %xmm1
|
|
259 shufps $27, %xmm1, %xmm1
|
|
260 addps %xmm1, %xmm2
|
|
261 movaps %xmm2, B1(96)
|
|
262 subps %xmm3, %xmm4
|
|
263 xorps %xmm6, %xmm4
|
|
264 mulps %xmm0, %xmm4
|
|
265 movaps %xmm4, B1(112)
|
|
266
|
|
267 movss one.4748, %xmm1
|
|
268 movss ASM_NAME(costab_mmxsse)+112, %xmm0
|
|
269 movaps %xmm1, %xmm3
|
|
270 unpcklps %xmm0, %xmm3
|
|
271 movss ASM_NAME(costab_mmxsse)+116, %xmm2
|
|
272 movaps %xmm1, %xmm0
|
|
273 unpcklps %xmm2, %xmm0
|
|
274 unpcklps %xmm3, %xmm0
|
|
275 movaps ppnn, %xmm2
|
|
276
|
|
277 movaps B1(0), %xmm3
|
|
278 movaps %xmm3, %xmm4
|
|
279 shufps $20, %xmm4, %xmm4
|
|
280 shufps $235, %xmm3, %xmm3
|
|
281 xorps %xmm2, %xmm3
|
|
282 addps %xmm3, %xmm4
|
|
283 mulps %xmm0, %xmm4
|
|
284 movaps %xmm4, B2(0)
|
|
285 movaps B1(16), %xmm6
|
|
286 movaps %xmm6, %xmm5
|
|
287 shufps $27, %xmm5, %xmm5
|
|
288 xorps %xmm2, %xmm5
|
|
289 addps %xmm5, %xmm6
|
|
290 mulps %xmm0, %xmm6
|
|
291 movaps %xmm6, B2(16)
|
|
292
|
|
293 movaps B1(32), %xmm3
|
|
294 movaps %xmm3, %xmm4
|
|
295 shufps $20, %xmm4, %xmm4
|
|
296 shufps $235, %xmm3, %xmm3
|
|
297 xorps %xmm2, %xmm3
|
|
298 addps %xmm3, %xmm4
|
|
299 mulps %xmm0, %xmm4
|
|
300 movaps %xmm4, B2(32)
|
|
301 movaps B1(48), %xmm6
|
|
302 movaps %xmm6, %xmm5
|
|
303 shufps $27, %xmm5, %xmm5
|
|
304 xorps %xmm2, %xmm5
|
|
305 addps %xmm5, %xmm6
|
|
306 mulps %xmm0, %xmm6
|
|
307 movaps %xmm6, B2(48)
|
|
308
|
|
309 movaps B1(64), %xmm3
|
|
310 movaps %xmm3, %xmm4
|
|
311 shufps $20, %xmm4, %xmm4
|
|
312 shufps $235, %xmm3, %xmm3
|
|
313 xorps %xmm2, %xmm3
|
|
314 addps %xmm3, %xmm4
|
|
315 mulps %xmm0, %xmm4
|
|
316 movaps %xmm4, B2(64)
|
|
317 movaps B1(80), %xmm6
|
|
318 movaps %xmm6, %xmm5
|
|
319 shufps $27, %xmm5, %xmm5
|
|
320 xorps %xmm2, %xmm5
|
|
321 addps %xmm5, %xmm6
|
|
322 mulps %xmm0, %xmm6
|
|
323 movaps %xmm6, B2(80)
|
|
324
|
|
325 movaps B1(96), %xmm3
|
|
326 movaps %xmm3, %xmm4
|
|
327 shufps $20, %xmm4, %xmm4
|
|
328 shufps $235, %xmm3, %xmm3
|
|
329 xorps %xmm2, %xmm3
|
|
330 addps %xmm3, %xmm4
|
|
331 mulps %xmm0, %xmm4
|
|
332 movaps %xmm4, B2(96)
|
|
333 movaps B1(112), %xmm6
|
|
334 movaps %xmm6, %xmm5
|
|
335 shufps $27, %xmm5, %xmm5
|
|
336 xorps %xmm2, %xmm5
|
|
337 addps %xmm5, %xmm6
|
|
338 mulps %xmm0, %xmm6
|
|
339 movaps %xmm6, B2(112)
|
|
340
|
|
341 movss ASM_NAME(costab_mmxsse)+120, %xmm0
|
|
342 movaps %xmm1, %xmm2
|
|
343 movaps %xmm0, %xmm7
|
|
344 unpcklps %xmm1, %xmm2
|
|
345 unpcklps %xmm0, %xmm7
|
|
346 movaps pnpn, %xmm0
|
|
347 unpcklps %xmm7, %xmm2
|
|
348
|
|
349 movaps B2(32), %xmm1
|
|
350 movaps %xmm1, %xmm3
|
|
351 shufps $224, %xmm3, %xmm3
|
|
352 shufps $181, %xmm1, %xmm1
|
|
353 xorps %xmm0, %xmm1
|
|
354 addps %xmm1, %xmm3
|
|
355 mulps %xmm2, %xmm3
|
|
356 movaps %xmm3, B1(32)
|
|
357 movaps B2(48), %xmm4
|
|
358 movaps %xmm4, %xmm5
|
|
359 shufps $224, %xmm5, %xmm5
|
|
360 shufps $181, %xmm4, %xmm4
|
|
361 xorps %xmm0, %xmm4
|
|
362 addps %xmm4, %xmm5
|
|
363 mulps %xmm2, %xmm5
|
|
364 movaps %xmm5, B1(48)
|
|
365
|
|
366 movaps B2(64), %xmm1
|
|
367 movaps %xmm1, %xmm3
|
|
368 shufps $224, %xmm3, %xmm3
|
|
369 shufps $181, %xmm1, %xmm1
|
|
370 xorps %xmm0, %xmm1
|
|
371 addps %xmm1, %xmm3
|
|
372 mulps %xmm2, %xmm3
|
|
373 movaps %xmm3, B1(64)
|
|
374 movaps B2(80), %xmm4
|
|
375 movaps %xmm4, %xmm5
|
|
376 shufps $224, %xmm5, %xmm5
|
|
377 shufps $181, %xmm4, %xmm4
|
|
378 xorps %xmm0, %xmm4
|
|
379 addps %xmm4, %xmm5
|
|
380 mulps %xmm2, %xmm5
|
|
381 movaps %xmm5, B1(80)
|
|
382
|
|
383 movaps B2(96), %xmm1
|
|
384 movaps %xmm1, %xmm3
|
|
385 shufps $224, %xmm3, %xmm3
|
|
386 shufps $181, %xmm1, %xmm1
|
|
387 xorps %xmm0, %xmm1
|
|
388 addps %xmm1, %xmm3
|
|
389 mulps %xmm2, %xmm3
|
|
390 movaps %xmm3, B1(96)
|
|
391 movaps B2(112), %xmm4
|
|
392 movaps %xmm4, %xmm5
|
|
393 shufps $224, %xmm5, %xmm5
|
|
394 shufps $181, %xmm4, %xmm4
|
|
395 xorps %xmm0, %xmm4
|
|
396 addps %xmm4, %xmm5
|
|
397 mulps %xmm2, %xmm5
|
|
398 movaps %xmm5, B1(112)
|
|
399
|
|
400 /* NO_APP */
|
|
401 flds B1(40)
|
|
402 movl %esp, %edx
|
|
403 addl $B1OFF, %edx
|
|
404 movl %esp, %eax
|
|
405 addl $B2OFF, %eax
|
|
406 fadds B1(44)
|
|
407 fstps B1(40)
|
|
408 flds B1(56)
|
|
409 fadds B1(60)
|
|
410 flds B1(48)
|
|
411 fadd %st(1), %st
|
|
412 fstps B1(48)
|
|
413 fadds B1(52)
|
|
414 fstps B1(56)
|
|
415 flds B1(52)
|
|
416 fadds B1(60)
|
|
417 fstps B1(52)
|
|
418 flds B1(72)
|
|
419 fadds B1(76)
|
|
420 fstps B1(72)
|
|
421 flds B1(88)
|
|
422 fadds B1(92)
|
|
423 flds B1(80)
|
|
424 fadd %st(1), %st
|
|
425 fstps B1(80)
|
|
426 fadds B1(84)
|
|
427 fstps B1(88)
|
|
428 flds B1(84)
|
|
429 fadds B1(92)
|
|
430 fstps B1(84)
|
|
431 flds B1(104)
|
|
432 fadds B1(108)
|
|
433 fstps B1(104)
|
|
434 flds B1(120)
|
|
435 fadds B1(124)
|
|
436 flds B1(112)
|
|
437 fadd %st(1), %st
|
|
438 fstps B1(112)
|
|
439 fadds B1(116)
|
|
440 fstps B1(120)
|
|
441 flds B1(116)
|
|
442 fadds B1(124)
|
|
443 fstps B1(116)
|
|
444 /* APP */
|
|
445 flds ASM_NAME(costab_mmxsse)+120
|
|
446 flds (%eax)
|
|
447 fadds 4(%eax)
|
|
448 fistp 512(%ecx)
|
|
449 flds (%eax)
|
|
450 fsubs 4(%eax)
|
|
451 fmul %st(1)
|
|
452 fistp (%ecx)
|
|
453 flds 12(%eax)
|
|
454 fsubs 8(%eax)
|
|
455 fmul %st(1)
|
|
456 fist 256(%ebx)
|
|
457 fadds 12(%eax)
|
|
458 fadds 8(%eax)
|
|
459 fistp 256(%ecx)
|
|
460 flds 16(%eax)
|
|
461 fsubs 20(%eax)
|
|
462 fmul %st(1)
|
|
463 flds 28(%eax)
|
|
464 fsubs 24(%eax)
|
|
465 fmul %st(2)
|
|
466 fist 384(%ebx)
|
|
467 fld %st(0)
|
|
468 fadds 24(%eax)
|
|
469 fadds 28(%eax)
|
|
470 fld %st(0)
|
|
471 fadds 16(%eax)
|
|
472 fadds 20(%eax)
|
|
473 fistp 384(%ecx)
|
|
474 fadd %st(2)
|
|
475 fistp 128(%ecx)
|
|
476 faddp %st(1)
|
|
477 fistp 128(%ebx)
|
|
478 flds 32(%edx)
|
|
479 fadds 48(%edx)
|
|
480 fistp 448(%ecx)
|
|
481 flds 48(%edx)
|
|
482 fadds 40(%edx)
|
|
483 fistp 320(%ecx)
|
|
484 flds 40(%edx)
|
|
485 fadds 56(%edx)
|
|
486 fistp 192(%ecx)
|
|
487 flds 56(%edx)
|
|
488 fadds 36(%edx)
|
|
489 fistp 64(%ecx)
|
|
490 flds 36(%edx)
|
|
491 fadds 52(%edx)
|
|
492 fistp 64(%ebx)
|
|
493 flds 52(%edx)
|
|
494 fadds 44(%edx)
|
|
495 fistp 192(%ebx)
|
|
496 flds 60(%edx)
|
|
497 fist 448(%ebx)
|
|
498 fadds 44(%edx)
|
|
499 fistp 320(%ebx)
|
|
500 flds 96(%edx)
|
|
501 fadds 112(%edx)
|
|
502 fld %st(0)
|
|
503 fadds 64(%edx)
|
|
504 fistp 480(%ecx)
|
|
505 fadds 80(%edx)
|
|
506 fistp 416(%ecx)
|
|
507 flds 112(%edx)
|
|
508 fadds 104(%edx)
|
|
509 fld %st(0)
|
|
510 fadds 80(%edx)
|
|
511 fistp 352(%ecx)
|
|
512 fadds 72(%edx)
|
|
513 fistp 288(%ecx)
|
|
514 flds 104(%edx)
|
|
515 fadds 120(%edx)
|
|
516 fld %st(0)
|
|
517 fadds 72(%edx)
|
|
518 fistp 224(%ecx)
|
|
519 fadds 88(%edx)
|
|
520 fistp 160(%ecx)
|
|
521 flds 120(%edx)
|
|
522 fadds 100(%edx)
|
|
523 fld %st(0)
|
|
524 fadds 88(%edx)
|
|
525 fistp 96(%ecx)
|
|
526 fadds 68(%edx)
|
|
527 fistp 32(%ecx)
|
|
528 flds 100(%edx)
|
|
529 fadds 116(%edx)
|
|
530 fld %st(0)
|
|
531 fadds 68(%edx)
|
|
532 fistp 32(%ebx)
|
|
533 fadds 84(%edx)
|
|
534 fistp 96(%ebx)
|
|
535 flds 116(%edx)
|
|
536 fadds 108(%edx)
|
|
537 fld %st(0)
|
|
538 fadds 84(%edx)
|
|
539 fistp 160(%ebx)
|
|
540 fadds 76(%edx)
|
|
541 fistp 224(%ebx)
|
|
542 flds 108(%edx)
|
|
543 fadds 124(%edx)
|
|
544 fld %st(0)
|
|
545 fadds 76(%edx)
|
|
546 fistp 288(%ebx)
|
|
547 fadds 92(%edx)
|
|
548 fistp 352(%ebx)
|
|
549 flds 124(%edx)
|
|
550 fist 480(%ebx)
|
|
551 fadds 92(%edx)
|
|
552 fistp 416(%ebx)
|
|
553 ffreep %st(0)
|
|
554
|
|
555 /* NO_APP */
|
|
556 movzwl (%ecx), %eax
|
|
557 movw %ax, (%ebx)
|
|
558 popl %ebx
|
|
559 movl %ebp, %esp
|
|
560 popl %ebp
|
|
561 ret
|
|
562 /* .size ASM_NAME(dct64_sse), .-ASM_NAME(dct64_sse) */
|
|
563
|
|
564 /* Mark non-executable stack. */
|
|
565 #if defined(__linux__) && defined(__ELF__)
|
|
566 .section .note.GNU-stack,"",%progbits
|
|
567 #endif
|