Mercurial > SDL_sound_CoreAudio
comparison decoders/libmpg123/decode_sse3d.h @ 562:7e08477b0fc1
MP3 decoder upgrade work.
Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123.
libmpg123 is a much better version of mpglib, so it should solve all the
problems about MP3's not seeking, or most modern MP3's not playing at all,
etc. Since you no longer have to make a tradeoff with SMPEG for features, and
SMPEG is basically rotting, I removed it from the project.
There is still work to be done with libmpg123...there are MMX, 3DNow, SSE,
Altivec, etc decoders which we don't have enabled at the moment, and the
build system could use some work to make this compile more cleanly, etc.
Still: huge win.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 30 Jan 2009 02:44:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
561:f2985e08589c | 562:7e08477b0fc1 |
---|---|
1 /* | |
2 decode_sse3d: Synth for SSE and extended 3DNow (yeah, the name is a relic) | |
3 | |
4 copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1 | |
5 see COPYING and AUTHORS files in distribution or http://mpg123.org | |
6 initially written by the mysterious higway for MMX (apparently) | |
7 then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec | |
8 Both have agreed to distribution under LGPL 2.1 . | |
9 | |
10 Transformed back into standalone asm, with help of | |
11 gcc -S -DHAVE_CONFIG_H -I. -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c} | |
12 | |
13 The difference between SSE and 3DNowExt is the dct64 function and the synth function name. | |
14 This template here uses the SYNTH_NAME and MPL_DCT64 macros for this - see decode_sse.S and decode_3dnowext.S... | |
15 That's not memory efficient since there's doubled code, but it's easier than giving another function pointer. | |
16 Maybe I'll change it in future, but now I need something that works. | |
17 | |
18 Original comment from MPlayer source follows: | |
19 */ | |
20 | |
21 /* | |
22 * this code comes under GPL | |
23 * This code was taken from http://www.mpg123.org | |
24 * See ChangeLog of mpg123-0.59s-pre.1 for detail | |
25 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> | |
26 * | |
27 * Local ChangeLog: | |
28 * - Partial loops unrolling and removing MOVW insn from loops | |
29 */ | |
30 | |
31 #include "mangle.h" | |
32 | |
33 .data | |
34 ALIGN8 | |
35 one_null: | |
36 .long -65536 | |
37 .long -65536 | |
38 ALIGN8 | |
39 null_one: | |
40 .long 65535 | |
41 .long 65535 | |
42 | |
43 .text | |
44 ALIGN16,,15 | |
45 /* void SYNTH_NAME(real *bandPtr, int channel, short *samples, short *buffs, int *bo, float *decwins) */ | |
46 .globl SYNTH_NAME | |
47 SYNTH_NAME: | |
48 pushl %ebp | |
49 /* stack:0=ebp 4=back 8=bandptr 12=channel 16=samples 20=buffs 24=bo 28=decwins */ | |
50 movl %esp, %ebp | |
51 /* Now the old stack addresses are preserved via %epb. */ | |
52 subl $4,%esp /* What has been called temp before. */ | |
53 pushl %edi | |
54 pushl %esi | |
55 pushl %ebx | |
56 #define TEMP 12(%esp) | |
57 #APP | |
58 movl 12(%ebp),%ecx | |
59 movl 16(%ebp),%edi | |
60 movl $15,%ebx | |
61 movl 24(%ebp),%edx | |
62 leal (%edi,%ecx,2),%edi | |
63 decl %ecx | |
64 movl 20(%ebp),%esi | |
65 movl (%edx),%eax | |
66 jecxz .L01 | |
67 decl %eax | |
68 andl %ebx,%eax | |
69 leal 1088(%esi),%esi | |
70 movl %eax,(%edx) | |
71 .L01: | |
72 leal (%esi,%eax,2),%edx | |
73 movl %eax,TEMP | |
74 incl %eax | |
75 andl %ebx,%eax | |
76 leal 544(%esi,%eax,2),%ecx | |
77 incl %ebx | |
78 testl $1, %eax | |
79 jnz .L02 | |
80 xchgl %edx,%ecx | |
81 incl TEMP | |
82 leal 544(%esi),%esi | |
83 .L02: | |
84 emms | |
85 pushl 8(%ebp) | |
86 pushl %edx | |
87 pushl %ecx | |
88 call MPL_DCT64 | |
89 addl $12, %esp | |
90 leal 1(%ebx), %ecx | |
91 subl TEMP,%ebx | |
92 pushl %ecx | |
93 /* leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx */ | |
94 movl 28(%ebp),%ecx | |
95 leal (%ecx,%ebx,2), %edx | |
96 movl (%esp),%ecx /* restore, but leave value on stack */ | |
97 shrl $1, %ecx | |
98 ALIGN16 | |
99 .L03: | |
100 movq (%edx),%mm0 | |
101 movq 64(%edx),%mm4 | |
102 pmaddwd (%esi),%mm0 | |
103 pmaddwd 32(%esi),%mm4 | |
104 movq 8(%edx),%mm1 | |
105 movq 72(%edx),%mm5 | |
106 pmaddwd 8(%esi),%mm1 | |
107 pmaddwd 40(%esi),%mm5 | |
108 movq 16(%edx),%mm2 | |
109 movq 80(%edx),%mm6 | |
110 pmaddwd 16(%esi),%mm2 | |
111 pmaddwd 48(%esi),%mm6 | |
112 movq 24(%edx),%mm3 | |
113 movq 88(%edx),%mm7 | |
114 pmaddwd 24(%esi),%mm3 | |
115 pmaddwd 56(%esi),%mm7 | |
116 paddd %mm1,%mm0 | |
117 paddd %mm5,%mm4 | |
118 paddd %mm2,%mm0 | |
119 paddd %mm6,%mm4 | |
120 paddd %mm3,%mm0 | |
121 paddd %mm7,%mm4 | |
122 movq %mm0,%mm1 | |
123 movq %mm4,%mm5 | |
124 psrlq $32,%mm1 | |
125 psrlq $32,%mm5 | |
126 paddd %mm1,%mm0 | |
127 paddd %mm5,%mm4 | |
128 psrad $13,%mm0 | |
129 psrad $13,%mm4 | |
130 packssdw %mm0,%mm0 | |
131 packssdw %mm4,%mm4 | |
132 movq (%edi), %mm1 | |
133 punpckldq %mm4, %mm0 | |
134 pand one_null, %mm1 | |
135 pand null_one, %mm0 | |
136 por %mm0, %mm1 | |
137 movq %mm1,(%edi) | |
138 leal 64(%esi),%esi | |
139 leal 128(%edx),%edx | |
140 leal 8(%edi),%edi | |
141 decl %ecx | |
142 jnz .L03 | |
143 popl %ecx | |
144 andl $1, %ecx | |
145 jecxz .next_loop | |
146 movq (%edx),%mm0 | |
147 pmaddwd (%esi),%mm0 | |
148 movq 8(%edx),%mm1 | |
149 pmaddwd 8(%esi),%mm1 | |
150 movq 16(%edx),%mm2 | |
151 pmaddwd 16(%esi),%mm2 | |
152 movq 24(%edx),%mm3 | |
153 pmaddwd 24(%esi),%mm3 | |
154 paddd %mm1,%mm0 | |
155 paddd %mm2,%mm0 | |
156 paddd %mm3,%mm0 | |
157 movq %mm0,%mm1 | |
158 psrlq $32,%mm1 | |
159 paddd %mm1,%mm0 | |
160 psrad $13,%mm0 | |
161 packssdw %mm0,%mm0 | |
162 movd %mm0,%eax | |
163 movw %ax, (%edi) | |
164 leal 32(%esi),%esi | |
165 leal 64(%edx),%edx | |
166 leal 4(%edi),%edi | |
167 .next_loop: | |
168 subl $64,%esi | |
169 movl $7,%ecx | |
170 ALIGN16 | |
171 .L04: | |
172 movq (%edx),%mm0 | |
173 movq 64(%edx),%mm4 | |
174 pmaddwd (%esi),%mm0 | |
175 pmaddwd -32(%esi),%mm4 | |
176 movq 8(%edx),%mm1 | |
177 movq 72(%edx),%mm5 | |
178 pmaddwd 8(%esi),%mm1 | |
179 pmaddwd -24(%esi),%mm5 | |
180 movq 16(%edx),%mm2 | |
181 movq 80(%edx),%mm6 | |
182 pmaddwd 16(%esi),%mm2 | |
183 pmaddwd -16(%esi),%mm6 | |
184 movq 24(%edx),%mm3 | |
185 movq 88(%edx),%mm7 | |
186 pmaddwd 24(%esi),%mm3 | |
187 pmaddwd -8(%esi),%mm7 | |
188 paddd %mm1,%mm0 | |
189 paddd %mm5,%mm4 | |
190 paddd %mm2,%mm0 | |
191 paddd %mm6,%mm4 | |
192 paddd %mm3,%mm0 | |
193 paddd %mm7,%mm4 | |
194 movq %mm0,%mm1 | |
195 movq %mm4,%mm5 | |
196 psrlq $32,%mm1 | |
197 psrlq $32,%mm5 | |
198 paddd %mm0,%mm1 | |
199 paddd %mm4,%mm5 | |
200 psrad $13,%mm1 | |
201 psrad $13,%mm5 | |
202 packssdw %mm1,%mm1 | |
203 packssdw %mm5,%mm5 | |
204 psubd %mm0,%mm0 | |
205 psubd %mm4,%mm4 | |
206 psubsw %mm1,%mm0 | |
207 psubsw %mm5,%mm4 | |
208 movq (%edi), %mm1 | |
209 punpckldq %mm4, %mm0 | |
210 pand one_null, %mm1 | |
211 pand null_one, %mm0 | |
212 por %mm0, %mm1 | |
213 movq %mm1,(%edi) | |
214 subl $64,%esi | |
215 addl $128,%edx | |
216 leal 8(%edi),%edi | |
217 decl %ecx | |
218 jnz .L04 | |
219 movq (%edx),%mm0 | |
220 pmaddwd (%esi),%mm0 | |
221 movq 8(%edx),%mm1 | |
222 pmaddwd 8(%esi),%mm1 | |
223 movq 16(%edx),%mm2 | |
224 pmaddwd 16(%esi),%mm2 | |
225 movq 24(%edx),%mm3 | |
226 pmaddwd 24(%esi),%mm3 | |
227 paddd %mm1,%mm0 | |
228 paddd %mm2,%mm0 | |
229 paddd %mm3,%mm0 | |
230 movq %mm0,%mm1 | |
231 psrlq $32,%mm1 | |
232 paddd %mm0,%mm1 | |
233 psrad $13,%mm1 | |
234 packssdw %mm1,%mm1 | |
235 psubd %mm0,%mm0 | |
236 psubsw %mm1,%mm0 | |
237 movd %mm0,%eax | |
238 movw %ax,(%edi) | |
239 emms | |
240 | |
241 #NO_APP | |
242 popl %ebx | |
243 popl %esi | |
244 popl %edi | |
245 addl $4,%esp | |
246 popl %ebp | |
247 ret |