Mercurial > SDL_sound_CoreAudio
comparison decoders/libmpg123/dct64_i386.c @ 562:7e08477b0fc1
MP3 decoder upgrade work.
Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123.
libmpg123 is a much better version of mpglib, so it should solve all the
problems about MP3's not seeking, or most modern MP3's not playing at all,
etc. Since you no longer have to make a tradeoff with SMPEG for features, and
SMPEG is basically rotting, I removed it from the project.
There is still work to be done with libmpg123...there are MMX, 3DNow, SSE,
Altivec, etc decoders which we don't have enabled at the moment, and the
build system could use some work to make this compile more cleanly, etc.
Still: huge win.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 30 Jan 2009 02:44:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
561:f2985e08589c | 562:7e08477b0fc1 |
---|---|
1 /* | |
2 dct64_i386.c: DCT64, a C variant for i386 | |
3 | |
4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 | |
5 see COPYING and AUTHORS files in distribution or http://mpg123.org | |
6 initially written by Michael Hipp | |
7 */ | |
8 | |
9 /* | |
10 * Discrete Cosine Tansform (DCT) for subband synthesis | |
11 * optimized for machines with no auto-increment. | |
12 * The performance is highly compiler dependend. Maybe | |
13 * the dct64.c version for 'normal' processor may be faster | |
14 * even for Intel processors. | |
15 */ | |
16 | |
17 #include "mpg123lib_intern.h" | |
18 | |
19 static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples) | |
20 { | |
21 { | |
22 register real *costab = pnts[0]; | |
23 | |
24 b1[0x00] = samples[0x00] + samples[0x1F]; | |
25 b1[0x01] = samples[0x01] + samples[0x1E]; | |
26 b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0]; | |
27 b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1]; | |
28 | |
29 b1[0x02] = samples[0x02] + samples[0x1D]; | |
30 b1[0x03] = samples[0x03] + samples[0x1C]; | |
31 b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2]; | |
32 b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3]; | |
33 | |
34 b1[0x04] = samples[0x04] + samples[0x1B]; | |
35 b1[0x05] = samples[0x05] + samples[0x1A]; | |
36 b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4]; | |
37 b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5]; | |
38 | |
39 b1[0x06] = samples[0x06] + samples[0x19]; | |
40 b1[0x07] = samples[0x07] + samples[0x18]; | |
41 b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6]; | |
42 b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7]; | |
43 | |
44 b1[0x08] = samples[0x08] + samples[0x17]; | |
45 b1[0x09] = samples[0x09] + samples[0x16]; | |
46 b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8]; | |
47 b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9]; | |
48 | |
49 b1[0x0A] = samples[0x0A] + samples[0x15]; | |
50 b1[0x0B] = samples[0x0B] + samples[0x14]; | |
51 b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA]; | |
52 b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB]; | |
53 | |
54 b1[0x0C] = samples[0x0C] + samples[0x13]; | |
55 b1[0x0D] = samples[0x0D] + samples[0x12]; | |
56 b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC]; | |
57 b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD]; | |
58 | |
59 b1[0x0E] = samples[0x0E] + samples[0x11]; | |
60 b1[0x0F] = samples[0x0F] + samples[0x10]; | |
61 b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE]; | |
62 b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF]; | |
63 | |
64 } | |
65 | |
66 | |
67 { | |
68 register real *costab = pnts[1]; | |
69 | |
70 b2[0x00] = b1[0x00] + b1[0x0F]; | |
71 b2[0x01] = b1[0x01] + b1[0x0E]; | |
72 b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0]; | |
73 b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1]; | |
74 | |
75 b2[0x02] = b1[0x02] + b1[0x0D]; | |
76 b2[0x03] = b1[0x03] + b1[0x0C]; | |
77 b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2]; | |
78 b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3]; | |
79 | |
80 b2[0x04] = b1[0x04] + b1[0x0B]; | |
81 b2[0x05] = b1[0x05] + b1[0x0A]; | |
82 b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4]; | |
83 b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5]; | |
84 | |
85 b2[0x06] = b1[0x06] + b1[0x09]; | |
86 b2[0x07] = b1[0x07] + b1[0x08]; | |
87 b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6]; | |
88 b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7]; | |
89 | |
90 /* */ | |
91 | |
92 b2[0x10] = b1[0x10] + b1[0x1F]; | |
93 b2[0x11] = b1[0x11] + b1[0x1E]; | |
94 b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0]; | |
95 b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1]; | |
96 | |
97 b2[0x12] = b1[0x12] + b1[0x1D]; | |
98 b2[0x13] = b1[0x13] + b1[0x1C]; | |
99 b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2]; | |
100 b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3]; | |
101 | |
102 b2[0x14] = b1[0x14] + b1[0x1B]; | |
103 b2[0x15] = b1[0x15] + b1[0x1A]; | |
104 b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4]; | |
105 b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5]; | |
106 | |
107 b2[0x16] = b1[0x16] + b1[0x19]; | |
108 b2[0x17] = b1[0x17] + b1[0x18]; | |
109 b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6]; | |
110 b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7]; | |
111 } | |
112 | |
113 { | |
114 register real *costab = pnts[2]; | |
115 | |
116 b1[0x00] = b2[0x00] + b2[0x07]; | |
117 b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0]; | |
118 b1[0x01] = b2[0x01] + b2[0x06]; | |
119 b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1]; | |
120 b1[0x02] = b2[0x02] + b2[0x05]; | |
121 b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2]; | |
122 b1[0x03] = b2[0x03] + b2[0x04]; | |
123 b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3]; | |
124 | |
125 b1[0x08] = b2[0x08] + b2[0x0F]; | |
126 b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0]; | |
127 b1[0x09] = b2[0x09] + b2[0x0E]; | |
128 b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1]; | |
129 b1[0x0A] = b2[0x0A] + b2[0x0D]; | |
130 b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2]; | |
131 b1[0x0B] = b2[0x0B] + b2[0x0C]; | |
132 b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3]; | |
133 | |
134 b1[0x10] = b2[0x10] + b2[0x17]; | |
135 b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0]; | |
136 b1[0x11] = b2[0x11] + b2[0x16]; | |
137 b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1]; | |
138 b1[0x12] = b2[0x12] + b2[0x15]; | |
139 b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2]; | |
140 b1[0x13] = b2[0x13] + b2[0x14]; | |
141 b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3]; | |
142 | |
143 b1[0x18] = b2[0x18] + b2[0x1F]; | |
144 b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0]; | |
145 b1[0x19] = b2[0x19] + b2[0x1E]; | |
146 b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1]; | |
147 b1[0x1A] = b2[0x1A] + b2[0x1D]; | |
148 b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2]; | |
149 b1[0x1B] = b2[0x1B] + b2[0x1C]; | |
150 b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3]; | |
151 } | |
152 | |
153 { | |
154 register real const cos0 = pnts[3][0]; | |
155 register real const cos1 = pnts[3][1]; | |
156 | |
157 b2[0x00] = b1[0x00] + b1[0x03]; | |
158 b2[0x03] = (b1[0x00] - b1[0x03]) * cos0; | |
159 b2[0x01] = b1[0x01] + b1[0x02]; | |
160 b2[0x02] = (b1[0x01] - b1[0x02]) * cos1; | |
161 | |
162 b2[0x04] = b1[0x04] + b1[0x07]; | |
163 b2[0x07] = (b1[0x07] - b1[0x04]) * cos0; | |
164 b2[0x05] = b1[0x05] + b1[0x06]; | |
165 b2[0x06] = (b1[0x06] - b1[0x05]) * cos1; | |
166 | |
167 b2[0x08] = b1[0x08] + b1[0x0B]; | |
168 b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0; | |
169 b2[0x09] = b1[0x09] + b1[0x0A]; | |
170 b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1; | |
171 | |
172 b2[0x0C] = b1[0x0C] + b1[0x0F]; | |
173 b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0; | |
174 b2[0x0D] = b1[0x0D] + b1[0x0E]; | |
175 b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1; | |
176 | |
177 b2[0x10] = b1[0x10] + b1[0x13]; | |
178 b2[0x13] = (b1[0x10] - b1[0x13]) * cos0; | |
179 b2[0x11] = b1[0x11] + b1[0x12]; | |
180 b2[0x12] = (b1[0x11] - b1[0x12]) * cos1; | |
181 | |
182 b2[0x14] = b1[0x14] + b1[0x17]; | |
183 b2[0x17] = (b1[0x17] - b1[0x14]) * cos0; | |
184 b2[0x15] = b1[0x15] + b1[0x16]; | |
185 b2[0x16] = (b1[0x16] - b1[0x15]) * cos1; | |
186 | |
187 b2[0x18] = b1[0x18] + b1[0x1B]; | |
188 b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0; | |
189 b2[0x19] = b1[0x19] + b1[0x1A]; | |
190 b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1; | |
191 | |
192 b2[0x1C] = b1[0x1C] + b1[0x1F]; | |
193 b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0; | |
194 b2[0x1D] = b1[0x1D] + b1[0x1E]; | |
195 b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1; | |
196 } | |
197 | |
198 { | |
199 register real const cos0 = pnts[4][0]; | |
200 | |
201 b1[0x00] = b2[0x00] + b2[0x01]; | |
202 b1[0x01] = (b2[0x00] - b2[0x01]) * cos0; | |
203 b1[0x02] = b2[0x02] + b2[0x03]; | |
204 b1[0x03] = (b2[0x03] - b2[0x02]) * cos0; | |
205 b1[0x02] += b1[0x03]; | |
206 | |
207 b1[0x04] = b2[0x04] + b2[0x05]; | |
208 b1[0x05] = (b2[0x04] - b2[0x05]) * cos0; | |
209 b1[0x06] = b2[0x06] + b2[0x07]; | |
210 b1[0x07] = (b2[0x07] - b2[0x06]) * cos0; | |
211 b1[0x06] += b1[0x07]; | |
212 b1[0x04] += b1[0x06]; | |
213 b1[0x06] += b1[0x05]; | |
214 b1[0x05] += b1[0x07]; | |
215 | |
216 b1[0x08] = b2[0x08] + b2[0x09]; | |
217 b1[0x09] = (b2[0x08] - b2[0x09]) * cos0; | |
218 b1[0x0A] = b2[0x0A] + b2[0x0B]; | |
219 b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0; | |
220 b1[0x0A] += b1[0x0B]; | |
221 | |
222 b1[0x0C] = b2[0x0C] + b2[0x0D]; | |
223 b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0; | |
224 b1[0x0E] = b2[0x0E] + b2[0x0F]; | |
225 b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0; | |
226 b1[0x0E] += b1[0x0F]; | |
227 b1[0x0C] += b1[0x0E]; | |
228 b1[0x0E] += b1[0x0D]; | |
229 b1[0x0D] += b1[0x0F]; | |
230 | |
231 b1[0x10] = b2[0x10] + b2[0x11]; | |
232 b1[0x11] = (b2[0x10] - b2[0x11]) * cos0; | |
233 b1[0x12] = b2[0x12] + b2[0x13]; | |
234 b1[0x13] = (b2[0x13] - b2[0x12]) * cos0; | |
235 b1[0x12] += b1[0x13]; | |
236 | |
237 b1[0x14] = b2[0x14] + b2[0x15]; | |
238 b1[0x15] = (b2[0x14] - b2[0x15]) * cos0; | |
239 b1[0x16] = b2[0x16] + b2[0x17]; | |
240 b1[0x17] = (b2[0x17] - b2[0x16]) * cos0; | |
241 b1[0x16] += b1[0x17]; | |
242 b1[0x14] += b1[0x16]; | |
243 b1[0x16] += b1[0x15]; | |
244 b1[0x15] += b1[0x17]; | |
245 | |
246 b1[0x18] = b2[0x18] + b2[0x19]; | |
247 b1[0x19] = (b2[0x18] - b2[0x19]) * cos0; | |
248 b1[0x1A] = b2[0x1A] + b2[0x1B]; | |
249 b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0; | |
250 b1[0x1A] += b1[0x1B]; | |
251 | |
252 b1[0x1C] = b2[0x1C] + b2[0x1D]; | |
253 b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0; | |
254 b1[0x1E] = b2[0x1E] + b2[0x1F]; | |
255 b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0; | |
256 b1[0x1E] += b1[0x1F]; | |
257 b1[0x1C] += b1[0x1E]; | |
258 b1[0x1E] += b1[0x1D]; | |
259 b1[0x1D] += b1[0x1F]; | |
260 } | |
261 | |
262 out0[0x10*16] = b1[0x00]; | |
263 out0[0x10*12] = b1[0x04]; | |
264 out0[0x10* 8] = b1[0x02]; | |
265 out0[0x10* 4] = b1[0x06]; | |
266 out0[0x10* 0] = b1[0x01]; | |
267 out1[0x10* 0] = b1[0x01]; | |
268 out1[0x10* 4] = b1[0x05]; | |
269 out1[0x10* 8] = b1[0x03]; | |
270 out1[0x10*12] = b1[0x07]; | |
271 | |
272 #if 1 | |
273 out0[0x10*14] = b1[0x08] + b1[0x0C]; | |
274 out0[0x10*10] = b1[0x0C] + b1[0x0a]; | |
275 out0[0x10* 6] = b1[0x0A] + b1[0x0E]; | |
276 out0[0x10* 2] = b1[0x0E] + b1[0x09]; | |
277 out1[0x10* 2] = b1[0x09] + b1[0x0D]; | |
278 out1[0x10* 6] = b1[0x0D] + b1[0x0B]; | |
279 out1[0x10*10] = b1[0x0B] + b1[0x0F]; | |
280 out1[0x10*14] = b1[0x0F]; | |
281 #else | |
282 b1[0x08] += b1[0x0C]; | |
283 out0[0x10*14] = b1[0x08]; | |
284 b1[0x0C] += b1[0x0a]; | |
285 out0[0x10*10] = b1[0x0C]; | |
286 b1[0x0A] += b1[0x0E]; | |
287 out0[0x10* 6] = b1[0x0A]; | |
288 b1[0x0E] += b1[0x09]; | |
289 out0[0x10* 2] = b1[0x0E]; | |
290 b1[0x09] += b1[0x0D]; | |
291 out1[0x10* 2] = b1[0x09]; | |
292 b1[0x0D] += b1[0x0B]; | |
293 out1[0x10* 6] = b1[0x0D]; | |
294 b1[0x0B] += b1[0x0F]; | |
295 out1[0x10*10] = b1[0x0B]; | |
296 out1[0x10*14] = b1[0x0F]; | |
297 #endif | |
298 | |
299 { | |
300 real tmp; | |
301 tmp = b1[0x18] + b1[0x1C]; | |
302 out0[0x10*15] = tmp + b1[0x10]; | |
303 out0[0x10*13] = tmp + b1[0x14]; | |
304 tmp = b1[0x1C] + b1[0x1A]; | |
305 out0[0x10*11] = tmp + b1[0x14]; | |
306 out0[0x10* 9] = tmp + b1[0x12]; | |
307 tmp = b1[0x1A] + b1[0x1E]; | |
308 out0[0x10* 7] = tmp + b1[0x12]; | |
309 out0[0x10* 5] = tmp + b1[0x16]; | |
310 tmp = b1[0x1E] + b1[0x19]; | |
311 out0[0x10* 3] = tmp + b1[0x16]; | |
312 out0[0x10* 1] = tmp + b1[0x11]; | |
313 tmp = b1[0x19] + b1[0x1D]; | |
314 out1[0x10* 1] = tmp + b1[0x11]; | |
315 out1[0x10* 3] = tmp + b1[0x15]; | |
316 tmp = b1[0x1D] + b1[0x1B]; | |
317 out1[0x10* 5] = tmp + b1[0x15]; | |
318 out1[0x10* 7] = tmp + b1[0x13]; | |
319 tmp = b1[0x1B] + b1[0x1F]; | |
320 out1[0x10* 9] = tmp + b1[0x13]; | |
321 out1[0x10*11] = tmp + b1[0x17]; | |
322 out1[0x10*13] = b1[0x17] + b1[0x1F]; | |
323 out1[0x10*15] = b1[0x1F]; | |
324 } | |
325 } | |
326 | |
327 /* | |
328 * the call via dct64 is a trick to force GCC to use | |
329 * (new) registers for the b1,b2 pointer to the bufs[xx] field | |
330 */ | |
331 void dct64_i386(real *a,real *b,real *c) | |
332 { | |
333 real bufs[0x40]; | |
334 dct64_1(a,b,bufs,bufs+0x20,c); | |
335 } | |
336 |