Mercurial > SDL_sound_CoreAudio
comparison decoders/libmpg123/dct64_i486.c @ 562:7e08477b0fc1
MP3 decoder upgrade work.
Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123.
libmpg123 is a much better version of mpglib, so it should solve all the
problems about MP3's not seeking, or most modern MP3's not playing at all,
etc. Since you no longer have to make a tradeoff with SMPEG for features, and
SMPEG is basically rotting, I removed it from the project.
There is still work to be done with libmpg123...there are MMX, 3DNow, SSE,
Altivec, etc decoders which we don't have enabled at the moment, and the
build system could use some work to make this compile more cleanly, etc.
Still: huge win.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 30 Jan 2009 02:44:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
561:f2985e08589c | 562:7e08477b0fc1 |
---|---|
1 /* | |
2 dct64_i486.c: DCT64, a plain C variant for i486 | |
3 | |
4 copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 | |
5 see COPYING and AUTHORS files in distribution or http://mpg123.org | |
6 initially written by Fabrice Bellard | |
7 */ | |
8 | |
9 /* Discrete Cosine Tansform (DCT) for subband synthesis. | |
10 * | |
11 * This code is optimized for 80486. It should be compiled with gcc | |
12 * 2.7.2 or higher. | |
13 * | |
14 * Note: This code does not give the necessary accuracy. Moreover, no | |
15 * overflow test are done. | |
16 * | |
17 * (c) 1998 Fabrice Bellard. | |
18 */ | |
19 | |
20 #include "mpg123lib_intern.h" | |
21 | |
22 #define COS_0_0 16403 | |
23 #define COS_0_1 16563 | |
24 #define COS_0_2 16890 | |
25 #define COS_0_3 17401 | |
26 #define COS_0_4 18124 | |
27 #define COS_0_5 19101 | |
28 #define COS_0_6 20398 | |
29 #define COS_0_7 22112 | |
30 #define COS_0_8 24396 | |
31 #define COS_0_9 27503 | |
32 #define COS_0_10 31869 | |
33 #define COS_0_11 38320 | |
34 #define COS_0_12 48633 | |
35 #define COS_0_13 67429 | |
36 #define COS_0_14 111660 | |
37 #define COS_0_15 333906 | |
38 #define COS_1_0 16463 | |
39 #define COS_1_1 17121 | |
40 #define COS_1_2 18577 | |
41 #define COS_1_3 21195 | |
42 #define COS_1_4 25826 | |
43 #define COS_1_5 34756 | |
44 #define COS_1_6 56441 | |
45 #define COS_1_7 167154 | |
46 #define COS_2_0 16704 | |
47 #define COS_2_1 19704 | |
48 #define COS_2_2 29490 | |
49 #define COS_2_3 83981 | |
50 #define COS_3_0 17733 | |
51 #define COS_3_1 42813 | |
52 #define COS_4_0 23170 | |
53 | |
54 #define SETOUT(out,n,expr) out[FIR_BUFFER_SIZE*(n)]=(expr) | |
55 #define MULL(a,b) (((long long)(a)*(long long)(b)) >> 15) | |
56 #define MUL(a,b) \ | |
57 (\ | |
58 ((!(b & 0x3F)) ? (((a)*(b >> 6)) >> 9) :\ | |
59 ((!(b & 0x1F)) ? (((a)*(b >> 5)) >> 10) :\ | |
60 ((!(b & 0x0F)) ? (((a)*(b >> 4)) >> 11) :\ | |
61 ((!(b & 0x07)) ? (((a)*(b >> 3)) >> 12) :\ | |
62 ((!(b & 0x03)) ? (((a)*(b >> 2)) >> 13) :\ | |
63 ((!(b & 0x01)) ? (((a)*(b >> 1)) >> 14) :\ | |
64 (((a)*(b )) >> 15)))))))) | |
65 | |
66 | |
67 void dct64_1_486(int *out0,int *out1,int *b1,int *b2) | |
68 { | |
69 b1[0x00] = b2[0x00] + b2[0x1F]; | |
70 b1[0x1F] = MUL((b2[0x00] - b2[0x1F]),COS_0_0); | |
71 | |
72 b1[0x01] = b2[0x01] + b2[0x1E]; | |
73 b1[0x1E] = MUL((b2[0x01] - b2[0x1E]),COS_0_1); | |
74 | |
75 b1[0x02] = b2[0x02] + b2[0x1D]; | |
76 b1[0x1D] = MUL((b2[0x02] - b2[0x1D]),COS_0_2); | |
77 | |
78 b1[0x03] = b2[0x03] + b2[0x1C]; | |
79 b1[0x1C] = MUL((b2[0x03] - b2[0x1C]),COS_0_3); | |
80 | |
81 b1[0x04] = b2[0x04] + b2[0x1B]; | |
82 b1[0x1B] = MUL((b2[0x04] - b2[0x1B]),COS_0_4); | |
83 | |
84 b1[0x05] = b2[0x05] + b2[0x1A]; | |
85 b1[0x1A] = MUL((b2[0x05] - b2[0x1A]),COS_0_5); | |
86 | |
87 b1[0x06] = b2[0x06] + b2[0x19]; | |
88 b1[0x19] = MUL((b2[0x06] - b2[0x19]),COS_0_6); | |
89 | |
90 b1[0x07] = b2[0x07] + b2[0x18]; | |
91 b1[0x18] = MUL((b2[0x07] - b2[0x18]),COS_0_7); | |
92 | |
93 b1[0x08] = b2[0x08] + b2[0x17]; | |
94 b1[0x17] = MUL((b2[0x08] - b2[0x17]),COS_0_8); | |
95 | |
96 b1[0x09] = b2[0x09] + b2[0x16]; | |
97 b1[0x16] = MUL((b2[0x09] - b2[0x16]),COS_0_9); | |
98 | |
99 b1[0x0A] = b2[0x0A] + b2[0x15]; | |
100 b1[0x15] = MUL((b2[0x0A] - b2[0x15]),COS_0_10); | |
101 | |
102 b1[0x0B] = b2[0x0B] + b2[0x14]; | |
103 b1[0x14] = MUL((b2[0x0B] - b2[0x14]),COS_0_11); | |
104 | |
105 b1[0x0C] = b2[0x0C] + b2[0x13]; | |
106 b1[0x13] = MUL((b2[0x0C] - b2[0x13]),COS_0_12); | |
107 | |
108 b1[0x0D] = b2[0x0D] + b2[0x12]; | |
109 b1[0x12] = MULL((b2[0x0D] - b2[0x12]),COS_0_13); | |
110 | |
111 b1[0x0E] = b2[0x0E] + b2[0x11]; | |
112 b1[0x11] = MULL((b2[0x0E] - b2[0x11]),COS_0_14); | |
113 | |
114 b1[0x0F] = b2[0x0F] + b2[0x10]; | |
115 b1[0x10] = MULL((b2[0x0F] - b2[0x10]),COS_0_15); | |
116 | |
117 | |
118 b2[0x00] = b1[0x00] + b1[0x0F]; | |
119 b2[0x0F] = MUL((b1[0x00] - b1[0x0F]),COS_1_0); | |
120 b2[0x01] = b1[0x01] + b1[0x0E]; | |
121 b2[0x0E] = MUL((b1[0x01] - b1[0x0E]),COS_1_1); | |
122 b2[0x02] = b1[0x02] + b1[0x0D]; | |
123 b2[0x0D] = MUL((b1[0x02] - b1[0x0D]),COS_1_2); | |
124 b2[0x03] = b1[0x03] + b1[0x0C]; | |
125 b2[0x0C] = MUL((b1[0x03] - b1[0x0C]),COS_1_3); | |
126 b2[0x04] = b1[0x04] + b1[0x0B]; | |
127 b2[0x0B] = MUL((b1[0x04] - b1[0x0B]),COS_1_4); | |
128 b2[0x05] = b1[0x05] + b1[0x0A]; | |
129 b2[0x0A] = MUL((b1[0x05] - b1[0x0A]),COS_1_5); | |
130 b2[0x06] = b1[0x06] + b1[0x09]; | |
131 b2[0x09] = MUL((b1[0x06] - b1[0x09]),COS_1_6); | |
132 b2[0x07] = b1[0x07] + b1[0x08]; | |
133 b2[0x08] = MULL((b1[0x07] - b1[0x08]),COS_1_7); | |
134 | |
135 b2[0x10] = b1[0x10] + b1[0x1F]; | |
136 b2[0x1F] = MUL((b1[0x1F] - b1[0x10]),COS_1_0); | |
137 b2[0x11] = b1[0x11] + b1[0x1E]; | |
138 b2[0x1E] = MUL((b1[0x1E] - b1[0x11]),COS_1_1); | |
139 b2[0x12] = b1[0x12] + b1[0x1D]; | |
140 b2[0x1D] = MUL((b1[0x1D] - b1[0x12]),COS_1_2); | |
141 b2[0x13] = b1[0x13] + b1[0x1C]; | |
142 b2[0x1C] = MUL((b1[0x1C] - b1[0x13]),COS_1_3); | |
143 b2[0x14] = b1[0x14] + b1[0x1B]; | |
144 b2[0x1B] = MUL((b1[0x1B] - b1[0x14]),COS_1_4); | |
145 b2[0x15] = b1[0x15] + b1[0x1A]; | |
146 b2[0x1A] = MUL((b1[0x1A] - b1[0x15]),COS_1_5); | |
147 b2[0x16] = b1[0x16] + b1[0x19]; | |
148 b2[0x19] = MUL((b1[0x19] - b1[0x16]),COS_1_6); | |
149 b2[0x17] = b1[0x17] + b1[0x18]; | |
150 b2[0x18] = MULL((b1[0x18] - b1[0x17]),COS_1_7); | |
151 | |
152 | |
153 b1[0x00] = b2[0x00] + b2[0x07]; | |
154 b1[0x07] = MUL((b2[0x00] - b2[0x07]),COS_2_0); | |
155 b1[0x01] = b2[0x01] + b2[0x06]; | |
156 b1[0x06] = MUL((b2[0x01] - b2[0x06]),COS_2_1); | |
157 b1[0x02] = b2[0x02] + b2[0x05]; | |
158 b1[0x05] = MUL((b2[0x02] - b2[0x05]),COS_2_2); | |
159 b1[0x03] = b2[0x03] + b2[0x04]; | |
160 b1[0x04] = MULL((b2[0x03] - b2[0x04]),COS_2_3); | |
161 | |
162 b1[0x08] = b2[0x08] + b2[0x0F]; | |
163 b1[0x0F] = MUL((b2[0x0F] - b2[0x08]),COS_2_0); | |
164 b1[0x09] = b2[0x09] + b2[0x0E]; | |
165 b1[0x0E] = MUL((b2[0x0E] - b2[0x09]),COS_2_1); | |
166 b1[0x0A] = b2[0x0A] + b2[0x0D]; | |
167 b1[0x0D] = MUL((b2[0x0D] - b2[0x0A]),COS_2_2); | |
168 b1[0x0B] = b2[0x0B] + b2[0x0C]; | |
169 b1[0x0C] = MULL((b2[0x0C] - b2[0x0B]),COS_2_3); | |
170 | |
171 b1[0x10] = b2[0x10] + b2[0x17]; | |
172 b1[0x17] = MUL((b2[0x10] - b2[0x17]),COS_2_0); | |
173 b1[0x11] = b2[0x11] + b2[0x16]; | |
174 b1[0x16] = MUL((b2[0x11] - b2[0x16]),COS_2_1); | |
175 b1[0x12] = b2[0x12] + b2[0x15]; | |
176 b1[0x15] = MUL((b2[0x12] - b2[0x15]),COS_2_2); | |
177 b1[0x13] = b2[0x13] + b2[0x14]; | |
178 b1[0x14] = MULL((b2[0x13] - b2[0x14]),COS_2_3); | |
179 | |
180 b1[0x18] = b2[0x18] + b2[0x1F]; | |
181 b1[0x1F] = MUL((b2[0x1F] - b2[0x18]),COS_2_0); | |
182 b1[0x19] = b2[0x19] + b2[0x1E]; | |
183 b1[0x1E] = MUL((b2[0x1E] - b2[0x19]),COS_2_1); | |
184 b1[0x1A] = b2[0x1A] + b2[0x1D]; | |
185 b1[0x1D] = MUL((b2[0x1D] - b2[0x1A]),COS_2_2); | |
186 b1[0x1B] = b2[0x1B] + b2[0x1C]; | |
187 b1[0x1C] = MULL((b2[0x1C] - b2[0x1B]),COS_2_3); | |
188 | |
189 | |
190 b2[0x00] = b1[0x00] + b1[0x03]; | |
191 b2[0x03] = MUL((b1[0x00] - b1[0x03]),COS_3_0); | |
192 b2[0x01] = b1[0x01] + b1[0x02]; | |
193 b2[0x02] = MUL((b1[0x01] - b1[0x02]),COS_3_1); | |
194 | |
195 b2[0x04] = b1[0x04] + b1[0x07]; | |
196 b2[0x07] = MUL((b1[0x07] - b1[0x04]),COS_3_0); | |
197 b2[0x05] = b1[0x05] + b1[0x06]; | |
198 b2[0x06] = MUL((b1[0x06] - b1[0x05]),COS_3_1); | |
199 | |
200 b2[0x08] = b1[0x08] + b1[0x0B]; | |
201 b2[0x0B] = MUL((b1[0x08] - b1[0x0B]),COS_3_0); | |
202 b2[0x09] = b1[0x09] + b1[0x0A]; | |
203 b2[0x0A] = MUL((b1[0x09] - b1[0x0A]),COS_3_1); | |
204 | |
205 b2[0x0C] = b1[0x0C] + b1[0x0F]; | |
206 b2[0x0F] = MUL((b1[0x0F] - b1[0x0C]),COS_3_0); | |
207 b2[0x0D] = b1[0x0D] + b1[0x0E]; | |
208 b2[0x0E] = MUL((b1[0x0E] - b1[0x0D]),COS_3_1); | |
209 | |
210 b2[0x10] = b1[0x10] + b1[0x13]; | |
211 b2[0x13] = MUL((b1[0x10] - b1[0x13]),COS_3_0); | |
212 b2[0x11] = b1[0x11] + b1[0x12]; | |
213 b2[0x12] = MUL((b1[0x11] - b1[0x12]),COS_3_1); | |
214 | |
215 b2[0x14] = b1[0x14] + b1[0x17]; | |
216 b2[0x17] = MUL((b1[0x17] - b1[0x14]),COS_3_0); | |
217 b2[0x15] = b1[0x15] + b1[0x16]; | |
218 b2[0x16] = MUL((b1[0x16] - b1[0x15]),COS_3_1); | |
219 | |
220 b2[0x18] = b1[0x18] + b1[0x1B]; | |
221 b2[0x1B] = MUL((b1[0x18] - b1[0x1B]),COS_3_0); | |
222 b2[0x19] = b1[0x19] + b1[0x1A]; | |
223 b2[0x1A] = MUL((b1[0x19] - b1[0x1A]),COS_3_1); | |
224 | |
225 b2[0x1C] = b1[0x1C] + b1[0x1F]; | |
226 b2[0x1F] = MUL((b1[0x1F] - b1[0x1C]),COS_3_0); | |
227 b2[0x1D] = b1[0x1D] + b1[0x1E]; | |
228 b2[0x1E] = MUL((b1[0x1E] - b1[0x1D]),COS_3_1); | |
229 | |
230 { | |
231 int i; | |
232 for(i=0;i<32;i+=4) { | |
233 b1[i+0x00] = b2[i+0x00] + b2[i+0x01]; | |
234 b1[i+0x01] = MUL((b2[i+0x00] - b2[i+0x01]),COS_4_0); | |
235 b1[i+0x02] = b2[i+0x02] + b2[i+0x03]; | |
236 b1[i+0x03] = MUL((b2[i+0x03] - b2[i+0x02]),COS_4_0); | |
237 } | |
238 } | |
239 | |
240 b1[0x02] += b1[0x03]; | |
241 b1[0x06] += b1[0x07]; | |
242 b1[0x04] += b1[0x06]; | |
243 b1[0x06] += b1[0x05]; | |
244 b1[0x05] += b1[0x07]; | |
245 | |
246 b1[0x0A] += b1[0x0B]; | |
247 b1[0x0E] += b1[0x0F]; | |
248 b1[0x0C] += b1[0x0E]; | |
249 b1[0x0E] += b1[0x0D]; | |
250 b1[0x0D] += b1[0x0F]; | |
251 | |
252 b1[0x12] += b1[0x13]; | |
253 b1[0x16] += b1[0x17]; | |
254 b1[0x14] += b1[0x16]; | |
255 b1[0x16] += b1[0x15]; | |
256 b1[0x15] += b1[0x17]; | |
257 | |
258 b1[0x1A] += b1[0x1B]; | |
259 b1[0x1E] += b1[0x1F]; | |
260 b1[0x1C] += b1[0x1E]; | |
261 b1[0x1E] += b1[0x1D]; | |
262 b1[0x1D] += b1[0x1F]; | |
263 | |
264 SETOUT(out0,16,b1[0x00]); | |
265 SETOUT(out0,12,b1[0x04]); | |
266 SETOUT(out0, 8,b1[0x02]); | |
267 SETOUT(out0, 4,b1[0x06]); | |
268 SETOUT(out0, 0,b1[0x01]); | |
269 SETOUT(out1, 0,b1[0x01]); | |
270 SETOUT(out1, 4,b1[0x05]); | |
271 SETOUT(out1, 8,b1[0x03]); | |
272 SETOUT(out1,12,b1[0x07]); | |
273 | |
274 b1[0x08] += b1[0x0C]; | |
275 SETOUT(out0,14,b1[0x08]); | |
276 b1[0x0C] += b1[0x0a]; | |
277 SETOUT(out0,10,b1[0x0C]); | |
278 b1[0x0A] += b1[0x0E]; | |
279 SETOUT(out0, 6,b1[0x0A]); | |
280 b1[0x0E] += b1[0x09]; | |
281 SETOUT(out0, 2,b1[0x0E]); | |
282 b1[0x09] += b1[0x0D]; | |
283 SETOUT(out1, 2,b1[0x09]); | |
284 b1[0x0D] += b1[0x0B]; | |
285 SETOUT(out1, 6,b1[0x0D]); | |
286 b1[0x0B] += b1[0x0F]; | |
287 SETOUT(out1,10,b1[0x0B]); | |
288 SETOUT(out1,14,b1[0x0F]); | |
289 | |
290 b1[0x18] += b1[0x1C]; | |
291 SETOUT(out0,15,b1[0x10] + b1[0x18]); | |
292 SETOUT(out0,13,b1[0x18] + b1[0x14]); | |
293 b1[0x1C] += b1[0x1a]; | |
294 SETOUT(out0,11,b1[0x14] + b1[0x1C]); | |
295 SETOUT(out0, 9,b1[0x1C] + b1[0x12]); | |
296 b1[0x1A] += b1[0x1E]; | |
297 SETOUT(out0, 7,b1[0x12] + b1[0x1A]); | |
298 SETOUT(out0, 5,b1[0x1A] + b1[0x16]); | |
299 b1[0x1E] += b1[0x19]; | |
300 SETOUT(out0, 3,b1[0x16] + b1[0x1E]); | |
301 SETOUT(out0, 1,b1[0x1E] + b1[0x11]); | |
302 b1[0x19] += b1[0x1D]; | |
303 SETOUT(out1, 1,b1[0x11] + b1[0x19]); | |
304 SETOUT(out1, 3,b1[0x19] + b1[0x15]); | |
305 b1[0x1D] += b1[0x1B]; | |
306 SETOUT(out1, 5,b1[0x15] + b1[0x1D]); | |
307 SETOUT(out1, 7,b1[0x1D] + b1[0x13]); | |
308 b1[0x1B] += b1[0x1F]; | |
309 SETOUT(out1, 9,b1[0x13] + b1[0x1B]); | |
310 SETOUT(out1,11,b1[0x1B] + b1[0x17]); | |
311 SETOUT(out1,13,b1[0x17] + b1[0x1F]); | |
312 SETOUT(out1,15,b1[0x1F]); | |
313 } | |
314 | |
315 | |
316 /* | |
317 * the call via dct64 is a trick to force GCC to use | |
318 * (new) registers for the b1,b2 pointer to the bufs[xx] field | |
319 */ | |
320 void dct64_i486(int *a,int *b,real *samples) | |
321 { | |
322 int bufs[64]; | |
323 int i; | |
324 | |
325 #ifdef REAL_IS_FIXED | |
326 #define TOINT(a) ((a) * 32768 / (int)REAL_FACTOR) | |
327 | |
328 for(i=0;i<32;i++) { | |
329 bufs[i]=TOINT(samples[i]); | |
330 } | |
331 #else | |
332 int *p = bufs; | |
333 register double const scale = ((65536.0 * 32) + 1) * 65536.0; | |
334 | |
335 for(i=0;i<32;i++) { | |
336 *((double *) (p++)) = scale + *samples++; /* beware on bufs overrun: 8B store from x87 */ | |
337 } | |
338 #endif | |
339 | |
340 dct64_1_486(a,b,bufs+32,bufs); | |
341 } | |
342 |