comparison decoders/libmpg123/dct64_i486.c @ 562:7e08477b0fc1

MP3 decoder upgrade work. Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123. libmpg123 is a much better version of mpglib, so it should solve all the problems about MP3's not seeking, or most modern MP3's not playing at all, etc. Since you no longer have to make a tradeoff with SMPEG for features, and SMPEG is basically rotting, I removed it from the project. There is still work to be done with libmpg123...there are MMX, 3DNow, SSE, Altivec, etc decoders which we don't have enabled at the moment, and the build system could use some work to make this compile more cleanly, etc. Still: huge win.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 30 Jan 2009 02:44:47 -0500
parents
children
comparison
equal deleted inserted replaced
561:f2985e08589c 562:7e08477b0fc1
1 /*
2 dct64_i486.c: DCT64, a plain C variant for i486
3
4 copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Fabrice Bellard
7 */
8
9 /* Discrete Cosine Tansform (DCT) for subband synthesis.
10 *
11 * This code is optimized for 80486. It should be compiled with gcc
12 * 2.7.2 or higher.
13 *
14 * Note: This code does not give the necessary accuracy. Moreover, no
15 * overflow test are done.
16 *
17 * (c) 1998 Fabrice Bellard.
18 */
19
20 #include "mpg123lib_intern.h"
21
22 #define COS_0_0 16403
23 #define COS_0_1 16563
24 #define COS_0_2 16890
25 #define COS_0_3 17401
26 #define COS_0_4 18124
27 #define COS_0_5 19101
28 #define COS_0_6 20398
29 #define COS_0_7 22112
30 #define COS_0_8 24396
31 #define COS_0_9 27503
32 #define COS_0_10 31869
33 #define COS_0_11 38320
34 #define COS_0_12 48633
35 #define COS_0_13 67429
36 #define COS_0_14 111660
37 #define COS_0_15 333906
38 #define COS_1_0 16463
39 #define COS_1_1 17121
40 #define COS_1_2 18577
41 #define COS_1_3 21195
42 #define COS_1_4 25826
43 #define COS_1_5 34756
44 #define COS_1_6 56441
45 #define COS_1_7 167154
46 #define COS_2_0 16704
47 #define COS_2_1 19704
48 #define COS_2_2 29490
49 #define COS_2_3 83981
50 #define COS_3_0 17733
51 #define COS_3_1 42813
52 #define COS_4_0 23170
53
54 #define SETOUT(out,n,expr) out[FIR_BUFFER_SIZE*(n)]=(expr)
55 #define MULL(a,b) (((long long)(a)*(long long)(b)) >> 15)
56 #define MUL(a,b) \
57 (\
58 ((!(b & 0x3F)) ? (((a)*(b >> 6)) >> 9) :\
59 ((!(b & 0x1F)) ? (((a)*(b >> 5)) >> 10) :\
60 ((!(b & 0x0F)) ? (((a)*(b >> 4)) >> 11) :\
61 ((!(b & 0x07)) ? (((a)*(b >> 3)) >> 12) :\
62 ((!(b & 0x03)) ? (((a)*(b >> 2)) >> 13) :\
63 ((!(b & 0x01)) ? (((a)*(b >> 1)) >> 14) :\
64 (((a)*(b )) >> 15))))))))
65
66
67 void dct64_1_486(int *out0,int *out1,int *b1,int *b2)
68 {
69 b1[0x00] = b2[0x00] + b2[0x1F];
70 b1[0x1F] = MUL((b2[0x00] - b2[0x1F]),COS_0_0);
71
72 b1[0x01] = b2[0x01] + b2[0x1E];
73 b1[0x1E] = MUL((b2[0x01] - b2[0x1E]),COS_0_1);
74
75 b1[0x02] = b2[0x02] + b2[0x1D];
76 b1[0x1D] = MUL((b2[0x02] - b2[0x1D]),COS_0_2);
77
78 b1[0x03] = b2[0x03] + b2[0x1C];
79 b1[0x1C] = MUL((b2[0x03] - b2[0x1C]),COS_0_3);
80
81 b1[0x04] = b2[0x04] + b2[0x1B];
82 b1[0x1B] = MUL((b2[0x04] - b2[0x1B]),COS_0_4);
83
84 b1[0x05] = b2[0x05] + b2[0x1A];
85 b1[0x1A] = MUL((b2[0x05] - b2[0x1A]),COS_0_5);
86
87 b1[0x06] = b2[0x06] + b2[0x19];
88 b1[0x19] = MUL((b2[0x06] - b2[0x19]),COS_0_6);
89
90 b1[0x07] = b2[0x07] + b2[0x18];
91 b1[0x18] = MUL((b2[0x07] - b2[0x18]),COS_0_7);
92
93 b1[0x08] = b2[0x08] + b2[0x17];
94 b1[0x17] = MUL((b2[0x08] - b2[0x17]),COS_0_8);
95
96 b1[0x09] = b2[0x09] + b2[0x16];
97 b1[0x16] = MUL((b2[0x09] - b2[0x16]),COS_0_9);
98
99 b1[0x0A] = b2[0x0A] + b2[0x15];
100 b1[0x15] = MUL((b2[0x0A] - b2[0x15]),COS_0_10);
101
102 b1[0x0B] = b2[0x0B] + b2[0x14];
103 b1[0x14] = MUL((b2[0x0B] - b2[0x14]),COS_0_11);
104
105 b1[0x0C] = b2[0x0C] + b2[0x13];
106 b1[0x13] = MUL((b2[0x0C] - b2[0x13]),COS_0_12);
107
108 b1[0x0D] = b2[0x0D] + b2[0x12];
109 b1[0x12] = MULL((b2[0x0D] - b2[0x12]),COS_0_13);
110
111 b1[0x0E] = b2[0x0E] + b2[0x11];
112 b1[0x11] = MULL((b2[0x0E] - b2[0x11]),COS_0_14);
113
114 b1[0x0F] = b2[0x0F] + b2[0x10];
115 b1[0x10] = MULL((b2[0x0F] - b2[0x10]),COS_0_15);
116
117
118 b2[0x00] = b1[0x00] + b1[0x0F];
119 b2[0x0F] = MUL((b1[0x00] - b1[0x0F]),COS_1_0);
120 b2[0x01] = b1[0x01] + b1[0x0E];
121 b2[0x0E] = MUL((b1[0x01] - b1[0x0E]),COS_1_1);
122 b2[0x02] = b1[0x02] + b1[0x0D];
123 b2[0x0D] = MUL((b1[0x02] - b1[0x0D]),COS_1_2);
124 b2[0x03] = b1[0x03] + b1[0x0C];
125 b2[0x0C] = MUL((b1[0x03] - b1[0x0C]),COS_1_3);
126 b2[0x04] = b1[0x04] + b1[0x0B];
127 b2[0x0B] = MUL((b1[0x04] - b1[0x0B]),COS_1_4);
128 b2[0x05] = b1[0x05] + b1[0x0A];
129 b2[0x0A] = MUL((b1[0x05] - b1[0x0A]),COS_1_5);
130 b2[0x06] = b1[0x06] + b1[0x09];
131 b2[0x09] = MUL((b1[0x06] - b1[0x09]),COS_1_6);
132 b2[0x07] = b1[0x07] + b1[0x08];
133 b2[0x08] = MULL((b1[0x07] - b1[0x08]),COS_1_7);
134
135 b2[0x10] = b1[0x10] + b1[0x1F];
136 b2[0x1F] = MUL((b1[0x1F] - b1[0x10]),COS_1_0);
137 b2[0x11] = b1[0x11] + b1[0x1E];
138 b2[0x1E] = MUL((b1[0x1E] - b1[0x11]),COS_1_1);
139 b2[0x12] = b1[0x12] + b1[0x1D];
140 b2[0x1D] = MUL((b1[0x1D] - b1[0x12]),COS_1_2);
141 b2[0x13] = b1[0x13] + b1[0x1C];
142 b2[0x1C] = MUL((b1[0x1C] - b1[0x13]),COS_1_3);
143 b2[0x14] = b1[0x14] + b1[0x1B];
144 b2[0x1B] = MUL((b1[0x1B] - b1[0x14]),COS_1_4);
145 b2[0x15] = b1[0x15] + b1[0x1A];
146 b2[0x1A] = MUL((b1[0x1A] - b1[0x15]),COS_1_5);
147 b2[0x16] = b1[0x16] + b1[0x19];
148 b2[0x19] = MUL((b1[0x19] - b1[0x16]),COS_1_6);
149 b2[0x17] = b1[0x17] + b1[0x18];
150 b2[0x18] = MULL((b1[0x18] - b1[0x17]),COS_1_7);
151
152
153 b1[0x00] = b2[0x00] + b2[0x07];
154 b1[0x07] = MUL((b2[0x00] - b2[0x07]),COS_2_0);
155 b1[0x01] = b2[0x01] + b2[0x06];
156 b1[0x06] = MUL((b2[0x01] - b2[0x06]),COS_2_1);
157 b1[0x02] = b2[0x02] + b2[0x05];
158 b1[0x05] = MUL((b2[0x02] - b2[0x05]),COS_2_2);
159 b1[0x03] = b2[0x03] + b2[0x04];
160 b1[0x04] = MULL((b2[0x03] - b2[0x04]),COS_2_3);
161
162 b1[0x08] = b2[0x08] + b2[0x0F];
163 b1[0x0F] = MUL((b2[0x0F] - b2[0x08]),COS_2_0);
164 b1[0x09] = b2[0x09] + b2[0x0E];
165 b1[0x0E] = MUL((b2[0x0E] - b2[0x09]),COS_2_1);
166 b1[0x0A] = b2[0x0A] + b2[0x0D];
167 b1[0x0D] = MUL((b2[0x0D] - b2[0x0A]),COS_2_2);
168 b1[0x0B] = b2[0x0B] + b2[0x0C];
169 b1[0x0C] = MULL((b2[0x0C] - b2[0x0B]),COS_2_3);
170
171 b1[0x10] = b2[0x10] + b2[0x17];
172 b1[0x17] = MUL((b2[0x10] - b2[0x17]),COS_2_0);
173 b1[0x11] = b2[0x11] + b2[0x16];
174 b1[0x16] = MUL((b2[0x11] - b2[0x16]),COS_2_1);
175 b1[0x12] = b2[0x12] + b2[0x15];
176 b1[0x15] = MUL((b2[0x12] - b2[0x15]),COS_2_2);
177 b1[0x13] = b2[0x13] + b2[0x14];
178 b1[0x14] = MULL((b2[0x13] - b2[0x14]),COS_2_3);
179
180 b1[0x18] = b2[0x18] + b2[0x1F];
181 b1[0x1F] = MUL((b2[0x1F] - b2[0x18]),COS_2_0);
182 b1[0x19] = b2[0x19] + b2[0x1E];
183 b1[0x1E] = MUL((b2[0x1E] - b2[0x19]),COS_2_1);
184 b1[0x1A] = b2[0x1A] + b2[0x1D];
185 b1[0x1D] = MUL((b2[0x1D] - b2[0x1A]),COS_2_2);
186 b1[0x1B] = b2[0x1B] + b2[0x1C];
187 b1[0x1C] = MULL((b2[0x1C] - b2[0x1B]),COS_2_3);
188
189
190 b2[0x00] = b1[0x00] + b1[0x03];
191 b2[0x03] = MUL((b1[0x00] - b1[0x03]),COS_3_0);
192 b2[0x01] = b1[0x01] + b1[0x02];
193 b2[0x02] = MUL((b1[0x01] - b1[0x02]),COS_3_1);
194
195 b2[0x04] = b1[0x04] + b1[0x07];
196 b2[0x07] = MUL((b1[0x07] - b1[0x04]),COS_3_0);
197 b2[0x05] = b1[0x05] + b1[0x06];
198 b2[0x06] = MUL((b1[0x06] - b1[0x05]),COS_3_1);
199
200 b2[0x08] = b1[0x08] + b1[0x0B];
201 b2[0x0B] = MUL((b1[0x08] - b1[0x0B]),COS_3_0);
202 b2[0x09] = b1[0x09] + b1[0x0A];
203 b2[0x0A] = MUL((b1[0x09] - b1[0x0A]),COS_3_1);
204
205 b2[0x0C] = b1[0x0C] + b1[0x0F];
206 b2[0x0F] = MUL((b1[0x0F] - b1[0x0C]),COS_3_0);
207 b2[0x0D] = b1[0x0D] + b1[0x0E];
208 b2[0x0E] = MUL((b1[0x0E] - b1[0x0D]),COS_3_1);
209
210 b2[0x10] = b1[0x10] + b1[0x13];
211 b2[0x13] = MUL((b1[0x10] - b1[0x13]),COS_3_0);
212 b2[0x11] = b1[0x11] + b1[0x12];
213 b2[0x12] = MUL((b1[0x11] - b1[0x12]),COS_3_1);
214
215 b2[0x14] = b1[0x14] + b1[0x17];
216 b2[0x17] = MUL((b1[0x17] - b1[0x14]),COS_3_0);
217 b2[0x15] = b1[0x15] + b1[0x16];
218 b2[0x16] = MUL((b1[0x16] - b1[0x15]),COS_3_1);
219
220 b2[0x18] = b1[0x18] + b1[0x1B];
221 b2[0x1B] = MUL((b1[0x18] - b1[0x1B]),COS_3_0);
222 b2[0x19] = b1[0x19] + b1[0x1A];
223 b2[0x1A] = MUL((b1[0x19] - b1[0x1A]),COS_3_1);
224
225 b2[0x1C] = b1[0x1C] + b1[0x1F];
226 b2[0x1F] = MUL((b1[0x1F] - b1[0x1C]),COS_3_0);
227 b2[0x1D] = b1[0x1D] + b1[0x1E];
228 b2[0x1E] = MUL((b1[0x1E] - b1[0x1D]),COS_3_1);
229
230 {
231 int i;
232 for(i=0;i<32;i+=4) {
233 b1[i+0x00] = b2[i+0x00] + b2[i+0x01];
234 b1[i+0x01] = MUL((b2[i+0x00] - b2[i+0x01]),COS_4_0);
235 b1[i+0x02] = b2[i+0x02] + b2[i+0x03];
236 b1[i+0x03] = MUL((b2[i+0x03] - b2[i+0x02]),COS_4_0);
237 }
238 }
239
240 b1[0x02] += b1[0x03];
241 b1[0x06] += b1[0x07];
242 b1[0x04] += b1[0x06];
243 b1[0x06] += b1[0x05];
244 b1[0x05] += b1[0x07];
245
246 b1[0x0A] += b1[0x0B];
247 b1[0x0E] += b1[0x0F];
248 b1[0x0C] += b1[0x0E];
249 b1[0x0E] += b1[0x0D];
250 b1[0x0D] += b1[0x0F];
251
252 b1[0x12] += b1[0x13];
253 b1[0x16] += b1[0x17];
254 b1[0x14] += b1[0x16];
255 b1[0x16] += b1[0x15];
256 b1[0x15] += b1[0x17];
257
258 b1[0x1A] += b1[0x1B];
259 b1[0x1E] += b1[0x1F];
260 b1[0x1C] += b1[0x1E];
261 b1[0x1E] += b1[0x1D];
262 b1[0x1D] += b1[0x1F];
263
264 SETOUT(out0,16,b1[0x00]);
265 SETOUT(out0,12,b1[0x04]);
266 SETOUT(out0, 8,b1[0x02]);
267 SETOUT(out0, 4,b1[0x06]);
268 SETOUT(out0, 0,b1[0x01]);
269 SETOUT(out1, 0,b1[0x01]);
270 SETOUT(out1, 4,b1[0x05]);
271 SETOUT(out1, 8,b1[0x03]);
272 SETOUT(out1,12,b1[0x07]);
273
274 b1[0x08] += b1[0x0C];
275 SETOUT(out0,14,b1[0x08]);
276 b1[0x0C] += b1[0x0a];
277 SETOUT(out0,10,b1[0x0C]);
278 b1[0x0A] += b1[0x0E];
279 SETOUT(out0, 6,b1[0x0A]);
280 b1[0x0E] += b1[0x09];
281 SETOUT(out0, 2,b1[0x0E]);
282 b1[0x09] += b1[0x0D];
283 SETOUT(out1, 2,b1[0x09]);
284 b1[0x0D] += b1[0x0B];
285 SETOUT(out1, 6,b1[0x0D]);
286 b1[0x0B] += b1[0x0F];
287 SETOUT(out1,10,b1[0x0B]);
288 SETOUT(out1,14,b1[0x0F]);
289
290 b1[0x18] += b1[0x1C];
291 SETOUT(out0,15,b1[0x10] + b1[0x18]);
292 SETOUT(out0,13,b1[0x18] + b1[0x14]);
293 b1[0x1C] += b1[0x1a];
294 SETOUT(out0,11,b1[0x14] + b1[0x1C]);
295 SETOUT(out0, 9,b1[0x1C] + b1[0x12]);
296 b1[0x1A] += b1[0x1E];
297 SETOUT(out0, 7,b1[0x12] + b1[0x1A]);
298 SETOUT(out0, 5,b1[0x1A] + b1[0x16]);
299 b1[0x1E] += b1[0x19];
300 SETOUT(out0, 3,b1[0x16] + b1[0x1E]);
301 SETOUT(out0, 1,b1[0x1E] + b1[0x11]);
302 b1[0x19] += b1[0x1D];
303 SETOUT(out1, 1,b1[0x11] + b1[0x19]);
304 SETOUT(out1, 3,b1[0x19] + b1[0x15]);
305 b1[0x1D] += b1[0x1B];
306 SETOUT(out1, 5,b1[0x15] + b1[0x1D]);
307 SETOUT(out1, 7,b1[0x1D] + b1[0x13]);
308 b1[0x1B] += b1[0x1F];
309 SETOUT(out1, 9,b1[0x13] + b1[0x1B]);
310 SETOUT(out1,11,b1[0x1B] + b1[0x17]);
311 SETOUT(out1,13,b1[0x17] + b1[0x1F]);
312 SETOUT(out1,15,b1[0x1F]);
313 }
314
315
316 /*
317 * the call via dct64 is a trick to force GCC to use
318 * (new) registers for the b1,b2 pointer to the bufs[xx] field
319 */
320 void dct64_i486(int *a,int *b,real *samples)
321 {
322 int bufs[64];
323 int i;
324
325 #ifdef REAL_IS_FIXED
326 #define TOINT(a) ((a) * 32768 / (int)REAL_FACTOR)
327
328 for(i=0;i<32;i++) {
329 bufs[i]=TOINT(samples[i]);
330 }
331 #else
332 int *p = bufs;
333 register double const scale = ((65536.0 * 32) + 1) * 65536.0;
334
335 for(i=0;i<32;i++) {
336 *((double *) (p++)) = scale + *samples++; /* beware on bufs overrun: 8B store from x87 */
337 }
338 #endif
339
340 dct64_1_486(a,b,bufs+32,bufs);
341 }
342