comparison decoders/libmpg123/dct64_i386.c @ 562:7e08477b0fc1

MP3 decoder upgrade work. Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123. libmpg123 is a much better version of mpglib, so it should solve all the problems about MP3's not seeking, or most modern MP3's not playing at all, etc. Since you no longer have to make a tradeoff with SMPEG for features, and SMPEG is basically rotting, I removed it from the project. There is still work to be done with libmpg123...there are MMX, 3DNow, SSE, Altivec, etc decoders which we don't have enabled at the moment, and the build system could use some work to make this compile more cleanly, etc. Still: huge win.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 30 Jan 2009 02:44:47 -0500
parents
children
comparison
equal deleted inserted replaced
561:f2985e08589c 562:7e08477b0fc1
1 /*
2 dct64_i386.c: DCT64, a C variant for i386
3
4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Michael Hipp
7 */
8
9 /*
10 * Discrete Cosine Tansform (DCT) for subband synthesis
11 * optimized for machines with no auto-increment.
12 * The performance is highly compiler dependend. Maybe
13 * the dct64.c version for 'normal' processor may be faster
14 * even for Intel processors.
15 */
16
17 #include "mpg123lib_intern.h"
18
19 static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples)
20 {
21 {
22 register real *costab = pnts[0];
23
24 b1[0x00] = samples[0x00] + samples[0x1F];
25 b1[0x01] = samples[0x01] + samples[0x1E];
26 b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0];
27 b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1];
28
29 b1[0x02] = samples[0x02] + samples[0x1D];
30 b1[0x03] = samples[0x03] + samples[0x1C];
31 b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2];
32 b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3];
33
34 b1[0x04] = samples[0x04] + samples[0x1B];
35 b1[0x05] = samples[0x05] + samples[0x1A];
36 b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4];
37 b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5];
38
39 b1[0x06] = samples[0x06] + samples[0x19];
40 b1[0x07] = samples[0x07] + samples[0x18];
41 b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6];
42 b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7];
43
44 b1[0x08] = samples[0x08] + samples[0x17];
45 b1[0x09] = samples[0x09] + samples[0x16];
46 b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8];
47 b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9];
48
49 b1[0x0A] = samples[0x0A] + samples[0x15];
50 b1[0x0B] = samples[0x0B] + samples[0x14];
51 b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA];
52 b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB];
53
54 b1[0x0C] = samples[0x0C] + samples[0x13];
55 b1[0x0D] = samples[0x0D] + samples[0x12];
56 b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC];
57 b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD];
58
59 b1[0x0E] = samples[0x0E] + samples[0x11];
60 b1[0x0F] = samples[0x0F] + samples[0x10];
61 b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE];
62 b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF];
63
64 }
65
66
67 {
68 register real *costab = pnts[1];
69
70 b2[0x00] = b1[0x00] + b1[0x0F];
71 b2[0x01] = b1[0x01] + b1[0x0E];
72 b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0];
73 b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1];
74
75 b2[0x02] = b1[0x02] + b1[0x0D];
76 b2[0x03] = b1[0x03] + b1[0x0C];
77 b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2];
78 b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3];
79
80 b2[0x04] = b1[0x04] + b1[0x0B];
81 b2[0x05] = b1[0x05] + b1[0x0A];
82 b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4];
83 b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5];
84
85 b2[0x06] = b1[0x06] + b1[0x09];
86 b2[0x07] = b1[0x07] + b1[0x08];
87 b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6];
88 b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7];
89
90 /* */
91
92 b2[0x10] = b1[0x10] + b1[0x1F];
93 b2[0x11] = b1[0x11] + b1[0x1E];
94 b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0];
95 b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1];
96
97 b2[0x12] = b1[0x12] + b1[0x1D];
98 b2[0x13] = b1[0x13] + b1[0x1C];
99 b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2];
100 b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3];
101
102 b2[0x14] = b1[0x14] + b1[0x1B];
103 b2[0x15] = b1[0x15] + b1[0x1A];
104 b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4];
105 b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5];
106
107 b2[0x16] = b1[0x16] + b1[0x19];
108 b2[0x17] = b1[0x17] + b1[0x18];
109 b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6];
110 b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7];
111 }
112
113 {
114 register real *costab = pnts[2];
115
116 b1[0x00] = b2[0x00] + b2[0x07];
117 b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0];
118 b1[0x01] = b2[0x01] + b2[0x06];
119 b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1];
120 b1[0x02] = b2[0x02] + b2[0x05];
121 b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2];
122 b1[0x03] = b2[0x03] + b2[0x04];
123 b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3];
124
125 b1[0x08] = b2[0x08] + b2[0x0F];
126 b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0];
127 b1[0x09] = b2[0x09] + b2[0x0E];
128 b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1];
129 b1[0x0A] = b2[0x0A] + b2[0x0D];
130 b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2];
131 b1[0x0B] = b2[0x0B] + b2[0x0C];
132 b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3];
133
134 b1[0x10] = b2[0x10] + b2[0x17];
135 b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0];
136 b1[0x11] = b2[0x11] + b2[0x16];
137 b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1];
138 b1[0x12] = b2[0x12] + b2[0x15];
139 b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2];
140 b1[0x13] = b2[0x13] + b2[0x14];
141 b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3];
142
143 b1[0x18] = b2[0x18] + b2[0x1F];
144 b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0];
145 b1[0x19] = b2[0x19] + b2[0x1E];
146 b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1];
147 b1[0x1A] = b2[0x1A] + b2[0x1D];
148 b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2];
149 b1[0x1B] = b2[0x1B] + b2[0x1C];
150 b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3];
151 }
152
153 {
154 register real const cos0 = pnts[3][0];
155 register real const cos1 = pnts[3][1];
156
157 b2[0x00] = b1[0x00] + b1[0x03];
158 b2[0x03] = (b1[0x00] - b1[0x03]) * cos0;
159 b2[0x01] = b1[0x01] + b1[0x02];
160 b2[0x02] = (b1[0x01] - b1[0x02]) * cos1;
161
162 b2[0x04] = b1[0x04] + b1[0x07];
163 b2[0x07] = (b1[0x07] - b1[0x04]) * cos0;
164 b2[0x05] = b1[0x05] + b1[0x06];
165 b2[0x06] = (b1[0x06] - b1[0x05]) * cos1;
166
167 b2[0x08] = b1[0x08] + b1[0x0B];
168 b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0;
169 b2[0x09] = b1[0x09] + b1[0x0A];
170 b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1;
171
172 b2[0x0C] = b1[0x0C] + b1[0x0F];
173 b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0;
174 b2[0x0D] = b1[0x0D] + b1[0x0E];
175 b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1;
176
177 b2[0x10] = b1[0x10] + b1[0x13];
178 b2[0x13] = (b1[0x10] - b1[0x13]) * cos0;
179 b2[0x11] = b1[0x11] + b1[0x12];
180 b2[0x12] = (b1[0x11] - b1[0x12]) * cos1;
181
182 b2[0x14] = b1[0x14] + b1[0x17];
183 b2[0x17] = (b1[0x17] - b1[0x14]) * cos0;
184 b2[0x15] = b1[0x15] + b1[0x16];
185 b2[0x16] = (b1[0x16] - b1[0x15]) * cos1;
186
187 b2[0x18] = b1[0x18] + b1[0x1B];
188 b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0;
189 b2[0x19] = b1[0x19] + b1[0x1A];
190 b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1;
191
192 b2[0x1C] = b1[0x1C] + b1[0x1F];
193 b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0;
194 b2[0x1D] = b1[0x1D] + b1[0x1E];
195 b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1;
196 }
197
198 {
199 register real const cos0 = pnts[4][0];
200
201 b1[0x00] = b2[0x00] + b2[0x01];
202 b1[0x01] = (b2[0x00] - b2[0x01]) * cos0;
203 b1[0x02] = b2[0x02] + b2[0x03];
204 b1[0x03] = (b2[0x03] - b2[0x02]) * cos0;
205 b1[0x02] += b1[0x03];
206
207 b1[0x04] = b2[0x04] + b2[0x05];
208 b1[0x05] = (b2[0x04] - b2[0x05]) * cos0;
209 b1[0x06] = b2[0x06] + b2[0x07];
210 b1[0x07] = (b2[0x07] - b2[0x06]) * cos0;
211 b1[0x06] += b1[0x07];
212 b1[0x04] += b1[0x06];
213 b1[0x06] += b1[0x05];
214 b1[0x05] += b1[0x07];
215
216 b1[0x08] = b2[0x08] + b2[0x09];
217 b1[0x09] = (b2[0x08] - b2[0x09]) * cos0;
218 b1[0x0A] = b2[0x0A] + b2[0x0B];
219 b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0;
220 b1[0x0A] += b1[0x0B];
221
222 b1[0x0C] = b2[0x0C] + b2[0x0D];
223 b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0;
224 b1[0x0E] = b2[0x0E] + b2[0x0F];
225 b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0;
226 b1[0x0E] += b1[0x0F];
227 b1[0x0C] += b1[0x0E];
228 b1[0x0E] += b1[0x0D];
229 b1[0x0D] += b1[0x0F];
230
231 b1[0x10] = b2[0x10] + b2[0x11];
232 b1[0x11] = (b2[0x10] - b2[0x11]) * cos0;
233 b1[0x12] = b2[0x12] + b2[0x13];
234 b1[0x13] = (b2[0x13] - b2[0x12]) * cos0;
235 b1[0x12] += b1[0x13];
236
237 b1[0x14] = b2[0x14] + b2[0x15];
238 b1[0x15] = (b2[0x14] - b2[0x15]) * cos0;
239 b1[0x16] = b2[0x16] + b2[0x17];
240 b1[0x17] = (b2[0x17] - b2[0x16]) * cos0;
241 b1[0x16] += b1[0x17];
242 b1[0x14] += b1[0x16];
243 b1[0x16] += b1[0x15];
244 b1[0x15] += b1[0x17];
245
246 b1[0x18] = b2[0x18] + b2[0x19];
247 b1[0x19] = (b2[0x18] - b2[0x19]) * cos0;
248 b1[0x1A] = b2[0x1A] + b2[0x1B];
249 b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0;
250 b1[0x1A] += b1[0x1B];
251
252 b1[0x1C] = b2[0x1C] + b2[0x1D];
253 b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0;
254 b1[0x1E] = b2[0x1E] + b2[0x1F];
255 b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0;
256 b1[0x1E] += b1[0x1F];
257 b1[0x1C] += b1[0x1E];
258 b1[0x1E] += b1[0x1D];
259 b1[0x1D] += b1[0x1F];
260 }
261
262 out0[0x10*16] = b1[0x00];
263 out0[0x10*12] = b1[0x04];
264 out0[0x10* 8] = b1[0x02];
265 out0[0x10* 4] = b1[0x06];
266 out0[0x10* 0] = b1[0x01];
267 out1[0x10* 0] = b1[0x01];
268 out1[0x10* 4] = b1[0x05];
269 out1[0x10* 8] = b1[0x03];
270 out1[0x10*12] = b1[0x07];
271
272 #if 1
273 out0[0x10*14] = b1[0x08] + b1[0x0C];
274 out0[0x10*10] = b1[0x0C] + b1[0x0a];
275 out0[0x10* 6] = b1[0x0A] + b1[0x0E];
276 out0[0x10* 2] = b1[0x0E] + b1[0x09];
277 out1[0x10* 2] = b1[0x09] + b1[0x0D];
278 out1[0x10* 6] = b1[0x0D] + b1[0x0B];
279 out1[0x10*10] = b1[0x0B] + b1[0x0F];
280 out1[0x10*14] = b1[0x0F];
281 #else
282 b1[0x08] += b1[0x0C];
283 out0[0x10*14] = b1[0x08];
284 b1[0x0C] += b1[0x0a];
285 out0[0x10*10] = b1[0x0C];
286 b1[0x0A] += b1[0x0E];
287 out0[0x10* 6] = b1[0x0A];
288 b1[0x0E] += b1[0x09];
289 out0[0x10* 2] = b1[0x0E];
290 b1[0x09] += b1[0x0D];
291 out1[0x10* 2] = b1[0x09];
292 b1[0x0D] += b1[0x0B];
293 out1[0x10* 6] = b1[0x0D];
294 b1[0x0B] += b1[0x0F];
295 out1[0x10*10] = b1[0x0B];
296 out1[0x10*14] = b1[0x0F];
297 #endif
298
299 {
300 real tmp;
301 tmp = b1[0x18] + b1[0x1C];
302 out0[0x10*15] = tmp + b1[0x10];
303 out0[0x10*13] = tmp + b1[0x14];
304 tmp = b1[0x1C] + b1[0x1A];
305 out0[0x10*11] = tmp + b1[0x14];
306 out0[0x10* 9] = tmp + b1[0x12];
307 tmp = b1[0x1A] + b1[0x1E];
308 out0[0x10* 7] = tmp + b1[0x12];
309 out0[0x10* 5] = tmp + b1[0x16];
310 tmp = b1[0x1E] + b1[0x19];
311 out0[0x10* 3] = tmp + b1[0x16];
312 out0[0x10* 1] = tmp + b1[0x11];
313 tmp = b1[0x19] + b1[0x1D];
314 out1[0x10* 1] = tmp + b1[0x11];
315 out1[0x10* 3] = tmp + b1[0x15];
316 tmp = b1[0x1D] + b1[0x1B];
317 out1[0x10* 5] = tmp + b1[0x15];
318 out1[0x10* 7] = tmp + b1[0x13];
319 tmp = b1[0x1B] + b1[0x1F];
320 out1[0x10* 9] = tmp + b1[0x13];
321 out1[0x10*11] = tmp + b1[0x17];
322 out1[0x10*13] = b1[0x17] + b1[0x1F];
323 out1[0x10*15] = b1[0x1F];
324 }
325 }
326
327 /*
328 * the call via dct64 is a trick to force GCC to use
329 * (new) registers for the b1,b2 pointer to the bufs[xx] field
330 */
331 void dct64_i386(real *a,real *b,real *c)
332 {
333 real bufs[0x40];
334 dct64_1(a,b,bufs,bufs+0x20,c);
335 }
336