comparison decoders/libmpg123/decode_i486.c @ 562:7e08477b0fc1

MP3 decoder upgrade work. Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123. libmpg123 is a much better version of mpglib, so it should solve all the problems about MP3's not seeking, or most modern MP3's not playing at all, etc. Since you no longer have to make a tradeoff with SMPEG for features, and SMPEG is basically rotting, I removed it from the project. There is still work to be done with libmpg123...there are MMX, 3DNow, SSE, Altivec, etc decoders which we don't have enabled at the moment, and the build system could use some work to make this compile more cleanly, etc. Still: huge win.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 30 Jan 2009 02:44:47 -0500
parents
children
comparison
equal deleted inserted replaced
561:f2985e08589c 562:7e08477b0fc1
1 /*
2 decode_i486.c: i486 decode
3
4 copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Fabrice Bellard
7
8 One has to see if the modification for non-static memory kills this optimization (cache locality?).
9 */
10
11 /*
12 * Subband Synthesis for MPEG Audio.
13 *
14 * Version optimized for 80486 by using integer arithmetic,
15 * multiplications by shift and add, and by increasing locality in
16 * order to fit the 8KB L1 cache. This code should be compiled with gcc
17 * 2.7.2 or higher.
18 *
19 * Note: this version does not guaranty a good accuracy. The filter
20 * coefficients are quantified on 14 bits.
21 *
22 * (c) 1998 Fabrice Bellard
23 */
24
25 #include "mpg123lib_intern.h"
26
27 #define FIR16_1(pos,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15) \
28 {\
29 int sum;\
30 sum=(c0)*b0[0]+(c1)*b0[1]+(c2)*b0[2]+(c3)*b0[3]+\
31 (c4)*b0[4]+(c5)*b0[5]+(c6)*b0[6]+(c7)*b0[7]+\
32 (c8)*b0[8]+(c9)*b0[9]+(c10)*b0[10]+(c11)*b0[11]+\
33 (c12)*b0[12]+(c13)*b0[13]+(c14)*b0[14]+(c15)*b0[15];\
34 sum=(sum+(1 << 13))>>14;\
35 if (sum<-32768) sum=-32768;\
36 else if (sum>32767) sum=32767;\
37 samples[2*(pos)]=sum;\
38 b0+=FIR_BUFFER_SIZE;\
39 }
40
41 #define FIR16_2(pos1,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,\
42 pos2,d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15) \
43 {\
44 int sum1,sum2,v;\
45 \
46 v=b0[0];\
47 sum1=(c0)*v;\
48 sum2=(d0)*v;\
49 v=b0[1];\
50 sum1+=(c1)*v;\
51 sum2+=(d1)*v;\
52 v=b0[2];\
53 sum1+=(c2)*v;\
54 sum2+=(d2)*v;\
55 v=b0[3];\
56 sum1+=(c3)*v;\
57 sum2+=(d3)*v;\
58 v=b0[4];\
59 sum1+=(c4)*v;\
60 sum2+=(d4)*v;\
61 v=b0[5];\
62 sum1+=(c5)*v;\
63 sum2+=(d5)*v;\
64 v=b0[6];\
65 sum1+=(c6)*v;\
66 sum2+=(d6)*v;\
67 v=b0[7];\
68 sum1+=(c7)*v;\
69 sum2+=(d7)*v;\
70 v=b0[8];\
71 sum1+=(c8)*v;\
72 sum2+=(d8)*v;\
73 v=b0[9];\
74 sum1+=(c9)*v;\
75 sum2+=(d9)*v;\
76 v=b0[10];\
77 sum1+=(c10)*v;\
78 sum2+=(d10)*v;\
79 v=b0[11];\
80 sum1+=(c11)*v;\
81 sum2+=(d11)*v;\
82 v=b0[12];\
83 sum1+=(c12)*v;\
84 sum2+=(d12)*v;\
85 v=b0[13];\
86 sum1+=(c13)*v;\
87 sum2+=(d13)*v;\
88 v=b0[14];\
89 sum1+=(c14)*v;\
90 sum2+=(d14)*v;\
91 v=b0[15];\
92 sum1+=(c15)*v;\
93 sum2+=(d15)*v;\
94 \
95 sum1=(sum1+(1<<13))>>14;\
96 sum2=(sum2+(1<<13))>>14;\
97 \
98 if (sum1<-32768) sum1=-32768;\
99 else if (sum1>32767) sum1=32767;\
100 samples[(pos1)*2]=sum1;\
101 \
102 if (sum2<-32768) sum2=-32768;\
103 else if (sum2>32767) sum2=32767;\
104 samples[(pos2)*2]=sum2;\
105 b0+=FIR_BUFFER_SIZE;\
106 }
107
108 int synth_1to1_486(real *bandPtr, int channel, mpg123_handle *fr, int nb_blocks)
109 {
110 short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
111 int *b0,**buf;
112 int clip = 0;
113 int block,b,bo_start;
114
115 /* samples address */
116 samples+=channel;
117
118 bo_start=fr->bo[channel];
119 buf = fr->int_buffs[channel];
120
121 b=bo_start;
122 for(block=0;block<nb_blocks;block++) {
123
124 /* FIR offset */
125 b++;
126 if (b >= FIR_BUFFER_SIZE) {
127 int *p,*q;
128 int c,i,j;
129
130 /* we shift the buffers */
131 for(c=0;c<2;c++) {
132 p=&buf[c][0]+1;
133 q=p+(FIR_BUFFER_SIZE-FIR_SIZE);
134 for(i=0;i<17;i++) {
135 for(j=0;j<FIR_SIZE-1;j++) p[j]=q[j];
136 p+=FIR_BUFFER_SIZE;
137 q+=FIR_BUFFER_SIZE;
138 }
139 }
140 /* we update 'bo' accordingly */
141 b=fr->bo[channel]=FIR_SIZE;
142 }
143
144 if(b & 1) {
145 dct64_i486(buf[1]+b,buf[0]+b,bandPtr);
146 } else {
147 dct64_i486(buf[0]+b,buf[1]+b,bandPtr);
148 }
149 bandPtr+=32;
150 }
151 fr->bo[channel]=b;
152
153 /* filter bank: part 1 */
154 b=bo_start;
155 for(block=0;block<nb_blocks;block++) {
156 b++;
157 if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
158 if(b & 1) {
159 b0 = buf[0] + b - (FIR_SIZE-1);
160 } else {
161 b0 = buf[1] + b - (FIR_SIZE-1);
162 }
163
164 FIR16_1(0,-7,53,-114,509,-1288,1643,-9372,18759,9372,1643,1288,509,114,53,7,0);
165 FIR16_2(1,-6,52,-100,515,-1197,1783,-8910,18748,9834,1489,1379,500,129,54,7,0,
166 31,0,-7,54,-129,500,-1379,1489,-9834,18748,8910,1783,1197,515,100,52,6);
167 FIR16_2(2,-6,50,-86,520,-1106,1910,-8447,18714,10294,1322,1469,488,145,55,8,0,
168 30,0,-8,55,-145,488,-1469,1322,-10294,18714,8447,1910,1106,520,86,50,6);
169 FIR16_2(3,-5,49,-73,521,-1015,2023,-7986,18657,10751,1140,1559,473,161,56,9,0,
170 29,0,-9,56,-161,473,-1559,1140,-10751,18657,7986,2023,1015,521,73,49,5);
171 samples+=64;
172 }
173 samples-=64*nb_blocks;
174
175 /* filter bank: part 2 */
176
177 b=bo_start;
178 for(block=0;block<nb_blocks;block++) {
179 b++;
180 if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
181 if(b & 1) {
182 b0 = buf[0] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
183 } else {
184 b0 = buf[1] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
185 }
186
187 FIR16_2(4,-4,47,-61,521,-926,2123,-7528,18578,11205,944,1647,455,177,56,10,0,
188 28,0,-10,56,-177,455,-1647,944,-11205,18578,7528,2123,926,521,61,47,4);
189 FIR16_2(5,-4,45,-49,518,-837,2210,-7072,18477,11654,733,1733,434,194,57,11,0,
190 27,0,-11,57,-194,434,-1733,733,-11654,18477,7072,2210,837,518,49,45,4);
191 FIR16_2(6,-4,44,-38,514,-751,2284,-6620,18353,12097,509,1817,411,212,57,12,0,
192 26,0,-12,57,-212,411,-1817,509,-12097,18353,6620,2284,751,514,38,44,4);
193 FIR16_2(7,-3,42,-27,508,-665,2347,-6173,18208,12534,270,1899,383,229,56,13,0,
194 25,0,-13,56,-229,383,-1899,270,-12534,18208,6173,2347,665,508,27,42,3);
195
196 samples+=64;
197 }
198 samples-=64*nb_blocks;
199
200 /* filter bank: part 3 */
201
202 b=bo_start;
203 for(block=0;block<nb_blocks;block++) {
204 b++;
205 if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
206 if(b & 1) {
207 b0 = buf[0] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
208 } else {
209 b0 = buf[1] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
210 }
211
212 FIR16_2(8,-3,40,-18,500,-582,2398,-5732,18042,12963,17,1977,353,247,56,14,0,
213 24,0,-14,56,-247,353,-1977,17,-12963,18042,5732,2398,582,500,18,40,3);
214 FIR16_2(9,-2,38,-9,490,-501,2437,-5297,17855,13383,-249,2052,320,266,55,15,0,
215 23,0,-15,55,-266,320,-2052,-249,-13383,17855,5297,2437,501,490,9,38,2);
216 FIR16_2(10,-2,36,0,479,-423,2465,-4869,17647,13794,-530,2122,282,284,53,17,0,
217 22,0,-17,53,-284,282,-2122,-530,-13794,17647,4869,2465,423,479,0,36,2);
218 FIR16_2(11,-2,34,7,467,-347,2483,-4449,17419,14194,-825,2188,242,302,52,18,0,
219 21,0,-18,52,-302,242,-2188,-825,-14194,17419,4449,2483,347,467,-7,34,2);
220
221 samples+=64;
222 }
223 samples-=64*nb_blocks;
224
225 /* filter bank: part 4 */
226
227 b=bo_start;
228 for(block=0;block<nb_blocks;block++) {
229 b++;
230 if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
231 if(b & 1) {
232 b0 = buf[0] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
233 } else {
234 b0 = buf[1] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
235 }
236
237 FIR16_2(12,-2,33,14,454,-273,2491,-4038,17173,14583,-1133,2249,198,320,50,19,0,
238 20,0,-19,50,-320,198,-2249,-1133,-14583,17173,4038,2491,273,454,-14,33,2);
239 FIR16_2(13,-1,31,20,439,-203,2489,-3637,16907,14959,-1454,2304,151,339,47,21,-1,
240 19,-1,-21,47,-339,151,-2304,-1454,-14959,16907,3637,2489,203,439,-20,31,1);
241 FIR16_2(14,-1,29,26,424,-136,2479,-3245,16623,15322,-1788,2354,100,357,44,22,-1,
242 18,-1,-22,44,-357,100,-2354,-1788,-15322,16623,3245,2479,136,424,-26,29,1);
243 FIR16_2(15,-1,27,31,408,-72,2459,-2863,16322,15671,-2135,2396,46,374,40,24,-1,
244 17,-1,-24,40,-374,46,-2396,-2135,-15671,16322,2863,2459,72,408,-31,27,1);
245 FIR16_1(16,-1,0,36,0,-11,0,-2493,0,16004,0,2431,0,391,0,26,0);
246
247 samples+=64;
248 }
249
250 return clip;
251 }
252