562
|
1 /*
|
|
2 decode_i486.c: i486 decode
|
|
3
|
|
4 copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
6 initially written by Fabrice Bellard
|
|
7
|
|
8 One has to see if the modification for non-static memory kills this optimization (cache locality?).
|
|
9 */
|
|
10
|
|
11 /*
|
|
12 * Subband Synthesis for MPEG Audio.
|
|
13 *
|
|
14 * Version optimized for 80486 by using integer arithmetic,
|
|
15 * multiplications by shift and add, and by increasing locality in
|
|
16 * order to fit the 8KB L1 cache. This code should be compiled with gcc
|
|
17 * 2.7.2 or higher.
|
|
18 *
|
|
19 * Note: this version does not guaranty a good accuracy. The filter
|
|
20 * coefficients are quantified on 14 bits.
|
|
21 *
|
|
22 * (c) 1998 Fabrice Bellard
|
|
23 */
|
|
24
|
|
25 #include "mpg123lib_intern.h"
|
|
26
|
|
27 #define FIR16_1(pos,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15) \
|
|
28 {\
|
|
29 int sum;\
|
|
30 sum=(c0)*b0[0]+(c1)*b0[1]+(c2)*b0[2]+(c3)*b0[3]+\
|
|
31 (c4)*b0[4]+(c5)*b0[5]+(c6)*b0[6]+(c7)*b0[7]+\
|
|
32 (c8)*b0[8]+(c9)*b0[9]+(c10)*b0[10]+(c11)*b0[11]+\
|
|
33 (c12)*b0[12]+(c13)*b0[13]+(c14)*b0[14]+(c15)*b0[15];\
|
|
34 sum=(sum+(1 << 13))>>14;\
|
|
35 if (sum<-32768) sum=-32768;\
|
|
36 else if (sum>32767) sum=32767;\
|
|
37 samples[2*(pos)]=sum;\
|
|
38 b0+=FIR_BUFFER_SIZE;\
|
|
39 }
|
|
40
|
|
41 #define FIR16_2(pos1,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,\
|
|
42 pos2,d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15) \
|
|
43 {\
|
|
44 int sum1,sum2,v;\
|
|
45 \
|
|
46 v=b0[0];\
|
|
47 sum1=(c0)*v;\
|
|
48 sum2=(d0)*v;\
|
|
49 v=b0[1];\
|
|
50 sum1+=(c1)*v;\
|
|
51 sum2+=(d1)*v;\
|
|
52 v=b0[2];\
|
|
53 sum1+=(c2)*v;\
|
|
54 sum2+=(d2)*v;\
|
|
55 v=b0[3];\
|
|
56 sum1+=(c3)*v;\
|
|
57 sum2+=(d3)*v;\
|
|
58 v=b0[4];\
|
|
59 sum1+=(c4)*v;\
|
|
60 sum2+=(d4)*v;\
|
|
61 v=b0[5];\
|
|
62 sum1+=(c5)*v;\
|
|
63 sum2+=(d5)*v;\
|
|
64 v=b0[6];\
|
|
65 sum1+=(c6)*v;\
|
|
66 sum2+=(d6)*v;\
|
|
67 v=b0[7];\
|
|
68 sum1+=(c7)*v;\
|
|
69 sum2+=(d7)*v;\
|
|
70 v=b0[8];\
|
|
71 sum1+=(c8)*v;\
|
|
72 sum2+=(d8)*v;\
|
|
73 v=b0[9];\
|
|
74 sum1+=(c9)*v;\
|
|
75 sum2+=(d9)*v;\
|
|
76 v=b0[10];\
|
|
77 sum1+=(c10)*v;\
|
|
78 sum2+=(d10)*v;\
|
|
79 v=b0[11];\
|
|
80 sum1+=(c11)*v;\
|
|
81 sum2+=(d11)*v;\
|
|
82 v=b0[12];\
|
|
83 sum1+=(c12)*v;\
|
|
84 sum2+=(d12)*v;\
|
|
85 v=b0[13];\
|
|
86 sum1+=(c13)*v;\
|
|
87 sum2+=(d13)*v;\
|
|
88 v=b0[14];\
|
|
89 sum1+=(c14)*v;\
|
|
90 sum2+=(d14)*v;\
|
|
91 v=b0[15];\
|
|
92 sum1+=(c15)*v;\
|
|
93 sum2+=(d15)*v;\
|
|
94 \
|
|
95 sum1=(sum1+(1<<13))>>14;\
|
|
96 sum2=(sum2+(1<<13))>>14;\
|
|
97 \
|
|
98 if (sum1<-32768) sum1=-32768;\
|
|
99 else if (sum1>32767) sum1=32767;\
|
|
100 samples[(pos1)*2]=sum1;\
|
|
101 \
|
|
102 if (sum2<-32768) sum2=-32768;\
|
|
103 else if (sum2>32767) sum2=32767;\
|
|
104 samples[(pos2)*2]=sum2;\
|
|
105 b0+=FIR_BUFFER_SIZE;\
|
|
106 }
|
|
107
|
|
108 int synth_1to1_486(real *bandPtr, int channel, mpg123_handle *fr, int nb_blocks)
|
|
109 {
|
|
110 short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
|
|
111 int *b0,**buf;
|
|
112 int clip = 0;
|
|
113 int block,b,bo_start;
|
|
114
|
|
115 /* samples address */
|
|
116 samples+=channel;
|
|
117
|
|
118 bo_start=fr->bo[channel];
|
|
119 buf = fr->int_buffs[channel];
|
|
120
|
|
121 b=bo_start;
|
|
122 for(block=0;block<nb_blocks;block++) {
|
|
123
|
|
124 /* FIR offset */
|
|
125 b++;
|
|
126 if (b >= FIR_BUFFER_SIZE) {
|
|
127 int *p,*q;
|
|
128 int c,i,j;
|
|
129
|
|
130 /* we shift the buffers */
|
|
131 for(c=0;c<2;c++) {
|
|
132 p=&buf[c][0]+1;
|
|
133 q=p+(FIR_BUFFER_SIZE-FIR_SIZE);
|
|
134 for(i=0;i<17;i++) {
|
|
135 for(j=0;j<FIR_SIZE-1;j++) p[j]=q[j];
|
|
136 p+=FIR_BUFFER_SIZE;
|
|
137 q+=FIR_BUFFER_SIZE;
|
|
138 }
|
|
139 }
|
|
140 /* we update 'bo' accordingly */
|
|
141 b=fr->bo[channel]=FIR_SIZE;
|
|
142 }
|
|
143
|
|
144 if(b & 1) {
|
|
145 dct64_i486(buf[1]+b,buf[0]+b,bandPtr);
|
|
146 } else {
|
|
147 dct64_i486(buf[0]+b,buf[1]+b,bandPtr);
|
|
148 }
|
|
149 bandPtr+=32;
|
|
150 }
|
|
151 fr->bo[channel]=b;
|
|
152
|
|
153 /* filter bank: part 1 */
|
|
154 b=bo_start;
|
|
155 for(block=0;block<nb_blocks;block++) {
|
|
156 b++;
|
|
157 if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
|
|
158 if(b & 1) {
|
|
159 b0 = buf[0] + b - (FIR_SIZE-1);
|
|
160 } else {
|
|
161 b0 = buf[1] + b - (FIR_SIZE-1);
|
|
162 }
|
|
163
|
|
164 FIR16_1(0,-7,53,-114,509,-1288,1643,-9372,18759,9372,1643,1288,509,114,53,7,0);
|
|
165 FIR16_2(1,-6,52,-100,515,-1197,1783,-8910,18748,9834,1489,1379,500,129,54,7,0,
|
|
166 31,0,-7,54,-129,500,-1379,1489,-9834,18748,8910,1783,1197,515,100,52,6);
|
|
167 FIR16_2(2,-6,50,-86,520,-1106,1910,-8447,18714,10294,1322,1469,488,145,55,8,0,
|
|
168 30,0,-8,55,-145,488,-1469,1322,-10294,18714,8447,1910,1106,520,86,50,6);
|
|
169 FIR16_2(3,-5,49,-73,521,-1015,2023,-7986,18657,10751,1140,1559,473,161,56,9,0,
|
|
170 29,0,-9,56,-161,473,-1559,1140,-10751,18657,7986,2023,1015,521,73,49,5);
|
|
171 samples+=64;
|
|
172 }
|
|
173 samples-=64*nb_blocks;
|
|
174
|
|
175 /* filter bank: part 2 */
|
|
176
|
|
177 b=bo_start;
|
|
178 for(block=0;block<nb_blocks;block++) {
|
|
179 b++;
|
|
180 if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
|
|
181 if(b & 1) {
|
|
182 b0 = buf[0] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
|
|
183 } else {
|
|
184 b0 = buf[1] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
|
|
185 }
|
|
186
|
|
187 FIR16_2(4,-4,47,-61,521,-926,2123,-7528,18578,11205,944,1647,455,177,56,10,0,
|
|
188 28,0,-10,56,-177,455,-1647,944,-11205,18578,7528,2123,926,521,61,47,4);
|
|
189 FIR16_2(5,-4,45,-49,518,-837,2210,-7072,18477,11654,733,1733,434,194,57,11,0,
|
|
190 27,0,-11,57,-194,434,-1733,733,-11654,18477,7072,2210,837,518,49,45,4);
|
|
191 FIR16_2(6,-4,44,-38,514,-751,2284,-6620,18353,12097,509,1817,411,212,57,12,0,
|
|
192 26,0,-12,57,-212,411,-1817,509,-12097,18353,6620,2284,751,514,38,44,4);
|
|
193 FIR16_2(7,-3,42,-27,508,-665,2347,-6173,18208,12534,270,1899,383,229,56,13,0,
|
|
194 25,0,-13,56,-229,383,-1899,270,-12534,18208,6173,2347,665,508,27,42,3);
|
|
195
|
|
196 samples+=64;
|
|
197 }
|
|
198 samples-=64*nb_blocks;
|
|
199
|
|
200 /* filter bank: part 3 */
|
|
201
|
|
202 b=bo_start;
|
|
203 for(block=0;block<nb_blocks;block++) {
|
|
204 b++;
|
|
205 if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
|
|
206 if(b & 1) {
|
|
207 b0 = buf[0] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
|
|
208 } else {
|
|
209 b0 = buf[1] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
|
|
210 }
|
|
211
|
|
212 FIR16_2(8,-3,40,-18,500,-582,2398,-5732,18042,12963,17,1977,353,247,56,14,0,
|
|
213 24,0,-14,56,-247,353,-1977,17,-12963,18042,5732,2398,582,500,18,40,3);
|
|
214 FIR16_2(9,-2,38,-9,490,-501,2437,-5297,17855,13383,-249,2052,320,266,55,15,0,
|
|
215 23,0,-15,55,-266,320,-2052,-249,-13383,17855,5297,2437,501,490,9,38,2);
|
|
216 FIR16_2(10,-2,36,0,479,-423,2465,-4869,17647,13794,-530,2122,282,284,53,17,0,
|
|
217 22,0,-17,53,-284,282,-2122,-530,-13794,17647,4869,2465,423,479,0,36,2);
|
|
218 FIR16_2(11,-2,34,7,467,-347,2483,-4449,17419,14194,-825,2188,242,302,52,18,0,
|
|
219 21,0,-18,52,-302,242,-2188,-825,-14194,17419,4449,2483,347,467,-7,34,2);
|
|
220
|
|
221 samples+=64;
|
|
222 }
|
|
223 samples-=64*nb_blocks;
|
|
224
|
|
225 /* filter bank: part 4 */
|
|
226
|
|
227 b=bo_start;
|
|
228 for(block=0;block<nb_blocks;block++) {
|
|
229 b++;
|
|
230 if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
|
|
231 if(b & 1) {
|
|
232 b0 = buf[0] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
|
|
233 } else {
|
|
234 b0 = buf[1] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
|
|
235 }
|
|
236
|
|
237 FIR16_2(12,-2,33,14,454,-273,2491,-4038,17173,14583,-1133,2249,198,320,50,19,0,
|
|
238 20,0,-19,50,-320,198,-2249,-1133,-14583,17173,4038,2491,273,454,-14,33,2);
|
|
239 FIR16_2(13,-1,31,20,439,-203,2489,-3637,16907,14959,-1454,2304,151,339,47,21,-1,
|
|
240 19,-1,-21,47,-339,151,-2304,-1454,-14959,16907,3637,2489,203,439,-20,31,1);
|
|
241 FIR16_2(14,-1,29,26,424,-136,2479,-3245,16623,15322,-1788,2354,100,357,44,22,-1,
|
|
242 18,-1,-22,44,-357,100,-2354,-1788,-15322,16623,3245,2479,136,424,-26,29,1);
|
|
243 FIR16_2(15,-1,27,31,408,-72,2459,-2863,16322,15671,-2135,2396,46,374,40,24,-1,
|
|
244 17,-1,-24,40,-374,46,-2396,-2135,-15671,16322,2863,2459,72,408,-31,27,1);
|
|
245 FIR16_1(16,-1,0,36,0,-11,0,-2493,0,16004,0,2431,0,391,0,26,0);
|
|
246
|
|
247 samples+=64;
|
|
248 }
|
|
249
|
|
250 return clip;
|
|
251 }
|
|
252
|