562
|
1 /*
|
|
2 dct64.c: DCT64, the plain C version
|
|
3
|
|
4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
6 initially written by Michael Hipp
|
|
7 */
|
|
8
|
|
9 /*
|
|
10 * Discrete Cosine Tansform (DCT) for subband synthesis
|
|
11 *
|
|
12 * -funroll-loops (for gcc) will remove the loops for better performance
|
|
13 * using loops in the source-code enhances readabillity
|
|
14 *
|
|
15 *
|
|
16 * TODO: write an optimized version for the down-sampling modes
|
|
17 * (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero
|
|
18 */
|
|
19
|
|
20 #include "mpg123lib_intern.h"
|
|
21
|
|
22 void dct64(real *out0,real *out1,real *samples)
|
|
23 {
|
|
24 real bufs[64];
|
|
25
|
|
26 {
|
|
27 register int i,j;
|
|
28 register real *b1,*b2,*bs,*costab;
|
|
29
|
|
30 b1 = samples;
|
|
31 bs = bufs;
|
|
32 costab = pnts[0]+16;
|
|
33 b2 = b1 + 32;
|
|
34
|
|
35 for(i=15;i>=0;i--)
|
|
36 *bs++ = (*b1++ + *--b2);
|
|
37 for(i=15;i>=0;i--)
|
|
38 *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
|
|
39
|
|
40 b1 = bufs;
|
|
41 costab = pnts[1]+8;
|
|
42 b2 = b1 + 16;
|
|
43
|
|
44 {
|
|
45 for(i=7;i>=0;i--)
|
|
46 *bs++ = (*b1++ + *--b2);
|
|
47 for(i=7;i>=0;i--)
|
|
48 *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
|
|
49 b2 += 32;
|
|
50 costab += 8;
|
|
51 for(i=7;i>=0;i--)
|
|
52 *bs++ = (*b1++ + *--b2);
|
|
53 for(i=7;i>=0;i--)
|
|
54 *bs++ = REAL_MUL((*b1++ - *--b2), *--costab);
|
|
55 b2 += 32;
|
|
56 }
|
|
57
|
|
58 bs = bufs;
|
|
59 costab = pnts[2];
|
|
60 b2 = b1 + 8;
|
|
61
|
|
62 for(j=2;j;j--)
|
|
63 {
|
|
64 for(i=3;i>=0;i--)
|
|
65 *bs++ = (*b1++ + *--b2);
|
|
66 for(i=3;i>=0;i--)
|
|
67 *bs++ = REAL_MUL((*--b2 - *b1++), costab[i]);
|
|
68 b2 += 16;
|
|
69 for(i=3;i>=0;i--)
|
|
70 *bs++ = (*b1++ + *--b2);
|
|
71 for(i=3;i>=0;i--)
|
|
72 *bs++ = REAL_MUL((*b1++ - *--b2), costab[i]);
|
|
73 b2 += 16;
|
|
74 }
|
|
75
|
|
76 b1 = bufs;
|
|
77 costab = pnts[3];
|
|
78 b2 = b1 + 4;
|
|
79
|
|
80 for(j=4;j;j--)
|
|
81 {
|
|
82 *bs++ = (*b1++ + *--b2);
|
|
83 *bs++ = (*b1++ + *--b2);
|
|
84 *bs++ = REAL_MUL((*--b2 - *b1++), costab[1]);
|
|
85 *bs++ = REAL_MUL((*--b2 - *b1++), costab[0]);
|
|
86 b2 += 8;
|
|
87 *bs++ = (*b1++ + *--b2);
|
|
88 *bs++ = (*b1++ + *--b2);
|
|
89 *bs++ = REAL_MUL((*b1++ - *--b2), costab[1]);
|
|
90 *bs++ = REAL_MUL((*b1++ - *--b2), costab[0]);
|
|
91 b2 += 8;
|
|
92 }
|
|
93 bs = bufs;
|
|
94 costab = pnts[4];
|
|
95
|
|
96 for(j=8;j;j--)
|
|
97 {
|
|
98 real v0,v1;
|
|
99 v0=*b1++; v1 = *b1++;
|
|
100 *bs++ = (v0 + v1);
|
|
101 *bs++ = REAL_MUL((v0 - v1), (*costab));
|
|
102 v0=*b1++; v1 = *b1++;
|
|
103 *bs++ = (v0 + v1);
|
|
104 *bs++ = REAL_MUL((v1 - v0), (*costab));
|
|
105 }
|
|
106
|
|
107 }
|
|
108
|
|
109
|
|
110 {
|
|
111 register real *b1;
|
|
112 register int i;
|
|
113
|
|
114 for(b1=bufs,i=8;i;i--,b1+=4)
|
|
115 b1[2] += b1[3];
|
|
116
|
|
117 for(b1=bufs,i=4;i;i--,b1+=8)
|
|
118 {
|
|
119 b1[4] += b1[6];
|
|
120 b1[6] += b1[5];
|
|
121 b1[5] += b1[7];
|
|
122 }
|
|
123
|
|
124 for(b1=bufs,i=2;i;i--,b1+=16)
|
|
125 {
|
|
126 b1[8] += b1[12];
|
|
127 b1[12] += b1[10];
|
|
128 b1[10] += b1[14];
|
|
129 b1[14] += b1[9];
|
|
130 b1[9] += b1[13];
|
|
131 b1[13] += b1[11];
|
|
132 b1[11] += b1[15];
|
|
133 }
|
|
134 }
|
|
135
|
|
136
|
|
137 out0[0x10*16] = bufs[0];
|
|
138 out0[0x10*15] = bufs[16+0] + bufs[16+8];
|
|
139 out0[0x10*14] = bufs[8];
|
|
140 out0[0x10*13] = bufs[16+8] + bufs[16+4];
|
|
141 out0[0x10*12] = bufs[4];
|
|
142 out0[0x10*11] = bufs[16+4] + bufs[16+12];
|
|
143 out0[0x10*10] = bufs[12];
|
|
144 out0[0x10* 9] = bufs[16+12] + bufs[16+2];
|
|
145 out0[0x10* 8] = bufs[2];
|
|
146 out0[0x10* 7] = bufs[16+2] + bufs[16+10];
|
|
147 out0[0x10* 6] = bufs[10];
|
|
148 out0[0x10* 5] = bufs[16+10] + bufs[16+6];
|
|
149 out0[0x10* 4] = bufs[6];
|
|
150 out0[0x10* 3] = bufs[16+6] + bufs[16+14];
|
|
151 out0[0x10* 2] = bufs[14];
|
|
152 out0[0x10* 1] = bufs[16+14] + bufs[16+1];
|
|
153 out0[0x10* 0] = bufs[1];
|
|
154
|
|
155 out1[0x10* 0] = bufs[1];
|
|
156 out1[0x10* 1] = bufs[16+1] + bufs[16+9];
|
|
157 out1[0x10* 2] = bufs[9];
|
|
158 out1[0x10* 3] = bufs[16+9] + bufs[16+5];
|
|
159 out1[0x10* 4] = bufs[5];
|
|
160 out1[0x10* 5] = bufs[16+5] + bufs[16+13];
|
|
161 out1[0x10* 6] = bufs[13];
|
|
162 out1[0x10* 7] = bufs[16+13] + bufs[16+3];
|
|
163 out1[0x10* 8] = bufs[3];
|
|
164 out1[0x10* 9] = bufs[16+3] + bufs[16+11];
|
|
165 out1[0x10*10] = bufs[11];
|
|
166 out1[0x10*11] = bufs[16+11] + bufs[16+7];
|
|
167 out1[0x10*12] = bufs[7];
|
|
168 out1[0x10*13] = bufs[16+7] + bufs[16+15];
|
|
169 out1[0x10*14] = bufs[15];
|
|
170 out1[0x10*15] = bufs[16+15];
|
|
171
|
|
172 }
|
|
173
|
|
174
|