comparison decoders/libmpg123/optimize.h @ 562:7e08477b0fc1

MP3 decoder upgrade work. Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123. libmpg123 is a much better version of mpglib, so it should solve all the problems about MP3's not seeking, or most modern MP3's not playing at all, etc. Since you no longer have to make a tradeoff with SMPEG for features, and SMPEG is basically rotting, I removed it from the project. There is still work to be done with libmpg123...there are MMX, 3DNow, SSE, Altivec, etc decoders which we don't have enabled at the moment, and the build system could use some work to make this compile more cleanly, etc. Still: huge win.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 30 Jan 2009 02:44:47 -0500
parents
children
comparison
equal deleted inserted replaced
561:f2985e08589c 562:7e08477b0fc1
1 #ifndef MPG123_H_OPTIMIZE
2 #define MPG123_H_OPTIMIZE
3 /*
4 optimize: get a grip on the different optimizations
5
6 copyright 2007 by the mpg123 project - free software under the terms of the LGPL 2.1
7 see COPYING and AUTHORS files in distribution or http://mpg123.org
8 initially written by Thomas Orgis, taking from mpg123.[hc]
9
10 for building mpg123 with one optimization only, you have to choose exclusively between
11 OPT_GENERIC (generic C code for everyone)
12 OPT_I386 (Intel i386)
13 OPT_I486 (...)
14 OPT_I586 (Intel Pentium)
15 OPT_I586_DITHER (Intel Pentium with dithering/noise shaping for enhanced quality)
16 OPT_MMX (Intel Pentium and compatibles with MMX, fast, but not the best accuracy)
17 OPT_3DNOW (AMD 3DNow!, K6-2/3, Athlon, compatibles...)
18 OPT_ALTIVEC (Motorola/IBM PPC with AltiVec under MacOSX)
19
20 or you define OPT_MULTI and give a combination which makes sense (do not include i486, do not mix altivec and x86).
21
22 I still have to examine the dynamics of this here together with REAL_IS_FIXED.
23 */
24
25 /* this is included in mpg123.h, which includes config.h */
26 #ifdef CCALIGN
27 #define ALIGNED(a) __attribute__((aligned(a)))
28 #else
29 #define ALIGNED(a)
30 #endif
31
32 /* the optimizations only cover the synth1to1 mode and the dct36 function */
33 /* the first two types are needed in set_synth_functions regardless of optimizations */
34 typedef int (*func_synth)(real *,int, mpg123_handle *,int );
35 typedef int (*func_synth_mono)(real *, mpg123_handle *);
36 typedef void (*func_dct36)(real *,real *,real *,real *,real *);
37 typedef void (*func_dct64)(real *,real *,real *);
38 typedef void (*func_make_decode_tables)(mpg123_handle*);
39 typedef real (*func_init_layer3_gainpow2)(mpg123_handle*, int);
40 typedef real* (*func_init_layer2_table)(mpg123_handle*, real*, double);
41 typedef int (*func_synth_pent)(real *,int,unsigned char *);
42
43 /* last headaches about getting mmx hardcode out */
44 real init_layer3_gainpow2(mpg123_handle *fr, int i);
45 real* init_layer2_table(mpg123_handle *fr, real *table, double m);
46 void make_decode_tables(mpg123_handle *fr);
47 void prepare_decode_tables(void); /* perhaps not best place here */
48
49 /* only 3dnow replaces that one, it's internal to layer3.c otherwise */
50 void dct36(real *,real *,real *,real *,real *);
51 #define opt_dct36(fr) dct36
52 /* only mmx replaces those */
53 #define opt_make_decode_tables(fr) make_decode_tables(fr)
54 #define opt_decwin(fr) (fr)->decwin
55 #define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2
56 #define opt_init_layer2_table(fr) init_layer2_table
57
58 #ifdef OPT_GENERIC
59 #define PENTIUM_FALLBACK
60 void dct64(real *,real *,real *);
61 int synth_1to1(real *bandPtr,int channel, mpg123_handle *fr, int final);
62 int synth_1to1_8bit(real *bandPtr,int channel, mpg123_handle *fr, int final);
63 int synth_1to1_mono(real *, mpg123_handle *fr);
64 int synth_1to1_mono2stereo (real *, mpg123_handle *fr);
65 int synth_1to1_8bit_mono (real *, mpg123_handle *fr);
66 int synth_1to1_8bit_mono2stereo (real *, mpg123_handle *fr);
67 #ifndef OPT_MULTI
68 #define defopt generic
69 #define opt_dct64(fr) dct64
70 #define opt_synth_1to1(fr) synth_1to1
71 #define opt_synth_1to1_mono(fr) synth_1to1_mono
72 #define opt_synth_1to1_mono2stereo(fr) synth_1to1_mono2stereo
73 #define opt_synth_1to1_8bit(fr) synth_1to1_8bit
74 #define opt_synth_1to1_8bit_mono(fr) synth_1to1_8bit_mono
75 #define opt_synth_1to1_8bit_mono2stereo(fr) synth_1to1_8bit_mono2stereo
76 #endif
77 #endif
78
79 /* i486 is special... always alone! */
80 #ifdef OPT_I486
81 #define OPT_X86
82 #define OPT_I386_SYNTH
83 #define defopt ivier
84 int synth_1to1_486(real *bandPtr, int channel, mpg123_handle *fr, int nb_blocks);
85 #ifdef OPT_MULTI
86 #error "i486 can only work alone!"
87 #endif
88 #define opt_synth_1to1(fr) synth_1to1_i386
89 #define FIR_BUFFER_SIZE 128
90 #define FIR_SIZE 16
91 void dct64_i486(int *a,int *b,real *c); /* not used generally */
92 #endif
93
94 #ifdef OPT_I386
95 #define PENTIUM_FALLBACK
96 #define OPT_X86
97 #define OPT_I386_SYNTH
98 #ifndef OPT_MULTI
99 #ifndef defopt
100 #define defopt idrei
101 #endif
102 #define opt_synth_1to1(fr) synth_1to1_i386
103 #endif
104 #endif
105
106 #ifdef OPT_I386_SYNTH
107 int synth_1to1_i386(real *bandPtr, int channel, mpg123_handle *fr, int final);
108 #endif
109
110 #ifdef OPT_I586
111 #define PENTIUM_FALLBACK
112 #define OPT_PENTIUM
113 #define OPT_X86
114 int synth_1to1_i586(real *bandPtr, int channel, mpg123_handle *fr, int final);
115 int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
116 #ifndef OPT_MULTI
117 #define defopt ifuenf
118 #define opt_synth_1to1(fr) synth_1to1_i586
119 #define opt_synth_1to1_i586_asm(fr) synth_1to1_i586_asm
120 #endif
121 #endif
122
123 #ifdef OPT_I586_DITHER
124 #define PENTIUM_FALLBACK
125 #define OPT_PENTIUM
126 #define OPT_X86
127 int synth_1to1_i586(real *bandPtr, int channel, mpg123_handle *fr, int final);
128 int synth_1to1_i586_asm_dither(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
129 #ifndef OPT_MULTI
130 #define defopt ifuenf_dither
131 #define opt_synth_1to1(fr) synth_1to1_i586
132 #define opt_synth_1to1_i586_asm(fr) synth_1to1_i586_asm_dither
133 #endif
134 #endif
135
136 /* That one has by far the most ugly hacks to make it cooperative. */
137 #ifdef OPT_MMX
138 #define OPT_MMXORSSE
139 #define OPT_X86
140 real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i);
141 real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m);
142 /* I think one can optimize storage here with the normal decwin */
143 extern real decwin_mmx[512+32];
144 void dct64_mmx(real *,real *,real *);
145 int synth_1to1_mmx(real *bandPtr, int channel, mpg123_handle *fr, int final);
146 void make_decode_tables_mmx(mpg123_handle *fr); /* tabinit_mmx.s */
147 void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); /* tabinit_mmx.s */
148 /* these are in asm, dct64 called directly there */
149 void dct64_MMX(short *a,short *b,real *c);
150 int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins);
151 #ifndef OPT_MULTI
152 #define defopt mmx
153 /* #undef opt_decwin
154 #define opt_decwin(fr) decwin_mmx */
155 #define opt_dct64(fr) dct64_mmx
156 #define opt_synth_1to1(fr) synth_1to1_mmx
157 #define opt_
158 #undef opt_make_decode_tables
159 #define opt_make_decode_tables(fr) make_decode_tables_mmx(fr)
160 #undef opt_init_layer3_gainpow2
161 #define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2_mmx
162 #undef opt_init_layer2_table
163 #define opt_init_layer2_table(fr) init_layer2_table_mmx
164 #define OPT_MMX_ONLY
165 #endif
166 #endif
167
168 /* first crude hack into our source */
169 #ifdef OPT_SSE
170 #define OPT_MMXORSSE
171 #define OPT_MPLAYER
172 #define OPT_X86
173 real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i);
174 real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m);
175 /* I think one can optimize storage here with the normal decwin */
176 extern real decwin_mmx[512+32];
177 void dct64_mmx(real *,real *,real *);
178 void dct64_sse(real *,real *,real *);
179 int synth_1to1_sse(real *bandPtr, int channel, mpg123_handle *fr, int final);
180 void synth_1to1_sse_asm(real *bandPtr, int channel, short *samples, short *buffs, int *bo, real *decwin);
181 void make_decode_tables_mmx(mpg123_handle *fr); /* tabinit_mmx.s */
182 void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); /* tabinit_mmx.s */
183 /* ugly! */
184 extern func_dct64 mpl_dct64;
185 #ifndef OPT_MULTI
186 #define defopt sse
187 #define opt_mpl_dct64(fr) dct64_sse
188 /* #undef opt_decwin
189 #define opt_decwin(fr) decwin_mmx */
190 #define opt_dct64(fr) dct64_mmx /* dct64_sse is silent in downsampling modes */
191 #define opt_synth_1to1(fr) synth_1to1_sse /* that will use dct64_sse */
192 #undef opt_make_decode_tables
193 #define opt_make_decode_tables(fr) make_decode_tables_mmx(fr)
194 #undef opt_init_layer3_gainpow2
195 #define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2_mmx
196 #undef opt_init_layer2_table
197 #define opt_init_layer2_table(fr) init_layer2_table_mmx
198 #define OPT_MMX_ONLY /* watch out! */
199 #endif
200 #endif
201
202 /* first crude hack into our source */
203 #ifdef OPT_3DNOWEXT
204 #define OPT_MMXORSSE
205 #define OPT_MPLAYER
206 #define OPT_X86
207 real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i);
208 real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m);
209 /* I think one can optimize storage here with the normal decwin */
210 extern real decwin_mmx[512+32];
211 void dct64_mmx(real *,real *,real *);
212 void dct64_3dnowext(real *,real *,real *);
213 void dct36_3dnowext(real *,real *,real *,real *,real *);
214 int synth_1to1_3dnowext(real *bandPtr, int channel, mpg123_handle *fr, int final);
215 void synth_1to1_3dnowext_asm(real *bandPtr, int channel, short *samples, short *buffs, int *bo, real *decwin);
216 void make_decode_tables_mmx(mpg123_handle *fr); /* tabinit_mmx.s */
217 void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); /* tabinit_mmx.s */
218 /* ugly! */
219 extern func_dct64 mpl_dct64;
220 #ifndef OPT_MULTI
221 #define defopt dreidnowext
222 #define opt_mpl_dct64(fr) dct64_3dnowext
223 #undef opt_dct36
224 #define opt_dct36(fr) dct36_3dnowext
225 /* #undef opt_decwin
226 #define opt_decwin(fr) decwin_mmx */
227 #define opt_dct64(fr) dct64_mmx /* dct64_sse is silent in downsampling modes */
228 #define opt_synth_1to1(fr) synth_1to1_3dnowext /* that will use dct64_3dnowext */
229 #undef opt_make_decode_tables
230 #define opt_make_decode_tables(fr) make_decode_tables_mmx(fr)
231 #undef opt_init_layer3_gainpow2
232 #define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2_mmx
233 #undef opt_init_layer2_table
234 #define opt_init_layer2_table(fr) init_layer2_table_mmx
235 #define OPT_MMX_ONLY /* watch out! */
236 #endif
237 #endif
238
239
240 #ifndef OPT_MMX_ONLY
241 extern real *pnts[5];
242 extern real decwin[512+32];
243 #endif
244 #ifdef OPT_MPLAYER
245 extern const int costab_mmxsse[];
246 #endif
247
248 /* 3dnow used to use synth_1to1_i586 for mono / 8bit conversion - was that intentional? */
249 /* I'm trying to skip the pentium code here ... until I see that that is indeed a bad idea */
250 #ifdef OPT_3DNOW
251 #define K6_FALLBACK /* a fallback for 3DNowExt */
252 #define OPT_X86
253 void dct36_3dnow(real *,real *,real *,real *,real *);
254 void do_equalizer_3dnow(real *bandPtr,int channel, real equalizer[2][32]);
255 int synth_1to1_3dnow(real *bandPtr, int channel, mpg123_handle *fr, int final);
256 int synth_1to1_3dnow_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
257 #ifndef OPT_MULTI
258 #define defopt dreidnow
259 #undef opt_dct36
260 #define opt_dct36(fr) dct36_3dnow
261 #define opt_synth_1to1(fr) synth_1to1_3dnow
262 #endif
263 #endif
264
265 #ifdef OPT_X86
266 /* these have to be merged back into one! */
267 unsigned int getcpuid();
268 unsigned int getextcpuflags();
269 unsigned int getstdcpuflags();
270 unsigned int getstd2cpuflags();
271
272 void dct64_i386(real *,real *,real *);
273 int synth_1to1_mono_i386(real *, mpg123_handle *fr);
274 int synth_1to1_mono2stereo_i386(real *, mpg123_handle *fr);
275 int synth_1to1_8bit_i386(real *,int, mpg123_handle *fr, int final);
276 int synth_1to1_8bit_mono_i386(real *, mpg123_handle *fr);
277 int synth_1to1_8bit_mono2stereo_i386(real *, mpg123_handle *fr);
278 #ifndef OPT_MULTI
279 #ifndef opt_dct64
280 #define opt_dct64(fr) dct64_i386 /* default one even for 3dnow and i486 in decode_2to1, decode_ntom */
281 #endif
282 #define opt_synth_1to1_mono(fr) synth_1to1_mono_i386
283 #define opt_synth_1to1_mono2stereo(fr) synth_1to1_mono2stereo_i386
284 #define opt_synth_1to1_8bit(fr) synth_1to1_8bit_i386
285 #define opt_synth_1to1_8bit_mono(fr) synth_1to1_8bit_mono_i386
286 #define opt_synth_1to1_8bit_mono2stereo(fr) synth_1to1_8bit_mono2stereo_i386
287 #endif
288 #endif
289
290 #ifdef OPT_ALTIVEC
291 void dct64_altivec(real *out0,real *out1,real *samples);
292 int synth_1to1_altivec(real *,int,mpg123_handle *, int);
293 int synth_1to1_mono_altivec(real *,mpg123_handle *);
294 int synth_1to1_mono2stereo_altivec(real *, mpg123_handle *);
295 int synth_1to1_8bit_altivec(real *,int,mpg123_handle *,int);
296 int synth_1to1_8bit_mono_altivec(real *,mpg123_handle *);
297 int synth_1to1_8bit_mono2stereo_altivec(real *,mpg123_handle *);
298 #ifndef OPT_MULTI
299 #define defopt altivec
300 #define opt_dct64(fr) dct64_altivec
301 #define opt_synth_1to1(fr) synth_1to1_altivec
302 #define opt_synth_1to1_mono(fr) synth_1to1_mono_altivec
303 #define opt_synth_1to1_mono2stereo(fr) synth_1to1_mono2stereo_altivec
304 #define opt_synth_1to1_8bit(fr) synth_1to1_8bit_altivec
305 #define opt_synth_1to1_8bit_mono(fr) synth_1to1_8bit_mono_altivec
306 #define opt_synth_1to1_8bit_mono2stereo(fr) synth_1to1_8bit_mono2stereo_altivec
307 #endif
308 #endif
309
310 /* used for multi opt mode and the single 3dnow mode to have the old 3dnow test flag still working */
311 void check_decoders(void);
312
313 #ifdef OPT_MULTI
314 #ifdef OPT_X86
315 extern struct cpuflags cf;
316 #endif
317 #define defopt nodec
318 /* a simple global struct to hold the decoding function pointers, could be localized later if really wanted */
319
320 #define opt_synth_1to1(fr) ((fr)->cpu_opts.synth_1to1)
321 #define opt_synth_1to1_mono(fr) ((fr)->cpu_opts.synth_1to1_mono)
322 #define opt_synth_1to1_mono2stereo(fr) ((fr)->cpu_opts.synth_1to1_mono2stereo)
323 #define opt_synth_1to1_8bit(fr) ((fr)->cpu_opts.synth_1to1_8bit)
324 #define opt_synth_1to1_8bit_mono(fr) ((fr)->cpu_opts.synth_1to1_8bit_mono)
325 #define opt_synth_1to1_8bit_mono2stereo(fr) ((fr)->cpu_opts.synth_1to1_8bit_mono2stereo)
326 #ifdef OPT_PENTIUM
327 #define opt_synth_1to1_i586_asm(fr) ((fr)->cpu_opts.synth_1to1_i586_asm)
328 #endif
329 #ifdef OPT_MMXORSSE
330 #undef opt_make_decode_tables
331 #define opt_make_decode_tables(fr) ((fr)->cpu_opts.make_decode_tables)(fr)
332 /* #undef opt_decwin
333 #define opt_decwin(fr) (fr)->cpu_opts.decwin */
334 #undef opt_init_layer3_gainpow2
335 #define opt_init_layer3_gainpow2(fr) ((fr)->cpu_opts.init_layer3_gainpow2)
336 #undef opt_init_layer2_table
337 #define opt_init_layer2_table(fr) ((fr)->cpu_opts.init_layer2_table)
338 #endif
339 #ifdef OPT_3DNOW
340 #undef opt_dct36
341 #define opt_dct36(fr) ((fr)->cpu_opts.dct36)
342 #endif
343 #define opt_dct64(fr) ((fr)->cpu_opts.dct64)
344 #ifdef OPT_MPLAYER
345 #define opt_mpl_dct64(fr) ((fr)->cpu_opts.mpl_dct64)
346 #endif
347 #endif
348
349 #endif /* MPG123_H_OPTIMIZE */
350