comparison decoders/libmpg123/id3.c @ 562:7e08477b0fc1

MP3 decoder upgrade work. Ripped out SMPEG and mpglib support, replaced it with "mpg123.c" and libmpg123. libmpg123 is a much better version of mpglib, so it should solve all the problems about MP3's not seeking, or most modern MP3's not playing at all, etc. Since you no longer have to make a tradeoff with SMPEG for features, and SMPEG is basically rotting, I removed it from the project. There is still work to be done with libmpg123...there are MMX, 3DNow, SSE, Altivec, etc decoders which we don't have enabled at the moment, and the build system could use some work to make this compile more cleanly, etc. Still: huge win.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 30 Jan 2009 02:44:47 -0500
parents
children
comparison
equal deleted inserted replaced
561:f2985e08589c 562:7e08477b0fc1
1 /*
2 id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset)
3
4 copyright 2006-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Thomas Orgis
7 */
8
9 #include "mpg123lib_intern.h"
10 #include "id3.h"
11 #include "debug.h"
12
13 /* UTF support definitions */
14
15 typedef void (*text_converter)(mpg123_string *sb, unsigned char* source, size_t len);
16
17 static void convert_latin1 (mpg123_string *sb, unsigned char* source, size_t len);
18 static void convert_utf16 (mpg123_string *sb, unsigned char* source, size_t len, int str_be);
19 static void convert_utf16bom(mpg123_string *sb, unsigned char* source, size_t len);
20 static void convert_utf16be (mpg123_string *sb, unsigned char* source, size_t len);
21 static void convert_utf8 (mpg123_string *sb, unsigned char* source, size_t len);
22
23 static const text_converter text_converters[4] =
24 {
25 convert_latin1,
26 convert_utf16bom,
27 convert_utf16be,
28 convert_utf8
29 };
30
31 const int encoding_widths[4] = { 1, 2, 2, 1 };
32
33 /* the code starts here... */
34
35 static void null_id3_links(mpg123_handle *fr)
36 {
37 fr->id3v2.title = NULL;
38 fr->id3v2.artist = NULL;
39 fr->id3v2.album = NULL;
40 fr->id3v2.year = NULL;
41 fr->id3v2.genre = NULL;
42 fr->id3v2.comment = NULL;
43 }
44
45 void init_id3(mpg123_handle *fr)
46 {
47 fr->id3v2.version = 0; /* nothing there */
48 null_id3_links(fr);
49 fr->id3v2.comments = 0;
50 fr->id3v2.comment_list = NULL;
51 fr->id3v2.texts = 0;
52 fr->id3v2.text = NULL;
53 fr->id3v2.extras = 0;
54 fr->id3v2.extra = NULL;
55 }
56
57 /* Managing of the text, comment and extra lists. */
58
59 /* Initialize one element. */
60 static void init_mpg123_text(mpg123_text *txt)
61 {
62 mpg123_init_string(&txt->text);
63 mpg123_init_string(&txt->description);
64 txt->id[0] = 0;
65 txt->id[1] = 0;
66 txt->id[2] = 0;
67 txt->id[3] = 0;
68 txt->lang[0] = 0;
69 txt->lang[1] = 0;
70 txt->lang[2] = 0;
71 }
72
73 /* Free memory of one element. */
74 static void free_mpg123_text(mpg123_text *txt)
75 {
76 mpg123_free_string(&txt->text);
77 mpg123_free_string(&txt->description);
78 }
79
80 /* Free memory of whole list. */
81 #define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
82 #define free_text(mh) free_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
83 #define free_extra(mh) free_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
84 static void free_id3_text(mpg123_text **list, size_t *size)
85 {
86 size_t i;
87 for(i=0; i<*size; ++i) free_mpg123_text(&((*list)[i]));
88
89 free(*list);
90 *list = NULL;
91 *size = 0;
92 }
93
94 /* Add items to the list. */
95 #define add_comment(mh) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
96 #define add_text(mh) add_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
97 #define add_extra(mh) add_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
98 static mpg123_text *add_id3_text(mpg123_text **list, size_t *size)
99 {
100 mpg123_text *x = safe_realloc(*list, sizeof(mpg123_text)*(*size+1));
101 if(x == NULL) return NULL; /* bad */
102
103 *list = x;
104 *size += 1;
105 init_mpg123_text(&((*list)[*size-1]));
106
107 return &((*list)[*size-1]); /* Return pointer to the added text. */
108 }
109
110 /* Remove the last item. */
111 #define pop_comment(mh) pop_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
112 #define pop_text(mh) pop_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
113 #define pop_extra(mh) pop_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
114 static void pop_id3_text(mpg123_text **list, size_t *size)
115 {
116 mpg123_text *x;
117 if(*size < 1) return;
118
119 free_mpg123_text(&((*list)[*size-1]));
120 if(*size > 1)
121 {
122 x = safe_realloc(*list, sizeof(mpg123_text)*(*size-1));
123 if(x != NULL){ *list = x; *size -= 1; }
124 }
125 else
126 {
127 free(*list);
128 *list = NULL;
129 *size = 0;
130 }
131 }
132
133 /* OK, back t the higher level functions. */
134
135 void exit_id3(mpg123_handle *fr)
136 {
137 free_comment(fr);
138 free_extra(fr);
139 free_text(fr);
140 }
141
142 void reset_id3(mpg123_handle *fr)
143 {
144 exit_id3(fr);
145 init_id3(fr);
146 }
147
148 /* Set the id3v2.artist id3v2.title ... links to elements of the array. */
149 void id3_link(mpg123_handle *fr)
150 {
151 size_t i;
152 mpg123_id3v2 *v2 = &fr->id3v2;
153 debug("linking ID3v2");
154 null_id3_links(fr);
155 for(i=0; i<v2->texts; ++i)
156 {
157 mpg123_text *entry = &v2->text[i];
158 if (!strncmp("TIT2", entry->id, 4)) v2->title = &entry->text;
159 else if(!strncmp("TALB", entry->id, 4)) v2->album = &entry->text;
160 else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text;
161 else if(!strncmp("TYER", entry->id, 4)) v2->year = &entry->text;
162 else if(!strncmp("TCON", entry->id, 4)) v2->genre = &entry->text;
163 }
164 for(i=0; i<v2->comments; ++i)
165 {
166 mpg123_text *entry = &v2->comment_list[i];
167 if(entry->description.fill == 0 || entry->description.p[0] == 0)
168 v2->comment = &entry->text;
169 }
170 /* When no generic comment found, use the last non-generic one. */
171 if(v2->comment == NULL && v2->comments > 0)
172 v2->comment = &v2->comment_list[v2->comments-1].text;
173 }
174
175 /*
176 Store any text in UTF8 encoding; preserve the zero string separator (I don't need strlen for the total size).
177 ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values.
178 So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though).
179 */
180 void store_id3_text(mpg123_string *sb, char *source, size_t source_size, const int noquiet)
181 {
182 int encoding;
183 int bwidth;
184 if(!source_size)
185 {
186 debug("Empty id3 data!");
187 return;
188 }
189 encoding = source[0];
190 ++source;
191 --source_size;
192 debug1("encoding: %i", encoding);
193 /* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
194 UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
195 if(encoding > 3)
196 {
197 if(noquiet) warning1("Unknown text encoding %d, assuming ISO8859-1 - I will probably screw a bit up!", encoding);
198 encoding = 0;
199 }
200 bwidth = encoding_widths[encoding];
201 /* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */
202 while(source_size > bwidth && source[0] == 0)
203 {
204 --source_size;
205 ++source;
206 debug("skipped leading zero");
207 }
208 if(source_size % bwidth)
209 {
210 /* When we need two bytes for a character, it's strange to have an uneven bytestream length. */
211 if(noquiet) warning2("Weird tag size %d for encoding %d - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
212 source_size -= source_size % bwidth;
213 }
214 text_converters[encoding](sb, (unsigned char*)source, source_size);
215 if(sb->size) debug1("UTF-8 string (the first one): %s", sb->p);
216 else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!");
217 }
218
219 char *next_text(char* prev, int encoding, size_t limit)
220 {
221 char *text = prev;
222 unsigned long neednull = encoding_widths[encoding];
223 /* So I go lengths to find zero or double zero... */
224 while(text-prev < limit)
225 {
226 if(text[0] == 0)
227 {
228 if(neednull <= limit-(text-prev))
229 {
230 unsigned long i = 1;
231 for(; i<neednull; ++i) if(text[i] != 0) break;
232
233 if(i == neednull) /* found a null wide enough! */
234 {
235 text += neednull;
236 break;
237 }
238 }
239 else{ text = NULL; break; }
240 }
241 ++text;
242 }
243 if(text-prev == limit) text = NULL;
244
245 return text;
246 }
247
248 static const char *enc_name(int enc)
249 {
250 switch(enc)
251 {
252 case 0: return "Latin 1";
253 case 1: return "UTF-16 BOM";
254 case 2: return "UTF-16 BE";
255 case 3: return "UTF-8";
256 default: return "unknown!";
257 }
258 }
259
260 static void process_text(mpg123_handle *fr, char *realdata, size_t realsize, char *id)
261 {
262 /* Text encoding $xx */
263 /* The text (encoded) ... */
264 mpg123_text *t = add_text(fr);
265 if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0]));
266 if(t == NULL)
267 {
268 if(NOQUIET) error("Unable to attach new text!");
269 return;
270 }
271 memcpy(t->id, id, 4);
272 store_id3_text(&t->text, realdata, realsize, NOQUIET);
273 if(VERBOSE4) fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame: %s\n", id[0], id[1], id[2], id[3], t->text.p);
274 }
275
276 /* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one */
277 static void process_comment(mpg123_handle *fr, char *realdata, size_t realsize, int rva_level, char *id)
278 {
279 /* Text encoding $xx */
280 /* Language $xx xx xx */
281 /* Short description (encoded!) <text> $00 (00) */
282 /* Then the comment text (encoded) ... */
283 char encoding = realdata[0];
284 char *lang = realdata+1; /* I'll only use the 3 bytes! */
285 char *descr = realdata+4;
286 char *text = NULL;
287 mpg123_text *xcom = NULL;
288 if(realsize < descr-realdata)
289 {
290 if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
291 return;
292 }
293 xcom = add_comment(fr);
294 if(VERBOSE4) fprintf(stderr, "Note: Storing comment from %s encoding\n", enc_name(realdata[0]));
295 if(xcom == NULL)
296 {
297 if(NOQUIET) error("Unable to attach new comment!");
298 return;
299 }
300 memcpy(xcom->lang, lang, 3);
301 memcpy(xcom->id, id, 4);
302 /* Now I can abuse a byte from lang for the encoding. */
303 descr[-1] = encoding;
304 /* Be careful with finding the end of description, I have to honor encoding here. */
305 text = next_text(descr, encoding, realsize-(descr-realdata));
306 if(text == NULL)
307 {
308 if(NOQUIET) error("No comment text / valid description?");
309 pop_comment(fr);
310 return;
311 }
312 store_id3_text(&xcom->description, descr-1, text-descr+1, NOQUIET);
313 text[-1] = encoding;
314 store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET);
315
316 if(VERBOSE4)
317 {
318 fprintf(stderr, "Note: ID3 comment desc: %s\n", xcom->description.fill > 0 ? xcom->description.p : "");
319 fprintf(stderr, "Note: ID3 comment text: %s\n", xcom->text.fill > 0 ? xcom->text.p : "");
320 }
321 if(xcom->description.fill > 0 && xcom->text.fill > 0)
322 {
323 int rva_mode = -1; /* mix / album */
324 if( !strcasecmp(xcom->description.p, "rva")
325 || !strcasecmp(xcom->description.p, "rva_mix")
326 || !strcasecmp(xcom->description.p, "rva_track")
327 || !strcasecmp(xcom->description.p, "rva_radio"))
328 rva_mode = 0;
329 else if( !strcasecmp(xcom->description.p, "rva_album")
330 || !strcasecmp(xcom->description.p, "rva_audiophile")
331 || !strcasecmp(xcom->description.p, "rva_user"))
332 rva_mode = 1;
333 if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
334 {
335 fr->rva.gain[rva_mode] = atof(xcom->text.p);
336 if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
337 fr->rva.peak[rva_mode] = 0;
338 fr->rva.level[rva_mode] = rva_level;
339 }
340 }
341 }
342
343 void process_extra(mpg123_handle *fr, char* realdata, size_t realsize, int rva_level, char *id)
344 {
345 /* Text encoding $xx */
346 /* Description ... $00 (00) */
347 /* Text ... */
348 char encoding = realdata[0];
349 char *descr = realdata+1; /* remember, the encoding is descr[-1] */
350 char *text;
351 mpg123_text *xex;
352 if(realsize < descr-realdata)
353 {
354 if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
355 return;
356 }
357 text = next_text(descr, encoding, realsize-(descr-realdata));
358 if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0]));
359 if(text == NULL)
360 {
361 if(NOQUIET) error("No extra frame text / valid description?");
362 return;
363 }
364 xex = add_extra(fr);
365 if(xex == NULL)
366 {
367 if(NOQUIET) error("Unable to attach new extra text!");
368 return;
369 }
370 memcpy(xex->id, id, 4);
371 store_id3_text(&xex->description, descr-1, text-descr+1, NOQUIET);
372 text[-1] = encoding;
373 store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET);
374 if(xex->description.fill > 0)
375 {
376 int is_peak = 0;
377 int rva_mode = -1; /* mix / album */
378
379 if(!strncasecmp(xex->description.p, "replaygain_track_",17))
380 {
381 if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n");
382
383 rva_mode = 0;
384 if(!strcasecmp(xex->description.p, "replaygain_track_peak")) is_peak = 1;
385 else if(strcasecmp(xex->description.p, "replaygain_track_gain")) rva_mode = -1;
386 }
387 else
388 if(!strncasecmp(xex->description.p, "replaygain_album_",17))
389 {
390 if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n");
391
392 rva_mode = 1;
393 if(!strcasecmp(xex->description.p, "replaygain_album_peak")) is_peak = 1;
394 else if(strcasecmp(xex->description.p, "replaygain_album_gain")) rva_mode = -1;
395 }
396 if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
397 {
398 if(xex->text.fill > 0)
399 {
400 if(is_peak)
401 {
402 fr->rva.peak[rva_mode] = atof(xex->text.p);
403 if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]);
404 }
405 else
406 {
407 fr->rva.gain[rva_mode] = atof(xex->text.p);
408 if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
409 }
410 fr->rva.level[rva_mode] = rva_level;
411 }
412 }
413 }
414 }
415
416 /* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID
417 Note that not all frames survived to 2.4; the mapping goes to 2.3 .
418 A notable miss is the old RVA frame, which is very unspecific anyway.
419 This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */
420 int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */
421 {
422 size_t i;
423 char *old[] =
424 {
425 "COM", "TAL", "TBP", "TCM", "TCO", "TCR", "TDA", "TDY", "TEN", "TFT",
426 "TIM", "TKE", "TLA", "TLE", "TMT", "TOA", "TOF", "TOL", "TOR", "TOT",
427 "TP1", "TP2", "TP3", "TP4", "TPA", "TPB", "TRC", "TDA", "TRK", "TSI",
428 "TSS", "TT1", "TT2", "TT3", "TXT", "TXX", "TYE"
429 };
430 char *new[] =
431 {
432 "COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT",
433 "TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL",
434 "TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ",
435 "TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER"
436 };
437 for(i=0; i<sizeof(old)/sizeof(char*); ++i)
438 {
439 if(!strncmp(id, old[i], 3))
440 {
441 memcpy(id, new[i], 4);
442 if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]);
443 return 0;
444 }
445 }
446 if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]);
447 return -1;
448 }
449
450 /*
451 trying to parse ID3v2.3 and ID3v2.4 tags...
452
453 returns: 0: bad or just unparseable tag
454 1: good, (possibly) new tag info
455 <0: reader error (may need more data feed, try again)
456 */
457 int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
458 {
459 #define UNSYNC_FLAG 128
460 #define EXTHEAD_FLAG 64
461 #define EXP_FLAG 32
462 #define FOOTER_FLAG 16
463 #define UNKNOWN_FLAGS 15 /* 00001111*/
464 unsigned char buf[6];
465 unsigned long length=0;
466 unsigned char flags = 0;
467 int ret = 1;
468 int ret2;
469 unsigned char* tagdata = NULL;
470 unsigned char major = first4bytes & 0xff;
471 debug1("ID3v2: major tag version: %i", major);
472 if(major == 0xff) return 0; /* Invalid... */
473 if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
474 return ret2;
475
476 if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */
477
478 /* second new byte are some nice flags, if these are invalid skip the whole thing */
479 flags = buf[1];
480 debug1("ID3v2: flags 0x%08x", flags);
481 /* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
482 #define synchsafe_to_long(buf,res) \
483 ( \
484 (((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \
485 (res = (((unsigned long) (buf)[0]) << 21) \
486 | (((unsigned long) (buf)[1]) << 14) \
487 | (((unsigned long) (buf)[2]) << 7) \
488 | ((unsigned long) (buf)[3]) \
489 ,1) \
490 )
491 /* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
492 #define bytes_to_long(buf,res) \
493 ( \
494 major == 3 ? \
495 (res = (((unsigned long) (buf)[0]) << 24) \
496 | (((unsigned long) (buf)[1]) << 16) \
497 | (((unsigned long) (buf)[2]) << 8) \
498 | ((unsigned long) (buf)[3]) \
499 ,1) : synchsafe_to_long(buf,res) \
500 )
501 /* for id3v2.2 only */
502 #define threebytes_to_long(buf,res) \
503 ( \
504 res = (((unsigned long) (buf)[0]) << 16) \
505 | (((unsigned long) (buf)[1]) << 8) \
506 | ((unsigned long) (buf)[2]) \
507 ,1 \
508 )
509
510 /* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number */
511 /* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
512 if(!synchsafe_to_long(buf+2,length))
513 {
514 if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]);
515 return 0;
516 }
517 debug1("ID3v2: tag data length %lu", length);
518 if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
519 /* skip if unknown version/scary flags, parse otherwise */
520 if((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2))
521 {
522 /* going to skip because there are unknown flags set */
523 if(NOQUIET) warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags);
524 if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */
525 ret = ret2;
526 }
527 else
528 {
529 fr->id3v2.version = major;
530 /* try to interpret that beast */
531 if((tagdata = (unsigned char*) malloc(length+1)) != NULL)
532 {
533 debug("ID3v2: analysing frames...");
534 if((ret2 = fr->rd->read_frame_body(fr,tagdata,length)) > 0)
535 {
536 unsigned long tagpos = 0;
537 debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
538 /* going to apply strlen for strings inside frames, make sure that it doesn't overflow! */
539 tagdata[length] = 0;
540 if(flags & EXTHEAD_FLAG)
541 {
542 debug("ID3v2: skipping extended header");
543 if(!bytes_to_long(tagdata, tagpos))
544 {
545 ret = 0;
546 if(NOQUIET) error4("Bad (non-synchsafe) tag offset: 0x%02x%02x%02x%02x", tagdata[0], tagdata[1], tagdata[2], tagdata[3]);
547 }
548 }
549 if(ret > 0)
550 {
551 char id[5];
552 unsigned long framesize;
553 unsigned long fflags; /* need 16 bits, actually */
554 id[4] = 0;
555 /* pos now advanced after ext head, now a frame has to follow */
556 while(tagpos < length-10) /* I want to read at least a full header */
557 {
558 int i = 0;
559 unsigned long pos = tagpos;
560 int head_part = fr->id3v2.version == 2 ? 3 : 4; /* bytes of frame title and of framesize value */
561 /* level 1,2,3 - 0 is info from lame/info tag! */
562 /* rva tags with ascending significance, then general frames */
563 #define KNOWN_FRAMES 3
564 const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2" }; /* plus all text frames... */
565 enum { unknown = -2, text = -1, comment, extra, rva2 } tt = unknown;
566 /* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
567 for(i=0; i< head_part; ++i)
568 if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
569 || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
570 {
571 debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
572 /* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */
573 goto tagparse_cleanup; /* Need to escape two loops here. */
574 }
575 if(ret > 0)
576 {
577 /* 4 or 3 bytes id */
578 strncpy(id, (char*) tagdata+pos, head_part);
579 pos += head_part;
580 tagpos += head_part;
581 /* size as 32 bits or 28 bits */
582 if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize);
583 else
584 if(!bytes_to_long(tagdata+pos, framesize))
585 {
586 /* Just assume that up to now there was some good data. */
587 if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
588 break;
589 }
590 if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
591 tagpos += head_part + framesize; /* the important advancement in whole tag */
592 if(tagpos > length)
593 {
594 if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag.");
595 break;
596 }
597 pos += head_part;
598 if(fr->id3v2.version > 2)
599 {
600 fflags = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]);
601 pos += 2;
602 tagpos += 2;
603 }
604 else fflags = 0;
605 /* for sanity, after full parsing tagpos should be == pos */
606 /* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
607 /* %0abc0000 %0h00kmnp */
608 #define BAD_FFLAGS (unsigned long) 36784
609 #define PRES_TAG_FFLAG 16384
610 #define PRES_FILE_FFLAG 8192
611 #define READ_ONLY_FFLAG 4096
612 #define GROUP_FFLAG 64
613 #define COMPR_FFLAG 8
614 #define ENCR_FFLAG 4
615 #define UNSYNC_FFLAG 2
616 #define DATLEN_FFLAG 1
617 if(head_part < 4 && promote_framename(fr, id) != 0) continue;
618
619 /* shall not or want not handle these */
620 if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG))
621 {
622 if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame");
623 continue;
624 }
625
626 for(i = 0; i < KNOWN_FRAMES; ++i)
627 if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
628
629 if(id[0] == 'T' && tt != extra) tt = text;
630
631 if(tt != unknown)
632 {
633 int rva_mode = -1; /* mix / album */
634 unsigned long realsize = framesize;
635 unsigned char* realdata = tagdata+pos;
636 if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG))
637 {
638 unsigned long ipos = 0;
639 unsigned long opos = 0;
640 debug("Id3v2: going to de-unsync the frame data");
641 /* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
642 /* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
643 /* standard mandates that de-unsync should always be safe if flag is set */
644 realdata = (unsigned char*) malloc(framesize); /* will need <= bytes */
645 if(realdata == NULL)
646 {
647 if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync");
648 continue;
649 }
650 /* now going byte per byte through the data... */
651 realdata[0] = tagdata[pos];
652 opos = 1;
653 for(ipos = pos+1; ipos < pos+framesize; ++ipos)
654 {
655 if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
656 {
657 realdata[opos++] = tagdata[ipos];
658 }
659 }
660 realsize = opos;
661 debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
662 }
663 pos = 0; /* now at the beginning again... */
664 switch(tt)
665 {
666 case comment:
667 process_comment(fr, (char*)realdata, realsize, comment+1, id);
668 break;
669 case extra: /* perhaps foobar2000's work */
670 process_extra(fr, (char*)realdata, realsize, extra+1, id);
671 break;
672 case rva2: /* "the" RVA tag */
673 {
674 #ifdef HAVE_INTTYPES_H
675 /* starts with null-terminated identification */
676 if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
677 /* default: some individual value, mix mode */
678 rva_mode = 0;
679 if( !strncasecmp((char*)realdata, "album", 5)
680 || !strncasecmp((char*)realdata, "audiophile", 10)
681 || !strncasecmp((char*)realdata, "user", 4))
682 rva_mode = 1;
683 if(fr->rva.level[rva_mode] <= rva2+1)
684 {
685 pos += strlen((char*) realdata) + 1;
686 if(realdata[pos] == 1)
687 {
688 ++pos;
689 /* only handle master channel */
690 debug("ID3v2: it is for the master channel");
691 /* two bytes adjustment, one byte for bits representing peak - n bytes for peak */
692 /* 16 bit signed integer = dB * 512 */
693 /* we already assume short being 16 bit */
694 fr->rva.gain[rva_mode] = (float) ((((short) realdata[pos]) << 8) | ((short) realdata[pos+1])) / 512;
695 pos += 2;
696 if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
697 /* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
698 fr->rva.peak[rva_mode] = 0;
699 fr->rva.level[rva_mode] = rva2+1;
700 }
701 }
702 #else
703 if(NOQUIET) warning("ID3v2: Cannot parse RVA2 value because I don't have a guaranteed 16 bit signed integer type");
704 #endif
705 }
706 break;
707 /* non-rva metainfo, simply store... */
708 case text:
709 process_text(fr, (char*)realdata, realsize, id);
710 break;
711 default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt);
712 }
713 if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) free(realdata);
714 }
715 #undef BAD_FFLAGS
716 #undef PRES_TAG_FFLAG
717 #undef PRES_FILE_FFLAG
718 #undef READ_ONLY_FFLAG
719 #undef GROUP_FFLAG
720 #undef COMPR_FFLAG
721 #undef ENCR_FFLAG
722 #undef UNSYNC_FFLAG
723 #undef DATLEN_FFLAG
724 }
725 else break;
726 #undef KNOWN_FRAMES
727 }
728 }
729 }
730 else
731 {
732 if(NOQUIET) error("ID3v2: Duh, not able to read ID3v2 tag data.");
733 ret = ret2;
734 }
735 tagparse_cleanup:
736 free(tagdata);
737 }
738 else
739 {
740 if(NOQUIET) error1("ID3v2: Arrg! Unable to allocate %lu bytes for interpreting ID3v2 data - trying to skip instead.", length);
741 if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) ret = ret2; /* will not store data in backbuff! */
742 else ret = 0;
743 }
744 }
745 /* skip footer if present */
746 if((ret > 0) && (flags & FOOTER_FLAG) && ((ret2 = fr->rd->skip_bytes(fr,length)) < 0)) ret = ret2;
747
748 return ret;
749 #undef UNSYNC_FLAG
750 #undef EXTHEAD_FLAG
751 #undef EXP_FLAG
752 #undef FOOTER_FLAG
753 #undef UNKOWN_FLAGS
754 }
755
756 static void convert_latin1(mpg123_string *sb, unsigned char* s, size_t l)
757 {
758 size_t length = l;
759 size_t i;
760 unsigned char *p;
761 /* determine real length, a latin1 character can at most take 2 in UTF8 */
762 for(i=0; i<l; ++i)
763 if(s[i] >= 0x80) ++length;
764
765 debug1("UTF-8 length: %lu", (unsigned long)length);
766 /* one extra zero byte for paranoia */
767 if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
768
769 p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
770 for(i=0; i<l; ++i)
771 if(s[i] < 0x80){ *p = s[i]; ++p; }
772 else /* two-byte encoding */
773 {
774 *p = 0xc0 | (s[i]>>6);
775 *(p+1) = 0x80 | (s[i] & 0x3f);
776 p+=2;
777 }
778
779 sb->p[length] = 0;
780 sb->fill = length+1;
781 }
782
783 #define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
784 /* Remember: There's a limit at 0x1ffff. */
785 #define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
786 static void convert_utf16(mpg123_string *sb, unsigned char* s, size_t l, int str_be)
787 {
788 size_t i;
789 unsigned char *p;
790 size_t length = 0; /* the resulting UTF-8 length */
791 /* Determine real length... extreme case can be more than utf-16 length. */
792 size_t high = 0;
793 size_t low = 1;
794 debug1("convert_utf16 with length %lu", (unsigned long)l);
795 if(!str_be) /* little-endian */
796 {
797 high = 1; /* The second byte is the high byte. */
798 low = 0; /* The first byte is the low byte. */
799 }
800 /* first: get length, check for errors -- stop at first one */
801 for(i=0; i < l-1; i+=2)
802 {
803 unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
804 if((point & 0xd800) == 0xd800) /* lead surrogate */
805 {
806 unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
807 if((second & 0xdc00) == 0xdc00) /* good... */
808 {
809 point = FULLPOINT(point,second);
810 length += UTF8LEN(point); /* possibly 4 bytes */
811 i+=2; /* We overstepped one word. */
812 }
813 else /* if no valid pair, break here */
814 {
815 debug1("Invalid UTF16 surrogate pair at %li.", (unsigned long)i);
816 l = i; /* Forget the half pair, END! */
817 break;
818 }
819 }
820 else length += UTF8LEN(point); /* 1,2 or 3 bytes */
821 }
822
823 if(l < 1){ mpg123_set_string(sb, ""); return; }
824
825 if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
826
827 /* Now really convert, skip checks as these have been done just before. */
828 p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
829 for(i=0; i < l-1; i+=2)
830 {
831 unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
832 if((codepoint & 0xd800) == 0xd800) /* lead surrogate */
833 {
834 unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
835 codepoint = FULLPOINT(codepoint,second);
836 i+=2; /* We overstepped one word. */
837 }
838 if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
839 else if(codepoint < 0x800)
840 {
841 *p++ = 0xc0 | (codepoint>>6);
842 *p++ = 0x80 | (codepoint & 0x3f);
843 }
844 else if(codepoint < 0x10000)
845 {
846 *p++ = 0xe0 | (codepoint>>12);
847 *p++ = 0x80 | ((codepoint>>6) & 0x3f);
848 *p++ = 0x80 | (codepoint & 0x3f);
849 }
850 else if (codepoint < 0x200000)
851 {
852 *p++ = 0xf0 | codepoint>>18;
853 *p++ = 0x80 | ((codepoint>>12) & 0x3f);
854 *p++ = 0x80 | ((codepoint>>6) & 0x3f);
855 *p++ = 0x80 | (codepoint & 0x3f);
856 } /* ignore bigger ones (that are not possible here anyway) */
857 }
858 sb->p[sb->size-1] = 0; /* paranoia... */
859 sb->fill = sb->size;
860 }
861 #undef UTF8LEN
862 #undef FULLPOINT
863
864 static void convert_utf16be(mpg123_string *sb, unsigned char* source, size_t len)
865 {
866 convert_utf16(sb, source, len, 1);
867 }
868
869 static void convert_utf16bom(mpg123_string *sb, unsigned char* source, size_t len)
870 {
871 if(len < 2){ mpg123_free_string(sb); return; }
872
873 if(source[0] == 0xff && source[1] == 0xfe) /* Little-endian */
874 convert_utf16(sb, source + 2, len - 2, 0);
875 else /* Big-endian */
876 convert_utf16(sb, source + 2, len - 2, 1);
877 }
878
879 static void convert_utf8(mpg123_string *sb, unsigned char* source, size_t len)
880 {
881 if(mpg123_resize_string(sb, len+1))
882 {
883 memcpy(sb->p, source, len);
884 sb->p[len] = 0;
885 sb->fill = len+1;
886 }
887 else mpg123_free_string(sb);
888 }