562
|
1 /*
|
|
2 id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset)
|
|
3
|
|
4 copyright 2006-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
6 initially written by Thomas Orgis
|
|
7 */
|
|
8
|
|
9 #include "mpg123lib_intern.h"
|
|
10 #include "id3.h"
|
|
11 #include "debug.h"
|
|
12
|
|
13 /* UTF support definitions */
|
|
14
|
|
15 typedef void (*text_converter)(mpg123_string *sb, unsigned char* source, size_t len);
|
|
16
|
|
17 static void convert_latin1 (mpg123_string *sb, unsigned char* source, size_t len);
|
|
18 static void convert_utf16 (mpg123_string *sb, unsigned char* source, size_t len, int str_be);
|
|
19 static void convert_utf16bom(mpg123_string *sb, unsigned char* source, size_t len);
|
|
20 static void convert_utf16be (mpg123_string *sb, unsigned char* source, size_t len);
|
|
21 static void convert_utf8 (mpg123_string *sb, unsigned char* source, size_t len);
|
|
22
|
|
23 static const text_converter text_converters[4] =
|
|
24 {
|
|
25 convert_latin1,
|
|
26 convert_utf16bom,
|
|
27 convert_utf16be,
|
|
28 convert_utf8
|
|
29 };
|
|
30
|
|
31 const int encoding_widths[4] = { 1, 2, 2, 1 };
|
|
32
|
|
33 /* the code starts here... */
|
|
34
|
|
35 static void null_id3_links(mpg123_handle *fr)
|
|
36 {
|
|
37 fr->id3v2.title = NULL;
|
|
38 fr->id3v2.artist = NULL;
|
|
39 fr->id3v2.album = NULL;
|
|
40 fr->id3v2.year = NULL;
|
|
41 fr->id3v2.genre = NULL;
|
|
42 fr->id3v2.comment = NULL;
|
|
43 }
|
|
44
|
|
45 void init_id3(mpg123_handle *fr)
|
|
46 {
|
|
47 fr->id3v2.version = 0; /* nothing there */
|
|
48 null_id3_links(fr);
|
|
49 fr->id3v2.comments = 0;
|
|
50 fr->id3v2.comment_list = NULL;
|
|
51 fr->id3v2.texts = 0;
|
|
52 fr->id3v2.text = NULL;
|
|
53 fr->id3v2.extras = 0;
|
|
54 fr->id3v2.extra = NULL;
|
|
55 }
|
|
56
|
|
57 /* Managing of the text, comment and extra lists. */
|
|
58
|
|
59 /* Initialize one element. */
|
|
60 static void init_mpg123_text(mpg123_text *txt)
|
|
61 {
|
|
62 mpg123_init_string(&txt->text);
|
|
63 mpg123_init_string(&txt->description);
|
|
64 txt->id[0] = 0;
|
|
65 txt->id[1] = 0;
|
|
66 txt->id[2] = 0;
|
|
67 txt->id[3] = 0;
|
|
68 txt->lang[0] = 0;
|
|
69 txt->lang[1] = 0;
|
|
70 txt->lang[2] = 0;
|
|
71 }
|
|
72
|
|
73 /* Free memory of one element. */
|
|
74 static void free_mpg123_text(mpg123_text *txt)
|
|
75 {
|
|
76 mpg123_free_string(&txt->text);
|
|
77 mpg123_free_string(&txt->description);
|
|
78 }
|
|
79
|
|
80 /* Free memory of whole list. */
|
|
81 #define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
|
|
82 #define free_text(mh) free_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
|
|
83 #define free_extra(mh) free_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
|
|
84 static void free_id3_text(mpg123_text **list, size_t *size)
|
|
85 {
|
|
86 size_t i;
|
|
87 for(i=0; i<*size; ++i) free_mpg123_text(&((*list)[i]));
|
|
88
|
|
89 free(*list);
|
|
90 *list = NULL;
|
|
91 *size = 0;
|
|
92 }
|
|
93
|
|
94 /* Add items to the list. */
|
|
95 #define add_comment(mh) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
|
|
96 #define add_text(mh) add_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
|
|
97 #define add_extra(mh) add_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
|
|
98 static mpg123_text *add_id3_text(mpg123_text **list, size_t *size)
|
|
99 {
|
|
100 mpg123_text *x = safe_realloc(*list, sizeof(mpg123_text)*(*size+1));
|
|
101 if(x == NULL) return NULL; /* bad */
|
|
102
|
|
103 *list = x;
|
|
104 *size += 1;
|
|
105 init_mpg123_text(&((*list)[*size-1]));
|
|
106
|
|
107 return &((*list)[*size-1]); /* Return pointer to the added text. */
|
|
108 }
|
|
109
|
|
110 /* Remove the last item. */
|
|
111 #define pop_comment(mh) pop_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
|
|
112 #define pop_text(mh) pop_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
|
|
113 #define pop_extra(mh) pop_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
|
|
114 static void pop_id3_text(mpg123_text **list, size_t *size)
|
|
115 {
|
|
116 mpg123_text *x;
|
|
117 if(*size < 1) return;
|
|
118
|
|
119 free_mpg123_text(&((*list)[*size-1]));
|
|
120 if(*size > 1)
|
|
121 {
|
|
122 x = safe_realloc(*list, sizeof(mpg123_text)*(*size-1));
|
|
123 if(x != NULL){ *list = x; *size -= 1; }
|
|
124 }
|
|
125 else
|
|
126 {
|
|
127 free(*list);
|
|
128 *list = NULL;
|
|
129 *size = 0;
|
|
130 }
|
|
131 }
|
|
132
|
|
133 /* OK, back t the higher level functions. */
|
|
134
|
|
135 void exit_id3(mpg123_handle *fr)
|
|
136 {
|
|
137 free_comment(fr);
|
|
138 free_extra(fr);
|
|
139 free_text(fr);
|
|
140 }
|
|
141
|
|
142 void reset_id3(mpg123_handle *fr)
|
|
143 {
|
|
144 exit_id3(fr);
|
|
145 init_id3(fr);
|
|
146 }
|
|
147
|
|
148 /* Set the id3v2.artist id3v2.title ... links to elements of the array. */
|
|
149 void id3_link(mpg123_handle *fr)
|
|
150 {
|
|
151 size_t i;
|
|
152 mpg123_id3v2 *v2 = &fr->id3v2;
|
|
153 debug("linking ID3v2");
|
|
154 null_id3_links(fr);
|
|
155 for(i=0; i<v2->texts; ++i)
|
|
156 {
|
|
157 mpg123_text *entry = &v2->text[i];
|
|
158 if (!strncmp("TIT2", entry->id, 4)) v2->title = &entry->text;
|
|
159 else if(!strncmp("TALB", entry->id, 4)) v2->album = &entry->text;
|
|
160 else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text;
|
|
161 else if(!strncmp("TYER", entry->id, 4)) v2->year = &entry->text;
|
|
162 else if(!strncmp("TCON", entry->id, 4)) v2->genre = &entry->text;
|
|
163 }
|
|
164 for(i=0; i<v2->comments; ++i)
|
|
165 {
|
|
166 mpg123_text *entry = &v2->comment_list[i];
|
|
167 if(entry->description.fill == 0 || entry->description.p[0] == 0)
|
|
168 v2->comment = &entry->text;
|
|
169 }
|
|
170 /* When no generic comment found, use the last non-generic one. */
|
|
171 if(v2->comment == NULL && v2->comments > 0)
|
|
172 v2->comment = &v2->comment_list[v2->comments-1].text;
|
|
173 }
|
|
174
|
|
175 /*
|
|
176 Store any text in UTF8 encoding; preserve the zero string separator (I don't need strlen for the total size).
|
|
177 ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values.
|
|
178 So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though).
|
|
179 */
|
|
180 void store_id3_text(mpg123_string *sb, char *source, size_t source_size, const int noquiet)
|
|
181 {
|
|
182 int encoding;
|
|
183 int bwidth;
|
|
184 if(!source_size)
|
|
185 {
|
|
186 debug("Empty id3 data!");
|
|
187 return;
|
|
188 }
|
|
189 encoding = source[0];
|
|
190 ++source;
|
|
191 --source_size;
|
|
192 debug1("encoding: %i", encoding);
|
|
193 /* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
|
|
194 UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
|
|
195 if(encoding > 3)
|
|
196 {
|
|
197 if(noquiet) warning1("Unknown text encoding %d, assuming ISO8859-1 - I will probably screw a bit up!", encoding);
|
|
198 encoding = 0;
|
|
199 }
|
|
200 bwidth = encoding_widths[encoding];
|
|
201 /* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */
|
|
202 while(source_size > bwidth && source[0] == 0)
|
|
203 {
|
|
204 --source_size;
|
|
205 ++source;
|
|
206 debug("skipped leading zero");
|
|
207 }
|
|
208 if(source_size % bwidth)
|
|
209 {
|
|
210 /* When we need two bytes for a character, it's strange to have an uneven bytestream length. */
|
|
211 if(noquiet) warning2("Weird tag size %d for encoding %d - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
|
|
212 source_size -= source_size % bwidth;
|
|
213 }
|
|
214 text_converters[encoding](sb, (unsigned char*)source, source_size);
|
|
215 if(sb->size) debug1("UTF-8 string (the first one): %s", sb->p);
|
|
216 else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!");
|
|
217 }
|
|
218
|
|
219 char *next_text(char* prev, int encoding, size_t limit)
|
|
220 {
|
|
221 char *text = prev;
|
|
222 unsigned long neednull = encoding_widths[encoding];
|
|
223 /* So I go lengths to find zero or double zero... */
|
|
224 while(text-prev < limit)
|
|
225 {
|
|
226 if(text[0] == 0)
|
|
227 {
|
|
228 if(neednull <= limit-(text-prev))
|
|
229 {
|
|
230 unsigned long i = 1;
|
|
231 for(; i<neednull; ++i) if(text[i] != 0) break;
|
|
232
|
|
233 if(i == neednull) /* found a null wide enough! */
|
|
234 {
|
|
235 text += neednull;
|
|
236 break;
|
|
237 }
|
|
238 }
|
|
239 else{ text = NULL; break; }
|
|
240 }
|
|
241 ++text;
|
|
242 }
|
|
243 if(text-prev == limit) text = NULL;
|
|
244
|
|
245 return text;
|
|
246 }
|
|
247
|
|
248 static const char *enc_name(int enc)
|
|
249 {
|
|
250 switch(enc)
|
|
251 {
|
|
252 case 0: return "Latin 1";
|
|
253 case 1: return "UTF-16 BOM";
|
|
254 case 2: return "UTF-16 BE";
|
|
255 case 3: return "UTF-8";
|
|
256 default: return "unknown!";
|
|
257 }
|
|
258 }
|
|
259
|
|
260 static void process_text(mpg123_handle *fr, char *realdata, size_t realsize, char *id)
|
|
261 {
|
|
262 /* Text encoding $xx */
|
|
263 /* The text (encoded) ... */
|
|
264 mpg123_text *t = add_text(fr);
|
|
265 if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0]));
|
|
266 if(t == NULL)
|
|
267 {
|
|
268 if(NOQUIET) error("Unable to attach new text!");
|
|
269 return;
|
|
270 }
|
|
271 memcpy(t->id, id, 4);
|
|
272 store_id3_text(&t->text, realdata, realsize, NOQUIET);
|
|
273 if(VERBOSE4) fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame: %s\n", id[0], id[1], id[2], id[3], t->text.p);
|
|
274 }
|
|
275
|
|
276 /* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one */
|
|
277 static void process_comment(mpg123_handle *fr, char *realdata, size_t realsize, int rva_level, char *id)
|
|
278 {
|
|
279 /* Text encoding $xx */
|
|
280 /* Language $xx xx xx */
|
|
281 /* Short description (encoded!) <text> $00 (00) */
|
|
282 /* Then the comment text (encoded) ... */
|
|
283 char encoding = realdata[0];
|
|
284 char *lang = realdata+1; /* I'll only use the 3 bytes! */
|
|
285 char *descr = realdata+4;
|
|
286 char *text = NULL;
|
|
287 mpg123_text *xcom = NULL;
|
|
288 if(realsize < descr-realdata)
|
|
289 {
|
|
290 if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
|
|
291 return;
|
|
292 }
|
|
293 xcom = add_comment(fr);
|
|
294 if(VERBOSE4) fprintf(stderr, "Note: Storing comment from %s encoding\n", enc_name(realdata[0]));
|
|
295 if(xcom == NULL)
|
|
296 {
|
|
297 if(NOQUIET) error("Unable to attach new comment!");
|
|
298 return;
|
|
299 }
|
|
300 memcpy(xcom->lang, lang, 3);
|
|
301 memcpy(xcom->id, id, 4);
|
|
302 /* Now I can abuse a byte from lang for the encoding. */
|
|
303 descr[-1] = encoding;
|
|
304 /* Be careful with finding the end of description, I have to honor encoding here. */
|
|
305 text = next_text(descr, encoding, realsize-(descr-realdata));
|
|
306 if(text == NULL)
|
|
307 {
|
|
308 if(NOQUIET) error("No comment text / valid description?");
|
|
309 pop_comment(fr);
|
|
310 return;
|
|
311 }
|
|
312 store_id3_text(&xcom->description, descr-1, text-descr+1, NOQUIET);
|
|
313 text[-1] = encoding;
|
|
314 store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET);
|
|
315
|
|
316 if(VERBOSE4)
|
|
317 {
|
|
318 fprintf(stderr, "Note: ID3 comment desc: %s\n", xcom->description.fill > 0 ? xcom->description.p : "");
|
|
319 fprintf(stderr, "Note: ID3 comment text: %s\n", xcom->text.fill > 0 ? xcom->text.p : "");
|
|
320 }
|
|
321 if(xcom->description.fill > 0 && xcom->text.fill > 0)
|
|
322 {
|
|
323 int rva_mode = -1; /* mix / album */
|
|
324 if( !strcasecmp(xcom->description.p, "rva")
|
|
325 || !strcasecmp(xcom->description.p, "rva_mix")
|
|
326 || !strcasecmp(xcom->description.p, "rva_track")
|
|
327 || !strcasecmp(xcom->description.p, "rva_radio"))
|
|
328 rva_mode = 0;
|
|
329 else if( !strcasecmp(xcom->description.p, "rva_album")
|
|
330 || !strcasecmp(xcom->description.p, "rva_audiophile")
|
|
331 || !strcasecmp(xcom->description.p, "rva_user"))
|
|
332 rva_mode = 1;
|
|
333 if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
|
|
334 {
|
|
335 fr->rva.gain[rva_mode] = atof(xcom->text.p);
|
|
336 if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
|
|
337 fr->rva.peak[rva_mode] = 0;
|
|
338 fr->rva.level[rva_mode] = rva_level;
|
|
339 }
|
|
340 }
|
|
341 }
|
|
342
|
|
343 void process_extra(mpg123_handle *fr, char* realdata, size_t realsize, int rva_level, char *id)
|
|
344 {
|
|
345 /* Text encoding $xx */
|
|
346 /* Description ... $00 (00) */
|
|
347 /* Text ... */
|
|
348 char encoding = realdata[0];
|
|
349 char *descr = realdata+1; /* remember, the encoding is descr[-1] */
|
|
350 char *text;
|
|
351 mpg123_text *xex;
|
|
352 if(realsize < descr-realdata)
|
|
353 {
|
|
354 if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
|
|
355 return;
|
|
356 }
|
|
357 text = next_text(descr, encoding, realsize-(descr-realdata));
|
|
358 if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0]));
|
|
359 if(text == NULL)
|
|
360 {
|
|
361 if(NOQUIET) error("No extra frame text / valid description?");
|
|
362 return;
|
|
363 }
|
|
364 xex = add_extra(fr);
|
|
365 if(xex == NULL)
|
|
366 {
|
|
367 if(NOQUIET) error("Unable to attach new extra text!");
|
|
368 return;
|
|
369 }
|
|
370 memcpy(xex->id, id, 4);
|
|
371 store_id3_text(&xex->description, descr-1, text-descr+1, NOQUIET);
|
|
372 text[-1] = encoding;
|
|
373 store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET);
|
|
374 if(xex->description.fill > 0)
|
|
375 {
|
|
376 int is_peak = 0;
|
|
377 int rva_mode = -1; /* mix / album */
|
|
378
|
|
379 if(!strncasecmp(xex->description.p, "replaygain_track_",17))
|
|
380 {
|
|
381 if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n");
|
|
382
|
|
383 rva_mode = 0;
|
|
384 if(!strcasecmp(xex->description.p, "replaygain_track_peak")) is_peak = 1;
|
|
385 else if(strcasecmp(xex->description.p, "replaygain_track_gain")) rva_mode = -1;
|
|
386 }
|
|
387 else
|
|
388 if(!strncasecmp(xex->description.p, "replaygain_album_",17))
|
|
389 {
|
|
390 if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n");
|
|
391
|
|
392 rva_mode = 1;
|
|
393 if(!strcasecmp(xex->description.p, "replaygain_album_peak")) is_peak = 1;
|
|
394 else if(strcasecmp(xex->description.p, "replaygain_album_gain")) rva_mode = -1;
|
|
395 }
|
|
396 if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
|
|
397 {
|
|
398 if(xex->text.fill > 0)
|
|
399 {
|
|
400 if(is_peak)
|
|
401 {
|
|
402 fr->rva.peak[rva_mode] = atof(xex->text.p);
|
|
403 if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]);
|
|
404 }
|
|
405 else
|
|
406 {
|
|
407 fr->rva.gain[rva_mode] = atof(xex->text.p);
|
|
408 if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
|
|
409 }
|
|
410 fr->rva.level[rva_mode] = rva_level;
|
|
411 }
|
|
412 }
|
|
413 }
|
|
414 }
|
|
415
|
|
416 /* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID
|
|
417 Note that not all frames survived to 2.4; the mapping goes to 2.3 .
|
|
418 A notable miss is the old RVA frame, which is very unspecific anyway.
|
|
419 This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */
|
|
420 int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */
|
|
421 {
|
|
422 size_t i;
|
|
423 char *old[] =
|
|
424 {
|
|
425 "COM", "TAL", "TBP", "TCM", "TCO", "TCR", "TDA", "TDY", "TEN", "TFT",
|
|
426 "TIM", "TKE", "TLA", "TLE", "TMT", "TOA", "TOF", "TOL", "TOR", "TOT",
|
|
427 "TP1", "TP2", "TP3", "TP4", "TPA", "TPB", "TRC", "TDA", "TRK", "TSI",
|
|
428 "TSS", "TT1", "TT2", "TT3", "TXT", "TXX", "TYE"
|
|
429 };
|
|
430 char *new[] =
|
|
431 {
|
|
432 "COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT",
|
|
433 "TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL",
|
|
434 "TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ",
|
|
435 "TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER"
|
|
436 };
|
|
437 for(i=0; i<sizeof(old)/sizeof(char*); ++i)
|
|
438 {
|
|
439 if(!strncmp(id, old[i], 3))
|
|
440 {
|
|
441 memcpy(id, new[i], 4);
|
|
442 if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]);
|
|
443 return 0;
|
|
444 }
|
|
445 }
|
|
446 if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]);
|
|
447 return -1;
|
|
448 }
|
|
449
|
|
450 /*
|
|
451 trying to parse ID3v2.3 and ID3v2.4 tags...
|
|
452
|
|
453 returns: 0: bad or just unparseable tag
|
|
454 1: good, (possibly) new tag info
|
|
455 <0: reader error (may need more data feed, try again)
|
|
456 */
|
|
457 int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
|
|
458 {
|
|
459 #define UNSYNC_FLAG 128
|
|
460 #define EXTHEAD_FLAG 64
|
|
461 #define EXP_FLAG 32
|
|
462 #define FOOTER_FLAG 16
|
|
463 #define UNKNOWN_FLAGS 15 /* 00001111*/
|
|
464 unsigned char buf[6];
|
|
465 unsigned long length=0;
|
|
466 unsigned char flags = 0;
|
|
467 int ret = 1;
|
|
468 int ret2;
|
|
469 unsigned char* tagdata = NULL;
|
|
470 unsigned char major = first4bytes & 0xff;
|
|
471 debug1("ID3v2: major tag version: %i", major);
|
|
472 if(major == 0xff) return 0; /* Invalid... */
|
|
473 if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
|
|
474 return ret2;
|
|
475
|
|
476 if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */
|
|
477
|
|
478 /* second new byte are some nice flags, if these are invalid skip the whole thing */
|
|
479 flags = buf[1];
|
|
480 debug1("ID3v2: flags 0x%08x", flags);
|
|
481 /* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
|
|
482 #define synchsafe_to_long(buf,res) \
|
|
483 ( \
|
|
484 (((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \
|
|
485 (res = (((unsigned long) (buf)[0]) << 21) \
|
|
486 | (((unsigned long) (buf)[1]) << 14) \
|
|
487 | (((unsigned long) (buf)[2]) << 7) \
|
|
488 | ((unsigned long) (buf)[3]) \
|
|
489 ,1) \
|
|
490 )
|
|
491 /* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
|
|
492 #define bytes_to_long(buf,res) \
|
|
493 ( \
|
|
494 major == 3 ? \
|
|
495 (res = (((unsigned long) (buf)[0]) << 24) \
|
|
496 | (((unsigned long) (buf)[1]) << 16) \
|
|
497 | (((unsigned long) (buf)[2]) << 8) \
|
|
498 | ((unsigned long) (buf)[3]) \
|
|
499 ,1) : synchsafe_to_long(buf,res) \
|
|
500 )
|
|
501 /* for id3v2.2 only */
|
|
502 #define threebytes_to_long(buf,res) \
|
|
503 ( \
|
|
504 res = (((unsigned long) (buf)[0]) << 16) \
|
|
505 | (((unsigned long) (buf)[1]) << 8) \
|
|
506 | ((unsigned long) (buf)[2]) \
|
|
507 ,1 \
|
|
508 )
|
|
509
|
|
510 /* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number */
|
|
511 /* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
|
|
512 if(!synchsafe_to_long(buf+2,length))
|
|
513 {
|
|
514 if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]);
|
|
515 return 0;
|
|
516 }
|
|
517 debug1("ID3v2: tag data length %lu", length);
|
|
518 if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
|
|
519 /* skip if unknown version/scary flags, parse otherwise */
|
|
520 if((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2))
|
|
521 {
|
|
522 /* going to skip because there are unknown flags set */
|
|
523 if(NOQUIET) warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags);
|
|
524 if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */
|
|
525 ret = ret2;
|
|
526 }
|
|
527 else
|
|
528 {
|
|
529 fr->id3v2.version = major;
|
|
530 /* try to interpret that beast */
|
|
531 if((tagdata = (unsigned char*) malloc(length+1)) != NULL)
|
|
532 {
|
|
533 debug("ID3v2: analysing frames...");
|
|
534 if((ret2 = fr->rd->read_frame_body(fr,tagdata,length)) > 0)
|
|
535 {
|
|
536 unsigned long tagpos = 0;
|
|
537 debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
|
|
538 /* going to apply strlen for strings inside frames, make sure that it doesn't overflow! */
|
|
539 tagdata[length] = 0;
|
|
540 if(flags & EXTHEAD_FLAG)
|
|
541 {
|
|
542 debug("ID3v2: skipping extended header");
|
|
543 if(!bytes_to_long(tagdata, tagpos))
|
|
544 {
|
|
545 ret = 0;
|
|
546 if(NOQUIET) error4("Bad (non-synchsafe) tag offset: 0x%02x%02x%02x%02x", tagdata[0], tagdata[1], tagdata[2], tagdata[3]);
|
|
547 }
|
|
548 }
|
|
549 if(ret > 0)
|
|
550 {
|
|
551 char id[5];
|
|
552 unsigned long framesize;
|
|
553 unsigned long fflags; /* need 16 bits, actually */
|
|
554 id[4] = 0;
|
|
555 /* pos now advanced after ext head, now a frame has to follow */
|
|
556 while(tagpos < length-10) /* I want to read at least a full header */
|
|
557 {
|
|
558 int i = 0;
|
|
559 unsigned long pos = tagpos;
|
|
560 int head_part = fr->id3v2.version == 2 ? 3 : 4; /* bytes of frame title and of framesize value */
|
|
561 /* level 1,2,3 - 0 is info from lame/info tag! */
|
|
562 /* rva tags with ascending significance, then general frames */
|
|
563 #define KNOWN_FRAMES 3
|
|
564 const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2" }; /* plus all text frames... */
|
|
565 enum { unknown = -2, text = -1, comment, extra, rva2 } tt = unknown;
|
|
566 /* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
|
|
567 for(i=0; i< head_part; ++i)
|
|
568 if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
|
|
569 || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
|
|
570 {
|
|
571 debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
|
|
572 /* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */
|
|
573 goto tagparse_cleanup; /* Need to escape two loops here. */
|
|
574 }
|
|
575 if(ret > 0)
|
|
576 {
|
|
577 /* 4 or 3 bytes id */
|
|
578 strncpy(id, (char*) tagdata+pos, head_part);
|
|
579 pos += head_part;
|
|
580 tagpos += head_part;
|
|
581 /* size as 32 bits or 28 bits */
|
|
582 if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize);
|
|
583 else
|
|
584 if(!bytes_to_long(tagdata+pos, framesize))
|
|
585 {
|
|
586 /* Just assume that up to now there was some good data. */
|
|
587 if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
|
|
588 break;
|
|
589 }
|
|
590 if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
|
|
591 tagpos += head_part + framesize; /* the important advancement in whole tag */
|
|
592 if(tagpos > length)
|
|
593 {
|
|
594 if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag.");
|
|
595 break;
|
|
596 }
|
|
597 pos += head_part;
|
|
598 if(fr->id3v2.version > 2)
|
|
599 {
|
|
600 fflags = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]);
|
|
601 pos += 2;
|
|
602 tagpos += 2;
|
|
603 }
|
|
604 else fflags = 0;
|
|
605 /* for sanity, after full parsing tagpos should be == pos */
|
|
606 /* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
|
|
607 /* %0abc0000 %0h00kmnp */
|
|
608 #define BAD_FFLAGS (unsigned long) 36784
|
|
609 #define PRES_TAG_FFLAG 16384
|
|
610 #define PRES_FILE_FFLAG 8192
|
|
611 #define READ_ONLY_FFLAG 4096
|
|
612 #define GROUP_FFLAG 64
|
|
613 #define COMPR_FFLAG 8
|
|
614 #define ENCR_FFLAG 4
|
|
615 #define UNSYNC_FFLAG 2
|
|
616 #define DATLEN_FFLAG 1
|
|
617 if(head_part < 4 && promote_framename(fr, id) != 0) continue;
|
|
618
|
|
619 /* shall not or want not handle these */
|
|
620 if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG))
|
|
621 {
|
|
622 if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame");
|
|
623 continue;
|
|
624 }
|
|
625
|
|
626 for(i = 0; i < KNOWN_FRAMES; ++i)
|
|
627 if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
|
|
628
|
|
629 if(id[0] == 'T' && tt != extra) tt = text;
|
|
630
|
|
631 if(tt != unknown)
|
|
632 {
|
|
633 int rva_mode = -1; /* mix / album */
|
|
634 unsigned long realsize = framesize;
|
|
635 unsigned char* realdata = tagdata+pos;
|
|
636 if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG))
|
|
637 {
|
|
638 unsigned long ipos = 0;
|
|
639 unsigned long opos = 0;
|
|
640 debug("Id3v2: going to de-unsync the frame data");
|
|
641 /* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
|
|
642 /* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
|
|
643 /* standard mandates that de-unsync should always be safe if flag is set */
|
|
644 realdata = (unsigned char*) malloc(framesize); /* will need <= bytes */
|
|
645 if(realdata == NULL)
|
|
646 {
|
|
647 if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync");
|
|
648 continue;
|
|
649 }
|
|
650 /* now going byte per byte through the data... */
|
|
651 realdata[0] = tagdata[pos];
|
|
652 opos = 1;
|
|
653 for(ipos = pos+1; ipos < pos+framesize; ++ipos)
|
|
654 {
|
|
655 if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
|
|
656 {
|
|
657 realdata[opos++] = tagdata[ipos];
|
|
658 }
|
|
659 }
|
|
660 realsize = opos;
|
|
661 debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
|
|
662 }
|
|
663 pos = 0; /* now at the beginning again... */
|
|
664 switch(tt)
|
|
665 {
|
|
666 case comment:
|
|
667 process_comment(fr, (char*)realdata, realsize, comment+1, id);
|
|
668 break;
|
|
669 case extra: /* perhaps foobar2000's work */
|
|
670 process_extra(fr, (char*)realdata, realsize, extra+1, id);
|
|
671 break;
|
|
672 case rva2: /* "the" RVA tag */
|
|
673 {
|
|
674 #ifdef HAVE_INTTYPES_H
|
|
675 /* starts with null-terminated identification */
|
|
676 if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
|
|
677 /* default: some individual value, mix mode */
|
|
678 rva_mode = 0;
|
|
679 if( !strncasecmp((char*)realdata, "album", 5)
|
|
680 || !strncasecmp((char*)realdata, "audiophile", 10)
|
|
681 || !strncasecmp((char*)realdata, "user", 4))
|
|
682 rva_mode = 1;
|
|
683 if(fr->rva.level[rva_mode] <= rva2+1)
|
|
684 {
|
|
685 pos += strlen((char*) realdata) + 1;
|
|
686 if(realdata[pos] == 1)
|
|
687 {
|
|
688 ++pos;
|
|
689 /* only handle master channel */
|
|
690 debug("ID3v2: it is for the master channel");
|
|
691 /* two bytes adjustment, one byte for bits representing peak - n bytes for peak */
|
|
692 /* 16 bit signed integer = dB * 512 */
|
|
693 /* we already assume short being 16 bit */
|
|
694 fr->rva.gain[rva_mode] = (float) ((((short) realdata[pos]) << 8) | ((short) realdata[pos+1])) / 512;
|
|
695 pos += 2;
|
|
696 if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
|
|
697 /* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
|
|
698 fr->rva.peak[rva_mode] = 0;
|
|
699 fr->rva.level[rva_mode] = rva2+1;
|
|
700 }
|
|
701 }
|
|
702 #else
|
|
703 if(NOQUIET) warning("ID3v2: Cannot parse RVA2 value because I don't have a guaranteed 16 bit signed integer type");
|
|
704 #endif
|
|
705 }
|
|
706 break;
|
|
707 /* non-rva metainfo, simply store... */
|
|
708 case text:
|
|
709 process_text(fr, (char*)realdata, realsize, id);
|
|
710 break;
|
|
711 default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt);
|
|
712 }
|
|
713 if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) free(realdata);
|
|
714 }
|
|
715 #undef BAD_FFLAGS
|
|
716 #undef PRES_TAG_FFLAG
|
|
717 #undef PRES_FILE_FFLAG
|
|
718 #undef READ_ONLY_FFLAG
|
|
719 #undef GROUP_FFLAG
|
|
720 #undef COMPR_FFLAG
|
|
721 #undef ENCR_FFLAG
|
|
722 #undef UNSYNC_FFLAG
|
|
723 #undef DATLEN_FFLAG
|
|
724 }
|
|
725 else break;
|
|
726 #undef KNOWN_FRAMES
|
|
727 }
|
|
728 }
|
|
729 }
|
|
730 else
|
|
731 {
|
|
732 if(NOQUIET) error("ID3v2: Duh, not able to read ID3v2 tag data.");
|
|
733 ret = ret2;
|
|
734 }
|
|
735 tagparse_cleanup:
|
|
736 free(tagdata);
|
|
737 }
|
|
738 else
|
|
739 {
|
|
740 if(NOQUIET) error1("ID3v2: Arrg! Unable to allocate %lu bytes for interpreting ID3v2 data - trying to skip instead.", length);
|
|
741 if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) ret = ret2; /* will not store data in backbuff! */
|
|
742 else ret = 0;
|
|
743 }
|
|
744 }
|
|
745 /* skip footer if present */
|
|
746 if((ret > 0) && (flags & FOOTER_FLAG) && ((ret2 = fr->rd->skip_bytes(fr,length)) < 0)) ret = ret2;
|
|
747
|
|
748 return ret;
|
|
749 #undef UNSYNC_FLAG
|
|
750 #undef EXTHEAD_FLAG
|
|
751 #undef EXP_FLAG
|
|
752 #undef FOOTER_FLAG
|
|
753 #undef UNKOWN_FLAGS
|
|
754 }
|
|
755
|
|
756 static void convert_latin1(mpg123_string *sb, unsigned char* s, size_t l)
|
|
757 {
|
|
758 size_t length = l;
|
|
759 size_t i;
|
|
760 unsigned char *p;
|
|
761 /* determine real length, a latin1 character can at most take 2 in UTF8 */
|
|
762 for(i=0; i<l; ++i)
|
|
763 if(s[i] >= 0x80) ++length;
|
|
764
|
|
765 debug1("UTF-8 length: %lu", (unsigned long)length);
|
|
766 /* one extra zero byte for paranoia */
|
|
767 if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
|
|
768
|
|
769 p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
|
|
770 for(i=0; i<l; ++i)
|
|
771 if(s[i] < 0x80){ *p = s[i]; ++p; }
|
|
772 else /* two-byte encoding */
|
|
773 {
|
|
774 *p = 0xc0 | (s[i]>>6);
|
|
775 *(p+1) = 0x80 | (s[i] & 0x3f);
|
|
776 p+=2;
|
|
777 }
|
|
778
|
|
779 sb->p[length] = 0;
|
|
780 sb->fill = length+1;
|
|
781 }
|
|
782
|
|
783 #define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
|
|
784 /* Remember: There's a limit at 0x1ffff. */
|
|
785 #define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
|
|
786 static void convert_utf16(mpg123_string *sb, unsigned char* s, size_t l, int str_be)
|
|
787 {
|
|
788 size_t i;
|
|
789 unsigned char *p;
|
|
790 size_t length = 0; /* the resulting UTF-8 length */
|
|
791 /* Determine real length... extreme case can be more than utf-16 length. */
|
|
792 size_t high = 0;
|
|
793 size_t low = 1;
|
|
794 debug1("convert_utf16 with length %lu", (unsigned long)l);
|
|
795 if(!str_be) /* little-endian */
|
|
796 {
|
|
797 high = 1; /* The second byte is the high byte. */
|
|
798 low = 0; /* The first byte is the low byte. */
|
|
799 }
|
|
800 /* first: get length, check for errors -- stop at first one */
|
|
801 for(i=0; i < l-1; i+=2)
|
|
802 {
|
|
803 unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
|
|
804 if((point & 0xd800) == 0xd800) /* lead surrogate */
|
|
805 {
|
|
806 unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
|
|
807 if((second & 0xdc00) == 0xdc00) /* good... */
|
|
808 {
|
|
809 point = FULLPOINT(point,second);
|
|
810 length += UTF8LEN(point); /* possibly 4 bytes */
|
|
811 i+=2; /* We overstepped one word. */
|
|
812 }
|
|
813 else /* if no valid pair, break here */
|
|
814 {
|
|
815 debug1("Invalid UTF16 surrogate pair at %li.", (unsigned long)i);
|
|
816 l = i; /* Forget the half pair, END! */
|
|
817 break;
|
|
818 }
|
|
819 }
|
|
820 else length += UTF8LEN(point); /* 1,2 or 3 bytes */
|
|
821 }
|
|
822
|
|
823 if(l < 1){ mpg123_set_string(sb, ""); return; }
|
|
824
|
|
825 if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
|
|
826
|
|
827 /* Now really convert, skip checks as these have been done just before. */
|
|
828 p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
|
|
829 for(i=0; i < l-1; i+=2)
|
|
830 {
|
|
831 unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
|
|
832 if((codepoint & 0xd800) == 0xd800) /* lead surrogate */
|
|
833 {
|
|
834 unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
|
|
835 codepoint = FULLPOINT(codepoint,second);
|
|
836 i+=2; /* We overstepped one word. */
|
|
837 }
|
|
838 if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
|
|
839 else if(codepoint < 0x800)
|
|
840 {
|
|
841 *p++ = 0xc0 | (codepoint>>6);
|
|
842 *p++ = 0x80 | (codepoint & 0x3f);
|
|
843 }
|
|
844 else if(codepoint < 0x10000)
|
|
845 {
|
|
846 *p++ = 0xe0 | (codepoint>>12);
|
|
847 *p++ = 0x80 | ((codepoint>>6) & 0x3f);
|
|
848 *p++ = 0x80 | (codepoint & 0x3f);
|
|
849 }
|
|
850 else if (codepoint < 0x200000)
|
|
851 {
|
|
852 *p++ = 0xf0 | codepoint>>18;
|
|
853 *p++ = 0x80 | ((codepoint>>12) & 0x3f);
|
|
854 *p++ = 0x80 | ((codepoint>>6) & 0x3f);
|
|
855 *p++ = 0x80 | (codepoint & 0x3f);
|
|
856 } /* ignore bigger ones (that are not possible here anyway) */
|
|
857 }
|
|
858 sb->p[sb->size-1] = 0; /* paranoia... */
|
|
859 sb->fill = sb->size;
|
|
860 }
|
|
861 #undef UTF8LEN
|
|
862 #undef FULLPOINT
|
|
863
|
|
864 static void convert_utf16be(mpg123_string *sb, unsigned char* source, size_t len)
|
|
865 {
|
|
866 convert_utf16(sb, source, len, 1);
|
|
867 }
|
|
868
|
|
869 static void convert_utf16bom(mpg123_string *sb, unsigned char* source, size_t len)
|
|
870 {
|
|
871 if(len < 2){ mpg123_free_string(sb); return; }
|
|
872
|
|
873 if(source[0] == 0xff && source[1] == 0xfe) /* Little-endian */
|
|
874 convert_utf16(sb, source + 2, len - 2, 0);
|
|
875 else /* Big-endian */
|
|
876 convert_utf16(sb, source + 2, len - 2, 1);
|
|
877 }
|
|
878
|
|
879 static void convert_utf8(mpg123_string *sb, unsigned char* source, size_t len)
|
|
880 {
|
|
881 if(mpg123_resize_string(sb, len+1))
|
|
882 {
|
|
883 memcpy(sb->p, source, len);
|
|
884 sb->p[len] = 0;
|
|
885 sb->fill = len+1;
|
|
886 }
|
|
887 else mpg123_free_string(sb);
|
|
888 }
|