562
|
1 /* mpg123 note: This is BSD-licensed code that is no problem for mpg123 usage under LGPL.
|
|
2 It's Free, understood? ;-) */
|
|
3
|
|
4 /* Another note: This code is basically written by Thorsten Glaser,
|
|
5 Thomas Orgis did just some rearrangements and comments. */
|
|
6
|
|
7 /*-
|
|
8 * Copyright (c) 2008
|
|
9 * Thorsten Glaser <tg@mirbsd.org>
|
|
10 *
|
|
11 * Provided that these terms and disclaimer and all copyright notices
|
|
12 * are retained or reproduced in an accompanying document, permission
|
|
13 * is granted to deal in this work without restriction, including un-
|
|
14 * limited rights to use, publicly perform, distribute, sell, modify,
|
|
15 * merge, give away, or sublicence.
|
|
16 *
|
|
17 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
|
|
18 * the utmost extent permitted by applicable law, neither express nor
|
|
19 * implied; without malicious intent or gross negligence. In no event
|
|
20 * may a licensor, author or contributor be held liable for indirect,
|
|
21 * direct, other damage, loss, or other issues arising in any way out
|
|
22 * of dealing in the work, even if advised of the possibility of such
|
|
23 * damage or existence of a defect, except proven that it results out
|
|
24 * of said person's immediate fault when using the work as intended.
|
|
25 *-
|
|
26 * Convert from ICY encoding (windows-1252 codepage) to UTF-8
|
|
27 */
|
|
28
|
|
29 /* Includes string and stdlib headers... */
|
|
30 #include "compat.h"
|
|
31
|
|
32 /* ThOr: too lazy for this type check; also we use char/short all around anyway.
|
|
33 Of cource, it would be the proper way to use _these_ kind of types all around. */
|
|
34 #define uint8_t unsigned char
|
|
35 #define uint16_t unsigned short
|
|
36
|
|
37 static const uint8_t cp1252_utf8[] = {
|
|
38 /* 0x00 @ 0 */ 0x00,
|
|
39 /* 0x01 @ 1 */ 0x01,
|
|
40 /* 0x02 @ 2 */ 0x02,
|
|
41 /* 0x03 @ 3 */ 0x03,
|
|
42 /* 0x04 @ 4 */ 0x04,
|
|
43 /* 0x05 @ 5 */ 0x05,
|
|
44 /* 0x06 @ 6 */ 0x06,
|
|
45 /* 0x07 @ 7 */ 0x07,
|
|
46 /* 0x08 @ 8 */ 0x08,
|
|
47 /* 0x09 @ 9 */ 0x09,
|
|
48 /* 0x0A @ 10 */ 0x0A,
|
|
49 /* 0x0B @ 11 */ 0x0B,
|
|
50 /* 0x0C @ 12 */ 0x0C,
|
|
51 /* 0x0D @ 13 */ 0x0D,
|
|
52 /* 0x0E @ 14 */ 0x0E,
|
|
53 /* 0x0F @ 15 */ 0x0F,
|
|
54 /* 0x10 @ 16 */ 0x10,
|
|
55 /* 0x11 @ 17 */ 0x11,
|
|
56 /* 0x12 @ 18 */ 0x12,
|
|
57 /* 0x13 @ 19 */ 0x13,
|
|
58 /* 0x14 @ 20 */ 0x14,
|
|
59 /* 0x15 @ 21 */ 0x15,
|
|
60 /* 0x16 @ 22 */ 0x16,
|
|
61 /* 0x17 @ 23 */ 0x17,
|
|
62 /* 0x18 @ 24 */ 0x18,
|
|
63 /* 0x19 @ 25 */ 0x19,
|
|
64 /* 0x1A @ 26 */ 0x1A,
|
|
65 /* 0x1B @ 27 */ 0x1B,
|
|
66 /* 0x1C @ 28 */ 0x1C,
|
|
67 /* 0x1D @ 29 */ 0x1D,
|
|
68 /* 0x1E @ 30 */ 0x1E,
|
|
69 /* 0x1F @ 31 */ 0x1F,
|
|
70 /* 0x20 @ 32 */ 0x20,
|
|
71 /* 0x21 @ 33 */ 0x21,
|
|
72 /* 0x22 @ 34 */ 0x22,
|
|
73 /* 0x23 @ 35 */ 0x23,
|
|
74 /* 0x24 @ 36 */ 0x24,
|
|
75 /* 0x25 @ 37 */ 0x25,
|
|
76 /* 0x26 @ 38 */ 0x26,
|
|
77 /* 0x27 @ 39 */ 0x27,
|
|
78 /* 0x28 @ 40 */ 0x28,
|
|
79 /* 0x29 @ 41 */ 0x29,
|
|
80 /* 0x2A @ 42 */ 0x2A,
|
|
81 /* 0x2B @ 43 */ 0x2B,
|
|
82 /* 0x2C @ 44 */ 0x2C,
|
|
83 /* 0x2D @ 45 */ 0x2D,
|
|
84 /* 0x2E @ 46 */ 0x2E,
|
|
85 /* 0x2F @ 47 */ 0x2F,
|
|
86 /* 0x30 @ 48 */ 0x30,
|
|
87 /* 0x31 @ 49 */ 0x31,
|
|
88 /* 0x32 @ 50 */ 0x32,
|
|
89 /* 0x33 @ 51 */ 0x33,
|
|
90 /* 0x34 @ 52 */ 0x34,
|
|
91 /* 0x35 @ 53 */ 0x35,
|
|
92 /* 0x36 @ 54 */ 0x36,
|
|
93 /* 0x37 @ 55 */ 0x37,
|
|
94 /* 0x38 @ 56 */ 0x38,
|
|
95 /* 0x39 @ 57 */ 0x39,
|
|
96 /* 0x3A @ 58 */ 0x3A,
|
|
97 /* 0x3B @ 59 */ 0x3B,
|
|
98 /* 0x3C @ 60 */ 0x3C,
|
|
99 /* 0x3D @ 61 */ 0x3D,
|
|
100 /* 0x3E @ 62 */ 0x3E,
|
|
101 /* 0x3F @ 63 */ 0x3F,
|
|
102 /* 0x40 @ 64 */ 0x40,
|
|
103 /* 0x41 @ 65 */ 0x41,
|
|
104 /* 0x42 @ 66 */ 0x42,
|
|
105 /* 0x43 @ 67 */ 0x43,
|
|
106 /* 0x44 @ 68 */ 0x44,
|
|
107 /* 0x45 @ 69 */ 0x45,
|
|
108 /* 0x46 @ 70 */ 0x46,
|
|
109 /* 0x47 @ 71 */ 0x47,
|
|
110 /* 0x48 @ 72 */ 0x48,
|
|
111 /* 0x49 @ 73 */ 0x49,
|
|
112 /* 0x4A @ 74 */ 0x4A,
|
|
113 /* 0x4B @ 75 */ 0x4B,
|
|
114 /* 0x4C @ 76 */ 0x4C,
|
|
115 /* 0x4D @ 77 */ 0x4D,
|
|
116 /* 0x4E @ 78 */ 0x4E,
|
|
117 /* 0x4F @ 79 */ 0x4F,
|
|
118 /* 0x50 @ 80 */ 0x50,
|
|
119 /* 0x51 @ 81 */ 0x51,
|
|
120 /* 0x52 @ 82 */ 0x52,
|
|
121 /* 0x53 @ 83 */ 0x53,
|
|
122 /* 0x54 @ 84 */ 0x54,
|
|
123 /* 0x55 @ 85 */ 0x55,
|
|
124 /* 0x56 @ 86 */ 0x56,
|
|
125 /* 0x57 @ 87 */ 0x57,
|
|
126 /* 0x58 @ 88 */ 0x58,
|
|
127 /* 0x59 @ 89 */ 0x59,
|
|
128 /* 0x5A @ 90 */ 0x5A,
|
|
129 /* 0x5B @ 91 */ 0x5B,
|
|
130 /* 0x5C @ 92 */ 0x5C,
|
|
131 /* 0x5D @ 93 */ 0x5D,
|
|
132 /* 0x5E @ 94 */ 0x5E,
|
|
133 /* 0x5F @ 95 */ 0x5F,
|
|
134 /* 0x60 @ 96 */ 0x60,
|
|
135 /* 0x61 @ 97 */ 0x61,
|
|
136 /* 0x62 @ 98 */ 0x62,
|
|
137 /* 0x63 @ 99 */ 0x63,
|
|
138 /* 0x64 @ 100 */ 0x64,
|
|
139 /* 0x65 @ 101 */ 0x65,
|
|
140 /* 0x66 @ 102 */ 0x66,
|
|
141 /* 0x67 @ 103 */ 0x67,
|
|
142 /* 0x68 @ 104 */ 0x68,
|
|
143 /* 0x69 @ 105 */ 0x69,
|
|
144 /* 0x6A @ 106 */ 0x6A,
|
|
145 /* 0x6B @ 107 */ 0x6B,
|
|
146 /* 0x6C @ 108 */ 0x6C,
|
|
147 /* 0x6D @ 109 */ 0x6D,
|
|
148 /* 0x6E @ 110 */ 0x6E,
|
|
149 /* 0x6F @ 111 */ 0x6F,
|
|
150 /* 0x70 @ 112 */ 0x70,
|
|
151 /* 0x71 @ 113 */ 0x71,
|
|
152 /* 0x72 @ 114 */ 0x72,
|
|
153 /* 0x73 @ 115 */ 0x73,
|
|
154 /* 0x74 @ 116 */ 0x74,
|
|
155 /* 0x75 @ 117 */ 0x75,
|
|
156 /* 0x76 @ 118 */ 0x76,
|
|
157 /* 0x77 @ 119 */ 0x77,
|
|
158 /* 0x78 @ 120 */ 0x78,
|
|
159 /* 0x79 @ 121 */ 0x79,
|
|
160 /* 0x7A @ 122 */ 0x7A,
|
|
161 /* 0x7B @ 123 */ 0x7B,
|
|
162 /* 0x7C @ 124 */ 0x7C,
|
|
163 /* 0x7D @ 125 */ 0x7D,
|
|
164 /* 0x7E @ 126 */ 0x7E,
|
|
165 /* 0x7F @ 127 */ 0x7F,
|
|
166 /* 0x80 @ 128 */ 0xE2, 0x82, 0xAC,
|
|
167 /* 0x81 @ 131 */ 0xEF, 0xBF, 0xBD,
|
|
168 /* 0x82 @ 134 */ 0xE2, 0x80, 0x9A,
|
|
169 /* 0x83 @ 137 */ 0xC6, 0x92,
|
|
170 /* 0x84 @ 139 */ 0xE2, 0x80, 0x9E,
|
|
171 /* 0x85 @ 142 */ 0xE2, 0x80, 0xA6,
|
|
172 /* 0x86 @ 145 */ 0xE2, 0x80, 0xA0,
|
|
173 /* 0x87 @ 148 */ 0xE2, 0x80, 0xA1,
|
|
174 /* 0x88 @ 151 */ 0xCB, 0x86,
|
|
175 /* 0x89 @ 153 */ 0xE2, 0x80, 0xB0,
|
|
176 /* 0x8A @ 156 */ 0xC5, 0xA0,
|
|
177 /* 0x8B @ 158 */ 0xE2, 0x80, 0xB9,
|
|
178 /* 0x8C @ 161 */ 0xC5, 0x92,
|
|
179 /* 0x8D @ 163 */ 0xEF, 0xBF, 0xBD,
|
|
180 /* 0x8E @ 166 */ 0xC5, 0xBD,
|
|
181 /* 0x8F @ 168 */ 0xEF, 0xBF, 0xBD,
|
|
182 /* 0x90 @ 171 */ 0xEF, 0xBF, 0xBD,
|
|
183 /* 0x91 @ 174 */ 0xE2, 0x80, 0x98,
|
|
184 /* 0x92 @ 177 */ 0xE2, 0x80, 0x99,
|
|
185 /* 0x93 @ 180 */ 0xE2, 0x80, 0x9C,
|
|
186 /* 0x94 @ 183 */ 0xE2, 0x80, 0x9D,
|
|
187 /* 0x95 @ 186 */ 0xE2, 0x80, 0xA2,
|
|
188 /* 0x96 @ 189 */ 0xE2, 0x80, 0x93,
|
|
189 /* 0x97 @ 192 */ 0xE2, 0x80, 0x94,
|
|
190 /* 0x98 @ 195 */ 0xCB, 0x9C,
|
|
191 /* 0x99 @ 197 */ 0xE2, 0x84, 0xA2,
|
|
192 /* 0x9A @ 200 */ 0xC5, 0xA1,
|
|
193 /* 0x9B @ 202 */ 0xE2, 0x80, 0xBA,
|
|
194 /* 0x9C @ 205 */ 0xC5, 0x93,
|
|
195 /* 0x9D @ 207 */ 0xEF, 0xBF, 0xBD,
|
|
196 /* 0x9E @ 210 */ 0xC5, 0xBE,
|
|
197 /* 0x9F @ 212 */ 0xC5, 0xB8,
|
|
198 /* 0xA0 @ 214 */ 0xC2, 0xA0,
|
|
199 /* 0xA1 @ 216 */ 0xC2, 0xA1,
|
|
200 /* 0xA2 @ 218 */ 0xC2, 0xA2,
|
|
201 /* 0xA3 @ 220 */ 0xC2, 0xA3,
|
|
202 /* 0xA4 @ 222 */ 0xC2, 0xA4,
|
|
203 /* 0xA5 @ 224 */ 0xC2, 0xA5,
|
|
204 /* 0xA6 @ 226 */ 0xC2, 0xA6,
|
|
205 /* 0xA7 @ 228 */ 0xC2, 0xA7,
|
|
206 /* 0xA8 @ 230 */ 0xC2, 0xA8,
|
|
207 /* 0xA9 @ 232 */ 0xC2, 0xA9,
|
|
208 /* 0xAA @ 234 */ 0xC2, 0xAA,
|
|
209 /* 0xAB @ 236 */ 0xC2, 0xAB,
|
|
210 /* 0xAC @ 238 */ 0xC2, 0xAC,
|
|
211 /* 0xAD @ 240 */ 0xC2, 0xAD,
|
|
212 /* 0xAE @ 242 */ 0xC2, 0xAE,
|
|
213 /* 0xAF @ 244 */ 0xC2, 0xAF,
|
|
214 /* 0xB0 @ 246 */ 0xC2, 0xB0,
|
|
215 /* 0xB1 @ 248 */ 0xC2, 0xB1,
|
|
216 /* 0xB2 @ 250 */ 0xC2, 0xB2,
|
|
217 /* 0xB3 @ 252 */ 0xC2, 0xB3,
|
|
218 /* 0xB4 @ 254 */ 0xC2, 0xB4,
|
|
219 /* 0xB5 @ 256 */ 0xC2, 0xB5,
|
|
220 /* 0xB6 @ 258 */ 0xC2, 0xB6,
|
|
221 /* 0xB7 @ 260 */ 0xC2, 0xB7,
|
|
222 /* 0xB8 @ 262 */ 0xC2, 0xB8,
|
|
223 /* 0xB9 @ 264 */ 0xC2, 0xB9,
|
|
224 /* 0xBA @ 266 */ 0xC2, 0xBA,
|
|
225 /* 0xBB @ 268 */ 0xC2, 0xBB,
|
|
226 /* 0xBC @ 270 */ 0xC2, 0xBC,
|
|
227 /* 0xBD @ 272 */ 0xC2, 0xBD,
|
|
228 /* 0xBE @ 274 */ 0xC2, 0xBE,
|
|
229 /* 0xBF @ 276 */ 0xC2, 0xBF,
|
|
230 /* 0xC0 @ 278 */ 0xC3, 0x80,
|
|
231 /* 0xC1 @ 280 */ 0xC3, 0x81,
|
|
232 /* 0xC2 @ 282 */ 0xC3, 0x82,
|
|
233 /* 0xC3 @ 284 */ 0xC3, 0x83,
|
|
234 /* 0xC4 @ 286 */ 0xC3, 0x84,
|
|
235 /* 0xC5 @ 288 */ 0xC3, 0x85,
|
|
236 /* 0xC6 @ 290 */ 0xC3, 0x86,
|
|
237 /* 0xC7 @ 292 */ 0xC3, 0x87,
|
|
238 /* 0xC8 @ 294 */ 0xC3, 0x88,
|
|
239 /* 0xC9 @ 296 */ 0xC3, 0x89,
|
|
240 /* 0xCA @ 298 */ 0xC3, 0x8A,
|
|
241 /* 0xCB @ 300 */ 0xC3, 0x8B,
|
|
242 /* 0xCC @ 302 */ 0xC3, 0x8C,
|
|
243 /* 0xCD @ 304 */ 0xC3, 0x8D,
|
|
244 /* 0xCE @ 306 */ 0xC3, 0x8E,
|
|
245 /* 0xCF @ 308 */ 0xC3, 0x8F,
|
|
246 /* 0xD0 @ 310 */ 0xC3, 0x90,
|
|
247 /* 0xD1 @ 312 */ 0xC3, 0x91,
|
|
248 /* 0xD2 @ 314 */ 0xC3, 0x92,
|
|
249 /* 0xD3 @ 316 */ 0xC3, 0x93,
|
|
250 /* 0xD4 @ 318 */ 0xC3, 0x94,
|
|
251 /* 0xD5 @ 320 */ 0xC3, 0x95,
|
|
252 /* 0xD6 @ 322 */ 0xC3, 0x96,
|
|
253 /* 0xD7 @ 324 */ 0xC3, 0x97,
|
|
254 /* 0xD8 @ 326 */ 0xC3, 0x98,
|
|
255 /* 0xD9 @ 328 */ 0xC3, 0x99,
|
|
256 /* 0xDA @ 330 */ 0xC3, 0x9A,
|
|
257 /* 0xDB @ 332 */ 0xC3, 0x9B,
|
|
258 /* 0xDC @ 334 */ 0xC3, 0x9C,
|
|
259 /* 0xDD @ 336 */ 0xC3, 0x9D,
|
|
260 /* 0xDE @ 338 */ 0xC3, 0x9E,
|
|
261 /* 0xDF @ 340 */ 0xC3, 0x9F,
|
|
262 /* 0xE0 @ 342 */ 0xC3, 0xA0,
|
|
263 /* 0xE1 @ 344 */ 0xC3, 0xA1,
|
|
264 /* 0xE2 @ 346 */ 0xC3, 0xA2,
|
|
265 /* 0xE3 @ 348 */ 0xC3, 0xA3,
|
|
266 /* 0xE4 @ 350 */ 0xC3, 0xA4,
|
|
267 /* 0xE5 @ 352 */ 0xC3, 0xA5,
|
|
268 /* 0xE6 @ 354 */ 0xC3, 0xA6,
|
|
269 /* 0xE7 @ 356 */ 0xC3, 0xA7,
|
|
270 /* 0xE8 @ 358 */ 0xC3, 0xA8,
|
|
271 /* 0xE9 @ 360 */ 0xC3, 0xA9,
|
|
272 /* 0xEA @ 362 */ 0xC3, 0xAA,
|
|
273 /* 0xEB @ 364 */ 0xC3, 0xAB,
|
|
274 /* 0xEC @ 366 */ 0xC3, 0xAC,
|
|
275 /* 0xED @ 368 */ 0xC3, 0xAD,
|
|
276 /* 0xEE @ 370 */ 0xC3, 0xAE,
|
|
277 /* 0xEF @ 372 */ 0xC3, 0xAF,
|
|
278 /* 0xF0 @ 374 */ 0xC3, 0xB0,
|
|
279 /* 0xF1 @ 376 */ 0xC3, 0xB1,
|
|
280 /* 0xF2 @ 378 */ 0xC3, 0xB2,
|
|
281 /* 0xF3 @ 380 */ 0xC3, 0xB3,
|
|
282 /* 0xF4 @ 382 */ 0xC3, 0xB4,
|
|
283 /* 0xF5 @ 384 */ 0xC3, 0xB5,
|
|
284 /* 0xF6 @ 386 */ 0xC3, 0xB6,
|
|
285 /* 0xF7 @ 388 */ 0xC3, 0xB7,
|
|
286 /* 0xF8 @ 390 */ 0xC3, 0xB8,
|
|
287 /* 0xF9 @ 392 */ 0xC3, 0xB9,
|
|
288 /* 0xFA @ 394 */ 0xC3, 0xBA,
|
|
289 /* 0xFB @ 396 */ 0xC3, 0xBB,
|
|
290 /* 0xFC @ 398 */ 0xC3, 0xBC,
|
|
291 /* 0xFD @ 400 */ 0xC3, 0xBD,
|
|
292 /* 0xFE @ 402 */ 0xC3, 0xBE,
|
|
293 /* 0xFF @ 404 */ 0xC3, 0xBF,
|
|
294 };
|
|
295
|
|
296 static const uint16_t tblofs[257] = {
|
|
297 /* 0x00 */ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
298 /* 0x08 */ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
299 /* 0x10 */ 16, 17, 18, 19, 20, 21, 22, 23,
|
|
300 /* 0x18 */ 24, 25, 26, 27, 28, 29, 30, 31,
|
|
301 /* 0x20 */ 32, 33, 34, 35, 36, 37, 38, 39,
|
|
302 /* 0x28 */ 40, 41, 42, 43, 44, 45, 46, 47,
|
|
303 /* 0x30 */ 48, 49, 50, 51, 52, 53, 54, 55,
|
|
304 /* 0x38 */ 56, 57, 58, 59, 60, 61, 62, 63,
|
|
305 /* 0x40 */ 64, 65, 66, 67, 68, 69, 70, 71,
|
|
306 /* 0x48 */ 72, 73, 74, 75, 76, 77, 78, 79,
|
|
307 /* 0x50 */ 80, 81, 82, 83, 84, 85, 86, 87,
|
|
308 /* 0x58 */ 88, 89, 90, 91, 92, 93, 94, 95,
|
|
309 /* 0x60 */ 96, 97, 98, 99, 100, 101, 102, 103,
|
|
310 /* 0x68 */ 104, 105, 106, 107, 108, 109, 110, 111,
|
|
311 /* 0x70 */ 112, 113, 114, 115, 116, 117, 118, 119,
|
|
312 /* 0x78 */ 120, 121, 122, 123, 124, 125, 126, 127,
|
|
313 /* 0x80 */ 128, 131, 134, 137, 139, 142, 145, 148,
|
|
314 /* 0x88 */ 151, 153, 156, 158, 161, 163, 166, 168,
|
|
315 /* 0x90 */ 171, 174, 177, 180, 183, 186, 189, 192,
|
|
316 /* 0x98 */ 195, 197, 200, 202, 205, 207, 210, 212,
|
|
317 /* 0xA0 */ 214, 216, 218, 220, 222, 224, 226, 228,
|
|
318 /* 0xA8 */ 230, 232, 234, 236, 238, 240, 242, 244,
|
|
319 /* 0xB0 */ 246, 248, 250, 252, 254, 256, 258, 260,
|
|
320 /* 0xB8 */ 262, 264, 266, 268, 270, 272, 274, 276,
|
|
321 /* 0xC0 */ 278, 280, 282, 284, 286, 288, 290, 292,
|
|
322 /* 0xC8 */ 294, 296, 298, 300, 302, 304, 306, 308,
|
|
323 /* 0xD0 */ 310, 312, 314, 316, 318, 320, 322, 324,
|
|
324 /* 0xD8 */ 326, 328, 330, 332, 334, 336, 338, 340,
|
|
325 /* 0xE0 */ 342, 344, 346, 348, 350, 352, 354, 356,
|
|
326 /* 0xE8 */ 358, 360, 362, 364, 366, 368, 370, 372,
|
|
327 /* 0xF0 */ 374, 376, 378, 380, 382, 384, 386, 388,
|
|
328 /* 0xF8 */ 390, 392, 394, 396, 398, 400, 402, 404,
|
|
329 /* sizeof (cp1252_utf8) */ 406
|
|
330 };
|
|
331
|
|
332 /* Check if a string qualifies as UTF-8. */
|
|
333 static int
|
|
334 is_utf8(const char* src)
|
|
335 {
|
|
336 uint8_t ch;
|
|
337 size_t i;
|
|
338 const uint8_t* s = (const uint8_t*) src;
|
|
339
|
|
340 /* We make a loop over every character, until we find a null one.
|
|
341 Remember: The string is supposed to end with a NUL, so ahead checks are safe. */
|
|
342 while ((ch = *s++)) {
|
|
343 /* Ye olde 7bit ASCII chars 'rr fine for anything */
|
|
344 if(ch < 0x80) continue;
|
|
345
|
|
346 /* Now, we watch out for non-UTF conform sequences. */
|
|
347 else if ((ch < 0xC2) || (ch > 0xFD))
|
|
348 return 0;
|
|
349 /* check for some misformed sequences */
|
|
350 if (((ch == 0xC2) && (s[0] < 0xA0)) ||
|
|
351 ((ch == 0xEF) && (s[0] == 0xBF) && (s[1] > 0xBD)))
|
|
352 /* XXX add more for outside the BMP */
|
|
353 return 0;
|
|
354
|
|
355 /* Check the continuation bytes. */
|
|
356 if (ch < 0xE0) i = 1;
|
|
357 else if (ch < 0xF0) i = 2;
|
|
358 else if (ch < 0xF8) i = 3;
|
|
359 else if (ch < 0xFC) i = 4;
|
|
360 else
|
|
361 i = 5;
|
|
362
|
|
363 while (i--)
|
|
364 if ((*s++ & 0xC0) != 0x80)
|
|
365 return 0;
|
|
366 }
|
|
367
|
|
368 /* If no check failed, the string indeed looks like valid UTF-8. */
|
|
369 return 1;
|
|
370 }
|
|
371
|
|
372 /* The main conversion routine.
|
|
373 ICY in CP-1252 (or UTF-8 alreay) to UTF-8 encoded string. */
|
|
374 char *
|
|
375 icy2utf8(const char *src)
|
|
376 {
|
|
377 const uint8_t *s = (const uint8_t *)src;
|
|
378 size_t srclen, dstlen, i, k;
|
|
379 uint8_t ch, *d;
|
|
380 char *dst;
|
|
381
|
|
382 /* Some funny streams from Apple/iTunes give ICY info in UTF-8 already.
|
|
383 So, be prepared and don't try to re-encode such. */
|
|
384 if(is_utf8(src)) return (strdup(src));
|
|
385
|
|
386 srclen = strlen(src) + 1;
|
|
387 /* allocate conservatively */
|
|
388 if ((d = malloc(srclen * 3)) == NULL)
|
|
389 return (NULL);
|
|
390
|
|
391 i = 0;
|
|
392 dstlen = 0;
|
|
393 while (i < srclen) {
|
|
394 ch = s[i++];
|
|
395 k = tblofs[ch];
|
|
396 while (k < tblofs[ch + 1])
|
|
397 d[dstlen++] = cp1252_utf8[k++];
|
|
398 }
|
|
399
|
|
400 /* dstlen includes trailing NUL since srclen also does */
|
|
401 if ((dst = realloc(d, dstlen)) == NULL) {
|
|
402 free(d);
|
|
403 return (NULL);
|
|
404 }
|
|
405 return (dst);
|
|
406 }
|
|
407
|
|
408 /* This stuff is for testing only. */
|
|
409 #ifdef TEST
|
|
410 static const char intext[] = "\225 Gr\374\337e kosten 0,55 \200\205";
|
|
411
|
|
412 #include <stdio.h>
|
|
413
|
|
414 int
|
|
415 main(void)
|
|
416 {
|
|
417 char *t, *t2;
|
|
418
|
|
419 if ((t = icy2utf8(intext)) == NULL) {
|
|
420 fprintf(stderr, "out of memory\n");
|
|
421 return (1);
|
|
422 }
|
|
423
|
|
424 /* make sure it won't be converted twice */
|
|
425 if ((t2 = icy2utf8(t)) == NULL) {
|
|
426 fprintf(stderr, "out of memory\n");
|
|
427 return (1);
|
|
428 }
|
|
429
|
|
430 printf("Result is:\t\343\200\214%s\343\200\215\n"
|
|
431 "\t\t\343\200\214%s\343\200\215\n", t, t2);
|
|
432
|
|
433 free(t);
|
|
434 free(t2);
|
|
435 return (0);
|
|
436 }
|
|
437 #endif
|