Mercurial > trustbridge > nss-cmake-static
comparison nss/lib/util/utf8.c @ 0:1e5118fa0cb1
This is NSS with a Cmake Buildsyste
To compile a static NSS library for Windows we've used the
Chromium-NSS fork and added a Cmake buildsystem to compile
it statically for Windows. See README.chromium for chromium
changes and README.trustbridge for our modifications.
author | Andre Heinecke <andre.heinecke@intevation.de> |
---|---|
date | Mon, 28 Jul 2014 10:47:06 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1e5118fa0cb1 |
---|---|
1 /* This Source Code Form is subject to the terms of the Mozilla Public | |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |
4 | |
5 #include "seccomon.h" | |
6 #include "secport.h" | |
7 | |
8 #ifdef TEST_UTF8 | |
9 #include <assert.h> | |
10 #undef PORT_Assert | |
11 #define PORT_Assert assert | |
12 #endif | |
13 | |
14 /* | |
15 * From RFC 2044: | |
16 * | |
17 * UCS-4 range (hex.) UTF-8 octet sequence (binary) | |
18 * 0000 0000-0000 007F 0xxxxxxx | |
19 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx | |
20 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx | |
21 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
22 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | |
23 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx | |
24 */ | |
25 | |
26 /* | |
27 * From http://www.imc.org/draft-hoffman-utf16 | |
28 * | |
29 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000 | |
30 * | |
31 * U' = yyyyyyyyyyxxxxxxxxxx | |
32 * W1 = 110110yyyyyyyyyy | |
33 * W2 = 110111xxxxxxxxxx | |
34 */ | |
35 | |
36 /* | |
37 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit | |
38 * character values. If you wish to use this code for working with | |
39 * host byte order values, define the following: | |
40 * | |
41 * #if IS_BIG_ENDIAN | |
42 * #define L_0 0 | |
43 * #define L_1 1 | |
44 * #define L_2 2 | |
45 * #define L_3 3 | |
46 * #define H_0 0 | |
47 * #define H_1 1 | |
48 * #else / * not everyone has elif * / | |
49 * #if IS_LITTLE_ENDIAN | |
50 * #define L_0 3 | |
51 * #define L_1 2 | |
52 * #define L_2 1 | |
53 * #define L_3 0 | |
54 * #define H_0 1 | |
55 * #define H_1 0 | |
56 * #else | |
57 * #error "PDP and NUXI support deferred" | |
58 * #endif / * IS_LITTLE_ENDIAN * / | |
59 * #endif / * IS_BIG_ENDIAN * / | |
60 */ | |
61 | |
62 #define L_0 0 | |
63 #define L_1 1 | |
64 #define L_2 2 | |
65 #define L_3 3 | |
66 #define H_0 0 | |
67 #define H_1 1 | |
68 | |
69 #define BAD_UTF8 ((PRUint32)-1) | |
70 | |
71 /* | |
72 * Parse a single UTF-8 character per the spec. in section 3.9 (D36) | |
73 * of Unicode 4.0.0. | |
74 * | |
75 * Parameters: | |
76 * index - Points to the byte offset in inBuf of character to read. On success, | |
77 * updated to the offset of the following character. | |
78 * inBuf - Input buffer, UTF-8 encoded | |
79 * inbufLen - Length of input buffer, in bytes. | |
80 * | |
81 * Returns: | |
82 * Success - The UCS4 encoded character | |
83 * Failure - BAD_UTF8 | |
84 */ | |
85 static PRUint32 | |
86 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen) | |
87 { | |
88 PRUint32 result; | |
89 unsigned int i = *index; | |
90 int bytes_left; | |
91 PRUint32 min_value; | |
92 | |
93 PORT_Assert(i < inBufLen); | |
94 | |
95 if ( (inBuf[i] & 0x80) == 0x00 ) { | |
96 result = inBuf[i++]; | |
97 bytes_left = 0; | |
98 min_value = 0; | |
99 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) { | |
100 result = inBuf[i++] & 0x1F; | |
101 bytes_left = 1; | |
102 min_value = 0x80; | |
103 } else if ( (inBuf[i] & 0xF0) == 0xE0) { | |
104 result = inBuf[i++] & 0x0F; | |
105 bytes_left = 2; | |
106 min_value = 0x800; | |
107 } else if ( (inBuf[i] & 0xF8) == 0xF0) { | |
108 result = inBuf[i++] & 0x07; | |
109 bytes_left = 3; | |
110 min_value = 0x10000; | |
111 } else { | |
112 return BAD_UTF8; | |
113 } | |
114 | |
115 while (bytes_left--) { | |
116 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8; | |
117 result = (result << 6) | (inBuf[i++] & 0x3F); | |
118 } | |
119 | |
120 /* Check for overlong sequences, surrogates, and outside unicode range */ | |
121 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) { | |
122 return BAD_UTF8; | |
123 } | |
124 | |
125 *index = i; | |
126 return result; | |
127 } | |
128 | |
129 PRBool | |
130 sec_port_ucs4_utf8_conversion_function | |
131 ( | |
132 PRBool toUnicode, | |
133 unsigned char *inBuf, | |
134 unsigned int inBufLen, | |
135 unsigned char *outBuf, | |
136 unsigned int maxOutBufLen, | |
137 unsigned int *outBufLen | |
138 ) | |
139 { | |
140 PORT_Assert((unsigned int *)NULL != outBufLen); | |
141 | |
142 if( toUnicode ) { | |
143 unsigned int i, len = 0; | |
144 | |
145 for( i = 0; i < inBufLen; ) { | |
146 if( (inBuf[i] & 0x80) == 0x00 ) i += 1; | |
147 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2; | |
148 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3; | |
149 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4; | |
150 else return PR_FALSE; | |
151 | |
152 len += 4; | |
153 } | |
154 | |
155 if( len > maxOutBufLen ) { | |
156 *outBufLen = len; | |
157 return PR_FALSE; | |
158 } | |
159 | |
160 len = 0; | |
161 | |
162 for( i = 0; i < inBufLen; ) { | |
163 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); | |
164 | |
165 if (ucs4 == BAD_UTF8) return PR_FALSE; | |
166 | |
167 outBuf[len+L_0] = 0x00; | |
168 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16); | |
169 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8); | |
170 outBuf[len+L_3] = (unsigned char)ucs4; | |
171 | |
172 len += 4; | |
173 } | |
174 | |
175 *outBufLen = len; | |
176 return PR_TRUE; | |
177 } else { | |
178 unsigned int i, len = 0; | |
179 PORT_Assert((inBufLen % 4) == 0); | |
180 if ((inBufLen % 4) != 0) { | |
181 *outBufLen = 0; | |
182 return PR_FALSE; | |
183 } | |
184 | |
185 for( i = 0; i < inBufLen; i += 4 ) { | |
186 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) { | |
187 *outBufLen = 0; | |
188 return PR_FALSE; | |
189 } else if( inBuf[i+L_1] >= 0x01 ) len += 4; | |
190 else if( inBuf[i+L_2] >= 0x08 ) len += 3; | |
191 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2; | |
192 else len += 1; | |
193 } | |
194 | |
195 if( len > maxOutBufLen ) { | |
196 *outBufLen = len; | |
197 return PR_FALSE; | |
198 } | |
199 | |
200 len = 0; | |
201 | |
202 for( i = 0; i < inBufLen; i += 4 ) { | |
203 if( inBuf[i+L_1] >= 0x01 ) { | |
204 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |
205 /* 00000000 000abcde fghijklm nopqrstu -> | |
206 11110abc 10defghi 10jklmno 10pqrstu */ | |
207 | |
208 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2); | |
209 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4) | |
210 | ((inBuf[i+L_2] & 0xF0) >> 4); | |
211 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | |
212 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
213 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
214 | |
215 len += 4; | |
216 } else if( inBuf[i+L_2] >= 0x08 ) { | |
217 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ | |
218 /* 00000000 00000000 abcdefgh ijklmnop -> | |
219 1110abcd 10efghij 10klmnop */ | |
220 | |
221 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4); | |
222 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | |
223 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
224 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
225 | |
226 len += 3; | |
227 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) { | |
228 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ | |
229 /* 00000000 00000000 00000abc defghijk -> | |
230 110abcde 10fghijk */ | |
231 | |
232 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2) | |
233 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
234 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
235 | |
236 len += 2; | |
237 } else { | |
238 /* 0000 0000-0000 007F -> 0xxxxxx */ | |
239 /* 00000000 00000000 00000000 0abcdefg -> | |
240 0abcdefg */ | |
241 | |
242 outBuf[len+0] = (inBuf[i+L_3] & 0x7F); | |
243 | |
244 len += 1; | |
245 } | |
246 } | |
247 | |
248 *outBufLen = len; | |
249 return PR_TRUE; | |
250 } | |
251 } | |
252 | |
253 PRBool | |
254 sec_port_ucs2_utf8_conversion_function | |
255 ( | |
256 PRBool toUnicode, | |
257 unsigned char *inBuf, | |
258 unsigned int inBufLen, | |
259 unsigned char *outBuf, | |
260 unsigned int maxOutBufLen, | |
261 unsigned int *outBufLen | |
262 ) | |
263 { | |
264 PORT_Assert((unsigned int *)NULL != outBufLen); | |
265 | |
266 if( toUnicode ) { | |
267 unsigned int i, len = 0; | |
268 | |
269 for( i = 0; i < inBufLen; ) { | |
270 if( (inBuf[i] & 0x80) == 0x00 ) { | |
271 i += 1; | |
272 len += 2; | |
273 } else if( (inBuf[i] & 0xE0) == 0xC0 ) { | |
274 i += 2; | |
275 len += 2; | |
276 } else if( (inBuf[i] & 0xF0) == 0xE0 ) { | |
277 i += 3; | |
278 len += 2; | |
279 } else if( (inBuf[i] & 0xF8) == 0xF0 ) { | |
280 i += 4; | |
281 len += 4; | |
282 } else return PR_FALSE; | |
283 } | |
284 | |
285 if( len > maxOutBufLen ) { | |
286 *outBufLen = len; | |
287 return PR_FALSE; | |
288 } | |
289 | |
290 len = 0; | |
291 | |
292 for( i = 0; i < inBufLen; ) { | |
293 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); | |
294 | |
295 if (ucs4 == BAD_UTF8) return PR_FALSE; | |
296 | |
297 if( ucs4 < 0x10000) { | |
298 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8); | |
299 outBuf[len+H_1] = (unsigned char)ucs4; | |
300 len += 2; | |
301 } else { | |
302 ucs4 -= 0x10000; | |
303 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3)); | |
304 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10); | |
305 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3)); | |
306 outBuf[len+2+H_1] = (unsigned char)ucs4; | |
307 len += 4; | |
308 } | |
309 } | |
310 | |
311 *outBufLen = len; | |
312 return PR_TRUE; | |
313 } else { | |
314 unsigned int i, len = 0; | |
315 PORT_Assert((inBufLen % 2) == 0); | |
316 if ((inBufLen % 2) != 0) { | |
317 *outBufLen = 0; | |
318 return PR_FALSE; | |
319 } | |
320 | |
321 for( i = 0; i < inBufLen; i += 2 ) { | |
322 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1; | |
323 else if( inBuf[i+H_0] < 0x08 ) len += 2; | |
324 else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) { | |
325 if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) { | |
326 i += 2; | |
327 len += 4; | |
328 } else { | |
329 return PR_FALSE; | |
330 } | |
331 } | |
332 else len += 3; | |
333 } | |
334 | |
335 if( len > maxOutBufLen ) { | |
336 *outBufLen = len; | |
337 return PR_FALSE; | |
338 } | |
339 | |
340 len = 0; | |
341 | |
342 for( i = 0; i < inBufLen; i += 2 ) { | |
343 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) { | |
344 /* 0000-007F -> 0xxxxxx */ | |
345 /* 00000000 0abcdefg -> 0abcdefg */ | |
346 | |
347 outBuf[len] = inBuf[i+H_1] & 0x7F; | |
348 | |
349 len += 1; | |
350 } else if( inBuf[i+H_0] < 0x08 ) { | |
351 /* 0080-07FF -> 110xxxxx 10xxxxxx */ | |
352 /* 00000abc defghijk -> 110abcde 10fghijk */ | |
353 | |
354 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) | |
355 | ((inBuf[i+H_1] & 0xC0) >> 6); | |
356 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); | |
357 | |
358 len += 2; | |
359 } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) { | |
360 int abcde, BCDE; | |
361 | |
362 PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2)); | |
363 | |
364 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |
365 /* 110110BC DEfghijk 110111lm nopqrstu -> | |
366 { Let abcde = BCDE + 1 } | |
367 11110abc 10defghi 10jklmno 10pqrstu */ | |
368 | |
369 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6); | |
370 abcde = BCDE + 1; | |
371 | |
372 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2); | |
373 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4) | |
374 | ((inBuf[i+0+H_1] & 0x3C) >> 2); | |
375 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4) | |
376 | ((inBuf[i+2+H_0] & 0x03) << 2) | |
377 | ((inBuf[i+2+H_1] & 0xC0) >> 6); | |
378 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0); | |
379 | |
380 i += 2; | |
381 len += 4; | |
382 } else { | |
383 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ | |
384 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */ | |
385 | |
386 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4); | |
387 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2) | |
388 | ((inBuf[i+H_1] & 0xC0) >> 6); | |
389 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); | |
390 | |
391 len += 3; | |
392 } | |
393 } | |
394 | |
395 *outBufLen = len; | |
396 return PR_TRUE; | |
397 } | |
398 } | |
399 | |
400 PRBool | |
401 sec_port_iso88591_utf8_conversion_function | |
402 ( | |
403 const unsigned char *inBuf, | |
404 unsigned int inBufLen, | |
405 unsigned char *outBuf, | |
406 unsigned int maxOutBufLen, | |
407 unsigned int *outBufLen | |
408 ) | |
409 { | |
410 unsigned int i, len = 0; | |
411 | |
412 PORT_Assert((unsigned int *)NULL != outBufLen); | |
413 | |
414 for( i = 0; i < inBufLen; i++) { | |
415 if( (inBuf[i] & 0x80) == 0x00 ) len += 1; | |
416 else len += 2; | |
417 } | |
418 | |
419 if( len > maxOutBufLen ) { | |
420 *outBufLen = len; | |
421 return PR_FALSE; | |
422 } | |
423 | |
424 len = 0; | |
425 | |
426 for( i = 0; i < inBufLen; i++) { | |
427 if( (inBuf[i] & 0x80) == 0x00 ) { | |
428 /* 00-7F -> 0xxxxxxx */ | |
429 /* 0abcdefg -> 0abcdefg */ | |
430 | |
431 outBuf[len] = inBuf[i]; | |
432 len += 1; | |
433 } else { | |
434 /* 80-FF <- 110xxxxx 10xxxxxx */ | |
435 /* 00000000 abcdefgh -> 110000ab 10cdefgh */ | |
436 | |
437 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6); | |
438 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0); | |
439 | |
440 len += 2; | |
441 } | |
442 } | |
443 | |
444 *outBufLen = len; | |
445 return PR_TRUE; | |
446 } | |
447 | |
448 #ifdef TEST_UTF8 | |
449 | |
450 #include <stdio.h> | |
451 #include <string.h> | |
452 #include <stdlib.h> | |
453 #include <netinet/in.h> /* for htonl and htons */ | |
454 | |
455 /* | |
456 * UCS-4 vectors | |
457 */ | |
458 | |
459 struct ucs4 { | |
460 PRUint32 c; | |
461 char *utf8; | |
462 }; | |
463 | |
464 /* | |
465 * UCS-2 vectors | |
466 */ | |
467 | |
468 struct ucs2 { | |
469 PRUint16 c; | |
470 char *utf8; | |
471 }; | |
472 | |
473 /* | |
474 * UTF-16 vectors | |
475 */ | |
476 | |
477 struct utf16 { | |
478 PRUint32 c; | |
479 PRUint16 w[2]; | |
480 }; | |
481 | |
482 | |
483 /* | |
484 * UCS-4 vectors | |
485 */ | |
486 | |
487 struct ucs4 ucs4[] = { | |
488 { 0x00000001, "\x01" }, | |
489 { 0x00000002, "\x02" }, | |
490 { 0x00000003, "\x03" }, | |
491 { 0x00000004, "\x04" }, | |
492 { 0x00000007, "\x07" }, | |
493 { 0x00000008, "\x08" }, | |
494 { 0x0000000F, "\x0F" }, | |
495 { 0x00000010, "\x10" }, | |
496 { 0x0000001F, "\x1F" }, | |
497 { 0x00000020, "\x20" }, | |
498 { 0x0000003F, "\x3F" }, | |
499 { 0x00000040, "\x40" }, | |
500 { 0x0000007F, "\x7F" }, | |
501 | |
502 { 0x00000080, "\xC2\x80" }, | |
503 { 0x00000081, "\xC2\x81" }, | |
504 { 0x00000082, "\xC2\x82" }, | |
505 { 0x00000084, "\xC2\x84" }, | |
506 { 0x00000088, "\xC2\x88" }, | |
507 { 0x00000090, "\xC2\x90" }, | |
508 { 0x000000A0, "\xC2\xA0" }, | |
509 { 0x000000C0, "\xC3\x80" }, | |
510 { 0x000000FF, "\xC3\xBF" }, | |
511 { 0x00000100, "\xC4\x80" }, | |
512 { 0x00000101, "\xC4\x81" }, | |
513 { 0x00000102, "\xC4\x82" }, | |
514 { 0x00000104, "\xC4\x84" }, | |
515 { 0x00000108, "\xC4\x88" }, | |
516 { 0x00000110, "\xC4\x90" }, | |
517 { 0x00000120, "\xC4\xA0" }, | |
518 { 0x00000140, "\xC5\x80" }, | |
519 { 0x00000180, "\xC6\x80" }, | |
520 { 0x000001FF, "\xC7\xBF" }, | |
521 { 0x00000200, "\xC8\x80" }, | |
522 { 0x00000201, "\xC8\x81" }, | |
523 { 0x00000202, "\xC8\x82" }, | |
524 { 0x00000204, "\xC8\x84" }, | |
525 { 0x00000208, "\xC8\x88" }, | |
526 { 0x00000210, "\xC8\x90" }, | |
527 { 0x00000220, "\xC8\xA0" }, | |
528 { 0x00000240, "\xC9\x80" }, | |
529 { 0x00000280, "\xCA\x80" }, | |
530 { 0x00000300, "\xCC\x80" }, | |
531 { 0x000003FF, "\xCF\xBF" }, | |
532 { 0x00000400, "\xD0\x80" }, | |
533 { 0x00000401, "\xD0\x81" }, | |
534 { 0x00000402, "\xD0\x82" }, | |
535 { 0x00000404, "\xD0\x84" }, | |
536 { 0x00000408, "\xD0\x88" }, | |
537 { 0x00000410, "\xD0\x90" }, | |
538 { 0x00000420, "\xD0\xA0" }, | |
539 { 0x00000440, "\xD1\x80" }, | |
540 { 0x00000480, "\xD2\x80" }, | |
541 { 0x00000500, "\xD4\x80" }, | |
542 { 0x00000600, "\xD8\x80" }, | |
543 { 0x000007FF, "\xDF\xBF" }, | |
544 | |
545 { 0x00000800, "\xE0\xA0\x80" }, | |
546 { 0x00000801, "\xE0\xA0\x81" }, | |
547 { 0x00000802, "\xE0\xA0\x82" }, | |
548 { 0x00000804, "\xE0\xA0\x84" }, | |
549 { 0x00000808, "\xE0\xA0\x88" }, | |
550 { 0x00000810, "\xE0\xA0\x90" }, | |
551 { 0x00000820, "\xE0\xA0\xA0" }, | |
552 { 0x00000840, "\xE0\xA1\x80" }, | |
553 { 0x00000880, "\xE0\xA2\x80" }, | |
554 { 0x00000900, "\xE0\xA4\x80" }, | |
555 { 0x00000A00, "\xE0\xA8\x80" }, | |
556 { 0x00000C00, "\xE0\xB0\x80" }, | |
557 { 0x00000FFF, "\xE0\xBF\xBF" }, | |
558 { 0x00001000, "\xE1\x80\x80" }, | |
559 { 0x00001001, "\xE1\x80\x81" }, | |
560 { 0x00001002, "\xE1\x80\x82" }, | |
561 { 0x00001004, "\xE1\x80\x84" }, | |
562 { 0x00001008, "\xE1\x80\x88" }, | |
563 { 0x00001010, "\xE1\x80\x90" }, | |
564 { 0x00001020, "\xE1\x80\xA0" }, | |
565 { 0x00001040, "\xE1\x81\x80" }, | |
566 { 0x00001080, "\xE1\x82\x80" }, | |
567 { 0x00001100, "\xE1\x84\x80" }, | |
568 { 0x00001200, "\xE1\x88\x80" }, | |
569 { 0x00001400, "\xE1\x90\x80" }, | |
570 { 0x00001800, "\xE1\xA0\x80" }, | |
571 { 0x00001FFF, "\xE1\xBF\xBF" }, | |
572 { 0x00002000, "\xE2\x80\x80" }, | |
573 { 0x00002001, "\xE2\x80\x81" }, | |
574 { 0x00002002, "\xE2\x80\x82" }, | |
575 { 0x00002004, "\xE2\x80\x84" }, | |
576 { 0x00002008, "\xE2\x80\x88" }, | |
577 { 0x00002010, "\xE2\x80\x90" }, | |
578 { 0x00002020, "\xE2\x80\xA0" }, | |
579 { 0x00002040, "\xE2\x81\x80" }, | |
580 { 0x00002080, "\xE2\x82\x80" }, | |
581 { 0x00002100, "\xE2\x84\x80" }, | |
582 { 0x00002200, "\xE2\x88\x80" }, | |
583 { 0x00002400, "\xE2\x90\x80" }, | |
584 { 0x00002800, "\xE2\xA0\x80" }, | |
585 { 0x00003000, "\xE3\x80\x80" }, | |
586 { 0x00003FFF, "\xE3\xBF\xBF" }, | |
587 { 0x00004000, "\xE4\x80\x80" }, | |
588 { 0x00004001, "\xE4\x80\x81" }, | |
589 { 0x00004002, "\xE4\x80\x82" }, | |
590 { 0x00004004, "\xE4\x80\x84" }, | |
591 { 0x00004008, "\xE4\x80\x88" }, | |
592 { 0x00004010, "\xE4\x80\x90" }, | |
593 { 0x00004020, "\xE4\x80\xA0" }, | |
594 { 0x00004040, "\xE4\x81\x80" }, | |
595 { 0x00004080, "\xE4\x82\x80" }, | |
596 { 0x00004100, "\xE4\x84\x80" }, | |
597 { 0x00004200, "\xE4\x88\x80" }, | |
598 { 0x00004400, "\xE4\x90\x80" }, | |
599 { 0x00004800, "\xE4\xA0\x80" }, | |
600 { 0x00005000, "\xE5\x80\x80" }, | |
601 { 0x00006000, "\xE6\x80\x80" }, | |
602 { 0x00007FFF, "\xE7\xBF\xBF" }, | |
603 { 0x00008000, "\xE8\x80\x80" }, | |
604 { 0x00008001, "\xE8\x80\x81" }, | |
605 { 0x00008002, "\xE8\x80\x82" }, | |
606 { 0x00008004, "\xE8\x80\x84" }, | |
607 { 0x00008008, "\xE8\x80\x88" }, | |
608 { 0x00008010, "\xE8\x80\x90" }, | |
609 { 0x00008020, "\xE8\x80\xA0" }, | |
610 { 0x00008040, "\xE8\x81\x80" }, | |
611 { 0x00008080, "\xE8\x82\x80" }, | |
612 { 0x00008100, "\xE8\x84\x80" }, | |
613 { 0x00008200, "\xE8\x88\x80" }, | |
614 { 0x00008400, "\xE8\x90\x80" }, | |
615 { 0x00008800, "\xE8\xA0\x80" }, | |
616 { 0x00009000, "\xE9\x80\x80" }, | |
617 { 0x0000A000, "\xEA\x80\x80" }, | |
618 { 0x0000C000, "\xEC\x80\x80" }, | |
619 { 0x0000FFFF, "\xEF\xBF\xBF" }, | |
620 | |
621 { 0x00010000, "\xF0\x90\x80\x80" }, | |
622 { 0x00010001, "\xF0\x90\x80\x81" }, | |
623 { 0x00010002, "\xF0\x90\x80\x82" }, | |
624 { 0x00010004, "\xF0\x90\x80\x84" }, | |
625 { 0x00010008, "\xF0\x90\x80\x88" }, | |
626 { 0x00010010, "\xF0\x90\x80\x90" }, | |
627 { 0x00010020, "\xF0\x90\x80\xA0" }, | |
628 { 0x00010040, "\xF0\x90\x81\x80" }, | |
629 { 0x00010080, "\xF0\x90\x82\x80" }, | |
630 { 0x00010100, "\xF0\x90\x84\x80" }, | |
631 { 0x00010200, "\xF0\x90\x88\x80" }, | |
632 { 0x00010400, "\xF0\x90\x90\x80" }, | |
633 { 0x00010800, "\xF0\x90\xA0\x80" }, | |
634 { 0x00011000, "\xF0\x91\x80\x80" }, | |
635 { 0x00012000, "\xF0\x92\x80\x80" }, | |
636 { 0x00014000, "\xF0\x94\x80\x80" }, | |
637 { 0x00018000, "\xF0\x98\x80\x80" }, | |
638 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" }, | |
639 { 0x00020000, "\xF0\xA0\x80\x80" }, | |
640 { 0x00020001, "\xF0\xA0\x80\x81" }, | |
641 { 0x00020002, "\xF0\xA0\x80\x82" }, | |
642 { 0x00020004, "\xF0\xA0\x80\x84" }, | |
643 { 0x00020008, "\xF0\xA0\x80\x88" }, | |
644 { 0x00020010, "\xF0\xA0\x80\x90" }, | |
645 { 0x00020020, "\xF0\xA0\x80\xA0" }, | |
646 { 0x00020040, "\xF0\xA0\x81\x80" }, | |
647 { 0x00020080, "\xF0\xA0\x82\x80" }, | |
648 { 0x00020100, "\xF0\xA0\x84\x80" }, | |
649 { 0x00020200, "\xF0\xA0\x88\x80" }, | |
650 { 0x00020400, "\xF0\xA0\x90\x80" }, | |
651 { 0x00020800, "\xF0\xA0\xA0\x80" }, | |
652 { 0x00021000, "\xF0\xA1\x80\x80" }, | |
653 { 0x00022000, "\xF0\xA2\x80\x80" }, | |
654 { 0x00024000, "\xF0\xA4\x80\x80" }, | |
655 { 0x00028000, "\xF0\xA8\x80\x80" }, | |
656 { 0x00030000, "\xF0\xB0\x80\x80" }, | |
657 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" }, | |
658 { 0x00040000, "\xF1\x80\x80\x80" }, | |
659 { 0x00040001, "\xF1\x80\x80\x81" }, | |
660 { 0x00040002, "\xF1\x80\x80\x82" }, | |
661 { 0x00040004, "\xF1\x80\x80\x84" }, | |
662 { 0x00040008, "\xF1\x80\x80\x88" }, | |
663 { 0x00040010, "\xF1\x80\x80\x90" }, | |
664 { 0x00040020, "\xF1\x80\x80\xA0" }, | |
665 { 0x00040040, "\xF1\x80\x81\x80" }, | |
666 { 0x00040080, "\xF1\x80\x82\x80" }, | |
667 { 0x00040100, "\xF1\x80\x84\x80" }, | |
668 { 0x00040200, "\xF1\x80\x88\x80" }, | |
669 { 0x00040400, "\xF1\x80\x90\x80" }, | |
670 { 0x00040800, "\xF1\x80\xA0\x80" }, | |
671 { 0x00041000, "\xF1\x81\x80\x80" }, | |
672 { 0x00042000, "\xF1\x82\x80\x80" }, | |
673 { 0x00044000, "\xF1\x84\x80\x80" }, | |
674 { 0x00048000, "\xF1\x88\x80\x80" }, | |
675 { 0x00050000, "\xF1\x90\x80\x80" }, | |
676 { 0x00060000, "\xF1\xA0\x80\x80" }, | |
677 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" }, | |
678 { 0x00080000, "\xF2\x80\x80\x80" }, | |
679 { 0x00080001, "\xF2\x80\x80\x81" }, | |
680 { 0x00080002, "\xF2\x80\x80\x82" }, | |
681 { 0x00080004, "\xF2\x80\x80\x84" }, | |
682 { 0x00080008, "\xF2\x80\x80\x88" }, | |
683 { 0x00080010, "\xF2\x80\x80\x90" }, | |
684 { 0x00080020, "\xF2\x80\x80\xA0" }, | |
685 { 0x00080040, "\xF2\x80\x81\x80" }, | |
686 { 0x00080080, "\xF2\x80\x82\x80" }, | |
687 { 0x00080100, "\xF2\x80\x84\x80" }, | |
688 { 0x00080200, "\xF2\x80\x88\x80" }, | |
689 { 0x00080400, "\xF2\x80\x90\x80" }, | |
690 { 0x00080800, "\xF2\x80\xA0\x80" }, | |
691 { 0x00081000, "\xF2\x81\x80\x80" }, | |
692 { 0x00082000, "\xF2\x82\x80\x80" }, | |
693 { 0x00084000, "\xF2\x84\x80\x80" }, | |
694 { 0x00088000, "\xF2\x88\x80\x80" }, | |
695 { 0x00090000, "\xF2\x90\x80\x80" }, | |
696 { 0x000A0000, "\xF2\xA0\x80\x80" }, | |
697 { 0x000C0000, "\xF3\x80\x80\x80" }, | |
698 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" }, | |
699 { 0x00100000, "\xF4\x80\x80\x80" }, | |
700 { 0x00100001, "\xF4\x80\x80\x81" }, | |
701 { 0x00100002, "\xF4\x80\x80\x82" }, | |
702 { 0x00100004, "\xF4\x80\x80\x84" }, | |
703 { 0x00100008, "\xF4\x80\x80\x88" }, | |
704 { 0x00100010, "\xF4\x80\x80\x90" }, | |
705 { 0x00100020, "\xF4\x80\x80\xA0" }, | |
706 { 0x00100040, "\xF4\x80\x81\x80" }, | |
707 { 0x00100080, "\xF4\x80\x82\x80" }, | |
708 { 0x00100100, "\xF4\x80\x84\x80" }, | |
709 { 0x00100200, "\xF4\x80\x88\x80" }, | |
710 { 0x00100400, "\xF4\x80\x90\x80" }, | |
711 { 0x00100800, "\xF4\x80\xA0\x80" }, | |
712 { 0x00101000, "\xF4\x81\x80\x80" }, | |
713 { 0x00102000, "\xF4\x82\x80\x80" }, | |
714 { 0x00104000, "\xF4\x84\x80\x80" }, | |
715 { 0x00108000, "\xF4\x88\x80\x80" }, | |
716 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" }, | |
717 }; | |
718 | |
719 /* | |
720 * UCS-2 vectors | |
721 */ | |
722 | |
723 struct ucs2 ucs2[] = { | |
724 { 0x0001, "\x01" }, | |
725 { 0x0002, "\x02" }, | |
726 { 0x0003, "\x03" }, | |
727 { 0x0004, "\x04" }, | |
728 { 0x0007, "\x07" }, | |
729 { 0x0008, "\x08" }, | |
730 { 0x000F, "\x0F" }, | |
731 { 0x0010, "\x10" }, | |
732 { 0x001F, "\x1F" }, | |
733 { 0x0020, "\x20" }, | |
734 { 0x003F, "\x3F" }, | |
735 { 0x0040, "\x40" }, | |
736 { 0x007F, "\x7F" }, | |
737 | |
738 { 0x0080, "\xC2\x80" }, | |
739 { 0x0081, "\xC2\x81" }, | |
740 { 0x0082, "\xC2\x82" }, | |
741 { 0x0084, "\xC2\x84" }, | |
742 { 0x0088, "\xC2\x88" }, | |
743 { 0x0090, "\xC2\x90" }, | |
744 { 0x00A0, "\xC2\xA0" }, | |
745 { 0x00C0, "\xC3\x80" }, | |
746 { 0x00FF, "\xC3\xBF" }, | |
747 { 0x0100, "\xC4\x80" }, | |
748 { 0x0101, "\xC4\x81" }, | |
749 { 0x0102, "\xC4\x82" }, | |
750 { 0x0104, "\xC4\x84" }, | |
751 { 0x0108, "\xC4\x88" }, | |
752 { 0x0110, "\xC4\x90" }, | |
753 { 0x0120, "\xC4\xA0" }, | |
754 { 0x0140, "\xC5\x80" }, | |
755 { 0x0180, "\xC6\x80" }, | |
756 { 0x01FF, "\xC7\xBF" }, | |
757 { 0x0200, "\xC8\x80" }, | |
758 { 0x0201, "\xC8\x81" }, | |
759 { 0x0202, "\xC8\x82" }, | |
760 { 0x0204, "\xC8\x84" }, | |
761 { 0x0208, "\xC8\x88" }, | |
762 { 0x0210, "\xC8\x90" }, | |
763 { 0x0220, "\xC8\xA0" }, | |
764 { 0x0240, "\xC9\x80" }, | |
765 { 0x0280, "\xCA\x80" }, | |
766 { 0x0300, "\xCC\x80" }, | |
767 { 0x03FF, "\xCF\xBF" }, | |
768 { 0x0400, "\xD0\x80" }, | |
769 { 0x0401, "\xD0\x81" }, | |
770 { 0x0402, "\xD0\x82" }, | |
771 { 0x0404, "\xD0\x84" }, | |
772 { 0x0408, "\xD0\x88" }, | |
773 { 0x0410, "\xD0\x90" }, | |
774 { 0x0420, "\xD0\xA0" }, | |
775 { 0x0440, "\xD1\x80" }, | |
776 { 0x0480, "\xD2\x80" }, | |
777 { 0x0500, "\xD4\x80" }, | |
778 { 0x0600, "\xD8\x80" }, | |
779 { 0x07FF, "\xDF\xBF" }, | |
780 | |
781 { 0x0800, "\xE0\xA0\x80" }, | |
782 { 0x0801, "\xE0\xA0\x81" }, | |
783 { 0x0802, "\xE0\xA0\x82" }, | |
784 { 0x0804, "\xE0\xA0\x84" }, | |
785 { 0x0808, "\xE0\xA0\x88" }, | |
786 { 0x0810, "\xE0\xA0\x90" }, | |
787 { 0x0820, "\xE0\xA0\xA0" }, | |
788 { 0x0840, "\xE0\xA1\x80" }, | |
789 { 0x0880, "\xE0\xA2\x80" }, | |
790 { 0x0900, "\xE0\xA4\x80" }, | |
791 { 0x0A00, "\xE0\xA8\x80" }, | |
792 { 0x0C00, "\xE0\xB0\x80" }, | |
793 { 0x0FFF, "\xE0\xBF\xBF" }, | |
794 { 0x1000, "\xE1\x80\x80" }, | |
795 { 0x1001, "\xE1\x80\x81" }, | |
796 { 0x1002, "\xE1\x80\x82" }, | |
797 { 0x1004, "\xE1\x80\x84" }, | |
798 { 0x1008, "\xE1\x80\x88" }, | |
799 { 0x1010, "\xE1\x80\x90" }, | |
800 { 0x1020, "\xE1\x80\xA0" }, | |
801 { 0x1040, "\xE1\x81\x80" }, | |
802 { 0x1080, "\xE1\x82\x80" }, | |
803 { 0x1100, "\xE1\x84\x80" }, | |
804 { 0x1200, "\xE1\x88\x80" }, | |
805 { 0x1400, "\xE1\x90\x80" }, | |
806 { 0x1800, "\xE1\xA0\x80" }, | |
807 { 0x1FFF, "\xE1\xBF\xBF" }, | |
808 { 0x2000, "\xE2\x80\x80" }, | |
809 { 0x2001, "\xE2\x80\x81" }, | |
810 { 0x2002, "\xE2\x80\x82" }, | |
811 { 0x2004, "\xE2\x80\x84" }, | |
812 { 0x2008, "\xE2\x80\x88" }, | |
813 { 0x2010, "\xE2\x80\x90" }, | |
814 { 0x2020, "\xE2\x80\xA0" }, | |
815 { 0x2040, "\xE2\x81\x80" }, | |
816 { 0x2080, "\xE2\x82\x80" }, | |
817 { 0x2100, "\xE2\x84\x80" }, | |
818 { 0x2200, "\xE2\x88\x80" }, | |
819 { 0x2400, "\xE2\x90\x80" }, | |
820 { 0x2800, "\xE2\xA0\x80" }, | |
821 { 0x3000, "\xE3\x80\x80" }, | |
822 { 0x3FFF, "\xE3\xBF\xBF" }, | |
823 { 0x4000, "\xE4\x80\x80" }, | |
824 { 0x4001, "\xE4\x80\x81" }, | |
825 { 0x4002, "\xE4\x80\x82" }, | |
826 { 0x4004, "\xE4\x80\x84" }, | |
827 { 0x4008, "\xE4\x80\x88" }, | |
828 { 0x4010, "\xE4\x80\x90" }, | |
829 { 0x4020, "\xE4\x80\xA0" }, | |
830 { 0x4040, "\xE4\x81\x80" }, | |
831 { 0x4080, "\xE4\x82\x80" }, | |
832 { 0x4100, "\xE4\x84\x80" }, | |
833 { 0x4200, "\xE4\x88\x80" }, | |
834 { 0x4400, "\xE4\x90\x80" }, | |
835 { 0x4800, "\xE4\xA0\x80" }, | |
836 { 0x5000, "\xE5\x80\x80" }, | |
837 { 0x6000, "\xE6\x80\x80" }, | |
838 { 0x7FFF, "\xE7\xBF\xBF" }, | |
839 { 0x8000, "\xE8\x80\x80" }, | |
840 { 0x8001, "\xE8\x80\x81" }, | |
841 { 0x8002, "\xE8\x80\x82" }, | |
842 { 0x8004, "\xE8\x80\x84" }, | |
843 { 0x8008, "\xE8\x80\x88" }, | |
844 { 0x8010, "\xE8\x80\x90" }, | |
845 { 0x8020, "\xE8\x80\xA0" }, | |
846 { 0x8040, "\xE8\x81\x80" }, | |
847 { 0x8080, "\xE8\x82\x80" }, | |
848 { 0x8100, "\xE8\x84\x80" }, | |
849 { 0x8200, "\xE8\x88\x80" }, | |
850 { 0x8400, "\xE8\x90\x80" }, | |
851 { 0x8800, "\xE8\xA0\x80" }, | |
852 { 0x9000, "\xE9\x80\x80" }, | |
853 { 0xA000, "\xEA\x80\x80" }, | |
854 { 0xC000, "\xEC\x80\x80" }, | |
855 { 0xFFFF, "\xEF\xBF\xBF" } | |
856 | |
857 }; | |
858 | |
859 /* | |
860 * UTF-16 vectors | |
861 */ | |
862 | |
863 struct utf16 utf16[] = { | |
864 { 0x00010000, { 0xD800, 0xDC00 } }, | |
865 { 0x00010001, { 0xD800, 0xDC01 } }, | |
866 { 0x00010002, { 0xD800, 0xDC02 } }, | |
867 { 0x00010003, { 0xD800, 0xDC03 } }, | |
868 { 0x00010004, { 0xD800, 0xDC04 } }, | |
869 { 0x00010007, { 0xD800, 0xDC07 } }, | |
870 { 0x00010008, { 0xD800, 0xDC08 } }, | |
871 { 0x0001000F, { 0xD800, 0xDC0F } }, | |
872 { 0x00010010, { 0xD800, 0xDC10 } }, | |
873 { 0x0001001F, { 0xD800, 0xDC1F } }, | |
874 { 0x00010020, { 0xD800, 0xDC20 } }, | |
875 { 0x0001003F, { 0xD800, 0xDC3F } }, | |
876 { 0x00010040, { 0xD800, 0xDC40 } }, | |
877 { 0x0001007F, { 0xD800, 0xDC7F } }, | |
878 { 0x00010080, { 0xD800, 0xDC80 } }, | |
879 { 0x00010081, { 0xD800, 0xDC81 } }, | |
880 { 0x00010082, { 0xD800, 0xDC82 } }, | |
881 { 0x00010084, { 0xD800, 0xDC84 } }, | |
882 { 0x00010088, { 0xD800, 0xDC88 } }, | |
883 { 0x00010090, { 0xD800, 0xDC90 } }, | |
884 { 0x000100A0, { 0xD800, 0xDCA0 } }, | |
885 { 0x000100C0, { 0xD800, 0xDCC0 } }, | |
886 { 0x000100FF, { 0xD800, 0xDCFF } }, | |
887 { 0x00010100, { 0xD800, 0xDD00 } }, | |
888 { 0x00010101, { 0xD800, 0xDD01 } }, | |
889 { 0x00010102, { 0xD800, 0xDD02 } }, | |
890 { 0x00010104, { 0xD800, 0xDD04 } }, | |
891 { 0x00010108, { 0xD800, 0xDD08 } }, | |
892 { 0x00010110, { 0xD800, 0xDD10 } }, | |
893 { 0x00010120, { 0xD800, 0xDD20 } }, | |
894 { 0x00010140, { 0xD800, 0xDD40 } }, | |
895 { 0x00010180, { 0xD800, 0xDD80 } }, | |
896 { 0x000101FF, { 0xD800, 0xDDFF } }, | |
897 { 0x00010200, { 0xD800, 0xDE00 } }, | |
898 { 0x00010201, { 0xD800, 0xDE01 } }, | |
899 { 0x00010202, { 0xD800, 0xDE02 } }, | |
900 { 0x00010204, { 0xD800, 0xDE04 } }, | |
901 { 0x00010208, { 0xD800, 0xDE08 } }, | |
902 { 0x00010210, { 0xD800, 0xDE10 } }, | |
903 { 0x00010220, { 0xD800, 0xDE20 } }, | |
904 { 0x00010240, { 0xD800, 0xDE40 } }, | |
905 { 0x00010280, { 0xD800, 0xDE80 } }, | |
906 { 0x00010300, { 0xD800, 0xDF00 } }, | |
907 { 0x000103FF, { 0xD800, 0xDFFF } }, | |
908 { 0x00010400, { 0xD801, 0xDC00 } }, | |
909 { 0x00010401, { 0xD801, 0xDC01 } }, | |
910 { 0x00010402, { 0xD801, 0xDC02 } }, | |
911 { 0x00010404, { 0xD801, 0xDC04 } }, | |
912 { 0x00010408, { 0xD801, 0xDC08 } }, | |
913 { 0x00010410, { 0xD801, 0xDC10 } }, | |
914 { 0x00010420, { 0xD801, 0xDC20 } }, | |
915 { 0x00010440, { 0xD801, 0xDC40 } }, | |
916 { 0x00010480, { 0xD801, 0xDC80 } }, | |
917 { 0x00010500, { 0xD801, 0xDD00 } }, | |
918 { 0x00010600, { 0xD801, 0xDE00 } }, | |
919 { 0x000107FF, { 0xD801, 0xDFFF } }, | |
920 { 0x00010800, { 0xD802, 0xDC00 } }, | |
921 { 0x00010801, { 0xD802, 0xDC01 } }, | |
922 { 0x00010802, { 0xD802, 0xDC02 } }, | |
923 { 0x00010804, { 0xD802, 0xDC04 } }, | |
924 { 0x00010808, { 0xD802, 0xDC08 } }, | |
925 { 0x00010810, { 0xD802, 0xDC10 } }, | |
926 { 0x00010820, { 0xD802, 0xDC20 } }, | |
927 { 0x00010840, { 0xD802, 0xDC40 } }, | |
928 { 0x00010880, { 0xD802, 0xDC80 } }, | |
929 { 0x00010900, { 0xD802, 0xDD00 } }, | |
930 { 0x00010A00, { 0xD802, 0xDE00 } }, | |
931 { 0x00010C00, { 0xD803, 0xDC00 } }, | |
932 { 0x00010FFF, { 0xD803, 0xDFFF } }, | |
933 { 0x00011000, { 0xD804, 0xDC00 } }, | |
934 { 0x00011001, { 0xD804, 0xDC01 } }, | |
935 { 0x00011002, { 0xD804, 0xDC02 } }, | |
936 { 0x00011004, { 0xD804, 0xDC04 } }, | |
937 { 0x00011008, { 0xD804, 0xDC08 } }, | |
938 { 0x00011010, { 0xD804, 0xDC10 } }, | |
939 { 0x00011020, { 0xD804, 0xDC20 } }, | |
940 { 0x00011040, { 0xD804, 0xDC40 } }, | |
941 { 0x00011080, { 0xD804, 0xDC80 } }, | |
942 { 0x00011100, { 0xD804, 0xDD00 } }, | |
943 { 0x00011200, { 0xD804, 0xDE00 } }, | |
944 { 0x00011400, { 0xD805, 0xDC00 } }, | |
945 { 0x00011800, { 0xD806, 0xDC00 } }, | |
946 { 0x00011FFF, { 0xD807, 0xDFFF } }, | |
947 { 0x00012000, { 0xD808, 0xDC00 } }, | |
948 { 0x00012001, { 0xD808, 0xDC01 } }, | |
949 { 0x00012002, { 0xD808, 0xDC02 } }, | |
950 { 0x00012004, { 0xD808, 0xDC04 } }, | |
951 { 0x00012008, { 0xD808, 0xDC08 } }, | |
952 { 0x00012010, { 0xD808, 0xDC10 } }, | |
953 { 0x00012020, { 0xD808, 0xDC20 } }, | |
954 { 0x00012040, { 0xD808, 0xDC40 } }, | |
955 { 0x00012080, { 0xD808, 0xDC80 } }, | |
956 { 0x00012100, { 0xD808, 0xDD00 } }, | |
957 { 0x00012200, { 0xD808, 0xDE00 } }, | |
958 { 0x00012400, { 0xD809, 0xDC00 } }, | |
959 { 0x00012800, { 0xD80A, 0xDC00 } }, | |
960 { 0x00013000, { 0xD80C, 0xDC00 } }, | |
961 { 0x00013FFF, { 0xD80F, 0xDFFF } }, | |
962 { 0x00014000, { 0xD810, 0xDC00 } }, | |
963 { 0x00014001, { 0xD810, 0xDC01 } }, | |
964 { 0x00014002, { 0xD810, 0xDC02 } }, | |
965 { 0x00014004, { 0xD810, 0xDC04 } }, | |
966 { 0x00014008, { 0xD810, 0xDC08 } }, | |
967 { 0x00014010, { 0xD810, 0xDC10 } }, | |
968 { 0x00014020, { 0xD810, 0xDC20 } }, | |
969 { 0x00014040, { 0xD810, 0xDC40 } }, | |
970 { 0x00014080, { 0xD810, 0xDC80 } }, | |
971 { 0x00014100, { 0xD810, 0xDD00 } }, | |
972 { 0x00014200, { 0xD810, 0xDE00 } }, | |
973 { 0x00014400, { 0xD811, 0xDC00 } }, | |
974 { 0x00014800, { 0xD812, 0xDC00 } }, | |
975 { 0x00015000, { 0xD814, 0xDC00 } }, | |
976 { 0x00016000, { 0xD818, 0xDC00 } }, | |
977 { 0x00017FFF, { 0xD81F, 0xDFFF } }, | |
978 { 0x00018000, { 0xD820, 0xDC00 } }, | |
979 { 0x00018001, { 0xD820, 0xDC01 } }, | |
980 { 0x00018002, { 0xD820, 0xDC02 } }, | |
981 { 0x00018004, { 0xD820, 0xDC04 } }, | |
982 { 0x00018008, { 0xD820, 0xDC08 } }, | |
983 { 0x00018010, { 0xD820, 0xDC10 } }, | |
984 { 0x00018020, { 0xD820, 0xDC20 } }, | |
985 { 0x00018040, { 0xD820, 0xDC40 } }, | |
986 { 0x00018080, { 0xD820, 0xDC80 } }, | |
987 { 0x00018100, { 0xD820, 0xDD00 } }, | |
988 { 0x00018200, { 0xD820, 0xDE00 } }, | |
989 { 0x00018400, { 0xD821, 0xDC00 } }, | |
990 { 0x00018800, { 0xD822, 0xDC00 } }, | |
991 { 0x00019000, { 0xD824, 0xDC00 } }, | |
992 { 0x0001A000, { 0xD828, 0xDC00 } }, | |
993 { 0x0001C000, { 0xD830, 0xDC00 } }, | |
994 { 0x0001FFFF, { 0xD83F, 0xDFFF } }, | |
995 { 0x00020000, { 0xD840, 0xDC00 } }, | |
996 { 0x00020001, { 0xD840, 0xDC01 } }, | |
997 { 0x00020002, { 0xD840, 0xDC02 } }, | |
998 { 0x00020004, { 0xD840, 0xDC04 } }, | |
999 { 0x00020008, { 0xD840, 0xDC08 } }, | |
1000 { 0x00020010, { 0xD840, 0xDC10 } }, | |
1001 { 0x00020020, { 0xD840, 0xDC20 } }, | |
1002 { 0x00020040, { 0xD840, 0xDC40 } }, | |
1003 { 0x00020080, { 0xD840, 0xDC80 } }, | |
1004 { 0x00020100, { 0xD840, 0xDD00 } }, | |
1005 { 0x00020200, { 0xD840, 0xDE00 } }, | |
1006 { 0x00020400, { 0xD841, 0xDC00 } }, | |
1007 { 0x00020800, { 0xD842, 0xDC00 } }, | |
1008 { 0x00021000, { 0xD844, 0xDC00 } }, | |
1009 { 0x00022000, { 0xD848, 0xDC00 } }, | |
1010 { 0x00024000, { 0xD850, 0xDC00 } }, | |
1011 { 0x00028000, { 0xD860, 0xDC00 } }, | |
1012 { 0x0002FFFF, { 0xD87F, 0xDFFF } }, | |
1013 { 0x00030000, { 0xD880, 0xDC00 } }, | |
1014 { 0x00030001, { 0xD880, 0xDC01 } }, | |
1015 { 0x00030002, { 0xD880, 0xDC02 } }, | |
1016 { 0x00030004, { 0xD880, 0xDC04 } }, | |
1017 { 0x00030008, { 0xD880, 0xDC08 } }, | |
1018 { 0x00030010, { 0xD880, 0xDC10 } }, | |
1019 { 0x00030020, { 0xD880, 0xDC20 } }, | |
1020 { 0x00030040, { 0xD880, 0xDC40 } }, | |
1021 { 0x00030080, { 0xD880, 0xDC80 } }, | |
1022 { 0x00030100, { 0xD880, 0xDD00 } }, | |
1023 { 0x00030200, { 0xD880, 0xDE00 } }, | |
1024 { 0x00030400, { 0xD881, 0xDC00 } }, | |
1025 { 0x00030800, { 0xD882, 0xDC00 } }, | |
1026 { 0x00031000, { 0xD884, 0xDC00 } }, | |
1027 { 0x00032000, { 0xD888, 0xDC00 } }, | |
1028 { 0x00034000, { 0xD890, 0xDC00 } }, | |
1029 { 0x00038000, { 0xD8A0, 0xDC00 } }, | |
1030 { 0x0003FFFF, { 0xD8BF, 0xDFFF } }, | |
1031 { 0x00040000, { 0xD8C0, 0xDC00 } }, | |
1032 { 0x00040001, { 0xD8C0, 0xDC01 } }, | |
1033 { 0x00040002, { 0xD8C0, 0xDC02 } }, | |
1034 { 0x00040004, { 0xD8C0, 0xDC04 } }, | |
1035 { 0x00040008, { 0xD8C0, 0xDC08 } }, | |
1036 { 0x00040010, { 0xD8C0, 0xDC10 } }, | |
1037 { 0x00040020, { 0xD8C0, 0xDC20 } }, | |
1038 { 0x00040040, { 0xD8C0, 0xDC40 } }, | |
1039 { 0x00040080, { 0xD8C0, 0xDC80 } }, | |
1040 { 0x00040100, { 0xD8C0, 0xDD00 } }, | |
1041 { 0x00040200, { 0xD8C0, 0xDE00 } }, | |
1042 { 0x00040400, { 0xD8C1, 0xDC00 } }, | |
1043 { 0x00040800, { 0xD8C2, 0xDC00 } }, | |
1044 { 0x00041000, { 0xD8C4, 0xDC00 } }, | |
1045 { 0x00042000, { 0xD8C8, 0xDC00 } }, | |
1046 { 0x00044000, { 0xD8D0, 0xDC00 } }, | |
1047 { 0x00048000, { 0xD8E0, 0xDC00 } }, | |
1048 { 0x0004FFFF, { 0xD8FF, 0xDFFF } }, | |
1049 { 0x00050000, { 0xD900, 0xDC00 } }, | |
1050 { 0x00050001, { 0xD900, 0xDC01 } }, | |
1051 { 0x00050002, { 0xD900, 0xDC02 } }, | |
1052 { 0x00050004, { 0xD900, 0xDC04 } }, | |
1053 { 0x00050008, { 0xD900, 0xDC08 } }, | |
1054 { 0x00050010, { 0xD900, 0xDC10 } }, | |
1055 { 0x00050020, { 0xD900, 0xDC20 } }, | |
1056 { 0x00050040, { 0xD900, 0xDC40 } }, | |
1057 { 0x00050080, { 0xD900, 0xDC80 } }, | |
1058 { 0x00050100, { 0xD900, 0xDD00 } }, | |
1059 { 0x00050200, { 0xD900, 0xDE00 } }, | |
1060 { 0x00050400, { 0xD901, 0xDC00 } }, | |
1061 { 0x00050800, { 0xD902, 0xDC00 } }, | |
1062 { 0x00051000, { 0xD904, 0xDC00 } }, | |
1063 { 0x00052000, { 0xD908, 0xDC00 } }, | |
1064 { 0x00054000, { 0xD910, 0xDC00 } }, | |
1065 { 0x00058000, { 0xD920, 0xDC00 } }, | |
1066 { 0x00060000, { 0xD940, 0xDC00 } }, | |
1067 { 0x00070000, { 0xD980, 0xDC00 } }, | |
1068 { 0x0007FFFF, { 0xD9BF, 0xDFFF } }, | |
1069 { 0x00080000, { 0xD9C0, 0xDC00 } }, | |
1070 { 0x00080001, { 0xD9C0, 0xDC01 } }, | |
1071 { 0x00080002, { 0xD9C0, 0xDC02 } }, | |
1072 { 0x00080004, { 0xD9C0, 0xDC04 } }, | |
1073 { 0x00080008, { 0xD9C0, 0xDC08 } }, | |
1074 { 0x00080010, { 0xD9C0, 0xDC10 } }, | |
1075 { 0x00080020, { 0xD9C0, 0xDC20 } }, | |
1076 { 0x00080040, { 0xD9C0, 0xDC40 } }, | |
1077 { 0x00080080, { 0xD9C0, 0xDC80 } }, | |
1078 { 0x00080100, { 0xD9C0, 0xDD00 } }, | |
1079 { 0x00080200, { 0xD9C0, 0xDE00 } }, | |
1080 { 0x00080400, { 0xD9C1, 0xDC00 } }, | |
1081 { 0x00080800, { 0xD9C2, 0xDC00 } }, | |
1082 { 0x00081000, { 0xD9C4, 0xDC00 } }, | |
1083 { 0x00082000, { 0xD9C8, 0xDC00 } }, | |
1084 { 0x00084000, { 0xD9D0, 0xDC00 } }, | |
1085 { 0x00088000, { 0xD9E0, 0xDC00 } }, | |
1086 { 0x0008FFFF, { 0xD9FF, 0xDFFF } }, | |
1087 { 0x00090000, { 0xDA00, 0xDC00 } }, | |
1088 { 0x00090001, { 0xDA00, 0xDC01 } }, | |
1089 { 0x00090002, { 0xDA00, 0xDC02 } }, | |
1090 { 0x00090004, { 0xDA00, 0xDC04 } }, | |
1091 { 0x00090008, { 0xDA00, 0xDC08 } }, | |
1092 { 0x00090010, { 0xDA00, 0xDC10 } }, | |
1093 { 0x00090020, { 0xDA00, 0xDC20 } }, | |
1094 { 0x00090040, { 0xDA00, 0xDC40 } }, | |
1095 { 0x00090080, { 0xDA00, 0xDC80 } }, | |
1096 { 0x00090100, { 0xDA00, 0xDD00 } }, | |
1097 { 0x00090200, { 0xDA00, 0xDE00 } }, | |
1098 { 0x00090400, { 0xDA01, 0xDC00 } }, | |
1099 { 0x00090800, { 0xDA02, 0xDC00 } }, | |
1100 { 0x00091000, { 0xDA04, 0xDC00 } }, | |
1101 { 0x00092000, { 0xDA08, 0xDC00 } }, | |
1102 { 0x00094000, { 0xDA10, 0xDC00 } }, | |
1103 { 0x00098000, { 0xDA20, 0xDC00 } }, | |
1104 { 0x000A0000, { 0xDA40, 0xDC00 } }, | |
1105 { 0x000B0000, { 0xDA80, 0xDC00 } }, | |
1106 { 0x000C0000, { 0xDAC0, 0xDC00 } }, | |
1107 { 0x000D0000, { 0xDB00, 0xDC00 } }, | |
1108 { 0x000FFFFF, { 0xDBBF, 0xDFFF } }, | |
1109 { 0x0010FFFF, { 0xDBFF, 0xDFFF } } | |
1110 | |
1111 }; | |
1112 | |
1113 /* illegal utf8 sequences */ | |
1114 char *utf8_bad[] = { | |
1115 "\xC0\x80", | |
1116 "\xC1\xBF", | |
1117 "\xE0\x80\x80", | |
1118 "\xE0\x9F\xBF", | |
1119 "\xF0\x80\x80\x80", | |
1120 "\xF0\x8F\xBF\xBF", | |
1121 "\xF4\x90\x80\x80", | |
1122 "\xF7\xBF\xBF\xBF", | |
1123 "\xF8\x80\x80\x80\x80", | |
1124 "\xF8\x88\x80\x80\x80", | |
1125 "\xF8\x92\x80\x80\x80", | |
1126 "\xF8\x9F\xBF\xBF\xBF", | |
1127 "\xF8\xA0\x80\x80\x80", | |
1128 "\xF8\xA8\x80\x80\x80", | |
1129 "\xF8\xB0\x80\x80\x80", | |
1130 "\xF8\xBF\xBF\xBF\xBF", | |
1131 "\xF9\x80\x80\x80\x88", | |
1132 "\xF9\x84\x80\x80\x80", | |
1133 "\xF9\xBF\xBF\xBF\xBF", | |
1134 "\xFA\x80\x80\x80\x80", | |
1135 "\xFA\x90\x80\x80\x80", | |
1136 "\xFB\xBF\xBF\xBF\xBF", | |
1137 "\xFC\x84\x80\x80\x80\x81", | |
1138 "\xFC\x85\x80\x80\x80\x80", | |
1139 "\xFC\x86\x80\x80\x80\x80", | |
1140 "\xFC\x87\xBF\xBF\xBF\xBF", | |
1141 "\xFC\x88\xA0\x80\x80\x80", | |
1142 "\xFC\x89\x80\x80\x80\x80", | |
1143 "\xFC\x8A\x80\x80\x80\x80", | |
1144 "\xFC\x90\x80\x80\x80\x82", | |
1145 "\xFD\x80\x80\x80\x80\x80", | |
1146 "\xFD\xBF\xBF\xBF\xBF\xBF", | |
1147 "\x80", | |
1148 "\xC3", | |
1149 "\xC3\xC3\x80", | |
1150 "\xED\xA0\x80", | |
1151 "\xED\xBF\x80", | |
1152 "\xED\xBF\xBF", | |
1153 "\xED\xA0\x80\xE0\xBF\xBF", | |
1154 }; | |
1155 | |
1156 static void | |
1157 dump_utf8 | |
1158 ( | |
1159 char *word, | |
1160 unsigned char *utf8, | |
1161 char *end | |
1162 ) | |
1163 { | |
1164 fprintf(stdout, "%s ", word); | |
1165 for( ; *utf8; utf8++ ) { | |
1166 fprintf(stdout, "%02.2x ", (unsigned int)*utf8); | |
1167 } | |
1168 fprintf(stdout, "%s", end); | |
1169 } | |
1170 | |
1171 static PRBool | |
1172 test_ucs4_chars | |
1173 ( | |
1174 void | |
1175 ) | |
1176 { | |
1177 PRBool rv = PR_TRUE; | |
1178 int i; | |
1179 | |
1180 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1181 struct ucs4 *e = &ucs4[i]; | |
1182 PRBool result; | |
1183 unsigned char utf8[8]; | |
1184 unsigned int len = 0; | |
1185 PRUint32 back = 0; | |
1186 | |
1187 (void)memset(utf8, 0, sizeof(utf8)); | |
1188 | |
1189 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1190 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
1191 | |
1192 if( !result ) { | |
1193 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c); | |
1194 rv = PR_FALSE; | |
1195 continue; | |
1196 } | |
1197 | |
1198 if( (len >= sizeof(utf8)) || | |
1199 (strlen(e->utf8) != len) || | |
1200 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
1201 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c); | |
1202 dump_utf8("expected", e->utf8, ", "); | |
1203 dump_utf8("received", utf8, "\n"); | |
1204 rv = PR_FALSE; | |
1205 continue; | |
1206 } | |
1207 | |
1208 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1209 utf8, len, (unsigned char *)&back, sizeof(back), &len); | |
1210 | |
1211 if( !result ) { | |
1212 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n"); | |
1213 rv = PR_FALSE; | |
1214 continue; | |
1215 } | |
1216 | |
1217 if( (sizeof(back) != len) || (e->c != back) ) { | |
1218 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:"); | |
1219 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | |
1220 rv = PR_FALSE; | |
1221 continue; | |
1222 } | |
1223 } | |
1224 | |
1225 return rv; | |
1226 } | |
1227 | |
1228 static PRBool | |
1229 test_ucs2_chars | |
1230 ( | |
1231 void | |
1232 ) | |
1233 { | |
1234 PRBool rv = PR_TRUE; | |
1235 int i; | |
1236 | |
1237 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1238 struct ucs2 *e = &ucs2[i]; | |
1239 PRBool result; | |
1240 unsigned char utf8[8]; | |
1241 unsigned int len = 0; | |
1242 PRUint16 back = 0; | |
1243 | |
1244 (void)memset(utf8, 0, sizeof(utf8)); | |
1245 | |
1246 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1247 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
1248 | |
1249 if( !result ) { | |
1250 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c); | |
1251 rv = PR_FALSE; | |
1252 continue; | |
1253 } | |
1254 | |
1255 if( (len >= sizeof(utf8)) || | |
1256 (strlen(e->utf8) != len) || | |
1257 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
1258 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c); | |
1259 dump_utf8("expected", e->utf8, ", "); | |
1260 dump_utf8("received", utf8, "\n"); | |
1261 rv = PR_FALSE; | |
1262 continue; | |
1263 } | |
1264 | |
1265 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1266 utf8, len, (unsigned char *)&back, sizeof(back), &len); | |
1267 | |
1268 if( !result ) { | |
1269 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n"); | |
1270 rv = PR_FALSE; | |
1271 continue; | |
1272 } | |
1273 | |
1274 if( (sizeof(back) != len) || (e->c != back) ) { | |
1275 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:"); | |
1276 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | |
1277 rv = PR_FALSE; | |
1278 continue; | |
1279 } | |
1280 } | |
1281 | |
1282 return rv; | |
1283 } | |
1284 | |
1285 static PRBool | |
1286 test_utf16_chars | |
1287 ( | |
1288 void | |
1289 ) | |
1290 { | |
1291 PRBool rv = PR_TRUE; | |
1292 int i; | |
1293 | |
1294 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { | |
1295 struct utf16 *e = &utf16[i]; | |
1296 PRBool result; | |
1297 unsigned char utf8[8]; | |
1298 unsigned int len = 0; | |
1299 PRUint32 back32 = 0; | |
1300 PRUint16 back[2]; | |
1301 | |
1302 (void)memset(utf8, 0, sizeof(utf8)); | |
1303 | |
1304 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1305 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len); | |
1306 | |
1307 if( !result ) { | |
1308 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n", | |
1309 e->w[0], e->w[1]); | |
1310 rv = PR_FALSE; | |
1311 continue; | |
1312 } | |
1313 | |
1314 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1315 utf8, len, (unsigned char *)&back32, sizeof(back32), &len); | |
1316 | |
1317 if( 4 != len ) { | |
1318 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: " | |
1319 "unexpected len %d\n", e->w[0], e->w[1], len); | |
1320 rv = PR_FALSE; | |
1321 continue; | |
1322 } | |
1323 | |
1324 utf8[len] = '\0'; /* null-terminate for printing */ | |
1325 | |
1326 if( !result ) { | |
1327 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n"); | |
1328 rv = PR_FALSE; | |
1329 continue; | |
1330 } | |
1331 | |
1332 if( (sizeof(back32) != len) || (e->c != back32) ) { | |
1333 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ", | |
1334 e->w[0], e->w[1]); | |
1335 dump_utf8("to UTF-8", utf8, "and then to UCS-4: "); | |
1336 if( sizeof(back32) != len ) { | |
1337 fprintf(stdout, "len is %d\n", len); | |
1338 } else { | |
1339 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32); | |
1340 } | |
1341 rv = PR_FALSE; | |
1342 continue; | |
1343 } | |
1344 | |
1345 (void)memset(utf8, 0, sizeof(utf8)); | |
1346 back[0] = back[1] = 0; | |
1347 | |
1348 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1349 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
1350 | |
1351 if( !result ) { | |
1352 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n", | |
1353 e->c); | |
1354 rv = PR_FALSE; | |
1355 continue; | |
1356 } | |
1357 | |
1358 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1359 utf8, len, (unsigned char *)&back[0], sizeof(back), &len); | |
1360 | |
1361 if( 4 != len ) { | |
1362 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: " | |
1363 "unexpected len %d\n", e->c, len); | |
1364 rv = PR_FALSE; | |
1365 continue; | |
1366 } | |
1367 | |
1368 utf8[len] = '\0'; /* null-terminate for printing */ | |
1369 | |
1370 if( !result ) { | |
1371 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n"); | |
1372 rv = PR_FALSE; | |
1373 continue; | |
1374 } | |
1375 | |
1376 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) { | |
1377 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c); | |
1378 dump_utf8("", utf8, "and then to UTF-16:"); | |
1379 if( sizeof(back) != len ) { | |
1380 fprintf(stdout, "len is %d\n", len); | |
1381 } else { | |
1382 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx\n", | |
1383 e->w[0], e->w[1], back[0], back[1]); | |
1384 } | |
1385 rv = PR_FALSE; | |
1386 continue; | |
1387 } | |
1388 } | |
1389 | |
1390 return rv; | |
1391 } | |
1392 | |
1393 static PRBool | |
1394 test_utf8_bad_chars | |
1395 ( | |
1396 void | |
1397 ) | |
1398 { | |
1399 PRBool rv = PR_TRUE; | |
1400 int i; | |
1401 | |
1402 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) { | |
1403 PRBool result; | |
1404 unsigned char destbuf[30]; | |
1405 unsigned int len = 0; | |
1406 | |
1407 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1408 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len); | |
1409 | |
1410 if( result ) { | |
1411 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_bad[i], "\n"); | |
1412 rv = PR_FALSE; | |
1413 continue; | |
1414 } | |
1415 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1416 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len); | |
1417 | |
1418 if( result ) { | |
1419 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_bad[i], "\n"); | |
1420 rv = PR_FALSE; | |
1421 continue; | |
1422 } | |
1423 | |
1424 } | |
1425 | |
1426 return rv; | |
1427 } | |
1428 | |
1429 static PRBool | |
1430 test_iso88591_chars | |
1431 ( | |
1432 void | |
1433 ) | |
1434 { | |
1435 PRBool rv = PR_TRUE; | |
1436 int i; | |
1437 | |
1438 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1439 struct ucs2 *e = &ucs2[i]; | |
1440 PRBool result; | |
1441 unsigned char iso88591; | |
1442 unsigned char utf8[3]; | |
1443 unsigned int len = 0; | |
1444 | |
1445 if (ntohs(e->c) > 0xFF) continue; | |
1446 | |
1447 (void)memset(utf8, 0, sizeof(utf8)); | |
1448 iso88591 = ntohs(e->c); | |
1449 | |
1450 result = sec_port_iso88591_utf8_conversion_function(&iso88591, | |
1451 1, utf8, sizeof(utf8), &len); | |
1452 | |
1453 if( !result ) { | |
1454 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso88591); | |
1455 rv = PR_FALSE; | |
1456 continue; | |
1457 } | |
1458 | |
1459 if( (len >= sizeof(utf8)) || | |
1460 (strlen(e->utf8) != len) || | |
1461 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
1462 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso88591); | |
1463 dump_utf8("expected", e->utf8, ", "); | |
1464 dump_utf8("received", utf8, "\n"); | |
1465 rv = PR_FALSE; | |
1466 continue; | |
1467 } | |
1468 | |
1469 } | |
1470 | |
1471 return rv; | |
1472 } | |
1473 | |
1474 static PRBool | |
1475 test_zeroes | |
1476 ( | |
1477 void | |
1478 ) | |
1479 { | |
1480 PRBool rv = PR_TRUE; | |
1481 PRBool result; | |
1482 PRUint32 lzero = 0; | |
1483 PRUint16 szero = 0; | |
1484 unsigned char utf8[8]; | |
1485 unsigned int len = 0; | |
1486 PRUint32 lback = 1; | |
1487 PRUint16 sback = 1; | |
1488 | |
1489 (void)memset(utf8, 1, sizeof(utf8)); | |
1490 | |
1491 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1492 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len); | |
1493 | |
1494 if( !result ) { | |
1495 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n"); | |
1496 rv = PR_FALSE; | |
1497 } else if( 1 != len ) { | |
1498 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len); | |
1499 rv = PR_FALSE; | |
1500 } else if( '\0' != *utf8 ) { | |
1501 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ," | |
1502 "received %02.2x\n", (unsigned int)*utf8); | |
1503 rv = PR_FALSE; | |
1504 } | |
1505 | |
1506 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1507 "", 1, (unsigned char *)&lback, sizeof(lback), &len); | |
1508 | |
1509 if( !result ) { | |
1510 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n"); | |
1511 rv = PR_FALSE; | |
1512 } else if( 4 != len ) { | |
1513 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len); | |
1514 rv = PR_FALSE; | |
1515 } else if( 0 != lback ) { | |
1516 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: " | |
1517 "expected 0x00000000, received 0x%08.8x\n", lback); | |
1518 rv = PR_FALSE; | |
1519 } | |
1520 | |
1521 (void)memset(utf8, 1, sizeof(utf8)); | |
1522 | |
1523 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1524 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len); | |
1525 | |
1526 if( !result ) { | |
1527 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n"); | |
1528 rv = PR_FALSE; | |
1529 } else if( 1 != len ) { | |
1530 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len); | |
1531 rv = PR_FALSE; | |
1532 } else if( '\0' != *utf8 ) { | |
1533 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ," | |
1534 "received %02.2x\n", (unsigned int)*utf8); | |
1535 rv = PR_FALSE; | |
1536 } | |
1537 | |
1538 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1539 "", 1, (unsigned char *)&sback, sizeof(sback), &len); | |
1540 | |
1541 if( !result ) { | |
1542 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n"); | |
1543 rv = PR_FALSE; | |
1544 } else if( 2 != len ) { | |
1545 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len); | |
1546 rv = PR_FALSE; | |
1547 } else if( 0 != sback ) { | |
1548 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: " | |
1549 "expected 0x0000, received 0x%04.4x\n", sback); | |
1550 rv = PR_FALSE; | |
1551 } | |
1552 | |
1553 return rv; | |
1554 } | |
1555 | |
1556 static PRBool | |
1557 test_multichars | |
1558 ( | |
1559 void | |
1560 ) | |
1561 { | |
1562 int i; | |
1563 unsigned int len, lenout; | |
1564 PRUint32 *ucs4s; | |
1565 char *ucs4_utf8; | |
1566 PRUint16 *ucs2s; | |
1567 char *ucs2_utf8; | |
1568 void *tmp; | |
1569 PRBool result; | |
1570 | |
1571 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32)); | |
1572 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16)); | |
1573 | |
1574 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) { | |
1575 fprintf(stderr, "out of memory\n"); | |
1576 exit(1); | |
1577 } | |
1578 | |
1579 len = 0; | |
1580 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1581 ucs4s[i] = ucs4[i].c; | |
1582 len += strlen(ucs4[i].utf8); | |
1583 } | |
1584 | |
1585 ucs4_utf8 = (char *)malloc(len); | |
1586 | |
1587 len = 0; | |
1588 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1589 ucs2s[i] = ucs2[i].c; | |
1590 len += strlen(ucs2[i].utf8); | |
1591 } | |
1592 | |
1593 ucs2_utf8 = (char *)malloc(len); | |
1594 | |
1595 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) { | |
1596 fprintf(stderr, "out of memory\n"); | |
1597 exit(1); | |
1598 } | |
1599 | |
1600 *ucs4_utf8 = '\0'; | |
1601 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1602 strcat(ucs4_utf8, ucs4[i].utf8); | |
1603 } | |
1604 | |
1605 *ucs2_utf8 = '\0'; | |
1606 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1607 strcat(ucs2_utf8, ucs2[i].utf8); | |
1608 } | |
1609 | |
1610 /* UTF-8 -> UCS-4 */ | |
1611 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32); | |
1612 tmp = calloc(len, 1); | |
1613 if( (void *)NULL == tmp ) { | |
1614 fprintf(stderr, "out of memory\n"); | |
1615 exit(1); | |
1616 } | |
1617 | |
1618 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1619 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout); | |
1620 if( !result ) { | |
1621 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n"); | |
1622 goto done; | |
1623 } | |
1624 | |
1625 if( lenout != len ) { | |
1626 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n"); | |
1627 goto loser; | |
1628 } | |
1629 | |
1630 if( 0 != memcmp(ucs4s, tmp, len) ) { | |
1631 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n"); | |
1632 goto loser; | |
1633 } | |
1634 | |
1635 free(tmp); tmp = (void *)NULL; | |
1636 | |
1637 /* UCS-4 -> UTF-8 */ | |
1638 len = strlen(ucs4_utf8); | |
1639 tmp = calloc(len, 1); | |
1640 if( (void *)NULL == tmp ) { | |
1641 fprintf(stderr, "out of memory\n"); | |
1642 exit(1); | |
1643 } | |
1644 | |
1645 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1646 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32), | |
1647 tmp, len, &lenout); | |
1648 if( !result ) { | |
1649 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n"); | |
1650 goto done; | |
1651 } | |
1652 | |
1653 if( lenout != len ) { | |
1654 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n"); | |
1655 goto loser; | |
1656 } | |
1657 | |
1658 if( 0 != strncmp(ucs4_utf8, tmp, len) ) { | |
1659 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n"); | |
1660 goto loser; | |
1661 } | |
1662 | |
1663 free(tmp); tmp = (void *)NULL; | |
1664 | |
1665 /* UTF-8 -> UCS-2 */ | |
1666 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16); | |
1667 tmp = calloc(len, 1); | |
1668 if( (void *)NULL == tmp ) { | |
1669 fprintf(stderr, "out of memory\n"); | |
1670 exit(1); | |
1671 } | |
1672 | |
1673 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1674 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout); | |
1675 if( !result ) { | |
1676 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n"); | |
1677 goto done; | |
1678 } | |
1679 | |
1680 if( lenout != len ) { | |
1681 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n"); | |
1682 goto loser; | |
1683 } | |
1684 | |
1685 if( 0 != memcmp(ucs2s, tmp, len) ) { | |
1686 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n"); | |
1687 goto loser; | |
1688 } | |
1689 | |
1690 free(tmp); tmp = (void *)NULL; | |
1691 | |
1692 /* UCS-2 -> UTF-8 */ | |
1693 len = strlen(ucs2_utf8); | |
1694 tmp = calloc(len, 1); | |
1695 if( (void *)NULL == tmp ) { | |
1696 fprintf(stderr, "out of memory\n"); | |
1697 exit(1); | |
1698 } | |
1699 | |
1700 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1701 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16), | |
1702 tmp, len, &lenout); | |
1703 if( !result ) { | |
1704 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n"); | |
1705 goto done; | |
1706 } | |
1707 | |
1708 if( lenout != len ) { | |
1709 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n"); | |
1710 goto loser; | |
1711 } | |
1712 | |
1713 if( 0 != strncmp(ucs2_utf8, tmp, len) ) { | |
1714 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n"); | |
1715 goto loser; | |
1716 } | |
1717 | |
1718 /* implement UTF16 */ | |
1719 | |
1720 result = PR_TRUE; | |
1721 goto done; | |
1722 | |
1723 loser: | |
1724 result = PR_FALSE; | |
1725 done: | |
1726 free(ucs4s); | |
1727 free(ucs4_utf8); | |
1728 free(ucs2s); | |
1729 free(ucs2_utf8); | |
1730 if( (void *)NULL != tmp ) free(tmp); | |
1731 return result; | |
1732 } | |
1733 | |
1734 void | |
1735 byte_order | |
1736 ( | |
1737 void | |
1738 ) | |
1739 { | |
1740 /* | |
1741 * The implementation (now) expects the 16- and 32-bit characters | |
1742 * to be in network byte order, not host byte order. Therefore I | |
1743 * have to byteswap all those test vectors above. hton[ls] may be | |
1744 * functions, so I have to do this dynamically. If you want to | |
1745 * use this code to do host byte order conversions, just remove | |
1746 * the call in main() to this function. | |
1747 */ | |
1748 | |
1749 int i; | |
1750 | |
1751 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1752 struct ucs4 *e = &ucs4[i]; | |
1753 e->c = htonl(e->c); | |
1754 } | |
1755 | |
1756 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1757 struct ucs2 *e = &ucs2[i]; | |
1758 e->c = htons(e->c); | |
1759 } | |
1760 | |
1761 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { | |
1762 struct utf16 *e = &utf16[i]; | |
1763 e->c = htonl(e->c); | |
1764 e->w[0] = htons(e->w[0]); | |
1765 e->w[1] = htons(e->w[1]); | |
1766 } | |
1767 | |
1768 return; | |
1769 } | |
1770 | |
1771 int | |
1772 main | |
1773 ( | |
1774 int argc, | |
1775 char *argv[] | |
1776 ) | |
1777 { | |
1778 byte_order(); | |
1779 | |
1780 if( test_ucs4_chars() && | |
1781 test_ucs2_chars() && | |
1782 test_utf16_chars() && | |
1783 test_utf8_bad_chars() && | |
1784 test_iso88591_chars() && | |
1785 test_zeroes() && | |
1786 test_multichars() && | |
1787 PR_TRUE ) { | |
1788 fprintf(stderr, "PASS\n"); | |
1789 return 1; | |
1790 } else { | |
1791 fprintf(stderr, "FAIL\n"); | |
1792 return 0; | |
1793 } | |
1794 } | |
1795 | |
1796 #endif /* TEST_UTF8 */ |